kit

kit
git clone https://git.ryansepassi.com/git/kit.git
Log | Files | Refs | README

commit 3eb1bba007c88ed0e45f9234967602b1f2d56b8c
parent 9a3a508a459a3d8d8d75530d66b988b68da42c43
Author: Ryan Sepassi <rsepassi@gmail.com>
Date:   Fri, 22 May 2026 12:34:13 -0700

Add Windows PE/COFF support

Diffstat:
MREADME.md | 3+++
Mdoc/CTOOLCHAIN.md | 33+++++++++++++++++++++++++++++++++
Adoc/WINDOWS.md | 408+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Mdriver/cc.c | 186+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++--------
Mdriver/hosted.c | 70++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Mdriver/hosted.h | 2+-
Mdriver/ld.c | 199++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++---
Mdriver/lib_resolve.c | 115++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++---------------
Mdriver/lib_resolve.h | 20++++++++++++++++++++
Mdriver/objdump.c | 635+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++--
Mdriver/runtime.c | 32++++++++++++++++++++++++++++++++
Minclude/cfree/link.h | 7+++++++
Minclude/cfree/object.h | 18++++++++++++++++++
Mlang/c/parse/parse.c | 15++++++++++++---
Mlang/c/parse/parse_expr.c | 100++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++---------------
Mlang/c/parse/parse_priv.h | 7+++++++
Mlang/c/parse/parse_type.c | 6++++--
Mlang/c/pp/pp.c | 197++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-----------
Mlang/c/pp/pp_directive.c | 16+++++++++++++++-
Mlang/c/pp/pp_expand.c | 34++++++++++++++++++++++++++++++++++
Mlang/c/pp/pp_priv.h | 23+++++++++++++++++++++++
Mlang/c/type/type.c | 2++
Mrt/Makefile | 18+++++++++++++++++-
Art/include/emmintrin.h | 3+++
Art/include/mm_malloc.h | 5+++++
Art/include/x86intrin.h | 12++++++++++++
Mrt/lib/impl/fp_compare_impl.inc | 2+-
Art/lib/stack/chkstk_x86_64_win.c | 32++++++++++++++++++++++++++++++++
Asrc/abi/abi_aapcs64_windows.c | 67+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Msrc/abi/abi_internal.h | 3+++
Asrc/abi/abi_win64_x64.c | 178+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Msrc/api/link.c | 15++++++++++++++-
Msrc/api/object_detect.c | 28++++++++++++++++++++++++++++
Msrc/api/object_file.c | 14++++++++++++++
Msrc/api/stubs.c | 14++------------
Msrc/arch/aa64/arch.c | 10++++++++++
Msrc/arch/aa64/emit.c | 21+++++++++++++++++----
Msrc/arch/aa64/link.c | 28++++++++++++++++++++++++++++
Msrc/arch/aa64/ops.c | 156++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++---
Msrc/arch/aa64/opt_coord.c | 10+++++++++-
Msrc/arch/registry.c | 13+++++++++++++
Msrc/arch/x64/alloc.c | 48++++++++++++++++++++++++++++++++++++------------
Msrc/arch/x64/arch.c | 10++++++++++
Msrc/arch/x64/asm.c | 116+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++------------------
Msrc/arch/x64/emit.c | 350+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++--------------
Msrc/arch/x64/internal.h | 58+++++++++++++++++++++++++++++++++++++++++++++++++++++++---
Msrc/arch/x64/isa.c | 5+++++
Msrc/arch/x64/link.c | 19+++++++++++++++++++
Msrc/arch/x64/ops.c | 303+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++--------
Msrc/arch/x64/opt_coord.c | 90+++++++++++++++++++++++++++++++++++++++++++++++++++++++------------------------
Msrc/link/link.c | 377++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-
Msrc/link/link.h | 1+
Msrc/link/link_arch.h | 14++++++++++++++
Asrc/link/link_coff.c | 1741+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Msrc/link/link_internal.h | 17+++++++++++++++++
Msrc/link/link_layout.c | 25++++++++++++++++++++++---
Msrc/link/link_reloc_layout.c | 7+++++++
Msrc/link/link_resolve.c | 237++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-----
Asrc/obj/coff.h | 598+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Asrc/obj/coff_emit.c | 731+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Asrc/obj/coff_read.c | 714+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Asrc/obj/coff_read_dso.c | 235+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Asrc/obj/coff_reloc_aarch64.c | 96+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Asrc/obj/coff_reloc_x86_64.c | 76++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Msrc/obj/obj.c | 21+++++++++++++++++++++
Msrc/obj/obj.h | 42++++++++++++++++++++++++++++++++++++++++++
Msrc/obj/obj_secnames.c | 24++++++++++++++++++++++++
Mtest/api/abi_classify_test.c | 320+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++----
Atest/coff/CORPUS.md | 199+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Atest/coff/README.md | 50++++++++++++++++++++++++++++++++++++++++++++++++++
Atest/coff/cfree-roundtrip-coff.c | 1404+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Atest/coff/pe-dso-forwarder.c | 303+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Atest/coff/pe-import-mingw.c | 377+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Atest/coff/pe-import-smoke.c | 435+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Atest/coff/pe-mixed-archive.c | 336+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Atest/coff/windows-system-dlls-smoke.sh | 444+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Atest/coff/windows-ucrt-hosted-smoke.sh | 450+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Atest/objdump/aarch64-windows/cases/01-coff-file-header.expected | 5+++++
Atest/objdump/aarch64-windows/cases/01-coff-file-header.sh | 14++++++++++++++
Atest/objdump/x86_64-windows/cases/01-coff-characteristics.expected | 4++++
Atest/objdump/x86_64-windows/cases/01-coff-characteristics.sh | 13+++++++++++++
Atest/objdump/x86_64-windows/cases/02-coff-file-header.expected | 5+++++
Atest/objdump/x86_64-windows/cases/02-coff-file-header.sh | 14++++++++++++++
Atest/objdump/x86_64-windows/cases/03-pe-private-headers.expected | 8++++++++
Atest/objdump/x86_64-windows/cases/03-pe-private-headers.sh | 38++++++++++++++++++++++++++++++++++++++
Atest/objdump/x86_64-windows/cases/04-pe-sections.expected | 4++++
Atest/objdump/x86_64-windows/cases/04-pe-sections.sh | 34++++++++++++++++++++++++++++++++++
Mtest/test.mk | 71+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++----
88 files changed, 12910 insertions(+), 330 deletions(-)

diff --git a/README.md b/README.md @@ -12,6 +12,9 @@ It features: - A cross-compiling backend, supporting 32-bit and 64-bit RISC-V, x86, ARM, and WASM. - Support for object files and executables (PE/COFF, ELF, Mach-O) +- Primary tested targets: x86_64-linux, aarch64-linux, x86_64-macos, + aarch64-macos, x86_64-windows, aarch64-windows, plus freestanding + variants of the same architectures. - An archiver - An assembler, standalone and inline - Basic linker script support diff --git a/doc/CTOOLCHAIN.md b/doc/CTOOLCHAIN.md @@ -248,6 +248,39 @@ gaps that exist equally in `cc -E`: - **C++ demangling.** `-C`, `--demangle` — N/A for C; can land as a silent no-op once it's needed. +## Windows (PE/COFF) target + +Cross-compilation to Windows requires the mingw-w64 sysroot for system +libraries and CRT bits. Set `CFREE_MINGW_SYSROOT` to the +`<toolchain>/x86_64-w64-mingw32` directory (or pass `-isysroot` / +`--sysroot`) so the `cc` driver appends `$SYSROOT/lib` to the library +search path. Both `cc -lFOO` and `ld -lFOO` resolve Windows libraries +using the suffix list `libFOO.dll.a` → `libFOO.a` → `FOO.lib` → +`FOO.dll.a` (mingw-canonical first, MSVC-style fallback). + +Example invocations: + +```sh +export CFREE_MINGW_SYSROOT=/opt/homebrew/opt/mingw-w64/toolchain-x86_64/x86_64-w64-mingw32 + +# Compile-only: produces hello.obj (note .obj suffix on Windows targets). +cfree cc -target x86_64-windows -c hello.c + +# Inspect a PE32+ image. -p prints the optional header, data +# directories, and per-DLL import lists. +cfree objdump -p hello.exe + +# Link via MSVC-style flag surface (opt-in via --ms-link-driver): +cfree ld --ms-link-driver /OUT:hello.exe /SUBSYSTEM:CONSOLE \ + /DEFAULTLIB:kernel32 hello.obj +``` + +Windows predefined macros emitted by `cc -target x86_64-windows`: +`_WIN32`, `_WIN64`, `WIN32`, `__MINGW32__`, `__MINGW64__`, `_M_X64`, +`_M_AMD64`. `aarch64-windows` substitutes `_M_ARM64` for the +x64-specific names. `_MSC_VER` is deliberately not set — cfree targets +the mingw flavor on Windows (DWARF debug info, mingwex CRT), not MSVC. + ## Recommended next moves 1. **Add to `cc` first**: `-rdynamic`, `-print-search-dirs`, diff --git a/doc/WINDOWS.md b/doc/WINDOWS.md @@ -0,0 +1,408 @@ +# Windows / PE-COFF support + +This document describes the Windows target support in `cfree` as it +exists now. It is no longer a bring-up plan: x64 and aarch64 PE/COFF +object emission, PE executable linking, mingw import library ingestion, +Windows ABI selection, and llvm-mingw UCRT hosted links are implemented. + +## Scope + +Supported targets: + +- `x86_64-windows` +- `aarch64-windows` + +The Windows path is 64-bit only. i386 Win32 is out of scope because +`cfree` has no 32-bit x86 backend. + +The intended hosted profile is mingw/llvm-mingw UCRT, not MSVC. cfree +links against llvm-mingw's CRT and import archives and emits PE32+ +executables that import UCRT API-set DLLs and system DLLs such as +`KERNEL32.dll`. + +Non-goals for the current Windows path: + +- SEH unwind metadata and C++ exception interop through cfree frames +- `.pdata` / `.xdata` emission for cfree-generated functions +- PDB, CodeView, windbg integration, and MSVC object/debug parity +- ARM64EC ABI support +- legacy MSVCRT as a separately selectable hosted profile + +## Current Status + +The implemented path can: + +- compile C to relocatable PE/COFF objects for x64 and aarch64 +- read and write COFF objects, including COMDAT, weak externals, + common symbols, section aux records, and per-arch relocations +- link PE32+ executables directly with `cfree ld` / `cfree cc` +- ingest mingw import archives and synthesize PE import tables +- link llvm-mingw UCRT startup objects, CRT archives, and system import + libraries +- run trivial x64 and aarch64 Windows executables under Wine through + Debian podman containers +- select the Win64 x64 and Windows AArch64 ABI through the normal + `(arch, os)` ABI dispatch +- emit Windows driver defaults such as `.obj`, `.exe`, Windows + predefined macros, subsystem selection, and sysroot library search + +Validated smoke coverage includes: + +- COFF round-trip: 22 hand-built ObjBuilder cases, byte-stable +- PE import unit smoke: synthetic short import to linked `.exe` +- PE import mingw smoke: real `libkernel32.a` to linked `.exe` +- llvm-mingw UCRT hosted x64 and aarch64 console executables +- x64 and aarch64 `windows.h` console and `-mwindows` GUI links +- Debian podman + Wine execution for x64 and aarch64 return-code + propagation +- x64 and aarch64 `Sleep` smoke execution through `KERNEL32.dll` +- x64 and aarch64 `windows.h` coverage for handles, callback typedefs, + wide APIs, `winbase`, `processthreadsapi`, `synchapi`, `fileapi`, + `errhandlingapi`, `winuser`, inline helpers, and macro-heavy declarations +- x64 and aarch64 Wine runtime coverage for `argc` / `argv` / `envp`, + stdout/stderr handles, heap allocation, file I/O, error codes, and + callback execution through `qsort` +- x64 and aarch64 Wine runtime coverage for UCRT stdio entry points and + imported data reads through `__dcrt_initial_narrow_environment` +- cfree-emitted TLS variables on x64 and aarch64, including PE TLS + directory presence and Wine runtime execution when matching Wine + containers are available +- system-DLL coverage for `user32` + `gdi32` GUI links, `gdi32` drawing + via memory DC + stock objects, `advapi32` registry open/query, + `ws2_32` Winsock startup/socket/closesocket/cleanup, `ole32` + CoInitializeEx / CoUninitialize, `shell32` `CommandLineToArgvW`, + `comctl32` `InitCommonControls(Ex)`, and a mixed-member `libucrt.a` + case that pulls in both an `api-ms-win-crt-*` short-import and a + real `lib64_libmingwex_a-*.o` stdio helper — x64 and aarch64, + link-level imports verified via `cfree objdump -p` and exit code + checked under Debian podman Wine when the matching container is + available +- ABI classifier tests for x64/aa64 Windows alongside Linux/macOS + +The remaining work is coverage and polish, not first-link bring-up. See +the checklist at the end of this file. + +## Compile, Link, And Run Under Wine + +Use llvm-mingw UCRT for run-on-Wine validation on both architectures. +The Homebrew `mingw-w64` x64 sysroot is still useful for object and +import-library tests, but its legacy MSVCRT profile can import CRT entry +points that Debian bookworm Wine does not implement +(`msvcrt.dll.__acrt_iob_func`). Prefer llvm-mingw UCRT for executable +runtime checks. + +On this host, `podman --arch ...` is the reliable way to select the +container architecture. `--platform linux/amd64` has not consistently +selected amd64 under the qemu-backed podman setup. The minimal Debian +Wine package exposes the launcher as `/usr/lib/wine/wine64`. + +Build one trivial return-code executable per Windows target: + +```sh +UCRT_ROOT=/private/tmp/llvm-mingw/llvm-mingw-20260519-ucrt-macos-universal + +cat >/tmp/ret7.c <<'SRC' +int main(void) { return 7; } +SRC + +build/cfree cc -target x86_64-windows \ + --sysroot "$UCRT_ROOT/x86_64-w64-mingw32" \ + /tmp/ret7.c -o build/test/ret7-x64-ucrt-windows.exe + +build/cfree cc -target aarch64-windows \ + --sysroot "$UCRT_ROOT/aarch64-w64-mingw32" \ + /tmp/ret7.c -o build/test/ret7-arm64-windows.exe +``` + +Build the Debian Wine containers once: + +```sh +podman build --arch amd64 -t localhost/cfree-wine-amd64 - <<'EOF' +FROM docker.io/library/debian:bookworm +RUN apt-get update \ + && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends wine64 \ + && rm -rf /var/lib/apt/lists/* +EOF + +podman build --arch arm64 -t localhost/cfree-wine-arm64 - <<'EOF' +FROM docker.io/library/debian:bookworm +RUN apt-get update \ + && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends wine64 \ + && rm -rf /var/lib/apt/lists/* +EOF +``` + +Run the executables through Wine and assert the process exit code: + +```sh +podman run --rm --arch amd64 -v "$PWD:/work:ro" \ + localhost/cfree-wine-amd64 \ + bash -lc ' + export WINEDEBUG=-all WINEPREFIX=/tmp/wineprefix + /usr/lib/wine/wine64 /work/build/test/ret7-x64-ucrt-windows.exe + rc=$? + echo "x64 exit=$rc" + test "$rc" -eq 7 + ' + +podman run --rm --arch arm64 -v "$PWD:/work:ro" \ + localhost/cfree-wine-arm64 \ + bash -lc ' + export WINEDEBUG=-all WINEPREFIX=/tmp/wineprefix + /usr/lib/wine/wine64 /work/build/test/ret7-arm64-windows.exe + rc=$? + echo "arm64 exit=$rc" + test "$rc" -eq 7 + ' +``` + +## Design + +### Target And Driver + +`driver/target.c` recognizes `x86_64-windows` and +`aarch64-windows`, sets `CFREE_OS_WINDOWS`, and selects `CFREE_OBJ_COFF`. +Windows targets use `.obj` for relocatable output and `.exe` for linked +programs. + +The driver defines the mingw-style Windows macros: + +- `_WIN32` +- `_WIN64` +- `__MINGW32__` +- `__MINGW64__` +- `_M_X64` / `_M_AMD64` for x64 +- `_M_ARM64` for aarch64 + +For `cfree cc --sysroot <mingw-target-sysroot>`, the driver adds the +target sysroot library directory and links the mingw CRT startup and +runtime archives around user objects. The hosted UCRT profile uses: + +- `crt2.o` +- `crtbegin.o` +- `libmingw32.a` +- `libmoldname.a` +- `libmingwex.a` +- `libmsvcrt.a` +- system import libraries such as `libkernel32.a` +- `crtend.o` + +In llvm-mingw UCRT, `libmsvcrt.a` is a compatibility/import archive. +The final PE should import `api-ms-win-crt-*.dll` API-set DLLs, not +literal `msvcrt.dll` or `ucrt.dll`. + +The linker supports console and GUI subsystem selection through +`ld --subsystem=windows`, `ld --ms-link-driver /SUBSYSTEM:WINDOWS`, +`cc -mwindows`, and `cc -Wl,/SUBSYSTEM:WINDOWS`. Console is the default. + +### ABI And Code Generation + +The ABI vtable is selected by `(arch, os)`. + +x64 Windows uses the Win64 calling convention: + +- integer/pointer args in `RCX`, `RDX`, `R8`, `R9` +- floating args in `XMM0`-`XMM3` +- 32-byte caller shadow space +- `RBX`, `RBP`, `RDI`, `RSI`, `RSP`, and `R12`-`R15` callee-saved +- `XMM6`-`XMM15` callee-saved when used +- varargs duplicate floating-point arguments into the paired integer + argument registers +- `va_list` is pointer-shaped + +Large Win64 stack frames emit `__chkstk` probes. The prologue loads the +frame size into `EAX`, calls `__chkstk`, then subtracts the probed size +from `RSP`. mingw's `libmingwex.a` supplies the hosted symbol. + +AArch64 Windows mostly follows AAPCS64, with the Windows `va_list` +layout handled through the target ABI path. Variadic functions use a +pointer-shaped `va_list`; floating-point arguments to variadic functions +are carried in integer argument slots, matching llvm-mingw/Clang and the +UCRT stdio wrappers. cfree also accepts ARM64EC COFF machine values as +AArch64 where the object encoding is identical; ARM64EC ABI interop is +still out of scope. + +`long double` is 64-bit `double` on Windows. `__int128` follows the +mingw/GCC split into two GPR slots on Win64 rather than MSVC's +pass-by-reference rule. + +### PE/COFF Objects + +`emit_coff` and `read_coff` implement relocatable COFF object support +through the normal `ObjBuilder` API. + +The object path handles: + +- file, section, symbol, auxiliary symbol, relocation, and string-table + wire records +- AMD64 and ARM64 machine types +- COMDAT section groups and SELECTANY-style deduplication +- weak externals and mingw alias fallback +- common symbols +- COFF section characteristics and alignment +- section-relative and architecture-specific relocations +- long section names through the COFF string table +- short import records and long-form mingw import members + +The reader preserves enough COFF-specific metadata in object extension +fields for round-trip stability while normalizing the information the +linker needs. + +### PE Linker + +`link_emit_image_writer` dispatches COFF targets to the PE writer. The +PE writer emits: + +- DOS stub and PE/COFF headers +- PE32+ optional header +- loadable sections with Windows alignment +- `.idata` import descriptors, ILT/IAT blocks, hint/name tables, and + per-DLL grouping +- per-architecture import call stubs +- `.reloc` base relocation blocks for absolute addresses +- TLS directory records and relocations for the directory fields +- subsystem and entry-point selection +- image identifiers through the shared `link_image_id_compute` path + +The default console entry point resolves to `mainCRTStartup`. GUI links +default to `WinMainCRTStartup` when the subsystem is Windows GUI. + +### Imports, DLLs, And IAT + +COFF has no ELF-style GOT/PLT model. cfree emits direct references in +the object and the linker resolves imported functions through IAT +slots and import stubs. + +The import reader handles both: + +- short import records (`Sig1=0`, `Sig2=0xffff`) +- long-form mingw import archive members such as those in + `libkernel32.a` + +Archive ingestion classifies import members as DSO shims, preserves +per-member DLL names, and skips head/trailer members. The PE linker then +builds one import descriptor per DLL. + +Imported data aliases such as `__imp_<name>` are object-like IAT data +slots, not callable function imports. The PE hint/name table strips the +`__imp_` prefix only for the exported symbol name. + +`read_coff_dso` can walk raw PE DLL export directories for named +exports. Forwarder ENT entries (EAT RVA inside the export directory's +own range, contents `OTHERDLL.OtherSym`) are surfaced as defined +symbols so the linker can satisfy imports against them; cfree does +not chase the chain at link time — the OS loader follows it at +runtime, which is how `api-ms-win-crt-*.dll` resolves to +`ucrtbase.dll`. The contract is pinned by +`test/coff/pe-dso-forwarder.c`. Ordinal-only exports (entries present +in the EAT but absent from the ENT) and ordinal-only short imports +(`NameType=IMPORT_OBJECT_ORDINAL` in a short-import archive member) +are not yet implemented: the latter fails with a clean diagnostic +naming the offending archive member and ordinal rather than an +internal panic. No mingw / llvm-mingw sysroot archive on the +supported targets uses either shape. + +Mixed-member archives — where one `.a` file contains both short-import +members and full long-form COFF object members — are ingested in a +single pass: each member is classified independently, short-import +records route through `read_coff_short_import` and become DSO inputs +keyed by the embedded DLL name, while long-form members fall through +to `read_coff` as regular objects. `libucrt.a` uses exactly this shape +(`api-ms-win-crt-*.dll` short imports alongside +`lib64_libucrt_extra_a-*.o` helpers). The composition is pinned by +`test/coff/pe-mixed-archive.c`. + +### TLS + +COFF TLS data is materialized into `.tls$` sections. Code generation +uses Windows TLS access: + +- x64: `gs:[0x58] + _tls_index * 8 + SECREL(sym)` +- aarch64: `x18` (TEB), then the Windows TLS slot at `+0x58`, then + `_tls_index * 8 + SECREL(sym)` + +The PE writer emits a TLS directory in `.rdata` and base relocations for +the directory's absolute fields. The optional-header TLS data directory +and the mingw-visible `_tls_used` symbol both name that same record. The +hosted UCRT smoke compiles and runs cfree-emitted TLS variables on both +x64 and aarch64 under Wine when the matching podman images are +installed. + +### Tooling + +Windows support is wired into the existing tools: + +- `objdump -p` prints PE image headers, data directories, and imports +- `objdump -h` decodes raw `IMAGE_SECTION_HEADER.Characteristics` into + GNU-objdump-style tags (`LINK_ONCE`, `DISCARDABLE`, `LINK_REMOVE`, + `SHARED`, `GPREL`, ...) for both COFF .obj inputs and PE images +- `objdump -f` summarizes architecture, format, section/symbol counts, + and (for PE images) image base / entry point / subsystem +- `objdump -h` also prints COMDAT group membership immediately after + the section table +- `objcopy` and `strip` accept COFF inputs +- object detection distinguishes COFF objects from PE images +- `ld --ms-link-driver` accepts common MS-link spellings such as + `/OUT`, `/ENTRY`, `/LIBPATH`, `/DEFAULTLIB`, and `/SUBSYSTEM` + +## Test Expectations + +Current test layers: + +- **COFF unit**: hand-built ObjBuilder to emit/read round-trip, + byte-stable +- **mingw fixtures**: mingw-built `.obj` inputs read and re-emitted +- **cfree codegen**: `cfree -target windows -c` objects linked by + external mingw tools where useful +- **cfree linker**: `cfree cc` / `cfree ld` emits PE executables +- **Wine execution**: produced `.exe` files run under Debian podman + Wine containers for amd64 and arm64 +- **bad inputs**: malformed PE/COFF inputs should diagnose cleanly +- **header ingestion**: `cfree cc` against llvm-mingw headers +- **DLL/import reader**: raw PE DLL and import-library absorption + +The harness should skip Windows-target tests with a clear +`SKIP: no mingw` message when the required toolchain is unavailable. +Set `CFREE_TEST_HAS_MINGW=1` to require mingw/llvm-mingw test inputs. +Wine execution should remain gated so normal local test runs do not +require podman or Wine. + +## Remaining Checklist + +- [x] Broaden `windows.h` coverage beyond the current trivial smoke. + Add focused cases for handles, structs, callback typedefs, wide + APIs, selected `winbase`, `processthreadsapi`, `synchapi`, + `fileapi`, `errhandlingapi`, `winuser`, inline helpers, and + macro-heavy declarations. +- [x] Broaden runtime execution under Wine on both x64 and aarch64. + Covered `argc` / `argv` / `envp`, stdout/stderr handles, heap + allocation, file I/O, error codes, callbacks, GUI `WinMain` links, + and cfree-emitted TLS variables. +- [x] Continue broadening runtime execution under Wine for UCRT stdio + on aarch64 and imported data reads. +- [x] Fix aarch64 Windows variadic UCRT stdio calls such as + `printf("x:%d\n", 42)` and floating-point printf arguments. + cfree now matches llvm-mingw/Clang argument lowering; Debian Wine + 8.0's aarch64 UCRT still crashes on formatted `printf` arguments + for clang-built binaries too, so Wine runtime coverage remains on + non-formatted UCRT stdio calls there. +- [x] Add runtime tests for cfree-emitted TLS variables on both + architectures. +- [x] Finish `_tls_used` symbol synthesis for the PE TLS contract. +- [x] Expand DLL/import-library coverage for forwarded exports, + ordinal-only exports, mixed import-library members, and larger + system libraries. Forwarders pinned by + `test/coff/pe-dso-forwarder.c`; ordinal-only short imports + currently diagnose cleanly rather than panic; mixed-member + archives covered by `test/coff/pe-mixed-archive.c`; broader + system-DLL link + Wine coverage in + `test/coff/windows-system-dlls-smoke.sh` (user32/gdi32, + advapi32, ws2_32, ole32, shell32, comctl32, mixed UCRT). +- [ ] Implement ordinal-only short imports if a real consumer surfaces + (today the path is a clean diagnostic, not a panic). +- [ ] Optionally walk forwarder chains at link time (today the OS + loader chases them at runtime). +- [x] Expand x64 hosted `windows.h` execution coverage to match the + aarch64 `Sleep` and GUI subsystem smokes. +- [x] Optionally expose richer COFF section characteristics in + `objdump` output. diff --git a/driver/cc.c b/driver/cc.c @@ -125,6 +125,7 @@ typedef struct CcOptions { size_t owned_output_path_size; const char* entry; /* -e */ const char* linker_script; /* -T path */ + uint16_t pe_subsystem; /* CfreePeSubsystem */ const char* sysroot; /* --sysroot / -isysroot */ const char* support_dir; /* --support-dir */ int probe_kind; /* CcProbeKind */ @@ -163,9 +164,14 @@ typedef struct CcOptions { uint32_t narchives; CcDsoInput* dsos; uint32_t ndsos; - /* -L search paths (argv-borrowed). */ + /* -L search paths (argv-borrowed; last slot may be owned, see + * owned_sysroot_lib_dir). */ const char** lib_search_paths; uint32_t nlib_search_paths; + /* Owned `<sysroot>/lib` slot appended for Windows targets when a + * sysroot is in effect (cmdline or CFREE_MINGW_SYSROOT). */ + char* owned_sysroot_lib_dir; + size_t owned_sysroot_lib_dir_size; /* Pending -l names (resolved at end-of-parse). */ CcPendingLib* pending_libs; uint32_t npending_libs; @@ -298,6 +304,9 @@ static void cc_options_release(CcOptions* o) { driver_free(o->env, o->build_id_bytes, o->build_id_len); if (o->owned_output_path) driver_free(o->env, o->owned_output_path, o->owned_output_path_size); + if (o->owned_sysroot_lib_dir) + driver_free(o->env, o->owned_sysroot_lib_dir, + o->owned_sysroot_lib_dir_size); driver_hosted_plan_fini(o->env, &o->hosted); driver_cflags_fini(&o->cf, o->env); driver_free(o->env, o->source_files, bound * sizeof(*o->source_files)); @@ -330,19 +339,51 @@ static char* cc_dup_span(DriverEnv* env, const char* s, size_t n) { static int cc_record_build_id(CcOptions* o, const char* val); static int cc_apply_hosted_profile(CcOptions* o); +static int cc_subsystem_value_eq(const char* val, size_t n, + const char* want) { + size_t i; + for (i = 0; want[i]; ++i) { + char a; + char b; + if (i >= n) return 0; + a = val[i]; + b = want[i]; + if (a >= 'a' && a <= 'z') a = (char)(a - 'a' + 'A'); + if (b >= 'a' && b <= 'z') b = (char)(b - 'a' + 'A'); + if (a != b) return 0; + } + return i == n || val[i] == ','; +} + +static int cc_record_pe_subsystem(CcOptions* o, const char* val, size_t n) { + if (cc_subsystem_value_eq(val, n, "CONSOLE") || + cc_subsystem_value_eq(val, n, "CUI")) { + o->pe_subsystem = CFREE_PE_SUBSYSTEM_WINDOWS_CUI; + return 0; + } + if (cc_subsystem_value_eq(val, n, "WINDOWS") || + cc_subsystem_value_eq(val, n, "GUI")) { + o->pe_subsystem = CFREE_PE_SUBSYSTEM_WINDOWS_GUI; + return 0; + } + driver_errf(CC_TOOL, "unsupported subsystem: %.*s", (int)n, val); + return 1; +} + /* Parse a single GCC-style -Wl,X[,Y...] pass-through argument. */ static int cc_record_wl(CcOptions* o, const char* arg) { const char* p = arg; int expect_rpath = 0; int expect_soname = 0; int expect_interp = 0; + int expect_subsystem = 0; while (*p) { const char* tok = p; size_t n = 0; while (p[n] && p[n] != ',') ++n; p = tok + n + (tok[n] == ',' ? 1 : 0); - if (expect_rpath || expect_soname || expect_interp) { + if (expect_rpath || expect_soname || expect_interp || expect_subsystem) { char* buf = cc_dup_span(o->env, tok, n); if (!buf) { driver_errf(CC_TOOL, "out of memory"); @@ -351,7 +392,12 @@ static int cc_record_wl(CcOptions* o, const char* arg) { if (expect_rpath) o->rpaths[o->nrpaths++] = buf; if (expect_soname) o->soname = buf; if (expect_interp) o->interp_path = buf; - expect_rpath = expect_soname = expect_interp = 0; + if (expect_subsystem) { + int rc = cc_record_pe_subsystem(o, buf, driver_strlen(buf)); + driver_free(o->env, buf, n + 1u); + if (rc != 0) return 1; + } + expect_rpath = expect_soname = expect_interp = expect_subsystem = 0; continue; } @@ -433,11 +479,23 @@ static int cc_record_wl(CcOptions* o, const char* arg) { o->build_id_mode = CFREE_BUILDID_SHA256; continue; } + if (n >= 12 && driver_strneq(tok, "--subsystem=", 12)) { + if (cc_record_pe_subsystem(o, tok + 12, n - 12) != 0) return 1; + continue; + } + if (n == 11 && driver_strneq(tok, "--subsystem", 11)) { + expect_subsystem = 1; + continue; + } + if (n >= 11 && driver_strneq(tok, "/SUBSYSTEM:", 11)) { + if (cc_record_pe_subsystem(o, tok + 11, n - 11) != 0) return 1; + continue; + } driver_errf(CC_TOOL, "unsupported -Wl, token: %.*s", (int)n, tok); return 1; } - if (expect_rpath || expect_soname || expect_interp) { + if (expect_rpath || expect_soname || expect_interp || expect_subsystem) { driver_errf(CC_TOOL, "-Wl option requires another comma argument"); return 1; } @@ -779,8 +837,12 @@ static int cc_resolve_pending_libs(CcOptions* o) { LibResolveMode mode = (o->static_link || pl->link_mode == CFREE_LM_STATIC) ? LIB_RESOLVE_STATIC_ONLY : LIB_RESOLVE_DYNAMIC_PREFER; - if (driver_lib_resolve(o->env, pl->name, mode, o->lib_search_paths, - o->nlib_search_paths, &p, &sz, &kind) != 0) { + LibResolveOS resolve_os = (o->target.os == CFREE_OS_WINDOWS) + ? LIB_RESOLVE_OS_WINDOWS + : LIB_RESOLVE_OS_POSIX; + if (driver_lib_resolve_for_os(o->env, pl->name, mode, resolve_os, + o->lib_search_paths, o->nlib_search_paths, + &p, &sz, &kind) != 0) { driver_errf(CC_TOOL, "library not found: -l%s", pl->name); return 1; } @@ -865,6 +927,75 @@ static int cc_apply_env(CcOptions* o) { return 0; } +/* Append a default `<sysroot>/lib` to the library search path for + * Windows targets. The llvm-mingw UCRT sysroot ships import archives + * such as libkernel32.a, libmsvcrt.a, and the UCRT API-set archives + * under <sysroot>/lib; the user-supplied -L list is searched first, + * then this appended default. In this profile libmsvcrt.a is the + * UCRT-flavoured mingw compatibility archive, not a request to import + * literal msvcrt.dll. Sysroot resolution order: + * 1. -isysroot / --sysroot on the command line (already in + * o->sysroot at this point); + * 2. CFREE_MINGW_SYSROOT env var (e.g. .../x86_64-w64-mingw32). + * + * No-op for non-Windows targets and for Windows when neither source + * provides a sysroot — keeps existing tests untouched. The appended + * path aliases the sysroot string for its lifetime; o->sysroot is + * either argv-borrowed or env-borrowed, both stable across the + * driver run, so the lib_search_paths slot remains valid. */ +static int cc_append_windows_lib_dirs(CcOptions* o) { + const char* sysroot = o->sysroot; + char* joined = NULL; + size_t srlen; + size_t need_slash; + size_t bytes; + size_t off = 0; + if (o->target.os != CFREE_OS_WINDOWS) return 0; + if (!sysroot || !sysroot[0]) { + sysroot = driver_getenv("CFREE_MINGW_SYSROOT"); + if (!sysroot || !sysroot[0]) return 0; + o->sysroot = sysroot; + } + srlen = driver_strlen(sysroot); + need_slash = (srlen > 0 && sysroot[srlen - 1] != '/') ? 1u : 0u; + /* "<sysroot>" + "/"? + "lib" + NUL */ + bytes = srlen + need_slash + 3u + 1u; + joined = driver_alloc(o->env, bytes); + if (!joined) { + driver_errf(CC_TOOL, "out of memory"); + return 1; + } + driver_memcpy(joined + off, sysroot, srlen); + off += srlen; + if (need_slash) joined[off++] = '/'; + driver_memcpy(joined + off, "lib", 3); + off += 3; + joined[off] = '\0'; + if (o->owned_sysroot_lib_dir) { + driver_free(o->env, o->owned_sysroot_lib_dir, + o->owned_sysroot_lib_dir_size); + } + o->owned_sysroot_lib_dir = joined; + o->owned_sysroot_lib_dir_size = bytes; + o->lib_search_paths[o->nlib_search_paths++] = joined; + return 0; +} + +static int cc_has_link_action(const CcOptions* o) { + return !o->compile_only && !o->preprocess_only && !o->dump_tokens && + o->dep_mode != CC_DEP_M && o->dep_mode != CC_DEP_MM; +} + +static void cc_apply_default_hosted_profile(CcOptions* o) { + if (o->target.os != CFREE_OS_WINDOWS || o->target.obj != CFREE_OBJ_COFF) + return; + if (o->no_stdlib || o->no_defaultlibs || o->wants_hosted_libc) return; + if (!o->sysroot || !o->sysroot[0]) return; + if (!cc_has_link_action(o) && o->nsource_files + o->nsource_memory == 0) + return; + o->wants_hosted_libc = 1; +} + static char* cc_dep_default_target(DriverEnv* env, const CcOptions* o, size_t* out_size); @@ -1131,6 +1262,14 @@ static int cc_parse(int argc, char** argv, CcOptions* o) { if (o->target.pic == CFREE_PIC_NONE) o->target.pic = CFREE_PIC_PIC; continue; } + if (driver_streq(a, "-mwindows")) { + o->pe_subsystem = CFREE_PE_SUBSYSTEM_WINDOWS_GUI; + continue; + } + if (driver_streq(a, "-mconsole")) { + o->pe_subsystem = CFREE_PE_SUBSYSTEM_WINDOWS_CUI; + continue; + } if (driver_strneq(a, "-Wl,", 4)) { if (cc_record_wl(o, a + 4) != 0) return 1; continue; @@ -1339,6 +1478,7 @@ static int cc_parse(int argc, char** argv, CcOptions* o) { if (o->probe_kind != CC_PROBE_NONE) return 0; if (cc_apply_env(o) != 0) return 1; + if (cc_append_windows_lib_dirs(o) != 0) return 1; if (cc_resolve_pending_libs(o) != 0) return 1; { @@ -1432,11 +1572,13 @@ static int cc_parse(int argc, char** argv, CcOptions* o) { } else if (o->preprocess_only) { /* stdout */ } else { - o->output_path = "a.out"; + o->output_path = + (o->target.os == CFREE_OS_WINDOWS) ? "a.exe" : "a.out"; } } } } + cc_apply_default_hosted_profile(o); if (cc_apply_hosted_profile(o) != 0) return 1; return 0; } @@ -1661,22 +1803,32 @@ static char* cc_dep_default_target(DriverEnv* env, const CcOptions* o, } } { + int win = (o && o->target.os == CFREE_OS_WINDOWS); + size_t ext_len = win ? 4u : 2u; + const char* ext = win ? ".obj" : ".o"; size_t name_len = dot - slash; - size_t bufsz = name_len + 3; + size_t bufsz = name_len + ext_len + 1u; buf = driver_alloc(env, bufsz); if (!buf) return NULL; driver_memcpy(buf, src + slash, name_len); - buf[name_len] = '.'; - buf[name_len + 1] = 'o'; - buf[name_len + 2] = '\0'; + driver_memcpy(buf + name_len, ext, ext_len); + buf[name_len + ext_len] = '\0'; *out_size = bufsz; return buf; } } } -static char* cc_default_obj_path_for_name(DriverEnv* env, const char* src, +static char* cc_default_obj_path_for_name(DriverEnv* env, + const CcOptions* o, const char* src, size_t* out_size) { + /* Windows targets default to a `.obj` suffix; everyone else `.o`. + * Drivers accept both spellings as inputs (driver/inputs.c), but + * tooling that scrapes default outputs expects the canonical + * platform extension. */ + int win = (o && o->target.os == CFREE_OS_WINDOWS); + size_t ext_len = win ? 4u : 2u; /* ".obj" or ".o" */ + const char* ext = win ? ".obj" : ".o"; size_t srclen = driver_strlen(src); size_t dot = srclen; size_t slash = 0; @@ -1697,13 +1849,12 @@ static char* cc_default_obj_path_for_name(DriverEnv* env, const char* src, } { size_t name_len = dot - slash; - size_t bufsz = name_len + 3; + size_t bufsz = name_len + ext_len + 1u; buf = driver_alloc(env, bufsz); if (!buf) return NULL; driver_memcpy(buf, src + slash, name_len); - buf[name_len] = '.'; - buf[name_len + 1] = 'o'; - buf[name_len + 2] = '\0'; + driver_memcpy(buf + name_len, ext, ext_len); + buf[name_len + ext_len] = '\0'; *out_size = bufsz; return buf; } @@ -2031,7 +2182,7 @@ static int cc_run_compile_objs(DriverEnv* env, const CcOptions* o, for (i = 0; i < o->nsource_files; ++i) { size_t out_size = 0; char* out = - cc_default_obj_path_for_name(env, o->source_files[i], &out_size); + cc_default_obj_path_for_name(env, o, o->source_files[i], &out_size); int rc; if (!out) { driver_errf(CC_TOOL, "out of memory"); @@ -2204,6 +2355,7 @@ static int cc_run_link_exe(DriverEnv* env, const CcOptions* o, lopts.build_id_len = o->build_id_len; lopts.gc_sections = o->gc_sections; lopts.pie = o->pie; + lopts.pe_subsystem = o->pe_subsystem; lopts.interp_path = o->interp_path; lopts.soname = o->soname; if (o->new_dtags) { diff --git a/driver/hosted.c b/driver/hosted.c @@ -371,6 +371,73 @@ static int hosted_resolve_linux(const DriverHostedRequest* req, return 1; } +static int hosted_resolve_windows_mingw(const DriverHostedRequest* req, + DriverHostedPlan* plan) { + if (!req->sysroot || !req->sysroot[0]) { + driver_errf(req->tool, "Windows hosted profile requires --sysroot"); + return 1; + } + plan->profile_name = "windows-mingw"; + if (hosted_add_existing_include(plan, req->env, req->sysroot, "include") != + 0) { + driver_errf(req->tool, "out of memory"); + return 1; + } + if (!req->link_inputs) return 0; + if (hosted_add_required(plan->before, &plan->nbefore, + DRIVER_HOSTED_MAX_BEFORE, req, req->sysroot, + "lib/crt2.o", DRIVER_HOSTED_INPUT_OBJECT) != 0 || + hosted_add_required(plan->before, &plan->nbefore, + DRIVER_HOSTED_MAX_BEFORE, req, req->sysroot, + "lib/crtbegin.o", DRIVER_HOSTED_INPUT_OBJECT) != 0) + return 1; + if (hosted_add_required(plan->after, &plan->nafter, DRIVER_HOSTED_MAX_AFTER, + req, req->sysroot, "lib/libmingw32.a", + DRIVER_HOSTED_INPUT_ARCHIVE) != 0 || + hosted_add_required(plan->after, &plan->nafter, DRIVER_HOSTED_MAX_AFTER, + req, req->sysroot, "lib/libmoldname.a", + DRIVER_HOSTED_INPUT_ARCHIVE) != 0 || + hosted_add_required(plan->after, &plan->nafter, DRIVER_HOSTED_MAX_AFTER, + req, req->sysroot, "lib/libmingwex.a", + DRIVER_HOSTED_INPUT_ARCHIVE) != 0 || + hosted_add_required(plan->after, &plan->nafter, DRIVER_HOSTED_MAX_AFTER, + req, req->sysroot, "lib/libmsvcrt.a", + DRIVER_HOSTED_INPUT_ARCHIVE) != 0 || + hosted_add_required(plan->after, &plan->nafter, DRIVER_HOSTED_MAX_AFTER, + req, req->sysroot, "lib/libadvapi32.a", + DRIVER_HOSTED_INPUT_ARCHIVE) != 0 || + hosted_add_required(plan->after, &plan->nafter, DRIVER_HOSTED_MAX_AFTER, + req, req->sysroot, "lib/libshell32.a", + DRIVER_HOSTED_INPUT_ARCHIVE) != 0 || + hosted_add_required(plan->after, &plan->nafter, DRIVER_HOSTED_MAX_AFTER, + req, req->sysroot, "lib/libuser32.a", + DRIVER_HOSTED_INPUT_ARCHIVE) != 0 || + hosted_add_required(plan->after, &plan->nafter, DRIVER_HOSTED_MAX_AFTER, + req, req->sysroot, "lib/libkernel32.a", + DRIVER_HOSTED_INPUT_ARCHIVE) != 0 || + hosted_add_required(plan->after, &plan->nafter, DRIVER_HOSTED_MAX_AFTER, + req, req->sysroot, "lib/libmingw32.a", + DRIVER_HOSTED_INPUT_ARCHIVE) != 0 || + hosted_add_required(plan->after, &plan->nafter, DRIVER_HOSTED_MAX_AFTER, + req, req->sysroot, "lib/libmoldname.a", + DRIVER_HOSTED_INPUT_ARCHIVE) != 0 || + hosted_add_required(plan->after, &plan->nafter, DRIVER_HOSTED_MAX_AFTER, + req, req->sysroot, "lib/libmingwex.a", + DRIVER_HOSTED_INPUT_ARCHIVE) != 0 || + hosted_add_required(plan->after, &plan->nafter, DRIVER_HOSTED_MAX_AFTER, + req, req->sysroot, "lib/libmsvcrt.a", + DRIVER_HOSTED_INPUT_ARCHIVE) != 0 || + hosted_add_required(plan->after, &plan->nafter, DRIVER_HOSTED_MAX_AFTER, + req, req->sysroot, "lib/libkernel32.a", + DRIVER_HOSTED_INPUT_ARCHIVE) != 0) + return 1; + if (hosted_add_required(plan->final, &plan->nfinal, DRIVER_HOSTED_MAX_FINAL, + req, req->sysroot, "lib/crtend.o", + DRIVER_HOSTED_INPUT_OBJECT) != 0) + return 1; + return 0; +} + int driver_hosted_resolve(const DriverHostedRequest* req, DriverHostedPlan* out) { DriverHostedPlan zero = {0}; @@ -382,6 +449,9 @@ int driver_hosted_resolve(const DriverHostedRequest* req, } else if (req->target.os == CFREE_OS_LINUX && req->target.obj == CFREE_OBJ_ELF) { rc = hosted_resolve_linux(req, out); + } else if (req->target.os == CFREE_OS_WINDOWS && + req->target.obj == CFREE_OBJ_COFF) { + rc = hosted_resolve_windows_mingw(req, out); } else { driver_errf(req->tool, "no hosted libc profile for target"); rc = 1; diff --git a/driver/hosted.h b/driver/hosted.h @@ -22,7 +22,7 @@ typedef struct DriverHostedInput { } DriverHostedInput; #define DRIVER_HOSTED_MAX_BEFORE 4 -#define DRIVER_HOSTED_MAX_AFTER 6 +#define DRIVER_HOSTED_MAX_AFTER 16 #define DRIVER_HOSTED_MAX_FINAL 2 #define DRIVER_HOSTED_MAX_INCLUDES 4 #define DRIVER_HOSTED_MAX_DEFINES 20 diff --git a/driver/ld.c b/driver/ld.c @@ -74,6 +74,7 @@ typedef struct LdOptions { int output_seen; const char* entry; /* -e */ const char* script_path; /* -T */ + uint16_t pe_subsystem; /* CfreePeSubsystem */ /* PT_INTERP path. NULL means "let libcfree pick the target default * (e.g. /lib/ld-musl-aarch64.so.1)". Set by -dynamic-linker / * --dynamic-linker. */ @@ -121,6 +122,10 @@ typedef struct LdOptions { uint8_t cur_link_mode; /* CfreeLinkMode for following inputs */ uint8_t cur_group_id; /* 0 outside any --start-group */ uint8_t next_group_id; /* increments on --start-group */ + + /* Opt-in: treat `/...` arguments as MSVC link.exe flags. Off by + * default so legacy paths like `/usr/lib/foo.o` remain inputs. */ + int ms_link_driver; } LdOptions; static void ld_usage(void) { @@ -154,6 +159,7 @@ void driver_help_ld(void) { "\n" "ENTRY / SCRIPT\n" " -e SYMBOL Entry symbol\n" + " --subsystem NAME PE subsystem: console or windows\n" " -T SCRIPT.ld Use a linker script (parsed, not raw)\n" "\n" "TARGET\n" @@ -365,15 +371,171 @@ static const char* arg_eq_value(const char* arg, const char* prefix) { return arg + n + 1; } +/* Compare an MSVC-style flag against `arg`, case-insensitive on the + * key. MSVC accepts both `/` and `-` as the lead char and is + * case-insensitive in the key part — we accept `/KEY:val`, `/key:val`, + * `-KEY:val`. Returns the tail past the colon, or NULL on mismatch. */ +static const char* ms_flag_value(const char* arg, const char* key) { + size_t klen = driver_strlen(key); + size_t i; + if (arg[0] != '/' && arg[0] != '-') return NULL; + for (i = 0; i < klen; ++i) { + char a = arg[1 + i]; + char k = key[i]; + if (a >= 'a' && a <= 'z') a = (char)(a - 'a' + 'A'); + if (k >= 'a' && k <= 'z') k = (char)(k - 'a' + 'A'); + if (a != k) return NULL; + } + if (arg[1 + klen] != ':') return NULL; + return arg + 1 + klen + 1; +} + +/* Same shape as ms_flag_value but for bare flags (no `:value`). */ +static int ms_flag_match(const char* arg, const char* key) { + size_t klen = driver_strlen(key); + size_t i; + if (arg[0] != '/' && arg[0] != '-') return 0; + for (i = 0; i < klen; ++i) { + char a = arg[1 + i]; + char k = key[i]; + if (a >= 'a' && a <= 'z') a = (char)(a - 'a' + 'A'); + if (k >= 'a' && k <= 'z') k = (char)(k - 'a' + 'A'); + if (a != k) return 0; + } + return arg[1 + klen] == '\0'; +} + +static int ld_subsystem_value_eq(const char* val, const char* want) { + size_t i; + for (i = 0; want[i]; ++i) { + char a = val[i]; + char b = want[i]; + if (a >= 'a' && a <= 'z') a = (char)(a - 'a' + 'A'); + if (b >= 'a' && b <= 'z') b = (char)(b - 'a' + 'A'); + if (a != b) return 0; + } + return val[i] == '\0' || val[i] == ','; +} + +static int ld_parse_pe_subsystem(LdOptions* o, const char* val) { + if (ld_subsystem_value_eq(val, "CONSOLE") || + ld_subsystem_value_eq(val, "CUI")) { + o->pe_subsystem = CFREE_PE_SUBSYSTEM_WINDOWS_CUI; + return 0; + } + if (ld_subsystem_value_eq(val, "WINDOWS") || + ld_subsystem_value_eq(val, "GUI")) { + o->pe_subsystem = CFREE_PE_SUBSYSTEM_WINDOWS_GUI; + return 0; + } + driver_errf(LD_TOOL, "unsupported subsystem: %s", val); + return 1; +} + +/* Parse one MSVC-style argument. Recognized subset (others warn and + * skip — match-but-no-op so legacy build scripts pass cleanly): + * /OUT:path → o->output_path + * /ENTRY:sym → o->entry + * /SUBSYSTEM:CONSOLE|WINDOWS → PE optional-header subsystem; WINDOWS + * also defaults entry to WinMainCRTStartup. + * /DEFAULTLIB:name → equivalent to -l<name>; resolved + * lazily in the same path as -l. + * /LIBPATH:dir → equivalent to -L dir + * + * Returns 1 if consumed, 0 if not a recognized MS flag (caller falls + * through to its existing behaviour), -1 on hard error. */ +static int ld_try_ms_flag(LdOptions* o, const char* a) { + const char* val; + if (!o->ms_link_driver) return 0; + if (a[0] != '/' && a[0] != '-') return 0; + + if ((val = ms_flag_value(a, "OUT")) != NULL) { + if (o->output_seen) { + driver_errf(LD_TOOL, "/OUT specified after -o"); + return -1; + } + o->output_path = val; + o->output_seen = 1; + return 1; + } + if ((val = ms_flag_value(a, "ENTRY")) != NULL) { + o->entry = val; + return 1; + } + if ((val = ms_flag_value(a, "LIBPATH")) != NULL) { + o->lib_dirs[o->nlib_dirs++] = val; + return 1; + } + if ((val = ms_flag_value(a, "DEFAULTLIB")) != NULL) { + /* Resolve eagerly like -l does, using whatever current link-mode + * state is in effect. Windows mode triggers the .lib/.dll.a/.a + * suffix list. */ + char* resolved; + size_t resolved_size; + LibResolveKind kind; + LibResolveMode mode = (o->cur_link_mode == CFREE_LM_STATIC) + ? LIB_RESOLVE_STATIC_ONLY + : LIB_RESOLVE_DYNAMIC_PREFER; + if (driver_lib_resolve_for_os(o->env, val, mode, LIB_RESOLVE_OS_WINDOWS, + o->lib_dirs, o->nlib_dirs, &resolved, + &resolved_size, &kind) != 0) { + driver_errf(LD_TOOL, "/DEFAULTLIB: cannot find %s", val); + return -1; + } + if (kind == LIB_RESOLVE_KIND_SHARED || kind == LIB_RESOLVE_KIND_TBD) { + ld_push_dso(o, resolved, 1, resolved_size); + } else { + ld_push_archive(o, resolved, 1, resolved_size); + } + return 1; + } + if ((val = ms_flag_value(a, "SUBSYSTEM")) != NULL) { + if (ld_parse_pe_subsystem(o, val) != 0) return -1; + return 1; + } + if (ms_flag_match(a, "NOLOGO") || ms_flag_match(a, "VERBOSE") || + ms_flag_match(a, "INCREMENTAL") || ms_flag_match(a, "DEBUG") || + ms_flag_match(a, "DYNAMICBASE") || ms_flag_match(a, "NXCOMPAT")) { + /* Common flags every Windows build script sets; silently accept. */ + return 1; + } + + /* Any other `/key[:val]` shape under --ms-link-driver: warn + skip. + * We treat the entire arg as consumed so it doesn't fall through to + * the positional path and try to open a file. */ + driver_errf(LD_TOOL, "ignoring unsupported MS-style flag: %s", a); + return 1; +} + static int ld_parse(int argc, char** argv, LdOptions* o) { int i; if (ld_alloc_arrays(o, argc) != 0) return 1; o->target = driver_host_target(); + /* First pass: detect --ms-link-driver up front so the option can + * appear anywhere on the command line and still affect earlier + * `/...` tokens. (Matches how `link.exe` treats option order as + * non-positional.) */ + for (i = 1; i < argc; ++i) { + if (driver_streq(argv[i], "--ms-link-driver")) { + o->ms_link_driver = 1; + break; + } + } + for (i = 1; i < argc; ++i) { const char* a = argv[i]; const char* val; + int ms_rc; + + if (driver_streq(a, "--ms-link-driver")) { + o->ms_link_driver = 1; + continue; + } + ms_rc = ld_try_ms_flag(o, a); + if (ms_rc < 0) return 1; + if (ms_rc > 0) continue; if (driver_streq(a, "-o")) { if (++i >= argc) { @@ -430,6 +592,18 @@ static int ld_parse(int argc, char** argv, LdOptions* o) { o->entry = argv[i]; continue; } + if ((val = arg_eq_value(a, "--subsystem")) != NULL) { + if (ld_parse_pe_subsystem(o, val) != 0) return 1; + continue; + } + if (driver_streq(a, "--subsystem")) { + if (++i >= argc) { + driver_errf(LD_TOOL, "--subsystem requires an argument"); + return 1; + } + if (ld_parse_pe_subsystem(o, argv[i]) != 0) return 1; + continue; + } if (driver_streq(a, "-T")) { if (++i >= argc) { driver_errf(LD_TOOL, "-T requires an argument"); @@ -479,6 +653,7 @@ static int ld_parse(int argc, char** argv, LdOptions* o) { size_t resolved_size; LibResolveKind kind; LibResolveMode mode; + LibResolveOS resolve_os; if (!name) { driver_errf(LD_TOOL, "-l requires an argument"); return 1; @@ -487,8 +662,11 @@ static int ld_parse(int argc, char** argv, LdOptions* o) { * -Bdynamic, --as-needed) prefers .so but falls back to .a. */ mode = (o->cur_link_mode == CFREE_LM_STATIC) ? LIB_RESOLVE_STATIC_ONLY : LIB_RESOLVE_DYNAMIC_PREFER; - if (driver_lib_resolve(o->env, name, mode, o->lib_dirs, o->nlib_dirs, - &resolved, &resolved_size, &kind) != 0) { + resolve_os = (o->target.os == CFREE_OS_WINDOWS) ? LIB_RESOLVE_OS_WINDOWS + : LIB_RESOLVE_OS_POSIX; + if (driver_lib_resolve_for_os(o->env, name, mode, resolve_os, o->lib_dirs, + o->nlib_dirs, &resolved, &resolved_size, + &kind) != 0) { driver_errf(LD_TOOL, "cannot find -l%s", name); return 1; } @@ -506,8 +684,12 @@ static int ld_parse(int argc, char** argv, LdOptions* o) { LibResolveMode mode = (o->cur_link_mode == CFREE_LM_STATIC) ? LIB_RESOLVE_STATIC_ONLY : LIB_RESOLVE_DYNAMIC_PREFER; - if (driver_lib_resolve(o->env, val, mode, o->lib_dirs, o->nlib_dirs, - &resolved, &resolved_size, &kind) != 0) { + LibResolveOS resolve_os = (o->target.os == CFREE_OS_WINDOWS) + ? LIB_RESOLVE_OS_WINDOWS + : LIB_RESOLVE_OS_POSIX; + if (driver_lib_resolve_for_os(o->env, val, mode, resolve_os, o->lib_dirs, + o->nlib_dirs, &resolved, &resolved_size, + &kind) != 0) { driver_errf(LD_TOOL, "cannot find -l%s", val); return 1; } @@ -523,14 +705,18 @@ static int ld_parse(int argc, char** argv, LdOptions* o) { size_t resolved_size; LibResolveKind kind; LibResolveMode mode; + LibResolveOS resolve_os; if (++i >= argc) { driver_errf(LD_TOOL, "--library requires an argument"); return 1; } mode = (o->cur_link_mode == CFREE_LM_STATIC) ? LIB_RESOLVE_STATIC_ONLY : LIB_RESOLVE_DYNAMIC_PREFER; - if (driver_lib_resolve(o->env, argv[i], mode, o->lib_dirs, o->nlib_dirs, - &resolved, &resolved_size, &kind) != 0) { + resolve_os = (o->target.os == CFREE_OS_WINDOWS) ? LIB_RESOLVE_OS_WINDOWS + : LIB_RESOLVE_OS_POSIX; + if (driver_lib_resolve_for_os(o->env, argv[i], mode, resolve_os, + o->lib_dirs, o->nlib_dirs, &resolved, + &resolved_size, &kind) != 0) { driver_errf(LD_TOOL, "cannot find -l%s", argv[i]); return 1; } @@ -982,6 +1168,7 @@ static int ld_run_link(LdOptions* o) { lopts.build_id_len = o->build_id_len; lopts.gc_sections = o->gc_sections; lopts.pie = o->pie; + lopts.pe_subsystem = o->pe_subsystem; lopts.interp_path = o->interp_path; lopts.soname = o->soname; /* Per --enable-new-dtags / --disable-new-dtags: when new_dtags is diff --git a/driver/lib_resolve.c b/driver/lib_resolve.c @@ -3,19 +3,22 @@ #include <stddef.h> #include <stdint.h> -/* Compose `<dir>/lib<name><suffix>` into a fresh heap buffer. Inserts - * a separating '/' iff `dir` does not already end in one. Empty `dir` - * is treated as the current directory: the path becomes - * `lib<name><suffix>`. `suffix` is e.g. ".a" or ".so" — caller-owned, - * NUL-terminated. */ -static char* compose_path(DriverEnv* env, const char* dir, const char* name, - const char* suffix, size_t* out_size) { +/* Compose `<dir>/<prefix><name><suffix>` into a fresh heap buffer. + * Inserts a separating '/' iff `dir` does not already end in one. + * Empty `dir` is treated as the current directory: the path becomes + * `<prefix><name><suffix>`. `prefix` is "lib" or "" (Windows MSVC- + * style libs ship without the prefix); `suffix` is e.g. ".a" or ".so" + * — both caller-owned, NUL-terminated. */ +static char* compose_path(DriverEnv* env, const char* dir, const char* prefix, + const char* name, const char* suffix, + size_t* out_size) { size_t dlen = driver_strlen(dir); + size_t plen = driver_strlen(prefix); size_t nlen = driver_strlen(name); size_t slen = driver_strlen(suffix); size_t need_slash = (dlen > 0 && dir[dlen - 1] != '/') ? 1 : 0; - /* "<dir>" + "/"? + "lib" + "<name>" + "<suffix>" + NUL */ - size_t bytes = dlen + need_slash + 3 + nlen + slen + 1; + /* "<dir>" + "/"? + "<prefix>" + "<name>" + "<suffix>" + NUL */ + size_t bytes = dlen + need_slash + plen + nlen + slen + 1; char* buf = driver_alloc(env, bytes); size_t off = 0; if (!buf) return NULL; @@ -26,8 +29,10 @@ static char* compose_path(DriverEnv* env, const char* dir, const char* name, if (need_slash) { buf[off++] = '/'; } - driver_memcpy(buf + off, "lib", 3); - off += 3; + if (plen) { + driver_memcpy(buf + off, prefix, plen); + off += plen; + } if (nlen) { driver_memcpy(buf + off, name, nlen); off += nlen; @@ -41,16 +46,18 @@ static char* compose_path(DriverEnv* env, const char* dir, const char* name, return buf; } -/* Try one (suffix, kind) pair across every search dir; return 0 on +/* Try one (prefix, suffix) pair across every search dir; return 0 on * the first hit. Allocations for non-matching candidates are freed * before the next attempt. */ -static int try_suffix(DriverEnv* env, const char* name, const char* suffix, - const char* const* search_dirs, uint32_t nsearch_dirs, - char** out_path, size_t* out_size) { +static int try_variant(DriverEnv* env, const char* prefix, const char* name, + const char* suffix, const char* const* search_dirs, + uint32_t nsearch_dirs, char** out_path, + size_t* out_size) { uint32_t i; for (i = 0; i < nsearch_dirs; ++i) { size_t bytes; - char* cand = compose_path(env, search_dirs[i], name, suffix, &bytes); + char* cand = + compose_path(env, search_dirs[i], prefix, name, suffix, &bytes); if (!cand) return 1; if (driver_path_exists(cand)) { *out_path = cand; @@ -62,12 +69,18 @@ static int try_suffix(DriverEnv* env, const char* name, const char* suffix, return 1; } -int driver_lib_resolve(DriverEnv* env, const char* name, LibResolveMode mode, - const char* const* search_dirs, uint32_t nsearch_dirs, - char** out_path, size_t* out_size, - LibResolveKind* out_kind) { - if (!env || !name) return 1; +/* POSIX-suffix `lib<name><suffix>` convenience wrapper. */ +static int try_suffix(DriverEnv* env, const char* name, const char* suffix, + const char* const* search_dirs, uint32_t nsearch_dirs, + char** out_path, size_t* out_size) { + return try_variant(env, "lib", name, suffix, search_dirs, nsearch_dirs, + out_path, out_size); +} +static int resolve_posix(DriverEnv* env, const char* name, LibResolveMode mode, + const char* const* search_dirs, uint32_t nsearch_dirs, + char** out_path, size_t* out_size, + LibResolveKind* out_kind) { /* GNU-ld order: under dynamic mode prefer .so over .a within the * same search dir. In practice that means we still iterate dirs in * order, but for each dir try .so first when applicable. To keep @@ -103,3 +116,63 @@ int driver_lib_resolve(DriverEnv* env, const char* name, LibResolveMode mode, } return 1; } + +static int resolve_windows(DriverEnv* env, const char* name, + LibResolveMode mode, + const char* const* search_dirs, + uint32_t nsearch_dirs, char** out_path, + size_t* out_size, LibResolveKind* out_kind) { + /* Windows / mingw layout. Try the mingw-canonical names first + * (lib<n>.dll.a, lib<n>.a) then the MSVC `<n>.lib` / `<n>.dll.a` + * variants. We feed every match to the linker as a static archive + * input — short-form import libraries (lib<n>.dll.a) are AR + * archives whose members are COFF .obj files plus IDATA stubs, so + * the existing archive ingestion path handles them. Long-form + * import libraries are tracked separately (a parallel Windows + * task; not yet wired here). */ + (void)mode; + if (try_variant(env, "lib", name, ".dll.a", search_dirs, nsearch_dirs, + out_path, out_size) == 0) { + if (out_kind) *out_kind = LIB_RESOLVE_KIND_ARCHIVE; + return 0; + } + if (try_variant(env, "lib", name, ".a", search_dirs, nsearch_dirs, out_path, + out_size) == 0) { + if (out_kind) *out_kind = LIB_RESOLVE_KIND_ARCHIVE; + return 0; + } + if (try_variant(env, "", name, ".lib", search_dirs, nsearch_dirs, out_path, + out_size) == 0) { + if (out_kind) *out_kind = LIB_RESOLVE_KIND_ARCHIVE; + return 0; + } + if (try_variant(env, "", name, ".dll.a", search_dirs, nsearch_dirs, out_path, + out_size) == 0) { + if (out_kind) *out_kind = LIB_RESOLVE_KIND_ARCHIVE; + return 0; + } + return 1; +} + +int driver_lib_resolve_for_os(DriverEnv* env, const char* name, + LibResolveMode mode, LibResolveOS os, + const char* const* search_dirs, + uint32_t nsearch_dirs, char** out_path, + size_t* out_size, LibResolveKind* out_kind) { + if (!env || !name) return 1; + if (os == LIB_RESOLVE_OS_WINDOWS) { + return resolve_windows(env, name, mode, search_dirs, nsearch_dirs, out_path, + out_size, out_kind); + } + return resolve_posix(env, name, mode, search_dirs, nsearch_dirs, out_path, + out_size, out_kind); +} + +int driver_lib_resolve(DriverEnv* env, const char* name, LibResolveMode mode, + const char* const* search_dirs, uint32_t nsearch_dirs, + char** out_path, size_t* out_size, + LibResolveKind* out_kind) { + return driver_lib_resolve_for_os(env, name, mode, LIB_RESOLVE_OS_POSIX, + search_dirs, nsearch_dirs, out_path, + out_size, out_kind); +} diff --git a/driver/lib_resolve.h b/driver/lib_resolve.h @@ -27,6 +27,14 @@ typedef enum LibResolveKind { LIB_RESOLVE_KIND_TBD = 2, } LibResolveKind; +/* Target-OS hint for the suffix list. Windows uses the mingw / MSVC + * naming variants (`lib<name>.dll.a`, `lib<name>.a`, `<name>.lib`, + * `<name>.dll.a`); everything else uses the POSIX `lib<name>.*` set. */ +typedef enum LibResolveOS { + LIB_RESOLVE_OS_POSIX = 0, + LIB_RESOLVE_OS_WINDOWS = 1, +} LibResolveOS; + /* Resolve `-l<name>` against a list of `-L`-style search directories. * * On success, returns 0 and writes a heap-allocated, NUL-terminated @@ -36,6 +44,12 @@ typedef enum LibResolveKind { * matched file is a `.so` (LIB_RESOLVE_KIND_SHARED) or a `.a` * (LIB_RESOLVE_KIND_ARCHIVE). * + * The legacy entry point `driver_lib_resolve` defaults to POSIX + * naming. `driver_lib_resolve_for_os` is the same function with an + * explicit target-OS hint so the caller can switch the suffix list + * for cross-compilation (Windows targets need .lib / .dll.a in + * addition to .a). The OS hint is independent of the host OS. + * * On failure, returns nonzero with `*out_path` unchanged. Failure * cases: * - no candidate exists in any of the search directories @@ -45,4 +59,10 @@ int driver_lib_resolve(DriverEnv* env, const char* name, LibResolveMode mode, char** out_path, size_t* out_size, LibResolveKind* out_kind); +int driver_lib_resolve_for_os(DriverEnv* env, const char* name, + LibResolveMode mode, LibResolveOS os, + const char* const* search_dirs, + uint32_t nsearch_dirs, char** out_path, + size_t* out_size, LibResolveKind* out_kind); + #endif diff --git a/driver/objdump.c b/driver/objdump.c @@ -24,6 +24,7 @@ typedef struct ObjdumpOpts { int D; /* -D: disasm all sections */ int r; /* -r: relocations */ int s; /* -s: hex section contents */ + int p; /* -p / --private-headers: PE optional header + data dirs */ const char* j[MAX_J_FILTERS]; int nj; } ObjdumpOpts; @@ -53,15 +54,25 @@ void driver_help_objdump(void) { " symbol table), matching GNU objdump's default-ish behaviour.\n" "\n" "OPERATIONS (any combination)\n" - " -f Print the file header\n" + " -f Print the file header: architecture, format,\n" + " section / symbol counts, HAS_RELOC / HAS_SYMS\n" + " flags, and (for PE images) image base,\n" + " entry point, and subsystem.\n" " -h Print section headers (idx, name, size, align,\n" - " flags). NOTE: this is the GNU objdump meaning of\n" - " -h — it does NOT print this help; use --help.\n" + " flags). For COFF inputs the raw\n" + " IMAGE_SCN_* Characteristics value is appended\n" + " on a continuation line and COMDAT groups are\n" + " printed after the section table. NOTE: this is\n" + " the GNU objdump meaning of -h — it does NOT\n" + " print this help; use --help.\n" " -t Print the symbol table\n" " -d Disassemble executable sections\n" " -D Disassemble all sections\n" " -r Print relocation records\n" " -s Print section contents as a hex+ASCII dump\n" + " -p, --private-headers\n" + " Print PE optional header, data directories,\n" + " and per-DLL import lists (PE images only)\n" " -x Aggregate: -f -h -r -t\n" "\n" "FILTERS\n" @@ -86,6 +97,463 @@ void driver_help_objdump(void) { "usage\n"); } +/* ---- PE/COFF private-header walker (used by `-p`) ---- + * + * The objdump driver currently relies on the high-level CfreeObjFile + * interface for section/symbol/disasm output. For PE images that hides + * a lot of useful structure: the optional header, data directories, + * and per-DLL import lists. The walker below operates on the raw input + * bytes so we can print this view without piping the data through + * libcfree. It does the strict minimum needed for a `-p` style dump + * and bails out on malformed offsets — diagnostic, not security-grade. + * + * RVA-to-file resolution: each section header records VirtualAddress + * (RVA) and PointerToRawData (file offset). A target RVA lands inside + * a section iff RVA in [VA, VA + VirtualSize). The file offset of the + * RVA inside the section's raw bytes is PointerToRawData + (RVA - VA). + * `pe_rva_to_file` returns -1 when no section covers the RVA. */ +#define PE_DOS_E_LFANEW_OFFSET 60u +#define PE_FILE_HEADER_SIZE 20u +#define PE_OPT_HDR64_MAGIC 0x020Bu +#define PE_NUM_DATA_DIRS 16u +#define PE_DATA_DIRECTORY_SIZE 8u +#define PE_SECTION_HEADER_SIZE 40u +#define PE_DIR_EXPORT 0u +#define PE_DIR_IMPORT 1u +#define PE_DIR_RESOURCE 2u +#define PE_DIR_EXCEPTION 3u +#define PE_DIR_BASERELOC 5u +#define PE_DIR_DEBUG 6u +#define PE_DIR_TLS 9u +#define PE_DIR_IAT 12u +#define PE_IMPORT_DESCRIPTOR_SIZE 20u +#define PE_THUNK_SIZE 8u +#define PE_ORDINAL_FLAG64 0x8000000000000000ull + +/* COFF-specific Characteristics bits we surface as tags. Kept in sync + * with src/obj/coff.h's IMAGE_SCN_* values; objdump only needs the + * diagnostic-visible subset. */ +#define OBJDUMP_IMAGE_SCN_LNK_INFO 0x00000200u +#define OBJDUMP_IMAGE_SCN_LNK_REMOVE 0x00000800u +#define OBJDUMP_IMAGE_SCN_LNK_COMDAT 0x00001000u +#define OBJDUMP_IMAGE_SCN_GPREL 0x00008000u +#define OBJDUMP_IMAGE_SCN_MEM_DISCARDABLE 0x02000000u +#define OBJDUMP_IMAGE_SCN_MEM_SHARED 0x10000000u + +static int j_match(const ObjdumpOpts* o, const char* name); + +static uint16_t pe_rd_u16(const uint8_t* p) { + return (uint16_t)(p[0] | ((uint32_t)p[1] << 8)); +} +static uint32_t pe_rd_u32(const uint8_t* p) { + return (uint32_t)p[0] | ((uint32_t)p[1] << 8) | ((uint32_t)p[2] << 16) | + ((uint32_t)p[3] << 24); +} +static uint64_t pe_rd_u64(const uint8_t* p) { + return (uint64_t)pe_rd_u32(p) | ((uint64_t)pe_rd_u32(p + 4) << 32); +} + +/* Names match the IMAGE_DIRECTORY_ENTRY_* index. Keep aligned with the + * order in coff.h to avoid drift. */ +static const char* pe_dir_name(uint32_t i) { + switch (i) { + case 0: + return "EXPORT"; + case 1: + return "IMPORT"; + case 2: + return "RESOURCE"; + case 3: + return "EXCEPTION"; + case 4: + return "SECURITY"; + case 5: + return "BASERELOC"; + case 6: + return "DEBUG"; + case 7: + return "ARCHITECTURE"; + case 8: + return "GLOBALPTR"; + case 9: + return "TLS"; + case 10: + return "LOAD_CONFIG"; + case 11: + return "BOUND_IMPORT"; + case 12: + return "IAT"; + case 13: + return "DELAY_IMPORT"; + case 14: + return "COM_DESCRIPTOR"; + case 15: + return "RESERVED"; + default: + return "?"; + } +} + +static const char* pe_subsystem_name(uint16_t s) { + switch (s) { + case 1: + return "NATIVE"; + case 2: + return "WINDOWS_GUI"; + case 3: + return "WINDOWS_CUI"; + case 5: + return "OS2_CUI"; + case 7: + return "POSIX_CUI"; + case 9: + return "WINDOWS_CE_GUI"; + case 10: + return "EFI_APPLICATION"; + case 11: + return "EFI_BOOT_SERVICE_DRIVER"; + case 12: + return "EFI_RUNTIME_DRIVER"; + case 13: + return "EFI_ROM"; + case 14: + return "XBOX"; + case 16: + return "WINDOWS_BOOT_APPLICATION"; + default: + return "UNKNOWN"; + } +} + +/* Find file offset for an RVA by scanning the section headers. Returns + * -1 if the RVA is outside every section's covered range. */ +static long pe_rva_to_file(const uint8_t* buf, size_t buf_len, size_t sec_off, + uint16_t nsec, uint32_t rva) { + uint16_t i; + for (i = 0; i < nsec; ++i) { + size_t sh = sec_off + (size_t)i * PE_SECTION_HEADER_SIZE; + uint32_t va; + uint32_t vsize; + uint32_t raw_off; + uint32_t raw_size; + if (sh + PE_SECTION_HEADER_SIZE > buf_len) return -1; + vsize = pe_rd_u32(buf + sh + 8); + va = pe_rd_u32(buf + sh + 12); + raw_size = pe_rd_u32(buf + sh + 16); + raw_off = pe_rd_u32(buf + sh + 20); + /* VirtualSize is sometimes 0 in object files; fall back to raw size. */ + if (vsize == 0) vsize = raw_size; + if (rva >= va && rva < va + vsize) { + uint32_t delta = rva - va; + if (delta >= raw_size) return -1; + return (long)(raw_off + delta); + } + } + return -1; +} + +/* Read a NUL-terminated ASCII string starting at `off`, capped to + * 256 bytes. Writes a copy into `dst` (size `dstcap`) and returns + * 0 on success, 1 if the offset is out of bounds. */ +static int pe_read_cstr(const uint8_t* buf, size_t buf_len, size_t off, + char* dst, size_t dstcap) { + size_t i; + if (off >= buf_len) { + if (dstcap) dst[0] = '\0'; + return 1; + } + for (i = 0; i + 1 < dstcap && off + i < buf_len && buf[off + i]; ++i) { + dst[i] = (char)buf[off + i]; + } + dst[i] = '\0'; + return 0; +} + +static void pe_dump_imports(const uint8_t* buf, size_t buf_len, size_t sec_off, + uint16_t nsec, uint32_t import_rva, + uint32_t import_size) { + long desc_off; + uint32_t consumed; + (void)import_size; + desc_off = pe_rva_to_file(buf, buf_len, sec_off, nsec, import_rva); + if (desc_off < 0) { + driver_printf(" (import directory RVA not covered by any section)\n"); + return; + } + driver_printf("\nThe Import Tables:\n"); + for (consumed = 0;; consumed += PE_IMPORT_DESCRIPTOR_SIZE) { + size_t off = (size_t)desc_off + consumed; + uint32_t ilt_rva; + uint32_t name_rva; + uint32_t iat_rva; + char dll[256]; + long thunk_off; + uint32_t i; + if (off + PE_IMPORT_DESCRIPTOR_SIZE > buf_len) break; + ilt_rva = pe_rd_u32(buf + off + 0); + name_rva = pe_rd_u32(buf + off + 12); + iat_rva = pe_rd_u32(buf + off + 16); + /* All-zero descriptor terminates the chain. */ + if (ilt_rva == 0 && name_rva == 0 && iat_rva == 0) break; + { + long name_off = pe_rva_to_file(buf, buf_len, sec_off, nsec, name_rva); + if (name_off < 0 || + pe_read_cstr(buf, buf_len, (size_t)name_off, dll, sizeof dll) != 0) { + dll[0] = '\0'; + } + } + driver_printf(" DLL Name: %s\n", dll[0] ? dll : "(unreadable)"); + driver_printf(" ILT RVA: 0x%x IAT RVA: 0x%x\n", ilt_rva, iat_rva); + /* Prefer walking the original first thunk (ILT) for names. Some + * mingw-emitted images zero the ILT and only ship the IAT; fall + * back to the IAT in that case. */ + { + uint32_t walk_rva = ilt_rva ? ilt_rva : iat_rva; + thunk_off = + walk_rva ? pe_rva_to_file(buf, buf_len, sec_off, nsec, walk_rva) : -1; + } + if (thunk_off < 0) continue; + for (i = 0;; ++i) { + size_t toff = (size_t)thunk_off + (size_t)i * PE_THUNK_SIZE; + uint64_t t; + if (toff + PE_THUNK_SIZE > buf_len) break; + t = pe_rd_u64(buf + toff); + if (t == 0) break; + if (t & PE_ORDINAL_FLAG64) { + driver_printf(" Ordinal: %u\n", (unsigned)(t & 0xFFFFu)); + } else { + long hint_off = + pe_rva_to_file(buf, buf_len, sec_off, nsec, (uint32_t)t); + char name[256]; + if (hint_off < 0 || + pe_read_cstr(buf, buf_len, (size_t)hint_off + 2u, name, + sizeof name) != 0) { + continue; + } + driver_printf(" Name: %s\n", name); + } + } + } + driver_printf("\n"); +} + +/* Parsed view of a PE32+ image's headers. Populated by pe_parse_image; + * callers check `valid` before reading other fields. Avoids the prior + * pattern of every PE-walker re-validating the DOS/PE/optional header + * triplet from scratch. */ +typedef struct PeImage { + int valid; + uint16_t machine; + uint16_t file_chars; + uint16_t nsec; + uint16_t opt_magic; + uint16_t subsystem; + uint16_t dllchars; + uint64_t image_base; + uint32_t entry_rva; + size_t sec_off; + size_t dir_off; +} PeImage; + +static int pe_parse_image(const uint8_t* buf, size_t buf_len, PeImage* out) { + uint32_t e_lfanew; + size_t coff_off; + size_t opt_off; + uint16_t opt_size; + + out->valid = 0; + if (buf_len < PE_DOS_E_LFANEW_OFFSET + 4u) return 0; + if (pe_rd_u16(buf) != 0x5A4Du) return 0; + e_lfanew = pe_rd_u32(buf + PE_DOS_E_LFANEW_OFFSET); + if ((size_t)e_lfanew + 4u + PE_FILE_HEADER_SIZE > buf_len) return 0; + if (pe_rd_u32(buf + e_lfanew) != 0x00004550u) return 0; + coff_off = (size_t)e_lfanew + 4u; + out->machine = pe_rd_u16(buf + coff_off + 0); + out->nsec = pe_rd_u16(buf + coff_off + 2); + out->file_chars = pe_rd_u16(buf + coff_off + 18); + opt_size = pe_rd_u16(buf + coff_off + 16); + opt_off = coff_off + PE_FILE_HEADER_SIZE; + if (opt_size == 0 || opt_off + opt_size > buf_len) return 0; + out->opt_magic = pe_rd_u16(buf + opt_off); + if (out->opt_magic != PE_OPT_HDR64_MAGIC) { + /* PE32 (0x10B) is out of scope but the caller may still want to + * report what it found, so we return a "valid header, unsupported + * subset" view rather than failing. */ + out->valid = 1; + return 1; + } + out->entry_rva = pe_rd_u32(buf + opt_off + 16); + out->image_base = pe_rd_u64(buf + opt_off + 24); + out->subsystem = pe_rd_u16(buf + opt_off + 68); + out->dllchars = pe_rd_u16(buf + opt_off + 70); + out->sec_off = opt_off + opt_size; + out->dir_off = opt_off + 112u; + out->valid = 1; + return 1; +} + +static const char* pe_machine_name(uint16_t m) { + switch (m) { + case 0x8664u: return "x86_64 (AMD64)"; + case 0xAA64u: return "aarch64 (ARM64)"; + case 0xA641u: return "aarch64 (ARM64EC)"; + case 0x014Cu: return "i386"; + case 0x01C0u: return "arm"; + case 0x01C4u: return "armnt"; + case 0x0200u: return "ia64"; + case 0x5064u: return "riscv64"; + default: return "unknown"; + } +} + +/* PE-image `-f`: architecture, image base, entry point, subsystem. + * Counterpart to dump_file_header for inputs that cfree_obj_open can't + * parse yet (PE executables / DLLs vs .obj). */ +static void dump_pe_file_header(const char* label, const PeImage* pe) { + driver_printf("%s:\tfile format pei-%s\n\n", label, + pe_machine_name(pe->machine)); + driver_printf("architecture: %s, flags 0x%04x\n", + pe_machine_name(pe->machine), (unsigned)pe->file_chars); + if (pe->opt_magic == PE_OPT_HDR64_MAGIC) { + driver_printf("start address 0x%016llx\n", + (unsigned long long)(pe->image_base + pe->entry_rva)); + driver_printf( + "image base: 0x%llx, entry rva: 0x%x, subsystem: %u (%s)\n\n", + (unsigned long long)pe->image_base, pe->entry_rva, + (unsigned)pe->subsystem, pe_subsystem_name(pe->subsystem)); + } else { + driver_printf("PE32 (magic 0x%x) — only PE32+ inspection is implemented\n\n", + (unsigned)pe->opt_magic); + } +} + +/* Decode IMAGE_SECTION_HEADER.Characteristics into the GNU objdump tag + * style used for COFF .obj inputs. Used by both dump_sections (via + * render_sec_flags) and the PE section walker. */ +static void render_pe_sec_flags(uint32_t ch, char* buf, size_t cap) { + size_t n = 0; + const char* tags[16]; + int nt = 0; + int i; + /* Bit layout shared with render_sec_flags; PE images don't carry + * BSS / TLS-by-name detection so we go straight from raw flags to + * tags. */ + if (ch & 0x00000020u) tags[nt++] = "CODE"; + if (ch & 0x00000040u) tags[nt++] = "DATA"; + if (ch & 0x00000080u) tags[nt++] = "BSS"; + if (ch & OBJDUMP_IMAGE_SCN_LNK_INFO) tags[nt++] = "LINK_INFO"; + if (ch & OBJDUMP_IMAGE_SCN_LNK_REMOVE) tags[nt++] = "LINK_REMOVE"; + if (ch & OBJDUMP_IMAGE_SCN_LNK_COMDAT) tags[nt++] = "LINK_ONCE"; + if (ch & OBJDUMP_IMAGE_SCN_GPREL) tags[nt++] = "GPREL"; + if (ch & OBJDUMP_IMAGE_SCN_MEM_DISCARDABLE) tags[nt++] = "DISCARDABLE"; + if (ch & OBJDUMP_IMAGE_SCN_MEM_SHARED) tags[nt++] = "SHARED"; + if (ch & 0x20000000u) tags[nt++] = "EXEC"; + if (ch & 0x40000000u) tags[nt++] = "READ"; + if (ch & 0x80000000u) tags[nt++] = "WRITE"; + for (i = 0; i < nt && n + 1 < cap; ++i) { + const char* t = tags[i]; + if (i > 0 && n + 1 < cap) buf[n++] = ','; + while (*t && n + 1 < cap) buf[n++] = *t++; + } + buf[n] = '\0'; +} + +/* PE-image `-h`: walks IMAGE_SECTION_HEADER table directly from raw + * bytes (cfree_obj_open doesn't yet parse PE executables). Output + * shape mirrors dump_sections for .obj inputs so postprocessing + * grep-fu doesn't care which path produced the line. */ +static void dump_pe_sections(const char* label, const PeImage* pe, + const uint8_t* buf, size_t buf_len, + const ObjdumpOpts* opts) { + uint16_t i; + char flagbuf[160]; + char name[9]; + driver_printf("%s:\tSections (PE image):\n", label); + driver_printf("Idx Name VMA Size " + "FileOff Align Flags\n"); + for (i = 0; i < pe->nsec; ++i) { + size_t sh = pe->sec_off + (size_t)i * PE_SECTION_HEADER_SIZE; + uint32_t vsize; + uint32_t va; + uint32_t raw_size; + uint32_t raw_off; + uint32_t ch; + uint32_t align_field; + unsigned align_log2; + int j; + if (sh + PE_SECTION_HEADER_SIZE > buf_len) break; + for (j = 0; j < 8; ++j) name[j] = (char)buf[sh + (size_t)j]; + name[8] = '\0'; + vsize = pe_rd_u32(buf + sh + 8); + va = pe_rd_u32(buf + sh + 12); + raw_size = pe_rd_u32(buf + sh + 16); + raw_off = pe_rd_u32(buf + sh + 20); + ch = pe_rd_u32(buf + sh + 36); + align_field = (ch >> 20) & 0xFu; + align_log2 = align_field ? (align_field - 1u) : 0u; + if (!j_match(opts, name)) continue; + render_pe_sec_flags(ch, flagbuf, sizeof(flagbuf)); + driver_printf("%3u %-16s %016llx %08x %08x 2**%-2u %s\n", (unsigned)i, + name, (unsigned long long)(pe->image_base + va), + vsize ? vsize : raw_size, raw_off, align_log2, flagbuf); + driver_printf(" Characteristics: 0x%08x\n", + ch); + } + driver_printf("\n"); +} + +/* Walk a PE image (DOS → "PE\0\0" → COFF file header → optional header + * → data directories) and print the highlights GNU objdump's `-p` + * shows. Bails silently on any header that doesn't validate — leaves + * the basic dump_obj() output untouched. */ +static void dump_pe_private(const char* label, const uint8_t* buf, + size_t buf_len) { + PeImage pe; + uint32_t import_rva = 0; + uint32_t import_size = 0; + uint32_t i; + if (!pe_parse_image(buf, buf_len, &pe) || !pe.valid) return; + if (pe.opt_magic != PE_OPT_HDR64_MAGIC) { + driver_printf("%s:\tPE optional header magic 0x%x (PE32) — skipping\n", + label, (unsigned)pe.opt_magic); + return; + } + + driver_printf("\n%s:\tPE32+ private headers\n", label); + driver_printf(" Magic: 0x%x (PE32+)\n", pe.opt_magic); + driver_printf(" Machine: 0x%04x (%s)\n", (unsigned)pe.machine, + pe_machine_name(pe.machine)); + driver_printf(" Characteristics: 0x%04x\n", (unsigned)pe.file_chars); + driver_printf(" ImageBase: 0x%llx\n", + (unsigned long long)pe.image_base); + driver_printf(" AddressOfEntryPoint: 0x%x\n", pe.entry_rva); + driver_printf(" Subsystem: %u (%s)\n", (unsigned)pe.subsystem, + pe_subsystem_name(pe.subsystem)); + driver_printf(" DllCharacteristics: 0x%04x\n", (unsigned)pe.dllchars); + driver_printf(" NumberOfSections: %u\n", (unsigned)pe.nsec); + + if (pe.dir_off + PE_NUM_DATA_DIRS * PE_DATA_DIRECTORY_SIZE > buf_len) return; + driver_printf("\nData Directories:\n"); + driver_printf(" Idx Name RVA Size\n"); + for (i = 0; i < PE_NUM_DATA_DIRS; ++i) { + uint32_t rva = + pe_rd_u32(buf + pe.dir_off + i * PE_DATA_DIRECTORY_SIZE); + uint32_t sz = + pe_rd_u32(buf + pe.dir_off + i * PE_DATA_DIRECTORY_SIZE + 4); + if (rva == 0 && sz == 0) continue; + driver_printf(" %2u %-14s 0x%08x 0x%08x\n", i, pe_dir_name(i), rva, sz); + if (i == PE_DIR_IMPORT) { + import_rva = rva; + import_size = sz; + } + } + + if (import_rva && import_size) { + pe_dump_imports(buf, buf_len, pe.sec_off, pe.nsec, import_rva, + import_size); + } +} + static const char* fmt_str(CfreeObjFmt fmt, uint8_t ptr_size) { switch (fmt) { case CFREE_OBJ_ELF: @@ -167,11 +635,13 @@ static int j_match(const ObjdumpOpts* o, const char* name) { return 0; } -/* Compose the comma-separated flag tag list GNU objdump prints in -h. */ -static void render_sec_flags(const CfreeObjSecInfo* sec, char* buf, - size_t cap) { +/* Compose the comma-separated flag tag list GNU objdump prints in -h. + * For COFF inputs, `coff_chars` is the raw IMAGE_SECTION_HEADER.Characteristics + * value; for other formats it should be 0. */ +static void render_sec_flags(const CfreeObjSecInfo* sec, CfreeObjFmt fmt, + uint32_t coff_chars, char* buf, size_t cap) { size_t n = 0; - const char* tags[12]; + const char* tags[16]; int nt = 0; int i; int is_bss = (sec->kind == CFREE_SEC_BSS); @@ -190,6 +660,16 @@ static void render_sec_flags(const CfreeObjSecInfo* sec, char* buf, if (sec->flags & CFREE_SF_STRINGS) tags[nt++] = "STRINGS"; if (sec->kind == CFREE_SEC_DEBUG) tags[nt++] = "DEBUGGING"; + if (fmt == CFREE_OBJ_COFF) { + if (coff_chars & OBJDUMP_IMAGE_SCN_LNK_COMDAT) tags[nt++] = "LINK_ONCE"; + if (coff_chars & OBJDUMP_IMAGE_SCN_LNK_INFO) tags[nt++] = "LINK_INFO"; + if (coff_chars & OBJDUMP_IMAGE_SCN_LNK_REMOVE) tags[nt++] = "LINK_REMOVE"; + if (coff_chars & OBJDUMP_IMAGE_SCN_MEM_DISCARDABLE) + tags[nt++] = "DISCARDABLE"; + if (coff_chars & OBJDUMP_IMAGE_SCN_MEM_SHARED) tags[nt++] = "SHARED"; + if (coff_chars & OBJDUMP_IMAGE_SCN_GPREL) tags[nt++] = "GPREL"; + } + for (i = 0; i < nt && n + 1 < cap; ++i) { const char* t = tags[i]; if (i > 0 && n + 1 < cap) buf[n++] = ','; @@ -208,25 +688,71 @@ static void render_sec_flags(const CfreeObjSecInfo* sec, char* buf, static void dump_sections(CfreeObjFile* f, const ObjdumpOpts* opts) { uint32_t nsec = cfree_obj_nsections(f); + CfreeObjFmt fmt = cfree_obj_fmt(f); uint32_t i; - char flagbuf[128]; + char flagbuf[160]; driver_printf("Sections:\n"); driver_printf("Idx Name Size Align Flags\n"); for (i = 0; i < nsec; ++i) { CfreeObjSecInfo sec; + uint32_t raw_type = 0; if (cfree_obj_section(f, i, &sec) != CFREE_OK) continue; if (!j_match(opts, sec.name)) continue; - render_sec_flags(&sec, flagbuf, sizeof(flagbuf)); + cfree_obj_section_format_flags(f, i, &raw_type, NULL); + render_sec_flags(&sec, fmt, raw_type, flagbuf, sizeof(flagbuf)); driver_printf( "%3u %-20s %08llx 2**%-2u %s\n", i, sec.name[0] ? sec.name : "(anon)", (unsigned long long)sec.size, sec.align ? (unsigned)__builtin_ctz(sec.align ? sec.align : 1) : 0, flagbuf); + /* Show the raw IMAGE_SCN_* value on a continuation line for COFF + * inputs — useful when diagnosing why a section ended up with the + * tags it did. The hex is much shorter than printing every set bit + * by name, and the tag list above already covers the bits that + * change behaviour at link time. */ + if (fmt == CFREE_OBJ_COFF && raw_type) { + driver_printf(" Characteristics: 0x%08x\n", + raw_type); + } } driver_printf("\n"); } +/* GNU objdump prints COMDAT group membership immediately after the + * section header table. The reader exposes groups uniformly across + * formats (ELF SHT_GROUP and COFF COMDAT both arrive here) so we just + * iterate. Output is silent when the object carries no groups. */ +static void dump_groups(CfreeObjFile* f, const ObjdumpOpts* opts) { + CfreeObjGroupIter* it = NULL; + CfreeObjGroupInfo g; + int printed_header = 0; + (void)opts; + + if (cfree_obj_groupiter_new(f, &it) != CFREE_OK) return; + while (cfree_obj_groupiter_next(it, &g) == CFREE_ITER_ITEM) { + uint32_t k; + if (!printed_header) { + driver_printf("COMDAT groups:\n"); + printed_header = 1; + } + driver_printf(" group %s (signature sym #%u, %u section%s)\n", + g.name && g.name[0] ? g.name : "(anon)", + (unsigned)g.signature, (unsigned)g.nsections, + g.nsections == 1 ? "" : "s"); + for (k = 0; k < g.nsections; ++k) { + CfreeObjSection sid = g.sections[k]; + CfreeObjSecInfo si; + if (sid == CFREE_SECTION_NONE) continue; + if (cfree_obj_section(f, sid, &si) != CFREE_OK) continue; + driver_printf(" [%3u] %s\n", (unsigned)sid, + si.name && si.name[0] ? si.name : "(anon)"); + } + } + cfree_obj_groupiter_free(it); + if (printed_header) driver_printf("\n"); +} + static void dump_symbols(CfreeObjFile* f, const ObjdumpOpts* opts) { CfreeObjSymIter* it = NULL; CfreeObjSymInfo sym; @@ -412,6 +938,48 @@ static void dump_disasm(const CfreeDisasmContext* dctx, CfreeObjFile* f, } } +/* `-f`: GNU objdump-style file header summary. Object files have no + * meaningful entry point so start address is always 0; PE images are + * handled separately by dump_pe_private. The flags line summarizes + * whether the input has symbols and relocations so it's clear at a + * glance whether further -t / -r work is going to be productive. */ +static void dump_file_header(CfreeObjFile* f, const char* label) { + CfreeTarget target = cfree_obj_target(f); + CfreeObjFmt fmt = cfree_obj_fmt(f); + CfreeObjSymIter* sit = NULL; + CfreeObjRelocIter* rit = NULL; + uint32_t nsec = cfree_obj_nsections(f); + uint32_t nsym = 0; + int has_relocs = 0; + unsigned flags = 0; + + if (cfree_obj_symiter_new(f, &sit) == CFREE_OK) { + CfreeObjSymInfo s; + while (cfree_obj_symiter_next(sit, &s) == CFREE_ITER_ITEM) nsym++; + cfree_obj_symiter_free(sit); + } + if (cfree_obj_reliter_new(f, &rit) == CFREE_OK) { + CfreeObjReloc r; + if (cfree_obj_reliter_next(rit, &r) == CFREE_ITER_ITEM) has_relocs = 1; + cfree_obj_reliter_free(rit); + } + /* GNU objdump's flag bits: 0x01 HAS_RELOC, 0x10 HAS_SYMS, 0x40 D_PAGED. + * cfree's reader handles relocatable .o-shaped inputs only, so we + * never see EXEC_P here; D_PAGED is irrelevant. */ + if (has_relocs) flags |= 0x0001u; + if (nsym) flags |= 0x0010u; + + driver_printf("architecture: %s, flags 0x%08x:\n", arch_str(target.arch), + flags); + if (has_relocs) driver_printf("HAS_RELOC, "); + if (nsym) driver_printf("HAS_SYMS"); + if (has_relocs || nsym) driver_printf("\n"); + driver_printf("start address 0x%016llx\n", 0ull); + driver_printf("format: %s, sections: %u, symbols: %u\n\n", + fmt_str(fmt, target.ptr_size), nsec, nsym); + (void)label; +} + static void dump_obj(const CfreeDisasmContext* dctx, const char* label, CfreeObjFile* f, const ObjdumpOpts* opts) { CfreeTarget target = cfree_obj_target(f); @@ -420,7 +988,9 @@ static void dump_obj(const CfreeDisasmContext* dctx, const char* label, driver_printf("%s:\tfile format %s-%s\n\n", label, fmt_str(fmt, target.ptr_size), arch_str(target.arch)); + if (opts->f) dump_file_header(f, label); if (opts->h) dump_sections(f, opts); + if (opts->h) dump_groups(f, opts); if (opts->t) dump_symbols(f, opts); if (opts->s) dump_hex(f, opts); if (opts->d || opts->D) dump_disasm(dctx, f, opts); @@ -500,6 +1070,9 @@ static int parse_short_flags(const char* arg, ObjdumpOpts* o) { case 's': o->s = 1; break; + case 'p': + o->p = 1; + break; case 'x': o->f = 1; o->h = 1; @@ -546,6 +1119,10 @@ static int parse_long_flag(const char* arg, ObjdumpOpts* o) { o->t = 1; return 1; } + if (driver_streq(arg, "--private-headers")) { + o->p = 1; + return 1; + } return 0; } @@ -596,7 +1173,8 @@ int driver_objdump(int argc, char** argv) { } } - saw_op = opts.f || opts.h || opts.t || opts.d || opts.D || opts.r || opts.s; + saw_op = opts.f || opts.h || opts.t || opts.d || opts.D || opts.r || opts.s || + opts.p; if (!saw_op) { /* Default = -h -t (matches the prior behavior). */ opts.h = 1; opts.t = 1; @@ -644,11 +1222,44 @@ int driver_objdump(int argc, char** argv) { case CFREE_BIN_MACHO: case CFREE_BIN_WASM: { CfreeObjFile* f = NULL; + /* PE executables aren't yet readable via cfree_obj_open (the + * obj reader is .obj-shaped only). For PE inputs we serve -f / + * -h / -p by walking the raw image bytes; -t / -d / -r / -s + * still need an ObjFile and are skipped with a soft error so + * the other ops don't get swallowed. */ if (cfree_obj_open(&ctx, &input, &f) != CFREE_OK) { - driver_errf(OBJDUMP_TOOL, "failed to parse: %s", a); - rc = 1; + if (bin == CFREE_BIN_PE) { + PeImage pe; + int parsed = pe_parse_image(input.data, input.len, &pe) && pe.valid; + int handled = 0; + if (parsed && opts.f) { + dump_pe_file_header(a, &pe); + handled = 1; + } + if (parsed && opts.h && pe.opt_magic == PE_OPT_HDR64_MAGIC) { + dump_pe_sections(a, &pe, input.data, input.len, &opts); + handled = 1; + } + if (opts.p) { + dump_pe_private(a, input.data, input.len); + handled = 1; + } + if (!handled) { + driver_errf(OBJDUMP_TOOL, + "%s: PE images support only -f / -h / -p; " + "use -p for image details", + a); + rc = 1; + } + } else { + driver_errf(OBJDUMP_TOOL, "failed to parse: %s", a); + rc = 1; + } } else { dump_obj(dctx_p, a, f, &opts); + if (opts.p && bin == CFREE_BIN_PE) { + dump_pe_private(a, input.data, input.len); + } cfree_obj_free(f); } break; diff --git a/driver/runtime.c b/driver/runtime.c @@ -35,6 +35,19 @@ static const char* const kRtSrcX64[] = { "coro/coro.c", }; +static const char* const kRtSrcX64Windows[] = { + "int/int.c", + "int/si_div.c", + "fp/fp.c", + "atomic/atomic_freestanding.c", + "cache/clear_cache.c", + "cfree/ifunc_init.c", + "int64/int64.c", + "stack/chkstk_x86_64_win.c", + "coro/x86_64_win.c", + "coro/coro.c", +}; + static const char* const kRtSrcAarch64Linux[] = { "int/int.c", "fp/fp.c", "mem/mem.c", "atomic/atomic_freestanding.c", @@ -56,6 +69,18 @@ static const char* const kRtSrcAarch64Darwin[] = { "coro/aarch64_macho.s", }; +static const char* const kRtSrcAarch64Windows[] = { + "int/int.c", + "int/si_div.c", + "fp/fp.c", + "atomic/atomic_freestanding.c", + "cache/clear_cache.c", + "cfree/ifunc_init.c", + "int64/int64.c", + "coro/aarch64.c", + "coro/coro.c", +}; + static const char* const kRtSrcRv64Linux[] = { /* fp_tf and fp_ti are bundled with LDBL128 in the host rt * Makefile; mirror that here. long double = double on rv64 per @@ -80,12 +105,19 @@ static const RuntimeVariant kRtVariants[] = { {"x86_64-apple-darwin", CFREE_ARCH_X86_64, CFREE_OS_MACOS, CFREE_OBJ_MACHO, 8, 8, "lib/include/lp64_le", 1, 0, kRtSrcX64, (uint32_t)(sizeof(kRtSrcX64) / sizeof(kRtSrcX64[0]))}, + {"x86_64-pc-windows", CFREE_ARCH_X86_64, CFREE_OS_WINDOWS, CFREE_OBJ_COFF, + 8, 8, "lib/include/llp64_le", 1, 0, kRtSrcX64Windows, + (uint32_t)(sizeof(kRtSrcX64Windows) / sizeof(kRtSrcX64Windows[0]))}, {"aarch64-linux", CFREE_ARCH_ARM_64, CFREE_OS_LINUX, CFREE_OBJ_ELF, 8, 8, "lib/include/lp64_le", 1, 1, kRtSrcAarch64Linux, (uint32_t)(sizeof(kRtSrcAarch64Linux) / sizeof(kRtSrcAarch64Linux[0]))}, {"aarch64-apple-darwin", CFREE_ARCH_ARM_64, CFREE_OS_MACOS, CFREE_OBJ_MACHO, 8, 8, "lib/include/lp64_le", 1, 0, kRtSrcAarch64Darwin, (uint32_t)(sizeof(kRtSrcAarch64Darwin) / sizeof(kRtSrcAarch64Darwin[0]))}, + {"aarch64-windows", CFREE_ARCH_ARM_64, CFREE_OS_WINDOWS, CFREE_OBJ_COFF, 8, + 8, "lib/include/llp64_le", 1, 0, kRtSrcAarch64Windows, + (uint32_t)(sizeof(kRtSrcAarch64Windows) / + sizeof(kRtSrcAarch64Windows[0]))}, /* rv64 long double = double per the locked decision (matches RV64 * musl/glibc default and avoids the binary128 soft-float tail). */ {"riscv64-linux", CFREE_ARCH_RV64, CFREE_OS_LINUX, CFREE_OBJ_ELF, 8, 8, diff --git a/include/cfree/link.h b/include/cfree/link.h @@ -151,10 +151,17 @@ typedef enum CfreeLinkOutputKind { CFREE_LINK_OUTPUT_JIT, } CfreeLinkOutputKind; +typedef enum CfreePeSubsystem { + CFREE_PE_SUBSYSTEM_DEFAULT = 0, + CFREE_PE_SUBSYSTEM_WINDOWS_GUI = 2, + CFREE_PE_SUBSYSTEM_WINDOWS_CUI = 3, +} CfreePeSubsystem; + typedef struct CfreeLinkSessionOptions { uint8_t output_kind; /* CfreeLinkOutputKind */ bool gc_sections; bool pie; + uint16_t pe_subsystem; /* CfreePeSubsystem; 0 => target default */ const char* interp_path; const char* entry; const CfreeLinkScript* linker_script; diff --git a/include/cfree/object.h b/include/cfree/object.h @@ -252,6 +252,24 @@ CfreeStatus cfree_obj_section_data(const CfreeObjFile *, CfreeObjSection idx, CfreeStatus cfree_obj_section_by_name(const CfreeObjFile *, const char *name, CfreeObjSection *out); +/* Format-specific raw section attributes preserved by the reader. + * + * COFF : *raw_type_out = IMAGE_SECTION_HEADER.Characteristics + * *raw_flags_out = 0 + * ELF : *raw_type_out = sh_type override (when the canonical SecSem + * mapping is lossy, e.g. SHT_LLVM_ADDRSIG); 0 otherwise + * *raw_flags_out = sh_flags bits not represented in SecFlag + * (e.g. SHF_EXCLUDE) + * Mach-O/Wasm : both zero (reader does not preserve a raw view here). + * + * Use when canonical SecFlag/SecKind isn't enough — e.g. objdump + * decoding `IMAGE_SCN_LNK_COMDAT` / `_MEM_DISCARDABLE` for diagnostic + * display. NULL output pointers are ignored. */ +CfreeStatus cfree_obj_section_format_flags(const CfreeObjFile *, + CfreeObjSection idx, + uint32_t *raw_type_out, + uint32_t *raw_flags_out); + CfreeStatus cfree_obj_symbol_by_name(const CfreeObjFile *, const char *name, CfreeObjSymInfo *out); diff --git a/lang/c/parse/parse.c b/lang/c/parse/parse.c @@ -1217,8 +1217,7 @@ static void parse_external_decl(Parser* p) { if (!parse_decl_specs(p, &specs)) { perr(p, "expected declaration"); } - if (specs.storage == DS_REGISTER || - (specs.storage == DS_AUTO && specs.storage_explicit)) { + if (specs.storage == DS_AUTO && specs.storage_explicit) { perr(p, "invalid storage-class specifier at file scope"); } @@ -1275,6 +1274,9 @@ static void parse_external_decl(Parser* p) { attr_list_append(&fent->attrs, dattrs); if (is_punct(&p->cur, '{')) { + int suppress_body_codegen = + specs.storage == DS_EXTERN && + ((specs.flags | fn_decl_flags) & DF_INLINE); if (fent->defined) perr(p, "redefinition of function"); fent->defined = 1; fent->decl_state = DSTATE_FUNC_DEFINED; @@ -1282,8 +1284,10 @@ static void parse_external_decl(Parser* p) { const Type* saved_func_ret = p->cur_func_ret; p->cur_func_name = name; p->cur_func_ret = fn_ty->fn.ret; + if (suppress_body_codegen) pcg_codegen_suppress_push(p); parse_function_body(p, fent->v.sym, fn_ty, abi, infos, nparams, loc, fn_section_id, fn_decl_flags); + if (suppress_body_codegen) pcg_codegen_suppress_pop(p); p->cur_func_name = saved_func_name; p->cur_func_ret = saved_func_ret; return; @@ -1320,7 +1324,9 @@ static void parse_external_decl(Parser* p) { /* Global object declaration. */ for (;;) { int has_init = is_punct(&p->cur, '='); - int is_pure_extern = (specs.storage == DS_EXTERN) && !has_init; + int is_pure_extern = + (specs.storage == DS_EXTERN || specs.storage == DS_REGISTER) && + !has_init; SymEntry* existing = scope_lookup_current(p, name); ObjSymId sym = OBJ_SYM_NONE; ObjSecId section_id = OBJ_SEC_NONE; @@ -1457,6 +1463,9 @@ static void parse_translation_unit(Parser* p) { parse_file_scope_asm(p); continue; } + if (accept_punct(p, ';')) { + continue; + } parse_external_decl(p); } } diff --git a/lang/c/parse/parse_expr.c b/lang/c/parse/parse_expr.c @@ -48,6 +48,11 @@ static const Type* ty_char16(Parser* p) { static const Type* ty_char32(Parser* p) { return type_prim(p->pool, TY_UINT); } +static const Type* ty_wchar(Parser* p) { + CfreeTarget target = cfree_compiler_target(p->c); + return target.os == CFREE_OS_WINDOWS ? ty_char16(p) : ty_int(p); +} + static int pointer_pointees_compatible(Parser* p, const Type* lhs, const Type* rhs) { const Type* lp; @@ -338,7 +343,7 @@ static CLitStringEnc literal_string_encoding(const Tok* t) { } const Type* string_literal_elem_type(Parser* p, const Tok* t) { - if (t->flags & TF_STR_WIDE) return ty_int(p); + if (t->flags & TF_STR_WIDE) return ty_wchar(p); if (t->flags & TF_STR_U16) return ty_char16(p); if (t->flags & TF_STR_U32) return ty_char32(p); return type_prim(p->pool, TY_CHAR); @@ -1262,6 +1267,40 @@ static MemOrder parse_atomic_mem_order(Parser* p) { * Builtin call handling * ============================================================ */ +static int offsetof_find_member(Parser* p, const Type* rec_ty, Sym mname, + const Type** out_ty, u32* out_off) { + const ABIRecordLayout* L; + rec_ty = type_unqual(p->pool, rec_ty); + if (!rec_ty || (rec_ty->kind != TY_STRUCT && rec_ty->kind != TY_UNION)) + return 0; + L = c_abi_record_layout(p->abi, p->pool, rec_ty); + if (!L) return 0; + for (u16 i = 0; i < rec_ty->rec.nfields; ++i) { + const Field* f = &rec_ty->rec.fields[i]; + if (f->name == mname && mname != 0) { + *out_ty = f->type; + *out_off = L->fields[i].offset; + return 1; + } + } + for (u16 i = 0; i < rec_ty->rec.nfields; ++i) { + const Field* f = &rec_ty->rec.fields[i]; + const Type* fty = type_unqual(p->pool, f->type); + const Type* nested_ty = NULL; + u32 nested_off = 0; + if (!((f->flags & FIELD_ANON) && + (fty->kind == TY_STRUCT || fty->kind == TY_UNION))) { + continue; + } + if (offsetof_find_member(p, fty, mname, &nested_ty, &nested_off)) { + *out_ty = nested_ty; + *out_off = L->fields[i].offset + nested_off; + return 1; + } + } + return 0; +} + static const Type* offsetof_designator(Parser* p, const Type* base, u32* off) { const Type* cur = base; if (p->cur.kind != TOK_IDENT || ident_kw(p, p->cur.v.ident) != KW_NONE) { @@ -1272,24 +1311,8 @@ static const Type* offsetof_designator(Parser* p, const Type* base, u32* off) { Sym mname = p->cur.v.ident; const Type* mty = NULL; u32 moff = 0; - const Field* mf = NULL; - /* find_field is static in parse_type.c; we need it here. - * We call c_abi_record_layout directly inline. */ - const ABIRecordLayout* L = c_abi_record_layout(p->abi, p->pool, cur); - if (!L) perr(p, "no such member in __builtin_offsetof"); - int found = 0; - for (u16 i = 0; i < cur->rec.nfields; ++i) { - const Field* f = &cur->rec.fields[i]; - if (f->name == mname && mname != 0) { - mty = f->type; - moff = L->fields[i].offset; - mf = f; - found = 1; - break; - } - } - (void)mf; - if (!found) perr(p, "no such member in __builtin_offsetof"); + if (!offsetof_find_member(p, cur, mname, &mty, &moff)) + perr(p, "no such member in __builtin_offsetof"); advance(p); *off += moff; cur = mty; @@ -1550,6 +1573,44 @@ static int parse_builtin_fabs_call(Parser* p, Sym name, SrcLoc loc) { return 1; } +static int parse_builtin_abs_call(Parser* p, Sym name, SrcLoc loc) { + size_t nlen = 0; + const char* nm = pool_str(p->pool, name, &nlen); + const char* libname = NULL; + const Type* int_ty = NULL; + const Type* params[1]; + const Type* fn_ty; + CfreeCgSym sym; + + if (nm && nlen == 13u && memcmp(nm, "__builtin_abs", 13u) == 0) { + libname = "abs"; + int_ty = type_prim(p->pool, TY_INT); + } else if (nm && nlen == 14u && memcmp(nm, "__builtin_labs", 14u) == 0) { + libname = "labs"; + int_ty = type_prim(p->pool, TY_LONG); + } else if (nm && nlen == 15u && memcmp(nm, "__builtin_llabs", 15u) == 0) { + libname = "llabs"; + int_ty = type_prim(p->pool, TY_LLONG); + } else { + return 0; + } + + advance(p); /* IDENT */ + expect_punct(p, '(', "'(' after abs builtin"); + parse_assign_expr(p); + to_rvalue(p); + coerce_top_to_type(p, int_ty); + expect_punct(p, ')', "')' after abs builtin"); + + params[0] = int_ty; + fn_ty = type_func(p->pool, int_ty, params, 1, 0); + sym = pcg_emit_enabled(p) ? builtin_libcall_sym(p, libname, fn_ty) + : CFREE_CG_SYM_NONE; + cg_set_loc(p->cg, loc); + pcg_call_symbol(p, sym, 1, fn_ty); + return 1; +} + static int try_parse_builtin_call(Parser* p) { Sym name = p->cur.v.ident; SrcLoc loc = p->cur.loc; @@ -1563,6 +1624,7 @@ static int try_parse_builtin_call(Parser* p) { if (parse_builtin_isnan_call(p, name, loc)) return 1; if (parse_builtin_inf_call(p, name, loc)) return 1; if (parse_builtin_fabs_call(p, name, loc)) return 1; + if (parse_builtin_abs_call(p, name, loc)) return 1; if (parse_builtin_clear_cache_call(p, name, loc)) return 1; if (name != p->sym_b_alloca && name != p->sym_b_ctz && diff --git a/lang/c/parse/parse_priv.h b/lang/c/parse/parse_priv.h @@ -242,6 +242,13 @@ typedef struct Parser { Sym sym_b_expect; Sym sym_b_offsetof; Sym sym_b_va_list; + /* Cached singleton for __builtin_va_list — built lazily on first + * mention so every occurrence resolves to the same Type* (and the + * same TagId where applicable). Without the cache, c_abi_va_list_type + * mints a fresh struct type per occurrence and headers that pass + * locally-declared __builtin_va_list values to functions taking + * va_list (e.g. mingw's sec_api/stdio_s.h) fail type-equality. */ + const Type* type_b_va_list; Sym sym_b_va_start; Sym sym_b_va_arg; Sym sym_b_va_end; diff --git a/lang/c/parse/parse_type.c b/lang/c/parse/parse_type.c @@ -447,7 +447,7 @@ void validate_decl_type_constraints(Parser* p, const DeclSpecs* specs, if (specs->flags & DF_THREAD) perr(p, "_Thread_local is invalid for struct member"); } - if (u->kind == TY_VOID && !is_function) { + if (u->kind == TY_VOID && !is_function && specs->storage != DS_TYPEDEF) { perr(p, "object may not have void type"); } if ((specs->flags & DF_INLINE) && !is_function) { @@ -771,7 +771,9 @@ int parse_decl_specs(Parser* p, DeclSpecs* out) { } else if (!acc.saw_explicit_type && !tagged_ty && t.kind == TOK_IDENT && ident_kw(p, t.v.ident) == KW_NONE) { if (t.v.ident == p->sym_b_va_list) { - tagged_ty = c_abi_va_list_type(p->abi, p->pool); + if (!p->type_b_va_list) + p->type_b_va_list = c_abi_va_list_type(p->abi, p->pool); + tagged_ty = p->type_b_va_list; acc.saw_explicit_type = 1; advance(p); seen = 1; diff --git a/lang/c/pp/pp.c b/lang/c/pp/pp.c @@ -315,6 +315,10 @@ static void compute_date_time(Pp* pp) { } static void pp_register_static_predefined(Pp* pp) { + pp_define(pp, "__cfree__", "1"); + pp_define(pp, "__cfree_major__", "0"); + pp_define(pp, "__cfree_minor__", "0"); + pp_define(pp, "__cfree_patchlevel__", "0"); pp_define(pp, "__STDC__", "1"); pp_define(pp, "__STDC_HOSTED__", "0"); pp_define(pp, "__STDC_VERSION__", "201112L"); @@ -332,15 +336,17 @@ static void pp_register_static_predefined(Pp* pp) { /* Target-dependent predefined macros consumed by rt/include/stddef.h and * rt/include/stdint.h. The set mirrors the subset of GCC/Clang's __*_TYPE__ * / __*_MAX__ namespace that those headers reference. We split only on - * pointer width: ptr_size == 8 picks the LP64 model (every 64-bit target - * cfree supports), ptr_size == 4 picks ILP32. LLP64 (Windows x86-64) is - * not yet a supported target, so `long` always tracks pointer width here. */ + * pointer width plus the target data model: LP64 for Unix-like 64-bit targets, + * LLP64 for 64-bit Windows, and ILP32 for 32-bit targets. */ static void pp_register_target_predefined(Pp* pp) { CfreeTarget target = cfree_compiler_target(pp->c); const CfreePredefinedMacro* arch_defs = NULL; uint32_t narch_defs = cfree_compiler_arch_predefines(pp->c, &arch_defs); uint32_t i; - int lp64 = (target.ptr_size == 8); + int ptr64 = (target.ptr_size == 8); + int win = (target.os == CFREE_OS_WINDOWS); + int lp64 = ptr64 && !win; + int wchar16 = win; for (i = 0; i < narch_defs; ++i) { pp_define(pp, arch_defs[i].name, arch_defs[i].body); @@ -360,19 +366,140 @@ static void pp_register_target_predefined(Pp* pp) { pp_define(pp, "__SIZEOF_INT__", "4"); pp_define(pp, "__SIZEOF_LONG__", lp64 ? "8" : "4"); pp_define(pp, "__SIZEOF_LONG_LONG__", "8"); - pp_define(pp, "__SIZEOF_POINTER__", lp64 ? "8" : "4"); - pp_define(pp, "__SIZEOF_SIZE_T__", lp64 ? "8" : "4"); - pp_define(pp, "__SIZEOF_PTRDIFF_T__", lp64 ? "8" : "4"); - pp_define(pp, "__SIZEOF_WCHAR_T__", "4"); + pp_define(pp, "__SIZEOF_POINTER__", ptr64 ? "8" : "4"); + pp_define(pp, "__SIZEOF_SIZE_T__", ptr64 ? "8" : "4"); + pp_define(pp, "__SIZEOF_PTRDIFF_T__", ptr64 ? "8" : "4"); + pp_define(pp, "__SIZEOF_WCHAR_T__", wchar16 ? "2" : "4"); pp_define(pp, "__SIZEOF_WINT_T__", "4"); pp_define(pp, "__SIZEOF_FLOAT__", "4"); pp_define(pp, "__SIZEOF_DOUBLE__", "8"); pp_define(pp, "__SIZEOF_LONG_DOUBLE__", "8"); + /* Windows / mingw predefined macros. cfree targets the mingw + * flavor (DWARF debug info, mingwex CRT) rather than MSVC, so we + * advertise __MINGW{32,64}__ and friends but never set _MSC_VER. + * Both _WIN32 and the legacy unprefixed WIN32 are defined; _WIN64 + * is set on 64-bit targets only. The MSVC-compat machine macros + * (_M_X64 / _M_AMD64 / _M_ARM64) are useful for headers that gate + * on them but harmless to set everywhere — mingw's own headers + * tolerate them. */ + if (target.os == CFREE_OS_WINDOWS) { + pp_define(pp, "_WIN32", "1"); + pp_define(pp, "WIN32", "1"); + pp_define(pp, "__MINGW32__", "1"); + if (target.ptr_size == 8) { + pp_define(pp, "_WIN64", "1"); + pp_define(pp, "__MINGW64__", "1"); + } + if (target.arch == CFREE_ARCH_X86_64) { + pp_define(pp, "_M_X64", "100"); + pp_define(pp, "_M_AMD64", "100"); + } else if (target.arch == CFREE_ARCH_ARM_64) { + pp_define(pp, "_M_ARM64", "1"); + } + /* mingw's <vadefs.h> / many CRT headers gate __builtin_va_list / + * __gnuc_va_list on __GNUC__. cfree implements the va_* builtins + * and __builtin_va_list with the GCC contract, so impersonating a + * conservative GCC vintage lets the mingw header tree compile. + * We pick 4.0 — old enough that no header expects GCC-specific + * extensions cfree doesn't implement (e.g. transactional memory, + * GIMPLE plugins), but new enough to clear every __GNUC__ >= N + * gate we've seen in practice. */ + pp_define(pp, "__GNUC__", "4"); + pp_define(pp, "__GNUC_MINOR__", "0"); + pp_define(pp, "__GNUC_PATCHLEVEL__", "0"); + /* __has_builtin / __has_attribute / __has_include_next: clang/GCC + * preprocessor extensions. mingw's _mingw.h gates inline-asm + * intrinsic definitions on whether the compiler claims to have + * them as builtins (e.g. __debugbreak, __fastfail, __prefetch). + * cfree doesn't model individual builtin lookups; claim "yes" + * uniformly so mingw skips its inline-asm fallbacks (which use + * intel/{$}-form asm syntax cfree's parser doesn't accept). */ + pp_define(pp, "__has_builtin(x)", "1"); + pp_define(pp, "__has_feature(x)", "0"); + pp_define(pp, "__has_attribute(x)", "0"); + /* MSVC fixed-width integer types. mingw's corecrt.h uses these + * directly (e.g. `typedef unsigned __int64 size_t;`). Map to the + * C standard equivalents. */ + pp_define(pp, "__int8", "char"); + pp_define(pp, "__int16", "short"); + pp_define(pp, "__int32", "int"); + pp_define(pp, "__int64", "long long"); + /* mingw's psdk_inc/intrin-impl.h emits an inline implementation + * for every MSVC intrinsic (_lrotl, _BitScanForward, ...) and + * gates them with __INTRINSIC_PROLOG, which uses ## to paste the + * intrinsic's name into a `defined(__INTRINSIC_DEFINED_<name>)` + * test. Once an intrinsic gets defined, a later re-invocation of + * the same gate macro hits a cfree pp bug where a *defined* + * symbol referenced inside `defined()` gets expanded before the + * `defined` operator captures it. Predefining + * __INTRINSIC_ONLYSPECIAL flips the gate's second clause so + * none of the inline intrinsics are emitted (mingw expects this + * idiom for non-special builds; the linker pulls them from + * libmingwex/libmsvcrt instead). This sidesteps the pp bug + * entirely. */ + pp_define(pp, "__INTRINSIC_ONLYSPECIAL", "1"); + /* __declspec(...) is the MSVC syntax for attributes. mingw uses + * it in headers for dllimport/dllexport, alignment, noreturn, + * etc. cfree's COFF linker routes externs through the IAT + * regardless of the dllimport hint and doesn't yet model + * dllexport via this attribute — so we erase it as a no-op + * macro. (Note: this is at the preprocessor layer; the parser + * still needs to handle the syntax if/when the macro is removed.) + */ + pp_define(pp, "__declspec(x)", ""); + /* GNU `__extension__` is a pedantic-quiet wrapper around + * non-standard constructs (statement exprs, anonymous structs). + * cfree's parser is permissive about those already; the keyword + * has no effect on parsing, so we erase it. */ + pp_define(pp, "__extension__", ""); + /* __restrict / __restrict__: GCC-flavored alternates to the C99 + * `restrict` keyword. cfree parses `restrict` already; map the + * GCC spellings onto it. */ + pp_define(pp, "__restrict", "restrict"); + pp_define(pp, "__restrict__", "restrict"); + pp_define(pp, "__volatile__", "volatile"); + pp_define(pp, "__const__", "const"); + pp_define(pp, "__signed__", "signed"); + /* MSVC calling-convention attributes. On x86_64 they're no-ops + * (every function uses the Win64 ABI) and on ARM64 likewise; on + * i386 they actually mean something but cfree doesn't target it. + * Defining them as empty macros lets mingw headers that say + * `void __cdecl foo(void)` parse correctly. Same posture mingw's + * own GCC takes: __MINGW_USYMBOL((__cdecl__)). */ + /* MSVC calling-convention attributes — no-ops on Win64. cfree + * pre-defines them empty *only when* mingw's headers don't + * themselves redefine them; we use the __MINGW_<x>_REDEFINE form + * via `#undef` first to play nicely with mingw's own + * redefinitions (mingw's _mingw.h does `#define __cdecl + * __attribute__((__cdecl__))` further down). Setting them empty + * here is safe because cfree's parser will see the redefinition + * before any header uses them. */ + pp_define(pp, "__cdecl", ""); + pp_define(pp, "__stdcall", ""); + pp_define(pp, "__fastcall", ""); + pp_define(pp, "__thiscall", ""); + pp_define(pp, "__vectorcall", ""); + pp_define(pp, "_cdecl", ""); + pp_define(pp, "_stdcall", ""); + pp_define(pp, "_fastcall", ""); + /* __forceinline / __inline / __w64: mingw's _mingw.h redefines + * them itself when __GNUC__ is set, so we leave them alone here + * to avoid a redefinition-with-different-replacement error. */ + } + /* stddef.h base aliases */ - pp_define(pp, "__SIZE_TYPE__", lp64 ? "unsigned long" : "unsigned int"); - pp_define(pp, "__PTRDIFF_TYPE__", lp64 ? "long" : "int"); - pp_define(pp, "__WCHAR_TYPE__", "int"); + if (lp64) { + pp_define(pp, "__SIZE_TYPE__", "unsigned long"); + pp_define(pp, "__PTRDIFF_TYPE__", "long"); + } else if (ptr64) { + pp_define(pp, "__SIZE_TYPE__", "unsigned long long"); + pp_define(pp, "__PTRDIFF_TYPE__", "long long"); + } else { + pp_define(pp, "__SIZE_TYPE__", "unsigned int"); + pp_define(pp, "__PTRDIFF_TYPE__", "int"); + } + pp_define(pp, "__WCHAR_TYPE__", wchar16 ? "unsigned short" : "int"); pp_define(pp, "__CHAR16_TYPE__", "unsigned short"); pp_define(pp, "__CHAR32_TYPE__", "unsigned int"); @@ -404,19 +531,28 @@ static void pp_register_target_predefined(Pp* pp) { pp_define(pp, "__UINT_FAST8_TYPE__", "unsigned char"); pp_define(pp, "__INT_FAST8_MAX__", "127"); pp_define(pp, "__UINT_FAST8_MAX__", "255"); - if (lp64) { - pp_define(pp, "__INT_FAST16_TYPE__", "long"); - pp_define(pp, "__INT_FAST32_TYPE__", "long"); - pp_define(pp, "__INT_FAST64_TYPE__", "long"); - pp_define(pp, "__UINT_FAST16_TYPE__", "unsigned long"); - pp_define(pp, "__UINT_FAST32_TYPE__", "unsigned long"); - pp_define(pp, "__UINT_FAST64_TYPE__", "unsigned long"); - pp_define(pp, "__INT_FAST16_MAX__", "9223372036854775807L"); - pp_define(pp, "__INT_FAST32_MAX__", "9223372036854775807L"); - pp_define(pp, "__INT_FAST64_MAX__", "9223372036854775807L"); - pp_define(pp, "__UINT_FAST16_MAX__", "18446744073709551615UL"); - pp_define(pp, "__UINT_FAST32_MAX__", "18446744073709551615UL"); - pp_define(pp, "__UINT_FAST64_MAX__", "18446744073709551615UL"); + if (ptr64) { + pp_define(pp, "__INT_FAST16_TYPE__", lp64 ? "long" : "long long"); + pp_define(pp, "__INT_FAST32_TYPE__", lp64 ? "long" : "long long"); + pp_define(pp, "__INT_FAST64_TYPE__", lp64 ? "long" : "long long"); + pp_define(pp, "__UINT_FAST16_TYPE__", + lp64 ? "unsigned long" : "unsigned long long"); + pp_define(pp, "__UINT_FAST32_TYPE__", + lp64 ? "unsigned long" : "unsigned long long"); + pp_define(pp, "__UINT_FAST64_TYPE__", + lp64 ? "unsigned long" : "unsigned long long"); + pp_define(pp, "__INT_FAST16_MAX__", + lp64 ? "9223372036854775807L" : "9223372036854775807LL"); + pp_define(pp, "__INT_FAST32_MAX__", + lp64 ? "9223372036854775807L" : "9223372036854775807LL"); + pp_define(pp, "__INT_FAST64_MAX__", + lp64 ? "9223372036854775807L" : "9223372036854775807LL"); + pp_define(pp, "__UINT_FAST16_MAX__", + lp64 ? "18446744073709551615UL" : "18446744073709551615ULL"); + pp_define(pp, "__UINT_FAST32_MAX__", + lp64 ? "18446744073709551615UL" : "18446744073709551615ULL"); + pp_define(pp, "__UINT_FAST64_MAX__", + lp64 ? "18446744073709551615UL" : "18446744073709551615ULL"); } else { pp_define(pp, "__INT_FAST16_TYPE__", "int"); pp_define(pp, "__INT_FAST32_TYPE__", "int"); @@ -441,6 +577,14 @@ static void pp_register_target_predefined(Pp* pp) { pp_define(pp, "__UINTPTR_MAX__", "18446744073709551615UL"); pp_define(pp, "__PTRDIFF_MAX__", "9223372036854775807L"); pp_define(pp, "__SIZE_MAX__", "18446744073709551615UL"); + } else if (ptr64) { + pp_define(pp, "__LONG_MAX__", "2147483647L"); + pp_define(pp, "__INTPTR_TYPE__", "long long"); + pp_define(pp, "__UINTPTR_TYPE__", "unsigned long long"); + pp_define(pp, "__INTPTR_MAX__", "9223372036854775807LL"); + pp_define(pp, "__UINTPTR_MAX__", "18446744073709551615ULL"); + pp_define(pp, "__PTRDIFF_MAX__", "9223372036854775807LL"); + pp_define(pp, "__SIZE_MAX__", "18446744073709551615ULL"); } else { pp_define(pp, "__LONG_MAX__", "2147483647L"); pp_define(pp, "__INTPTR_TYPE__", "int"); @@ -472,9 +616,8 @@ static void pp_register_target_predefined(Pp* pp) { pp_define(pp, "__UINTMAX_C(c)", "c ## ULL"); } - /* wchar_t / wint_t / sig_atomic_t are all `int` in cfree's model */ - pp_define(pp, "__WCHAR_MAX__", "2147483647"); - pp_define(pp, "__WCHAR_MIN__", "(-__WCHAR_MAX__ - 1)"); + pp_define(pp, "__WCHAR_MAX__", wchar16 ? "65535" : "2147483647"); + pp_define(pp, "__WCHAR_MIN__", wchar16 ? "0" : "(-__WCHAR_MAX__ - 1)"); pp_define(pp, "__WINT_MAX__", "2147483647"); pp_define(pp, "__WINT_MIN__", "(-__WINT_MAX__ - 1)"); pp_define(pp, "__SIG_ATOMIC_MAX__", "2147483647"); diff --git a/lang/c/pp/pp_directive.c b/lang/c/pp/pp_directive.c @@ -130,13 +130,27 @@ static void prepass_defined(Pp* pp, const Tok* in, u32 nin, TokVec* out) { } } -/* Macro-expand a sequence of pre-#if tokens to completion. */ +/* Macro-expand a sequence of pre-#if tokens to completion. + * + * Sets pp->in_if_expansion for the duration so pp_next_raw can keep + * `defined`-operator operands raw even when they ride out of a macro + * body via the ## operator. Without this flag a macro body like + * #define G(x) (!defined(__G_DEFINED_ ## x)) + * would have the pasted operand expanded if it happens to name an + * already-defined macro, leaving the second prepass to choke on + * `defined()`. */ static void expand_for_if(Pp* pp, const Tok* in, u32 nin, TokVec* out) { Tok* slice; + u8 saved; if (nin == 0) return; slice = arena_array(pp->arena, Tok, nin); memcpy(slice, in, sizeof(Tok) * nin); + saved = pp->in_if_expansion; + pp->in_if_expansion = 1; + pp->defined_skip = 0; expand_arg_to_eof(pp, slice, NULL, nin, out); + pp->in_if_expansion = saved; + pp->defined_skip = 0; } /* Replace remaining identifiers with `0` per §6.10.1 ¶4, after `defined` diff --git a/lang/c/pp/pp_expand.c b/lang/c/pp/pp_expand.c @@ -933,6 +933,40 @@ Tok pp_next_raw(Pp* pp) { * iteration picks up. */ continue; } + /* While expanding an #if condition, suppress macro expansion of + * `defined`-operator operands so a `defined(X)` produced by a + * macro body whose argument was pasted via ## doesn't accidentally + * expand an already-defined X to its body (typically empty). See + * the `defined_skip` field comment in pp_priv.h. */ + if (pp->in_if_expansion) { + if (pp->defined_skip == 1 && t.kind == TOK_IDENT) { + t.flags |= TF_NO_EXPAND; + pp->defined_skip = 0; + } else if (pp->defined_skip == 2) { + if (t.kind == TOK_PUNCT && t.v.punct == '(') { + pp->defined_skip = 3; + } else if (t.kind == TOK_IDENT) { + /* `defined IDENT` (no parens) — same as the skip==1 case. */ + t.flags |= TF_NO_EXPAND; + pp->defined_skip = 0; + } else { + pp->defined_skip = 0; + } + } else if (pp->defined_skip == 3) { + if (t.kind == TOK_IDENT) { + t.flags |= TF_NO_EXPAND; + pp->defined_skip = 4; + } else if (t.kind == TOK_PUNCT && t.v.punct == ')') { + pp->defined_skip = 0; + } + } else if (pp->defined_skip == 4) { + if (t.kind == TOK_PUNCT && t.v.punct == ')') { + pp->defined_skip = 0; + } + } else if (t.kind == TOK_IDENT && t.v.ident == pp->sym_defined) { + pp->defined_skip = 2; + } + } if (t.kind == TOK_IDENT && (t.flags & TF_NO_EXPAND) == 0) { Sym id = t.v.ident; diff --git a/lang/c/pp/pp_priv.h b/lang/c/pp/pp_priv.h @@ -162,6 +162,29 @@ struct Pp { * time(NULL) if unset). */ Sym val_date_str; Sym val_time_str; + + /* Defined-operator handling during #if expansion. + * + * The first prepass in eval_if_expr replaces `defined X` / `defined + * (X)` literally found in the directive line, but `defined()` can + * also come from macro bodies (mingw's intrin-impl.h uses + * `defined(__INTRINSIC_DEFINED_ ## name)` inside a #define). When + * the expander processes such a body, the identifier inside + * `defined(...)` must NOT be macro-expanded — otherwise an empty + * macro X would turn `defined(X)` into `defined()` and the + * post-expansion prepass would reject it. + * + * This pair of fields tracks the state across `pp_next_raw` calls + * within `expand_for_if`: + * in_if_expansion: 1 inside an #if's expand_arg_to_eof call + * defined_skip: 0 normally; 1 after emitting `defined` + * (consume one IDENT before clearing); 2 after + * emitting `defined (` (waiting for IDENT then + * `)`). + * The expander uses these to mark the operand IDENT TF_NO_EXPAND + * before the macro-expansion check at the head of pp_next_raw. */ + u8 in_if_expansion; + u8 defined_skip; }; /* ============================================================ diff --git a/lang/c/type/type.c b/lang/c/type/type.c @@ -502,6 +502,8 @@ static CfreeCgTypeId type_cg_builtin(CfreeCompiler* c, TypeKind kind) { return b.id[CFREE_CG_BUILTIN_I32]; case TY_LONG: case TY_ULONG: + if (target.os == CFREE_OS_WINDOWS) return b.id[CFREE_CG_BUILTIN_I32]; + return b.id[CFREE_CG_BUILTIN_I64]; case TY_LLONG: case TY_ULLONG: return b.id[CFREE_CG_BUILTIN_I64]; diff --git a/rt/Makefile b/rt/Makefile @@ -17,6 +17,7 @@ RT_VARIANTS = \ riscv64-linux \ riscv64-elf \ riscv64-elf-save-restore \ + aarch64-windows \ x86_64-pc-windows \ i386-linux \ wasm32 \ @@ -71,6 +72,12 @@ RT_aarch64-apple-darwin_INT128 = 1 RT_aarch64-apple-darwin_CORO = aarch64 RT_EXTRA_SRCS_aarch64-apple-darwin = rt/lib/coro/aarch64_macho.s +RT_aarch64-windows_TARGET = aarch64-w64-windows-gnu +RT_aarch64-windows_ABI = llp64 +RT_aarch64-windows_INT128 = 1 +RT_aarch64-windows_CORO = aarch64 +RT_aarch64-windows_HOSTED = 1 + RT_riscv64-linux_TARGET = riscv64-linux-gnu RT_riscv64-linux_ABI = lp64 RT_riscv64-linux_INT128 = 1 @@ -96,6 +103,7 @@ RT_x86_64-pc-windows_TARGET = x86_64-pc-windows-msvc RT_x86_64-pc-windows_ABI = llp64 RT_x86_64-pc-windows_INT128 = 1 RT_x86_64-pc-windows_CORO = x86_64_win +RT_x86_64-pc-windows_HOSTED = 1 RT_i386-linux_TARGET = i386-linux-gnu RT_i386-linux_ABI = ilp32 @@ -146,6 +154,14 @@ RT_BASE_SRCS = \ rt/lib/cache/clear_cache.c \ rt/lib/cfree/ifunc_init.c +RT_COMPILER_SRCS = \ + rt/lib/int/int.c \ + rt/lib/int/si_div.c \ + rt/lib/fp/fp.c \ + rt/lib/atomic/atomic_freestanding.c \ + rt/lib/cache/clear_cache.c \ + rt/lib/cfree/ifunc_init.c + RT_ABI_SRCS_lp64 = rt/lib/int64/int64.c RT_ABI_SRCS_llp64 = rt/lib/int64/int64.c RT_ABI_SRCS_ilp32 = rt/lib/int32/int32.c @@ -177,7 +193,7 @@ RT_AEABI_FLAGS_thumb1 = -march=armv6-m -mthumb -mfloat-abi=soft define RT_VARIANT_template RT_SRCS_$(1) := \ - $$(RT_BASE_SRCS) \ + $$(if $$(RT_$(1)_HOSTED),$$(RT_COMPILER_SRCS),$$(RT_BASE_SRCS)) \ $$(RT_ABI_SRCS_$$(RT_$(1)_ABI)) \ $$(RT_CORO_SRCS_$$(RT_$(1)_CORO)) \ $$(if $$(RT_$(1)_LDBL128),$$(RT_LDBL128_SRCS)) \ diff --git a/rt/include/emmintrin.h b/rt/include/emmintrin.h @@ -0,0 +1,3 @@ +#pragma once + +/* See x86intrin.h in this directory. */ diff --git a/rt/include/mm_malloc.h b/rt/include/mm_malloc.h @@ -0,0 +1,5 @@ +#pragma once + +/* Clang's x86 <malloc.h> companion declares aligned allocation helpers. + * llvm-mingw's CRT headers include it for x64; cfree uses the CRT prototypes + * from mingw headers and does not need Clang's intrinsic companion here. */ diff --git a/rt/include/x86intrin.h b/rt/include/x86intrin.h @@ -0,0 +1,12 @@ +#pragma once + +/* cfree does not implement Clang/GCC x86 vector intrinsic headers yet. + * llvm-mingw's <windows.h> includes this header while declaring WinNT + * processor helpers. The scalar helper declarations themselves come from + * mingw's psdk_inc/intrin-impl.h; this shim only prevents pulling in Clang's + * vector intrinsic header tree. */ + +void __stosb(unsigned char*, unsigned char, unsigned long long); +unsigned long long __readgsqword(unsigned long); +#define __INTRINSIC_DEFINED___stosb +#define __INTRINSIC_DEFINED___readgsqword diff --git a/rt/lib/impl/fp_compare_impl.inc b/rt/lib/impl/fp_compare_impl.inc @@ -32,7 +32,7 @@ typedef char CMP_RESULT; typedef long CMP_RESULT; #endif -#if !defined(__clang__) && defined(__GNUC__) +#if !defined(__clang__) && defined(__GNUC__) && !defined(__cfree__) // GCC uses a special __libgcc_cmp_return__ mode to define the return type, so // check that we are ABI-compatible when compiling the builtins with GCC. typedef int GCC_CMP_RESULT __attribute__((__mode__(__libgcc_cmp_return__))); diff --git a/rt/lib/stack/chkstk_x86_64_win.c b/rt/lib/stack/chkstk_x86_64_win.c @@ -0,0 +1,32 @@ +/* + * Win64 stack probes. + * + * x64 callers pass the pending frame allocation size in rax, call the probe, + * then subtract rax from rsp. The probe touches each intervening page and + * returns with rax preserved. + */ + +__asm__( + ".text\n" + ".globl __chkstk\n" + "__chkstk:\n" + ".globl ___chkstk_ms\n" + "___chkstk_ms:\n" + " movq %rsp, %r10\n" + " addq $8, %r10\n" + " movq %rax, %r11\n" + " cmpq $4096, %r11\n" + " jb __cfree_chkstk_last\n" + "__cfree_chkstk_loop:\n" + " subq $4096, %r10\n" + " movq (%r10), %rcx\n" + " subq $4096, %r11\n" + " cmpq $4096, %r11\n" + " jae __cfree_chkstk_loop\n" + " testq %r11, %r11\n" + " je __cfree_chkstk_done\n" + "__cfree_chkstk_last:\n" + " subq %r11, %r10\n" + " movq (%r10), %rcx\n" + "__cfree_chkstk_done:\n" + " ret\n"); diff --git a/src/abi/abi_aapcs64_windows.c b/src/abi/abi_aapcs64_windows.c @@ -0,0 +1,67 @@ +/* Windows-on-ARM64 ABI dispatch. + * + * Vtable selection keys on (target.arch, target.os); (ARM_64, WINDOWS) + * lands here instead of AAPCS64. The two ABIs diverge in: + * + * 1. va_list shape — Windows-ARM64 `__builtin_va_list` is plain + * `void*`; AAPCS64 is a five-field struct. + * + * 2. long double — 64-bit double on Windows-ARM64 (AAPCS64: 128-bit). + * Assumed lowered by the front end before classification. + * + * Variadics still use registers (NOT all-on-stack like Apple ARM64), but + * FP arguments to variadic functions are routed through integer slots so + * `va_list` can remain a plain pointer. + * + * Classification starts from aapcs64_compute_func_info, then adjusts FP + * parameter parts for variadic functions. */ + +#include <string.h> + +#include "abi/abi_internal.h" +#include "core/core.h" + +#include "core/arena.h" + +extern ABIFuncInfo* aapcs64_compute_func_info(TargetABI*, CfreeCgTypeId); + +static void remap_fp_parts_to_int(TargetABI* a, ABIArgInfo* ai) { + if (!ai || ai->kind != ABI_ARG_DIRECT || ai->nparts == 0) return; + int needs_copy = 0; + for (u16 i = 0; i < ai->nparts; ++i) { + if (ai->parts[i].cls == ABI_CLASS_FP) { + needs_copy = 1; + break; + } + } + if (!needs_copy) return; + + ABIArgPart* parts = arena_array(a->c->tu, ABIArgPart, ai->nparts); + memcpy(parts, ai->parts, sizeof(ABIArgPart) * ai->nparts); + for (u16 i = 0; i < ai->nparts; ++i) { + if (parts[i].cls == ABI_CLASS_FP) parts[i].cls = ABI_CLASS_INT; + } + ai->parts = parts; +} + +static ABIFuncInfo* aapcs64_windows_compute_func_info(TargetABI* a, + CfreeCgTypeId fn) { + ABIFuncInfo* info = aapcs64_compute_func_info(a, fn); + /* vararg_on_stack stays 0 — Windows-ARM64 variadics use registers, + * unlike Apple. + * + * In a variadic function, Windows ARM64 routes floating-point arguments + * through the integer argument slots. That applies to named FP parameters + * too; trailing `...` arguments are handled by the call lowering path. */ + if (info && info->variadic) { + for (u16 i = 0; i < info->nparams; ++i) { + remap_fp_parts_to_int(a, (ABIArgInfo*)&info->params[i]); + } + } + return info; +} + +const ABIVtable aapcs64_windows_vtable = { + .compute_func_info = aapcs64_windows_compute_func_info, + .va_list_info = {8, 8, ABI_SC_PTR, 0, 0, 0}, +}; diff --git a/src/abi/abi_internal.h b/src/abi/abi_internal.h @@ -24,6 +24,9 @@ extern const ABIVtable rv64_vtable; * abi.c::select_vtable. */ extern const ABIVtable apple_arm64_vtable; extern const ABIVtable apple_x64_vtable; +/* Windows variants — selected when os == CFREE_OS_WINDOWS. */ +extern const ABIVtable win64_x64_vtable; +extern const ABIVtable aapcs64_windows_vtable; /* Shared TargetABI internals. The struct definition is here so each ABI * TU can reach into the per-TU caches via TargetABI*. abi.c owns the diff --git a/src/abi/abi_win64_x64.c b/src/abi/abi_win64_x64.c @@ -0,0 +1,178 @@ +/* Win64 (Microsoft x86_64) ABI classifier. + * + * Selected when (target.arch == X86_64, target.os == WINDOWS). + * + * Win64 vs SysV-x64 deltas: + * - Arg slots: RCX/RDX/R8/R9 share index with XMM0..3 (codegen + * assigns by index; classifier emits per-slot INT or FP parts). + * - Aggregates: pass-by-value only for sizes in {1,2,4,8}; otherwise + * hidden-pointer (byval for args, sret for returns). + * - __int128: passed as two INTEGER eightbytes (mingw convention; + * differs from MSVC spec which says by reference). + * - long double: 64-bit double (no x87). + * - va_list: void* (single pointer; no struct). + * - varargs: still in regs; FP-args duplicated in matching GPR slot + * by the call-site codegen (not encoded here). + * + * Shadow-space (32 B above return addr) is a call-site reservation, + * not an ABI classifier concern -- see arch/x64/call.c. + */ + +#include <string.h> + +#include "abi/abi_internal.h" +#include "cg/type.h" +#include "core/arena.h" +#include "core/core.h" + +static void classify_void(ABIArgInfo* out) { + memset(out, 0, sizeof *out); + out->kind = ABI_ARG_IGNORE; +} + +static void classify_scalar(TargetABI* a, CfreeCgTypeId t, ABIArgInfo* out, + int is_return) { + ABITypeInfo ti = abi_internal_type_info(a, t); + (void)is_return; + /* __int128 / __uint128: mingw/GCC convention emits two INTEGER + * eightbytes (rcx+rdx for args, rax+rdx for return) -- same shape + * as SysV. MSVC's official spec says "by reference" for 16-byte + * aggregates, but mingw is cfree's interop target on Windows and + * mingw matches SysV here. */ + if (ti.scalar_kind == ABI_SC_INT && ti.size == 16) { + ABIArgPart* parts = arena_array(a->c->tu, ABIArgPart, 2); + memset(parts, 0, sizeof(ABIArgPart) * 2); + for (u32 i = 0; i < 2; ++i) { + parts[i].cls = ABI_CLASS_INT; + parts[i].loc = ABI_LOC_REG; + parts[i].size = 8; + parts[i].align = 8; + parts[i].src_offset = i * 8; + } + out->kind = ABI_ARG_DIRECT; + out->flags = ABI_AF_NONE; + out->parts = parts; + out->nparts = 2; + out->indirect_align = 0; + return; + } + /* long double on Win64 is 64-bit double (both MSVC and mingw, unless + * mingw's -mlong-double-80 is in effect -- not supported in v1). + * The front end should already have lowered `long double` to a size-8 + * float for Windows targets. Defensive path: if a size-16 FP slips + * through, treat it as a size-8 double (one FP part) -- this stays + * register-passed, unlike SysV which routes long double through + * memory. */ + if (ti.scalar_kind == ABI_SC_FLOAT && ti.size == 16) { + ABIArgPart* parts = arena_new(a->c->tu, ABIArgPart); + memset(parts, 0, sizeof *parts); + parts->cls = ABI_CLASS_FP; + parts->loc = ABI_LOC_REG; + parts->size = 8; + parts->align = 8; + parts->src_offset = 0; + out->kind = ABI_ARG_DIRECT; + out->flags = ABI_AF_NONE; + out->parts = parts; + out->nparts = 1; + out->indirect_align = 0; + return; + } + + out->kind = ABI_ARG_DIRECT; + out->flags = ABI_AF_NONE; + out->indirect_align = 0; + + ABIArgPart* parts = arena_new(a->c->tu, ABIArgPart); + memset(parts, 0, sizeof *parts); + parts->cls = (ti.scalar_kind == ABI_SC_FLOAT) ? ABI_CLASS_FP : ABI_CLASS_INT; + parts->loc = ABI_LOC_REG; + parts->size = ti.size; + parts->align = ti.align; + parts->src_offset = 0; + + out->parts = parts; + out->nparts = 1; +} + +static void classify_aggregate(TargetABI* a, CfreeCgTypeId t, ABIArgInfo* out, + int is_return) { + ABITypeInfo ti = abi_internal_type_info(a, t); + if (ti.size == 0) { + classify_void(out); + return; + } + /* Win64: aggregates pass by value only when the size is exactly one + * of {1, 2, 4, 8}. A 3-byte struct is NOT a 3-byte INT part -- it + * goes by hidden pointer. A 16-byte struct is also hidden-pointer + * (no two-register pair, unlike SysV's <=16B path). */ + if (ti.size == 1 || ti.size == 2 || ti.size == 4 || ti.size == 8) { + ABIArgPart* parts = arena_new(a->c->tu, ABIArgPart); + memset(parts, 0, sizeof *parts); + parts->cls = ABI_CLASS_INT; + parts->loc = ABI_LOC_REG; + parts->size = ti.size; + parts->align = ti.align ? ti.align : ti.size; + parts->src_offset = 0; + out->kind = ABI_ARG_DIRECT; + out->flags = ABI_AF_NONE; + out->parts = parts; + out->nparts = 1; + out->indirect_align = 0; + } else { + out->kind = ABI_ARG_INDIRECT; + out->flags = is_return ? ABI_AF_SRET : ABI_AF_BYVAL; + out->indirect_align = ti.align ? ti.align : 8; + out->parts = NULL; + out->nparts = 0; + } +} + +static void classify_one(TargetABI* a, CfreeCgTypeId t, ABIArgInfo* out, + int is_return) { + const CgType* ty = cg_type_get(a->c, t); + if (!ty || ty->kind == CFREE_CG_TYPE_VOID) { + classify_void(out); + return; + } + switch (ty->kind) { + case CFREE_CG_TYPE_RECORD: + classify_aggregate(a, t, out, is_return); + return; + case CFREE_CG_TYPE_ALIAS: + classify_one(a, ty->alias.base, out, is_return); + return; + default: + classify_scalar(a, t, out, is_return); + return; + } +} + +static ABIFuncInfo* win64_x64_compute_func_info(TargetABI* a, + CfreeCgTypeId fn) { + ABIFuncInfo* info = arena_new(a->c->tu, ABIFuncInfo); + const CgType* fnty = cg_type_get(a->c, fn); + memset(info, 0, sizeof *info); + + classify_one(a, fnty->func.ret, &info->ret, /*is_return=*/1); + info->has_sret = (info->ret.kind == ABI_ARG_INDIRECT) ? 1 : 0; + info->variadic = fnty->func.abi_variadic; + + info->nparams = (u16)fnty->func.nparams; + if (fnty->func.nparams) { + ABIArgInfo* arr = arena_array(a->c->tu, ABIArgInfo, fnty->func.nparams); + memset(arr, 0, sizeof(ABIArgInfo) * fnty->func.nparams); + for (u32 i = 0; i < fnty->func.nparams; ++i) { + classify_one(a, fnty->func.params[i].type, &arr[i], /*is_return=*/0); + } + info->params = arr; + } else { + info->params = NULL; + } + return info; +} + +const ABIVtable win64_x64_vtable = { + .compute_func_info = win64_x64_compute_func_info, + .va_list_info = {8, 8, ABI_SC_PTR, 0, 0, 0}, +}; diff --git a/src/api/link.c b/src/api/link.c @@ -90,6 +90,14 @@ CfreeStatus cfree_link_session_new(CfreeCompiler* c, link_free(l); return CFREE_INVALID; } + if (opts->pe_subsystem != CFREE_PE_SUBSYSTEM_DEFAULT && + opts->pe_subsystem != CFREE_PE_SUBSYSTEM_WINDOWS_GUI && + opts->pe_subsystem != CFREE_PE_SUBSYSTEM_WINDOWS_CUI) { + h->free(h, s, sizeof(*s)); + link_free(l); + return CFREE_INVALID; + } + link_set_pe_subsystem(l, opts->pe_subsystem); switch ((CfreeLinkOutputKind)opts->output_kind) { case CFREE_LINK_OUTPUT_EXE: @@ -129,7 +137,12 @@ CfreeStatus cfree_link_session_new(CfreeCompiler* c, break; } if (opts->linker_script) link_set_script(l, opts->linker_script); - if (opts->entry) link_set_entry(l, opts->entry); + if (opts->entry) { + link_set_entry(l, opts->entry); + } else if (opts->pe_subsystem == CFREE_PE_SUBSYSTEM_WINDOWS_GUI && + !(opts->linker_script && opts->linker_script->entry)) { + link_set_entry(l, "WinMainCRTStartup"); + } (void)opts->build_id_mode; (void)opts->build_id_bytes; (void)opts->build_id_len; diff --git a/src/api/object_detect.c b/src/api/object_detect.c @@ -39,6 +39,34 @@ CfreeBinFmt cfree_detect_fmt(const uint8_t* data, size_t len) { case 0x8664: case 0x014C: case 0xAA64: + case 0xA641: /* ARM64EC — accept as a COFF flavour the ARM64 + * codegen / linker treat as plain AArch64 (the + * encoding is unchanged; only the ABI differs, + * and we link these objs into pure-AArch64 + * images). */ + case 0x01C4: + case 0x5032: + case 0x5064: + return CFREE_BIN_COFF; + } + } + /* Microsoft "short import" record: Sig1=0, Sig2=0xFFFF. Routed + * through read_coff (which dispatches to read_coff_short_import). + * The header continues with a Machine word, which we also sanity- + * check so a stray 00 00 FF FF prefix on some other format does + * not mis-route. */ + if (len >= 8 && data[0] == 0x00 && data[1] == 0x00 && data[2] == 0xFF && + data[3] == 0xFF) { + u16 mach = (u16)data[6] | ((u16)data[7] << 8); + switch (mach) { + case 0x8664: + case 0x014C: + case 0xAA64: + case 0xA641: /* ARM64EC — accept as a COFF flavour the ARM64 + * codegen / linker treat as plain AArch64 (the + * encoding is unchanged; only the ABI differs, + * and we link these objs into pure-AArch64 + * images). */ case 0x01C4: case 0x5032: case 0x5064: diff --git a/src/api/object_file.c b/src/api/object_file.c @@ -179,6 +179,20 @@ CfreeStatus cfree_obj_section_data(const CfreeObjFile* cf, CfreeObjSection idx, return CFREE_OK; } +CfreeStatus cfree_obj_section_format_flags(const CfreeObjFile* f, + CfreeObjSection idx, + uint32_t* raw_type_out, + uint32_t* raw_flags_out) { + const Section* sec; + if (!f) return CFREE_INVALID; + if (idx >= obj_section_count(f->ob)) return CFREE_NOT_FOUND; + sec = obj_section_get(f->ob, (ObjSecId)(idx + 1)); + if (!sec) return CFREE_NOT_FOUND; + if (raw_type_out) *raw_type_out = sec->ext_type; + if (raw_flags_out) *raw_flags_out = sec->ext_flags; + return CFREE_OK; +} + CfreeStatus cfree_obj_section_by_name(const CfreeObjFile* f, const char* name, CfreeObjSection* out) { u32 n, i; diff --git a/src/api/stubs.c b/src/api/stubs.c @@ -8,25 +8,15 @@ static _Noreturn void unimplemented(Compiler* c, const char* what) { compiler_panic(c, loc, "subsystem not implemented: %s", what); } -/* COFF / WASM emit/read remain stubs until those writers/readers land. */ +/* WASM emit/read remain stubs until those writers/readers land. + * COFF emit/read are implemented in src/obj/coff_emit.c and coff_read.c. */ -void emit_coff(Compiler* c, ObjBuilder* o, Writer* w) { - (void)o; - (void)w; - unimplemented(c, "emit_coff"); -} void emit_wasm(Compiler* c, ObjBuilder* o, Writer* w) { (void)o; (void)w; unimplemented(c, "emit_wasm"); } -ObjBuilder* read_coff(Compiler* c, const char* n, const u8* d, size_t l) { - (void)n; - (void)d; - (void)l; - unimplemented(c, "read_coff"); -} ObjBuilder* read_wasm(Compiler* c, const char* n, const u8* d, size_t l) { (void)n; (void)d; diff --git a/src/arch/aa64/arch.c b/src/arch/aa64/arch.c @@ -8,6 +8,7 @@ #include "arch/aa64/regs.h" #include "core/bytes.h" #include "link/link_arch.h" +#include "obj/coff.h" #include "obj/elf.h" #include "obj/macho.h" #include "obj/obj.h" @@ -20,6 +21,8 @@ static const ABIVtable* aa64_abi_vtable(Compiler* c, CfreeOSKind os) { switch (os) { case CFREE_OS_MACOS: return &apple_arm64_vtable; + case CFREE_OS_WINDOWS: + return &aapcs64_windows_vtable; default: return &aapcs64_vtable; } @@ -59,6 +62,12 @@ static const ArchDwarfOps aa64_dwarf_ops = { .max_ops_per_inst = 1u, }; +static const ArchCoffOps aa64_coff_ops = { + .machine = IMAGE_FILE_MACHINE_ARM64, + .reloc_to = coff_aarch64_reloc_to, + .reloc_from = coff_aarch64_reloc_from, +}; + static int aa64_apply_label_fixup(Compiler* c, const ArchLabelFixup* fx) { const Section* s; u8 cur[4]; @@ -159,6 +168,7 @@ const ArchImpl arch_impl_aa64 = { .link = &link_arch_aa64, .elf = &aa64_elf_ops, .macho = &aa64_macho_ops, + .coff = &aa64_coff_ops, .dwarf = &aa64_dwarf_ops, .dbg = &aa64_dbg_ops, .predefined_macros = aa64_predefined_macros, diff --git a/src/arch/aa64/emit.c b/src/arch/aa64/emit.c @@ -640,13 +640,26 @@ CGLocalStorage aa_param(CGTarget *t, const CGParamDesc *p) { u32 dst = reg_num((Operand){.kind = OPK_REG, .v.reg = st.v.reg}); if (a->next_param_int < 8) { u32 src = a->next_param_int++; - u32 sf = (sz == 8) ? 1u : 0u; - if (dst != src) aa64_emit32(t->mc, aa64_mov_reg(sf, dst, src)); + if (p->type == CG_BUILTIN_ID(CFREE_CG_BUILTIN_F64)) { + aa64_emit32(t->mc, aa64_fmov_d_x(dst, src)); + } else if (p->type == CG_BUILTIN_ID(CFREE_CG_BUILTIN_F32)) { + aa64_emit32(t->mc, aa64_fmov_s_w(dst, src)); + } else { + u32 sf = (sz == 8) ? 1u : 0u; + if (dst != src) aa64_emit32(t->mc, aa64_mov_reg(sf, dst, src)); + } } else { u32 caller_off = a->next_param_stack; a->next_param_stack += 8; - aa64_emit_ldur_off(t->mc, sidx, dst, incoming_stack_base, - incoming_stack_bias + (i32)caller_off, AA_TMP0); + if (p->type == CG_BUILTIN_ID(CFREE_CG_BUILTIN_F64) || + p->type == CG_BUILTIN_ID(CFREE_CG_BUILTIN_F32)) { + aa64_emit_ldur_fp_off(t->mc, sidx, dst, incoming_stack_base, + incoming_stack_bias + (i32)caller_off, + AA_TMP0); + } else { + aa64_emit_ldur_off(t->mc, sidx, dst, incoming_stack_base, + incoming_stack_bias + (i32)caller_off, AA_TMP0); + } } } else if (pt->cls == ABI_CLASS_FP) { u32 dst = reg_num((Operand){.kind = OPK_REG, .v.reg = st.v.reg}); diff --git a/src/arch/aa64/link.c b/src/arch/aa64/link.c @@ -135,6 +135,31 @@ static u32 aa64_emit_iplt_stub(u8* dst, u64 stub_vaddr, u64 slot_vaddr, return 2; } +/* PE/COFF IAT stub for aarch64 (12 B): + * + * adrp x16, iat_slot@PAGE ; x16 = page-aligned base + * ldr x16, [x16, #iat_off] ; x16 = *iat_slot (function ptr) + * br x16 ; tail-call + * + * Uses x16 (intra-procedure-call scratch) so the called function + * sees an unperturbed x30 / argument registers. Page+offset are + * baked from the post-shift IAT slot vaddr; no apply-time reloc + * needed because both ends move together under image-base shift. */ +static void aa64_emit_coff_iat_stub(u8* dst, u64 stub_vaddr, + u64 iat_slot_vaddr) { + u32 immlo, immhi; + aa64_adrp_imm_halves(stub_vaddr, iat_slot_vaddr, &immlo, &immhi); + u32 lo12 = (u32)(iat_slot_vaddr & AA64_PAGE_MASK); + /* IAT slots are 8-byte aligned (function pointers), so the low 3 + * bits of lo12 are always 0; LDR Xt scales the imm12 by 8. */ + u32 ldr_imm12 = (lo12 >> 3) & 0xfffu; + + wr_u32_le(dst + 0, aa64_adrp(AA64_PLT_SCRATCH_X16, immlo, immhi)); + wr_u32_le(dst + 4, aa64_ldr64_uimm12(AA64_PLT_SCRATCH_X16, + AA64_PLT_SCRATCH_X16, ldr_imm12)); + wr_u32_le(dst + 8, aa64_br(AA64_PLT_SCRATCH_X16)); +} + static void aa64_emit_macho_stub(u8* out, u64 stub_vaddr, u64 got_slot_vaddr) { i64 page_s = ((i64)got_slot_vaddr) & ~(i64)0xfff; i64 page_p = ((i64)stub_vaddr) & ~(i64)0xfff; @@ -200,6 +225,9 @@ const LinkArchDesc link_arch_aa64 = { .macho_stub_size = AA64_IPLT_STUB_SIZE, .emit_macho_stub = aa64_emit_macho_stub, + .coff_stub_size = AA64_IPLT_STUB_SIZE, + .emit_coff_iat_stub = aa64_emit_coff_iat_stub, + .is_branch_reloc = aa64_is_branch_reloc, .is_got_load_reloc = aa64_is_got_load_reloc, .is_tlvp_reloc = aa64_is_tlvp_reloc, diff --git a/src/arch/aa64/ops.c b/src/arch/aa64/ops.c @@ -70,6 +70,18 @@ static void aa_load_const(CGTarget* t, Operand dst, ConstBytes cb) { } static void aa_copy(CGTarget* t, Operand dst, Operand src) { + if (dst.cls == RC_FP && src.cls == RC_INT) { + u32 sz = type_byte_size(dst.type); + aa64_emit32(t->mc, sz == 8 ? aa64_fmov_d_x(reg_num(dst), reg_num(src)) + : aa64_fmov_s_w(reg_num(dst), reg_num(src))); + return; + } + if (dst.cls == RC_INT && src.cls == RC_FP) { + u32 sz = type_byte_size(src.type); + aa64_emit32(t->mc, sz == 8 ? aa64_fmov_x_d(reg_num(dst), reg_num(src)) + : aa64_fmov_w_s(reg_num(dst), reg_num(src))); + return; + } if (dst.cls == RC_FP || src.cls == RC_FP) { if (type_byte_size(dst.type) == 16) { aa64_emit32(t->mc, aa64_mov_v16b(reg_num(dst), reg_num(src))); @@ -356,6 +368,66 @@ static void aa_tls_addr_of(CGTarget* t, Operand dst, ObjSymId sym, i64 addend) { return; } + /* Windows-on-ARM64 TLS Local-Exec. + * + * ldr xd, [x18, #0x58] ; xd = TEB->TlsSlots (TLS array) + * adrp x16, _tls_index ; ADR_PREL_PG_HI21 + * ldr w16, [x16, :lo12:_tls_index] ; LDST32_ABS_LO12_NC + * add xd, xd, x16, lsl #3 ; xd += index*8 + * ldr xd, [xd] ; xd = per-image TLS block base + * add xd, xd, #:secrel_hi12:sym, lsl#12 ; SECREL_HIGH12A + * add xd, xd, #:secrel_lo12:sym ; SECREL_LOW12A + * + * x16 (IP0) is a caller-saved intra-procedure-call scratch reg, + * always safe to clobber inside a function body. The two ADD-imm12 + * SECREL fixups assume the merged .tls section is < 16 MiB; cfree + * panics with a clear diagnostic at link time if that ever fails. */ + if (t->c->target.os == CFREE_OS_WINDOWS) { + Sym idx_name = pool_intern_cstr(t->c->global, "_tls_index"); + ObjSymId idx_sym = obj_symbol_find(t->obj, idx_name); + if (idx_sym == 0) { + idx_sym = obj_symbol(t->obj, idx_name, SB_GLOBAL, SK_UNDEF, + OBJ_SEC_NONE, 0, 0); + } + /* Windows ARM64 reserves x18 as the TEB pointer. Do not read + * TPIDR_EL0 here; Wine and real Windows expose the TLS slots via + * x18 + 0x58, matching clang/llvm-mingw codegen. */ + aa64_emit32(mc, aa64_ldr_uimm(/*size=*/3, rd, /*Rn=*/18, + /*byte_off=*/0x58)); + + u32 adrp_pos = mc->pos(mc); + aa64_emit32(mc, aa64_adrp_base(/*Rd=*/16)); + mc->emit_reloc_at(mc, sec, adrp_pos, R_AARCH64_ADR_PREL_PG_HI21, + idx_sym, 0, 0, 0); + u32 ldr_pos = mc->pos(mc); + aa64_emit32(mc, + aa64_ldr_uimm(/*size=*/2, /*Rt=*/16, /*Rn=*/16, /*byte_off=*/0)); + mc->emit_reloc_at(mc, sec, ldr_pos, R_AARCH64_LDST32_ABS_LO12_NC, + idx_sym, 0, 0, 0); + + /* add xd, xd, x16, LSL #3: + * 0x8B000000 | (Rm << 16) | (3 << 10) | (Rn << 5) | Rd + * sf=1, shift=LSL (00), Rm=16. */ + u32 add_shr = + 0x8B000000u | (16u << 16) | (3u << 10) | ((rd & 0x1fu) << 5) | + (rd & 0x1fu); + aa64_emit32(mc, add_shr); + aa64_emit32(mc, aa64_ldr_uimm(/*size=*/3, rd, rd, /*byte_off=*/0)); + + /* add xd, xd, #(0 << 12), then patch HIGH12A: sh=1 in the encoding. */ + u32 hi_pos = mc->pos(mc); + aa64_emit32(mc, + aa64_add_imm(/*sf=*/1, rd, rd, /*imm12=*/0, /*sh=*/1)); + mc->emit_reloc_at(mc, sec, hi_pos, R_COFF_AARCH64_SECREL_HIGH12A, sym, + addend, 0, 0); + u32 lo_pos = mc->pos(mc); + aa64_emit32(mc, + aa64_add_imm(/*sf=*/1, rd, rd, /*imm12=*/0, /*sh=*/0)); + mc->emit_reloc_at(mc, sec, lo_pos, R_COFF_AARCH64_SECREL_LOW12A, sym, + addend, 0, 0); + return; + } + aa64_emit32(mc, aa64_mrs_tpidr_el0(AA_TMP0)); u32 hi_pos = mc->pos(mc); @@ -890,6 +962,19 @@ static void aa_store_stack_reg(CGTarget* t, u32 reg, RegClass cls, aa_store(t, addr, src, ma); } +static int aa_windows_fp_vararg(const CGTarget* t, const CGABIValue* av) { + return t->c->target.os == CFREE_OS_WINDOWS && av && av->abi == NULL && + av->storage.cls == RC_FP; +} + +static void aa_move_fp_to_int_reg(MCEmitter* mc, u32 dst_reg, Operand src, + u32 size) { + if (size == 8) + aa64_emit32(mc, aa64_fmov_x_d(dst_reg, reg_num(src))); + else + aa64_emit32(mc, aa64_fmov_w_s(dst_reg, reg_num(src))); +} + static void emit_arg_value(CGTarget* t, const ABIFuncInfo* fi, const CGABIValue* av, u32* next_int, u32* next_fp, u32* stack_off, int tail) { @@ -904,7 +989,10 @@ static void emit_arg_value(CGTarget* t, const ABIFuncInfo* fi, va_ai.kind = ABI_ARG_DIRECT; va_ai.parts = &va_pt; va_ai.nparts = 1; - va_pt.cls = (av->storage.cls == RC_FP) ? ABI_CLASS_FP : ABI_CLASS_INT; + va_pt.cls = aa_windows_fp_vararg(t, av) + ? ABI_CLASS_INT + : ((av->storage.cls == RC_FP) ? ABI_CLASS_FP + : ABI_CLASS_INT); va_pt.size = sz; va_pt.align = sz; va_pt.src_offset = 0; @@ -961,7 +1049,11 @@ static void emit_arg_value(CGTarget* t, const ABIFuncInfo* fi, } case OPK_REG: { u32 sf = (sz == 8) ? 1u : 0u; - aa64_emit32(t->mc, aa64_mov_reg(sf, dst_reg, reg_num(av->storage))); + if (av->storage.cls == RC_FP) + aa_move_fp_to_int_reg(t->mc, dst_reg, av->storage, sz); + else + aa64_emit32(t->mc, + aa64_mov_reg(sf, dst_reg, reg_num(av->storage))); break; } case OPK_LOCAL: { @@ -1072,7 +1164,8 @@ static void emit_arg_value(CGTarget* t, const ABIFuncInfo* fi, } } -static void count_arg_stack(const ABIFuncInfo* fi, const CGABIValue* av, +static void count_arg_stack(CGTarget* t, const ABIFuncInfo* fi, + const CGABIValue* av, u32* next_int, u32* next_fp, u32* stack_off) { ABIArgInfo va_ai; ABIArgPart va_pt; @@ -1084,7 +1177,10 @@ static void count_arg_stack(const ABIFuncInfo* fi, const CGABIValue* av, va_ai.kind = ABI_ARG_DIRECT; va_ai.parts = &va_pt; va_ai.nparts = 1; - va_pt.cls = (av->storage.cls == RC_FP) ? ABI_CLASS_FP : ABI_CLASS_INT; + va_pt.cls = aa_windows_fp_vararg(t, av) + ? ABI_CLASS_INT + : ((av->storage.cls == RC_FP) ? ABI_CLASS_FP + : ABI_CLASS_INT); va_pt.size = sz; va_pt.align = sz; va_pt.src_offset = 0; @@ -1122,7 +1218,7 @@ static u32 aa_call_stack_size(CGTarget* t, const CGCallDesc* d) { (void)t; u32 next_int = 0, next_fp = 0, stack_off = 0; for (u32 i = 0; i < d->nargs; ++i) - count_arg_stack(d->abi, &d->args[i], &next_int, &next_fp, &stack_off); + count_arg_stack(t, d->abi, &d->args[i], &next_int, &next_fp, &stack_off); return (stack_off + 15u) & ~15u; } @@ -1644,6 +1740,23 @@ static void aa_va_start_(CGTarget* t, Operand ap_op) { aa64_emit32(mc, aa64_str_uimm(3, AA_TMP0, ap, 0)); return; } + if (t->c->target.os == CFREE_OS_WINDOWS) { + if (a->next_param_int < 8) { + AASlot* gs = aa64_slot_get(a, a->gp_save_slot); + emit_fp_off(mc, AA_TMP0, + -(i32)gs->off + (i32)(a->next_param_int * 8u)); + } else { + u32 ofs = 16u + a->next_param_stack; + if (ofs <= 0xfff) + aa64_emit32(mc, aa64_add_imm(1, AA_TMP0, 29, ofs, 0)); + else { + aa64_emit_load_imm(mc, 1, AA_TMP0, (i64)ofs); + aa64_emit32(mc, aa64_add(1, AA_TMP0, 29, AA_TMP0)); + } + } + aa64_emit32(mc, aa64_str_uimm(3, AA_TMP0, ap, 0)); + return; + } AASlot* gs = aa64_slot_get(a, a->gp_save_slot); AASlot* fs = aa64_slot_get(a, a->fp_save_slot); @@ -1671,6 +1784,7 @@ static void aa_va_start_(CGTarget* t, Operand ap_op) { static void aa_va_arg_(CGTarget* t, Operand dst, Operand ap_op, CfreeCgTypeId ty) { + AAImpl* a = impl_of(t); MCEmitter* mc = t->mc; u32 ap = reg_num(ap_op); int is_fp = (dst.cls == RC_FP); @@ -1690,6 +1804,33 @@ static void aa_va_arg_(CGTarget* t, Operand dst, Operand ap_op, aa64_emit32(mc, aa64_stur(3, AA_TMP1, ap, 0)); return; } + if (t->c->target.os == CFREE_OS_WINDOWS) { + MCLabel L_store = mc->label_new(mc); + aa64_emit32(mc, aa64_ldur(3, AA_TMP1, ap, 0)); + if (is_fp) + aa64_emit32(mc, aa64_ldur_fp(sidx, reg_num(dst), AA_TMP1, 0)); + else + aa64_emit32(mc, aa64_ldur(sidx, reg_num(dst), AA_TMP1, 0)); + aa64_emit32(mc, aa64_add_imm(1, AA_TMP1, AA_TMP1, 8u, 0)); + + AASlot* gs = aa64_slot_get(a, a->gp_save_slot); + if (gs) { + emit_fp_off(mc, AA_TMP2, -(i32)gs->off + 64); + aa64_emit32(mc, aa64_subs_reg(1, 31u, AA_TMP1, AA_TMP2)); + aa64_emit32(mc, aa64_b_cond(0x1 /*NE*/)); + mc->emit_label_ref(mc, L_store, R_AARCH64_CONDBR19, 4, 0); + u32 ofs = 16u + a->next_param_stack; + if (ofs <= 0xfff) + aa64_emit32(mc, aa64_add_imm(1, AA_TMP1, 29, ofs, 0)); + else { + aa64_emit_load_imm(mc, 1, AA_TMP1, (i64)ofs); + aa64_emit32(mc, aa64_add(1, AA_TMP1, 29, AA_TMP1)); + } + } + mc->label_place(mc, L_store); + aa64_emit32(mc, aa64_stur(3, AA_TMP1, ap, 0)); + return; + } MCLabel L_stack = mc->label_new(mc); MCLabel L_done = mc->label_new(mc); @@ -1737,6 +1878,11 @@ static void aa_va_copy_(CGTarget* t, Operand d, Operand s) { aa64_emit32(mc, aa64_stur(3, AA_TMP0, dr, 0)); return; } + if (t->c->target.os == CFREE_OS_WINDOWS) { + aa64_emit32(mc, aa64_ldur(3, AA_TMP0, sr, 0)); + aa64_emit32(mc, aa64_stur(3, AA_TMP0, dr, 0)); + return; + } for (u32 i = 0; i < 32u; i += 8u) { aa64_emit32(mc, aa64_ldur(3, AA_TMP0, sr, (i32)i)); aa64_emit32(mc, aa64_stur(3, AA_TMP0, dr, (i32)i)); diff --git a/src/arch/aa64/opt_coord.c b/src/arch/aa64/opt_coord.c @@ -202,6 +202,11 @@ static u32 aa_return_reg_mask(CGTarget* t, const ABIFuncInfo* abi, return mask; } +static int aa_windows_fp_vararg_plan(CGTarget* t, const CGABIValue* av) { + return t->c->target.os == CFREE_OS_WINDOWS && av && av->abi == NULL && + av->storage.cls == RC_FP; +} + static void aa_plan_call(CGTarget* t, const CGCallDesc* d, CGCallPlan* out) { memset(out, 0, sizeof *out); out->callee = d->callee; @@ -236,7 +241,10 @@ static void aa_plan_call(CGTarget* t, const CGCallDesc* d, CGCallPlan* out) { if (!ai) { memset(&vai, 0, sizeof vai); memset(&vap, 0, sizeof vap); - vap.cls = av->storage.cls == RC_FP ? ABI_CLASS_FP : ABI_CLASS_INT; + vap.cls = aa_windows_fp_vararg_plan(t, av) + ? ABI_CLASS_INT + : (av->storage.cls == RC_FP ? ABI_CLASS_FP + : ABI_CLASS_INT); vap.size = type_byte_size(av->type); vai.kind = ABI_ARG_DIRECT; vai.nparts = 1; diff --git a/src/arch/registry.c b/src/arch/registry.c @@ -37,3 +37,16 @@ const ArchImpl* arch_lookup_macho_cputype(u32 cputype) { } return NULL; } + +const ArchImpl* arch_lookup_coff_machine(u16 machine) { + /* IMAGE_FILE_MACHINE_ARM64EC (0xA641) aliases to AArch64 — the + * instruction encoding is identical; only the ABI differs, and the + * linker treats both as a single image's worth of code on Windows + * targets. */ + if (machine == 0xA641u) machine = 0xAA64u; + for (u32 i = 0; i < (u32)(sizeof arch_impls / sizeof arch_impls[0]); ++i) { + const ArchImpl* impl = arch_impls[i]; + if (impl->coff && impl->coff->machine == machine) return impl; + } + return NULL; +} diff --git a/src/arch/x64/alloc.c b/src/arch/x64/alloc.c @@ -91,13 +91,27 @@ XSlot* x64_slot_get(XImpl* a, FrameSlot fs) { } /* ---- param: bind incoming arg(s) to the requested storage ---- */ + +/* Win64 shares one arg-slot counter across int and FP regs; the kth + * argument consumes either GPR-k or XMM-k but never both. Keep + * next_param_int and next_param_fp in lockstep so a later FP/int arg + * sees the same slot index. */ +static inline void x_param_sync_slot(XImpl* a) { + if (!a->abi->slot_shared_int_fp) return; + u32 m = a->next_param_int > a->next_param_fp ? a->next_param_int + : a->next_param_fp; + a->next_param_int = m; + a->next_param_fp = m; +} + static void x_consume_param_location(XImpl* a, const ABIArgInfo* ai) { if (!ai || ai->kind == ABI_ARG_IGNORE) return; if (ai->kind == ABI_ARG_INDIRECT) { - if (a->next_param_int < 6) + if (a->next_param_int < a->abi->n_int_args) ++a->next_param_int; else a->next_param_stack += 8; + x_param_sync_slot(a); return; } if (ai->kind == ABI_ARG_DIRECT && x64_abi_direct_to_stack( @@ -109,16 +123,17 @@ static void x_consume_param_location(XImpl* a, const ABIArgInfo* ai) { for (u16 i = 0; i < ai->nparts; ++i) { const ABIArgPart* pt = &ai->parts[i]; if (pt->cls == ABI_CLASS_INT) { - if (a->next_param_int < 6) + if (a->next_param_int < a->abi->n_int_args) ++a->next_param_int; else a->next_param_stack += 8; } else if (pt->cls == ABI_CLASS_FP) { - if (a->next_param_fp < 8) + if (a->next_param_fp < a->abi->n_fp_args) ++a->next_param_fp; else a->next_param_stack += 8; } + x_param_sync_slot(a); } } @@ -143,7 +158,13 @@ CGLocalStorage x_param(CGTarget* t, const CGParamDesc* p) { compiler_panic(t->c, a->loc, "x64 param: bad slot"); const ABIArgInfo* ai = p->abi; u32 incoming_stack_base = a->omit_frame ? X64_RSP : X64_RBP; - i32 incoming_stack_bias = a->omit_frame ? 8 : 16; + /* incoming_stack_bias is the offset from the base register to the + * first stack-passed argument. After `push rbp` we are at +0; +8 + * skips the saved RBP and +16 skips the saved return address. + * Win64 reserves 32 B of caller-provided "home space" for the 4 + * register arg slots immediately above the return address, so stack + * args start at [rbp + 16 + 32] = +48. SysV has no shadow space. */ + i32 incoming_stack_bias = a->omit_frame ? 8 : (i32)(16u + a->abi->shadow_space); if (ai->kind == ABI_ARG_IGNORE) return st; if (st.kind == CG_LOCAL_STORAGE_REG && st.v.reg == (Reg)REG_NONE) { @@ -158,8 +179,8 @@ CGLocalStorage x_param(CGTarget* t, const CGParamDesc* p) { const ABIArgPart* pt = &ai->parts[0]; u32 sz = pt->size; if (pt->cls == ABI_CLASS_INT) { - if (a->next_param_int < 6) { - u32 src = g_int_arg_regs[a->next_param_int++]; + if (a->next_param_int < a->abi->n_int_args) { + u32 src = a->abi->int_args[a->next_param_int++]; u32 dst = st.v.reg & 0xFu; int w = (sz == 8) ? 1 : 0; if (dst != src) emit_mov_rr(t->mc, w, dst, src); @@ -172,7 +193,7 @@ CGLocalStorage x_param(CGTarget* t, const CGParamDesc* p) { } else if (pt->cls == ABI_CLASS_FP) { u8 prefix = (sz == 8) ? 0xF2 : 0xF3; u32 dst = st.v.reg & 0xFu; - if (a->next_param_fp < 8) { + if (a->next_param_fp < a->abi->n_fp_args) { u32 src = a->next_param_fp++; if (dst != src) emit_sse_rr(t->mc, prefix, 0x10, dst, src); } else { @@ -185,13 +206,14 @@ CGLocalStorage x_param(CGTarget* t, const CGParamDesc* p) { compiler_panic(t->c, a->loc, "x64 param: ABI class %d unimpl", (int)pt->cls); } + x_param_sync_slot(a); return st; } if (ai->kind == ABI_ARG_INDIRECT) { /* Incoming pointer to byval copy: load pointer, memcpy into slot. */ u32 ptr_reg; - if (a->next_param_int < 6) { - ptr_reg = g_int_arg_regs[a->next_param_int++]; + if (a->next_param_int < a->abi->n_int_args) { + ptr_reg = a->abi->int_args[a->next_param_int++]; } else { u32 caller_off = a->next_param_stack; a->next_param_stack += 8; @@ -199,6 +221,7 @@ CGLocalStorage x_param(CGTarget* t, const CGParamDesc* p) { incoming_stack_bias + (i32)caller_off); ptr_reg = X64_R11; } + x_param_sync_slot(a); u32 nbytes = s->size; u32 i = 0; while (i + 8 <= nbytes) { @@ -250,8 +273,8 @@ CGLocalStorage x_param(CGTarget* t, const CGParamDesc* p) { u32 part_off = pt->src_offset; u32 sz = pt->size; if (pt->cls == ABI_CLASS_INT) { - if (a->next_param_int < 6) { - u32 reg = g_int_arg_regs[a->next_param_int++]; + if (a->next_param_int < a->abi->n_int_args) { + u32 reg = a->abi->int_args[a->next_param_int++]; emit_mov_store(t->mc, sz, reg, X64_RBP, -(i32)s->off + (i32)part_off); } else { u32 caller_off = a->next_param_stack; @@ -262,7 +285,7 @@ CGLocalStorage x_param(CGTarget* t, const CGParamDesc* p) { -(i32)s->off + (i32)part_off); } } else if (pt->cls == ABI_CLASS_FP) { - if (a->next_param_fp < 8) { + if (a->next_param_fp < a->abi->n_fp_args) { u32 xmm = a->next_param_fp++; u8 prefix = (sz == 8) ? 0xF2 : 0xF3; emit_sse_store(t->mc, prefix, 0x11, xmm, X64_RBP, @@ -280,6 +303,7 @@ CGLocalStorage x_param(CGTarget* t, const CGParamDesc* p) { compiler_panic(t->c, a->loc, "x64 param: ABI class %d unimpl", (int)pt->cls); } + x_param_sync_slot(a); } return st; } diff --git a/src/arch/x64/arch.c b/src/arch/x64/arch.c @@ -7,6 +7,7 @@ #include "arch/x64/x64.h" #include "core/bytes.h" #include "link/link_arch.h" +#include "obj/coff.h" #include "obj/elf.h" #include "obj/macho.h" #include "obj/obj.h" @@ -19,6 +20,8 @@ static const ABIVtable* x64_abi_vtable(Compiler* c, CfreeOSKind os) { switch (os) { case CFREE_OS_MACOS: return &apple_x64_vtable; + case CFREE_OS_WINDOWS: + return &win64_x64_vtable; default: return &sysv_x64_vtable; } @@ -45,6 +48,12 @@ static const ArchDwarfOps x64_dwarf_ops = { .max_ops_per_inst = 1u, }; +static const ArchCoffOps x64_coff_ops = { + .machine = IMAGE_FILE_MACHINE_AMD64, + .reloc_to = coff_x86_64_reloc_to, + .reloc_from = coff_x86_64_reloc_from, +}; + static int x64_apply_label_fixup(Compiler* c, const ArchLabelFixup* fx) { (void)c; if (!fx || fx->kind != R_PC32 || fx->width != 4) return 1; @@ -84,6 +93,7 @@ const ArchImpl arch_impl_x64 = { .link = &link_arch_x64, .elf = &x64_elf_ops, .macho = &x64_macho_ops, + .coff = &x64_coff_ops, .dwarf = &x64_dwarf_ops, .dbg = &x64_dbg_ops, .predefined_macros = x64_predefined_macros, diff --git a/src/arch/x64/asm.c b/src/arch/x64/asm.c @@ -37,7 +37,9 @@ typedef struct X64AsmOperand { u8 reg; u8 base; u8 high8; - u8 pad[3]; + u8 seg; + u8 no_base; + u8 pad[1]; i64 imm; i32 disp; } X64AsmOperand; @@ -101,6 +103,21 @@ static int x64_xmm_from_name(AsmDriver* d, Sym s, u32* reg_out) { return 1; } +static int x64_segment_prefix_from_name(AsmDriver* d, Sym s, u8* prefix_out) { + size_t n = 0; + const char* p = pool_str(asm_driver_pool(d), s, &n); + if (!p || n != 2) return 0; + if (p[0] == 'f' && p[1] == 's') { + if (prefix_out) *prefix_out = 0x64; + return 1; + } + if (p[0] == 'g' && p[1] == 's') { + if (prefix_out) *prefix_out = 0x65; + return 1; + } + return 0; +} + static u32 parse_reg(AsmDriver* d, u32* width_out, u32* high8_out) { AsmTok t; u32 reg; @@ -136,6 +153,19 @@ static X64AsmOperand parse_operand(AsmDriver* d) { (void)asm_driver_next(d); ident = asm_driver_next(d); if (ident.kind != ASM_TOK_IDENT) asm_driver_panic(d, "x64 asm: bad register"); + if (x64_segment_prefix_from_name(d, ident.v.ident, &op.seg)) { + asm_driver_expect_punct(d, ':', "':' after x64 segment register"); + op.kind = X64_ASM_OP_MEM; + if (!asm_driver_tok_is_punct(asm_driver_peek(d), '(')) + op.disp = (i32)asm_driver_parse_const(d); + if (asm_driver_eat_punct(d, '(')) { + op.base = (u8)parse_reg(d, NULL, NULL); + asm_driver_expect_punct(d, ')', "')' in x64 memory operand"); + } else { + op.no_base = 1; + } + return op; + } if (x64_xmm_from_name(d, ident.v.ident, &width)) { op.kind = X64_ASM_OP_XMM; op.reg = (u8)width; @@ -164,6 +194,48 @@ static X64AsmOperand parse_operand(AsmDriver* d) { return op; } +static u32 x64_pack_rex_mem_operand(u8* out, int w, u32 reg, + X64AsmOperand mem) { + return x64_pack_rex(out, w, reg, 0, mem.no_base ? 0u : mem.base); +} + +static u32 x64_pack_mem_operand(u8* out, u32 reg, X64AsmOperand mem) { + if (mem.no_base) { + out[0] = x64_modrm(0u, reg, 4u); + out[1] = x64_sib(0u, 4u, 5u); + return 2u + x64_put_u32le(out + 2, (u32)mem.disp); + } + return x64_pack_mem(out, reg, mem.base, mem.disp); +} + +static void emit_mov_load_operand(MCEmitter* mc, u32 size, u32 dst, + X64AsmOperand src) { + u8 buf[16]; + u32 n = 0; + if (size == 2u) buf[n++] = X64_OPSIZE_PFX; + if (src.seg) buf[n++] = src.seg; + n += x64_pack_rex_mem_operand(buf + n, size == 8u, dst, src); + buf[n++] = X64_OPC_MOV_R_RM; + n += x64_pack_mem_operand(buf + n, dst, src); + mc->emit_bytes(mc, buf, n); +} + +static void emit_mov_store_operand(MCEmitter* mc, u32 size, u32 src, + X64AsmOperand dst, int force_rex) { + u8 buf[16]; + u32 n = 0; + if (size == 2u) buf[n++] = X64_OPSIZE_PFX; + if (dst.seg) buf[n++] = dst.seg; + if (force_rex) + n += x64_pack_rex_force(buf + n, size == 8u, src, 0, + dst.no_base ? 0u : dst.base); + else + n += x64_pack_rex_mem_operand(buf + n, size == 8u, src, dst); + buf[n++] = size == 1u ? X64_OPC_MOV_RM_R8 : X64_OPC_MOV_RM_R; + n += x64_pack_mem_operand(buf + n, src, dst); + mc->emit_bytes(mc, buf, n); +} + static void expect_comma(AsmDriver* d) { if (!asm_driver_eat_comma(d)) asm_driver_panic(d, "x64 asm: expected ','"); } @@ -213,12 +285,14 @@ static __attribute__((unused)) void emit_movb_store_operand( AsmDriver* d, MCEmitter* mc, X64AsmOperand src, X64AsmOperand dst) { if (src.high8) { u8 ob = 0x88; - if (dst.base >= 8u) asm_driver_panic(d, "x64 asm: high-byte register cannot use REX"); + if (dst.no_base || dst.base >= 8u) + asm_driver_panic(d, "x64 asm: high-byte register cannot use REX"); + if (dst.seg) mc->emit_bytes(mc, &dst.seg, 1); mc->emit_bytes(mc, &ob, 1); emit_mem_operand(mc, src.reg, dst.base, dst.disp); return; } - emit_mov_store(mc, 1, src.reg, dst.base, dst.disp); + emit_mov_store_operand(mc, 1, src.reg, dst, 1); } static __attribute__((unused)) void emit_rm_imm(AsmDriver* d, MCEmitter* mc, @@ -607,12 +681,12 @@ static void parse_alu_rr(X64ParseCtx* p) { if (p->width == 1u) emit_movb_store_operand(p->d, p->mc, src, dst); else - emit_mov_store(p->mc, p->width, src.reg, dst.base, dst.disp); + emit_mov_store_operand(p->mc, p->width, src.reg, dst, 0); return; } if (p->desc->opc[0] == 0x89u && src.kind == X64_ASM_OP_MEM && dst.kind == X64_ASM_OP_REG) { - emit_mov_load(p->mc, p->width, 0, dst.reg, src.base, src.disp); + emit_mov_load_operand(p->mc, p->width, dst.reg, src); return; } asm_driver_panic(p->d, "x64 asm: unsupported alu_rr form"); @@ -647,15 +721,9 @@ static void parse_mov_rm_load(X64ParseCtx* p) { } if (src.kind == X64_ASM_OP_MEM && dst.kind == X64_ASM_OP_REG) { if (p->width == 2u) { - u8 buf[16]; - u32 n = x64_mov_rm_load_pack( - (X64MovRMLoad){.w = 0, .opc0 = X64_OPC_MOV_R_RM, .dst = dst.reg, - .base = src.base, .disp = src.disp}, - buf + 1); - buf[0] = X64_OPSIZE_PFX; - emit_packed(p->mc, buf, n + 1u); + emit_mov_load_operand(p->mc, p->width, dst.reg, src); } else { - emit_mov_load(p->mc, p->width, 0, dst.reg, src.base, src.disp); + emit_mov_load_operand(p->mc, p->width, dst.reg, src); } return; } @@ -946,6 +1014,13 @@ static void parse_sse_rr(X64ParseCtx* p) { dst.disp); return; } + if (dst.kind == X64_ASM_OP_MEM && src.kind == X64_ASM_OP_XMM && + p->desc->opc[1] == 0x28u && + !strcmp(p->desc->mnemonic, "movaps")) { + emit_sse_store(p->mc, p->desc->leg_pfx, 0x29, src.reg, dst.base, + dst.disp); + return; + } if (dst.kind != X64_ASM_OP_XMM) asm_driver_panic(p->d, "x64 asm: sse dst xmm"); if (src.kind == X64_ASM_OP_XMM) emit_sse_rr(p->mc, p->desc->leg_pfx, p->desc->opc[1], dst.reg, src.reg); @@ -1172,22 +1247,11 @@ static void x64_arch_asm_insn(ArchAsm* base, AsmDriver* d, Sym mnemonic) { if (w == 1u) emit_movb_store_operand(d, mc, src, dst); else - emit_mov_store(mc, w, src.reg, dst.base, dst.disp); + emit_mov_store_operand(mc, w, src.reg, dst, 0); return; } if (src.kind == X64_ASM_OP_MEM && dst.kind == X64_ASM_OP_REG) { - if (w == 2u) { - u8 buf[16]; - u32 nn = x64_mov_rm_load_pack( - (X64MovRMLoad){.w = 0, .opc0 = X64_OPC_MOV_R_RM, - .dst = dst.reg, .base = src.base, - .disp = src.disp}, - buf + 1); - buf[0] = X64_OPSIZE_PFX; - emit_packed(mc, buf, nn + 1u); - } else { - emit_mov_load(mc, w, 0, dst.reg, src.base, src.disp); - } + emit_mov_load_operand(mc, w, dst.reg, src); return; } asm_driver_panic(d, "x64 asm: mov form"); diff --git a/src/arch/x64/emit.c b/src/arch/x64/emit.c @@ -2,7 +2,7 @@ * * Covers: REX, ModR/M, SIB, all emit_* primitives, x_func_begin, * x_func_end, and the shared constant tables (g_int_order, g_fp_order, - * g_int_arg_regs). */ + * per-ABI int_args tables exposed via X64ABIRegs). */ #include <string.h> @@ -30,8 +30,44 @@ const Reg g_fp_order[10] = { X64_XMM0 + 11, X64_XMM0 + 12, X64_XMM0 + 13, X64_XMM0 + 14, X64_XMM15, }; -const u32 g_int_arg_regs[6] = {X64_RDI, X64_RSI, X64_RDX, - X64_RCX, X64_R8, X64_R9}; +static const u32 g_int_arg_regs_sysv[6] = {X64_RDI, X64_RSI, X64_RDX, + X64_RCX, X64_R8, X64_R9}; +static const u32 g_int_arg_regs_win64[4] = {X64_RCX, X64_RDX, X64_R8, X64_R9}; + +static const X64ABIRegs g_x64_abi_sysv = { + .int_args = g_int_arg_regs_sysv, + .n_int_args = 6, + .n_fp_args = 8, + .slot_shared_int_fp = 0, + .shadow_space = 0, + .emit_sysv_vararg_save = 1, + .vararg_fp_dup_to_gpr = 0, + .cs_int_mask = (1ull << X64_RBX) | (1ull << X64_RBP) | (1ull << X64_R12) | + (1ull << X64_R13) | (1ull << X64_R14) | (1ull << X64_R15), + .cs_fp_mask = 0, +}; + +static const X64ABIRegs g_x64_abi_win64 = { + .int_args = g_int_arg_regs_win64, + .n_int_args = 4, + .n_fp_args = 4, + .slot_shared_int_fp = 1, + .shadow_space = X64_WIN64_SHADOW_SPACE, + .emit_sysv_vararg_save = 0, + .vararg_fp_dup_to_gpr = 1, + .cs_int_mask = (1ull << X64_RBX) | (1ull << X64_RBP) | (1ull << X64_R12) | + (1ull << X64_R13) | (1ull << X64_R14) | (1ull << X64_R15) | + (1ull << X64_RDI) | (1ull << X64_RSI), + .cs_fp_mask = (1ull << X64_XMM6) | (1ull << X64_XMM7) | (1ull << X64_XMM8) | + (1ull << (X64_XMM0 + 9)) | (1ull << (X64_XMM0 + 10)) | + (1ull << (X64_XMM0 + 11)) | (1ull << (X64_XMM0 + 12)) | + (1ull << (X64_XMM0 + 13)) | (1ull << (X64_XMM0 + 14)) | + (1ull << X64_XMM15), +}; + +const X64ABIRegs* x64_abi_for_os(CfreeOSKind os) { + return (os == CFREE_OS_WINDOWS) ? &g_x64_abi_win64 : &g_x64_abi_sysv; +} /* ============================================================ * Byte-level emit helpers. @@ -480,11 +516,28 @@ void emit_sse_rr_w(MCEmitter *mc, u8 prefix, u8 opcode, int w, u32 dst, /* ============================================================ * Function lifecycle */ -static u32 count_x64_cs_int(u32 mask) { +/* Count the callee-saved GPR bits in `mask` that the ABI's cs_int_mask + * actually owns. RBP is excluded because the prologue head saves it via + * `push rbp`, not via the per-reg slot loop. */ +static u32 count_x64_cs_int(u32 mask, u64 cs_int_mask) { + u32 n = 0; + u64 eligible = (u64)mask & cs_int_mask; + eligible &= ~(1ull << X64_RBP); + while (eligible) { + eligible &= (eligible - 1); + ++n; + } + return n; +} + +/* Count callee-saved XMM bits the ABI claims (Win64 only — SysV's + * cs_fp_mask is empty). */ +static u32 count_x64_cs_fp(u32 mask, u64 cs_fp_mask) { u32 n = 0; - for (u32 i = 0; i < 5u; ++i) { - Reg r = g_int_order[i]; - if (mask & (1u << r)) ++n; + u64 eligible = (u64)mask & cs_fp_mask; + while (eligible) { + eligible &= (eligible - 1); + ++n; } return n; } @@ -492,7 +545,14 @@ static u32 count_x64_cs_int(u32 mask) { static u32 x64_planned_prologue_bytes(const XImpl *a) { u32 n = X64_PROLOGUE_BASE_BYTES; if (a->has_sret) n += X64_PROLOGUE_SRET_BYTES; - n += count_x64_cs_int(a->planned_cs_int_mask) * X64_PROLOGUE_SAVE_BYTES; + n += count_x64_cs_int(a->planned_cs_int_mask, a->abi->cs_int_mask) * + X64_PROLOGUE_SAVE_BYTES; + n += count_x64_cs_fp(a->planned_cs_fp_mask, a->abi->cs_fp_mask) * + X64_PROLOGUE_XMM_SAVE_BYTES; + /* We don't know the final frame size at planning time; reserve the + * chkstk delta whenever the ABI requires it so the placeholder is + * large enough if the body grows past 4 KiB. */ + if (a->abi->shadow_space) n += X64_PROLOGUE_CHKSTK_DELTA; return n ? n : 1u; } @@ -504,6 +564,7 @@ static void x_func_begin_init(CGTarget *t, const CGFuncDesc *fd) { mc->emit_align(mc, 16, 0x90); a->fd = fd; + a->abi = x64_abi_for_os(t->c->target.os); a->func_start = mc->pos(mc); mc_begin_function(mc, fd->sym, fd->text_section_id, a->func_start); a->next_param_int = 0; @@ -518,8 +579,11 @@ static void x_func_begin_init(CGTarget *t, const CGFuncDesc *fd) { a->max_outgoing = 0; a->used_cs_int_mask = a->has_planned_regs ? a->planned_cs_int_mask : 0; a->used_cs_fp_mask = a->has_planned_regs ? a->planned_cs_fp_mask : 0; - a->prologue_nbytes = a->has_planned_regs ? x64_planned_prologue_bytes(a) - : X64_PROLOGUE_BYTES; + a->prologue_nbytes = + a->has_planned_regs + ? x64_planned_prologue_bytes(a) + : (a->abi->shadow_space ? X64_PROLOGUE_BYTES_WIN64 + : X64_PROLOGUE_BYTES); a->planned_cs_int_mask = 0; a->planned_cs_fp_mask = 0; a->has_planned_regs = 0; @@ -536,8 +600,9 @@ static void x_func_begin_init(CGTarget *t, const CGFuncDesc *fd) { static void x_add_entry_frame_slots(CGTarget *t) { XImpl *a = impl_of(t); - /* sret: rdi at entry holds the destination pointer. Spill it to a - * hidden slot so the body can use rdi freely. */ + /* sret: the first int arg reg at entry holds the destination pointer + * (RDI on SysV, RCX on Win64). Spill it to a hidden slot so the body + * can use that register freely. */ if (a->has_sret) { FrameSlotDesc fsd = { .type = CFREE_CG_TYPE_NONE, @@ -549,15 +614,16 @@ static void x_add_entry_frame_slots(CGTarget *t) { .flags = 0, }; a->sret_ptr_slot = x_frame_slot(t, &fsd); - /* Subsequent int args start at rsi (next_param_int = 1). */ + /* Subsequent int args start at the next slot. */ a->next_param_int = 1; } - /* Variadic: reserve the SysV reg-save area (rdi..r9 at +0..+40, then - * xmm0..xmm7 at +48..+160 with 16-byte stride) and emit the saves - * directly after the prologue placeholder so the original register - * args are preserved before x_param() spills the named ones. */ - if (a->is_variadic) { + /* Variadic SysV: reserve the 176 B reg-save area (rdi..r9 at +0..+40, + * then xmm0..xmm7 at +48..+160 with 16-byte stride) and emit the + * saves after the prologue placeholder. Win64 variadic uses the + * caller-provided 32 B home space at [rbp+16..+47] instead — no + * callee-allocated reg-save slot. */ + if (a->is_variadic && a->abi->emit_sysv_vararg_save) { FrameSlotDesc rsd = { .type = CFREE_CG_TYPE_NONE, .name = 0, @@ -576,69 +642,177 @@ static void x_emit_variadic_reg_saves(CGTarget *t) { MCEmitter *mc = t->mc; if (!a->is_variadic) return; - XSlot *rs = x64_slot_get(a, a->reg_save_slot); - static const u32 gprs[6] = {X64_RDI, X64_RSI, X64_RDX, - X64_RCX, X64_R8, X64_R9}; - for (u32 i = 0; i < 6; ++i) { - emit_mov_store(mc, 8, gprs[i], X64_RBP, -(i32)rs->off + (i32)(i * 8u)); - } - /* movsd writes the low 8 bytes of each xmm; va_arg reads 8 bytes per - * FP slot, so the upper half of the 16-byte stride stays unused. */ - for (u32 i = 0; i < 8; ++i) { - emit_sse_store(mc, 0xF2, 0x11, (u32)(X64_XMM0 + i), X64_RBP, - -(i32)rs->off + (i32)(48u + i * 16u)); + if (a->abi->emit_sysv_vararg_save) { + XSlot *rs = x64_slot_get(a, a->reg_save_slot); + static const u32 gprs[6] = {X64_RDI, X64_RSI, X64_RDX, + X64_RCX, X64_R8, X64_R9}; + for (u32 i = 0; i < 6; ++i) { + emit_mov_store(mc, 8, gprs[i], X64_RBP, -(i32)rs->off + (i32)(i * 8u)); + } + /* movsd writes the low 8 bytes of each xmm; va_arg reads 8 bytes per + * FP slot, so the upper half of the 16-byte stride stays unused. */ + for (u32 i = 0; i < 8; ++i) { + emit_sse_store(mc, 0xF2, 0x11, (u32)(X64_XMM0 + i), X64_RBP, + -(i32)rs->off + (i32)(48u + i * 16u)); + } + return; } + /* Win64 variadic: spill RCX, RDX, R8, R9 into the caller's 32 B home + * space at [rbp+16..+47]. va_start ends up pointing at + * [rbp+16 + named_int_slots*8] (a contiguous arg array). FP variadic + * args are duplicated into the matching GPR at the call site (see + * vararg_fp_dup_to_gpr), so by the time the callee accesses them + * they're already in the GPR home slot. */ + emit_mov_store(mc, 8, X64_RCX, X64_RBP, 16); + emit_mov_store(mc, 8, X64_RDX, X64_RBP, 24); + emit_mov_store(mc, 8, X64_R8, X64_RBP, 32); + emit_mov_store(mc, 8, X64_R9, X64_RBP, 40); } static u32 align_up_u32(u32 v, u32 a) { return (v + (a - 1u)) & ~(a - 1u); } +/* Spill order for the per-ABI callee-saved set. SysV: RBX, R12..R15 (the + * leading entries of g_int_order). Win64 adds RDI then RSI at the tail + * (mingw/MSVC pick a stable order; the saved slot is offsets-only for + * cfree's purposes). RBP is excluded — handled by the prologue head. */ +static const Reg g_cs_int_order_all[X64_MAX_CS_INT_REGS] = { + X64_RBX, X64_R12, X64_R13, X64_R14, X64_R15, X64_RDI, X64_RSI, +}; + +/* Spill order for Win64 callee-saved XMMs (XMM6..XMM15). */ +#define X64_MAX_CS_FP_REGS 10u +static const Reg g_cs_fp_order_all[X64_MAX_CS_FP_REGS] = { + X64_XMM6, X64_XMM7, X64_XMM8, X64_XMM0 + 9, + X64_XMM0 + 10, X64_XMM0 + 11, X64_XMM0 + 12, X64_XMM0 + 13, + X64_XMM0 + 14, X64_XMM15, +}; + static u32 x_collect_cs_regs(const XImpl *a, Reg *cs_regs) { u32 cs_used = 0; - for (u32 i = 0; i < 5u; ++i) { - Reg r = g_int_order[i]; - if (a->used_cs_int_mask & (1u << r)) - cs_regs[cs_used++] = r; + u64 mask = (u64)a->used_cs_int_mask & a->abi->cs_int_mask; + mask &= ~(1ull << X64_RBP); + for (u32 i = 0; i < X64_MAX_CS_INT_REGS; ++i) { + Reg r = g_cs_int_order_all[i]; + if (mask & (1ull << r)) cs_regs[cs_used++] = r; } return cs_used; } -static u32 x_compute_frame_size(const XImpl *a, u32 cs_used) { +static u32 x_collect_cs_fp_regs(const XImpl *a, Reg *cs_fp_regs) { + u32 n = 0; + u64 mask = (u64)a->used_cs_fp_mask & a->abi->cs_fp_mask; + for (u32 i = 0; i < X64_MAX_CS_FP_REGS; ++i) { + Reg r = g_cs_fp_order_all[i]; + if (mask & (1ull << r)) cs_fp_regs[n++] = r; + } + return n; +} + +/* Frame layout (rbp-relative, high → low): + * [rbp] : saved rbp (push rbp) + * [rbp - cum_off] : locals + spills (cum_off bytes) + * [rbp - xmm_base] : XMM saves, 16 B each (16-aligned) + * [rbp - xmm_base - cs_size] : GPR callee-saves + * [rsp] : outgoing args (max_outgoing, 16-aligned) + * xmm_base = align_up(cum_off, 16) when any XMM saved, else == cum_off. + * Frame size includes the alignment pad so rsp lands at 0 mod 16. */ +static u32 x_xmm_base(const XImpl *a, u32 cs_fp_used) { + if (cs_fp_used == 0) return a->cum_off; + return align_up_u32(a->cum_off, 16u); +} + +static u32 x_compute_frame_size(const XImpl *a, u32 cs_used, u32 cs_fp_used) { + u32 xmm_base = x_xmm_base(a, cs_fp_used); u32 cs_size = cs_used * 8u; - u32 raw = a->max_outgoing + cs_size + a->cum_off; + u32 xmm_size = cs_fp_used * 16u; + u32 raw = a->max_outgoing + cs_size + xmm_size + xmm_base; u32 frame_size = align_up_u32(raw, 16u); return frame_size ? frame_size : 16u; } +/* Cached lookup/creation of __chkstk as a SK_UNDEF symbol. The Win64 + * stack-probe helper is provided by mingw's libmingwex / MSVC's CRT; + * cfree references it on demand from the prologue and lets the linker + * resolve it. */ +static ObjSymId x_chkstk_sym(CGTarget *t) { + Sym name = pool_intern_cstr(t->c->global, "__chkstk"); + ObjSymId s = obj_symbol_find(t->obj, name); + if (s != 0) return s; + return obj_symbol(t->obj, name, SB_GLOBAL, SK_UNDEF, OBJ_SEC_NONE, 0, 0); +} + +/* Build the prologue byte sequence. Returns the number of bytes + * written. If `chkstk_disp_pos_out` is non-NULL and the chkstk path was + * taken, stores the byte offset of the `call __chkstk` disp32 within + * `buf` so the caller can emit the matching R_X64_PLT32 reloc. Sets + * it to UINT32_MAX otherwise. */ static u32 x_build_prologue(CGTarget *t, u8 *buf, u32 cap, u32 frame_size, - const Reg *cs_regs, u32 cs_used) { + const Reg *cs_regs, u32 cs_used, + const Reg *cs_fp_regs, u32 cs_fp_used, + u32 *chkstk_disp_pos_out) { XImpl *a = impl_of(t); u32 wi = 0; + if (chkstk_disp_pos_out) *chkstk_disp_pos_out = (u32)-1; - if (wi + 11 > cap) goto overflow; + if (wi + 4 > cap) goto overflow; /* push rbp (1 byte). */ buf[wi++] = 0x55; /* mov rbp, rsp: REX.W 89 E5. */ buf[wi++] = X64_REX_BASE | X64_REX_W; buf[wi++] = 0x89; buf[wi++] = modrm(3u, X64_RSP, X64_RBP); - /* sub rsp, frame_size: REX.W 81 /5 imm32 = 7 bytes. */ - buf[wi++] = X64_REX_BASE | X64_REX_W; - buf[wi++] = 0x81; - buf[wi++] = modrm(3u, 5u, X64_RSP); - buf[wi++] = (u8)frame_size; - buf[wi++] = (u8)(frame_size >> 8); - buf[wi++] = (u8)(frame_size >> 16); - buf[wi++] = (u8)(frame_size >> 24); - - /* sret: mov [rbp + disp32], rdi. */ + + int need_chkstk = + a->abi->shadow_space && frame_size > X64_WIN64_CHKSTK_THRESHOLD; + if (need_chkstk) { + /* Win64 large-frame probe sequence (matches what GCC/clang emit on + * x86_64-windows): + * mov eax, frame_size ; B8 imm32 (5 bytes) + * call __chkstk ; E8 disp32 (5 bytes) + * sub rsp, rax ; REX.W 29 C4 (3 bytes) + * __chkstk probes one page at a time over the requested allocation + * but does NOT adjust rsp itself; the explicit `sub rsp, rax` + * after the call does that. */ + if (wi + 13 > cap) goto overflow; + buf[wi++] = 0xB8; + buf[wi++] = (u8)frame_size; + buf[wi++] = (u8)(frame_size >> 8); + buf[wi++] = (u8)(frame_size >> 16); + buf[wi++] = (u8)(frame_size >> 24); + buf[wi++] = 0xE8; + if (chkstk_disp_pos_out) *chkstk_disp_pos_out = wi; + buf[wi++] = 0; + buf[wi++] = 0; + buf[wi++] = 0; + buf[wi++] = 0; + buf[wi++] = X64_REX_BASE | X64_REX_W; + buf[wi++] = 0x29; + buf[wi++] = modrm(3u, X64_RAX, X64_RSP); + } else { + /* sub rsp, frame_size: REX.W 81 /5 imm32 = 7 bytes. */ + if (wi + 7 > cap) goto overflow; + buf[wi++] = X64_REX_BASE | X64_REX_W; + buf[wi++] = 0x81; + buf[wi++] = modrm(3u, 5u, X64_RSP); + buf[wi++] = (u8)frame_size; + buf[wi++] = (u8)(frame_size >> 8); + buf[wi++] = (u8)(frame_size >> 16); + buf[wi++] = (u8)(frame_size >> 24); + } + + /* sret: spill the first int arg reg (which holds the destination + * pointer at entry) to the hidden slot. SysV uses RDI; Win64 uses + * RCX. */ if (a->has_sret && a->sret_ptr_slot != FRAME_SLOT_NONE) { XSlot *s = x64_slot_get(a, a->sret_ptr_slot); if (s) { i32 off = -(i32)s->off; + u32 sret_reg = a->abi->int_args[0]; if (wi + 7 > cap) goto overflow; - buf[wi++] = X64_REX_BASE | X64_REX_W; + buf[wi++] = + (u8)(X64_REX_BASE | X64_REX_W | ((sret_reg & 8) ? X64_REX_R : 0)); buf[wi++] = 0x89; - buf[wi++] = modrm(2u, X64_RDI, X64_RBP); + buf[wi++] = modrm(2u, (sret_reg & 7u), X64_RBP); buf[wi++] = (u8)off; buf[wi++] = (u8)(off >> 8); buf[wi++] = (u8)(off >> 16); @@ -646,10 +820,12 @@ static u32 x_build_prologue(CGTarget *t, u8 *buf, u32 cap, u32 frame_size, } } + u32 xmm_base = x_xmm_base(a, cs_fp_used); + /* Spill callee-saves. */ for (u32 i = 0; i < cs_used; ++i) { u32 reg = cs_regs[i]; - i32 off = -(i32)a->cum_off - (i32)(i + 1) * 8; + i32 off = -(i32)xmm_base - (i32)(cs_fp_used) * 16 - (i32)(i + 1) * 8; if (wi + 7 > cap) goto overflow; buf[wi++] = (u8)(X64_REX_BASE | X64_REX_W | ((reg & 8) ? X64_REX_R : 0)); buf[wi++] = 0x89; @@ -659,6 +835,26 @@ static u32 x_build_prologue(CGTarget *t, u8 *buf, u32 cap, u32 frame_size, buf[wi++] = (u8)(off >> 16); buf[wi++] = (u8)(off >> 24); } + + /* Spill callee-saved XMMs (Win64 only). movaps [rbp+disp32], xmm_n. + * Layout: xmm[0] at -(xmm_base+16), xmm[1] at -(xmm_base+32), ... + * Each slot is 16-aligned because rbp is 16-aligned at entry and + * xmm_base is rounded up to 16. */ + for (u32 i = 0; i < cs_fp_used; ++i) { + u32 xmm = cs_fp_regs[i]; + i32 off = -(i32)xmm_base - (i32)(i + 1) * 16; + u8 rex = (u8)((xmm & 8) ? (X64_REX_BASE | X64_REX_R) : 0); + u32 n = rex ? 8u : 7u; + if (wi + n > cap) goto overflow; + if (rex) buf[wi++] = rex; + buf[wi++] = 0x0F; + buf[wi++] = 0x29; /* MOVAPS r/m128, xmm */ + buf[wi++] = modrm(2u, (xmm & 7u), X64_RBP); + buf[wi++] = (u8)off; + buf[wi++] = (u8)(off >> 8); + buf[wi++] = (u8)(off >> 16); + buf[wi++] = (u8)(off >> 24); + } return wi; overflow: @@ -686,8 +882,9 @@ void x_func_begin_known_frame(CGTarget *t, const CGFuncDesc *fd, const CGKnownFrameDesc *frame, FrameSlot *out_slots) { XImpl *a = impl_of(t); - Reg cs_regs[5]; - u8 buf[X64_PROLOGUE_BYTES]; + Reg cs_regs[X64_MAX_CS_INT_REGS]; + Reg cs_fp_regs[X64_MAX_CS_FP_REGS]; + u8 buf[X64_PROLOGUE_BYTES_WIN64]; x_func_begin_init(t, fd); a->known_frame = 1; @@ -702,17 +899,26 @@ void x_func_begin_known_frame(CGTarget *t, const CGFuncDesc *fd, } u32 cs_used = x_collect_cs_regs(a, cs_regs); + u32 cs_fp_used = x_collect_cs_fp_regs(a, cs_fp_regs); if (frame && frame->may_omit_frame && frame->nslots == 0 && frame->max_outgoing == 0 && !frame->has_alloca && !frame->has_call && - !a->has_sret && !a->is_variadic && cs_used == 0) { + !a->has_sret && !a->is_variadic && cs_used == 0 && cs_fp_used == 0) { a->omit_frame = 1; return; } - u32 frame_size = x_compute_frame_size(a, cs_used); + u32 frame_size = x_compute_frame_size(a, cs_used, cs_fp_used); a->prologue_pos = t->mc->pos(t->mc); - u32 nbytes = x_build_prologue(t, buf, X64_PROLOGUE_BYTES, frame_size, - cs_regs, cs_used); + u32 chkstk_disp_pos = (u32)-1; + u32 nbytes = x_build_prologue(t, buf, sizeof buf, frame_size, + cs_regs, cs_used, cs_fp_regs, cs_fp_used, + &chkstk_disp_pos); t->mc->emit_bytes(t->mc, buf, nbytes); + if (chkstk_disp_pos != (u32)-1) { + ObjSymId chk = x_chkstk_sym(t); + t->mc->emit_reloc_at(t->mc, t->mc->section_id, + a->prologue_pos + chkstk_disp_pos, R_X64_PLT32, + chk, -4, 1, 0); + } x_emit_variadic_reg_saves(t); } @@ -720,24 +926,36 @@ void x_func_end(CGTarget *t) { XImpl *a = impl_of(t); MCEmitter *mc = t->mc; - Reg cs_regs[5]; + Reg cs_regs[X64_MAX_CS_INT_REGS]; + Reg cs_fp_regs[X64_MAX_CS_FP_REGS]; u32 cs_used = x_collect_cs_regs(a, cs_regs); + u32 cs_fp_used = x_collect_cs_fp_regs(a, cs_fp_regs); /* Stack alignment: SysV requires rsp ≡ 0 mod 16 just before a call, * which means rsp ≡ 8 mod 16 inside the function (after the return * address is pushed). On entry, rsp ≡ 8 mod 16; after `push rbp` it * is 0 mod 16; after `sub rsp, frame_size` we need it back to 0 * mod 16, so frame_size must be a multiple of 16. */ - u32 frame_size = x_compute_frame_size(a, cs_used); + u32 frame_size = x_compute_frame_size(a, cs_used, cs_fp_used); if (a->omit_frame) goto finish; mc->label_place(mc, a->epilogue_label); - /* Restore callee-saves. Each at rbp - (cum_off + (i+1)*8). */ + u32 xmm_base = x_xmm_base(a, cs_fp_used); + + /* Restore callee-saved XMMs (Win64). movaps xmm_n, [rbp+disp32]. */ + for (i32 i = (i32)cs_fp_used - 1; i >= 0; --i) { + u32 xmm = cs_fp_regs[i]; + i32 off = -(i32)xmm_base - (i32)(i + 1) * 16; + /* prefix=0 selects MOVAPS (0F 28 /r) when used through emit_sse_load. */ + emit_sse_load(mc, /*prefix=*/0, /*opcode=*/0x28, xmm, X64_RBP, off); + } + + /* Restore callee-saved GPRs. */ for (i32 i = (i32)cs_used - 1; i >= 0; --i) { u32 reg = cs_regs[i]; - i32 off = -(i32)a->cum_off - (i32)(i + 1) * 8; + i32 off = -(i32)xmm_base - (i32)(cs_fp_used) * 16 - (i32)(i + 1) * 8; emit_mov_load(mc, /*size=*/8, /*signed=*/0, reg, X64_RBP, off); } @@ -747,15 +965,23 @@ void x_func_end(CGTarget *t) { if (!a->known_frame) { /* Patch prologue placeholder. */ - u8 buf[X64_PROLOGUE_BYTES]; + u8 buf[X64_PROLOGUE_BYTES_WIN64]; u32 prologue_nbytes = a->prologue_nbytes ? a->prologue_nbytes : X64_PROLOGUE_BYTES; for (u32 i = 0; i < prologue_nbytes; ++i) buf[i] = 0x90; + u32 chkstk_disp_pos = (u32)-1; (void)x_build_prologue(t, buf, prologue_nbytes, frame_size, cs_regs, - cs_used); + cs_used, cs_fp_regs, cs_fp_used, + &chkstk_disp_pos); obj_patch(t->obj, a->fd->text_section_id, a->prologue_pos, buf, prologue_nbytes); + if (chkstk_disp_pos != (u32)-1) { + ObjSymId chk = x_chkstk_sym(t); + mc->emit_reloc_at(mc, a->fd->text_section_id, + a->prologue_pos + chkstk_disp_pos, R_X64_PLT32, + chk, -4, 1, 0); + } } /* Patch each alloca's `lea dst, [rsp + 0]` disp32 with the final diff --git a/src/arch/x64/internal.h b/src/arch/x64/internal.h @@ -19,10 +19,61 @@ #include "core/pool.h" #include "obj/obj.h" +/* Prologue placeholder budget for the unplanned-regs path (the C + * frontend's default; the opt pipeline pre-plans registers and hits + * x64_planned_prologue_bytes for tight sizing). + * + * SysV worst case: 11 base + 7 sret + 5*7 GPR saves = 53. + * Win64 worst case adds XMM6-15 (10 * 8 = 80) plus chkstk delta (+6) + * plus the 2 extra GPR slots for RDI/RSI (2*7 = 14), so 153 — round + * up to 192. We pick the larger budget for both OSes (the SysV path + * is unaffected past byte 53) and rely on dead-strip / link-time + * coalescing if size becomes a concern. */ #define X64_PROLOGUE_BYTES 96u +#define X64_PROLOGUE_BYTES_WIN64 192u #define X64_PROLOGUE_BASE_BYTES 11u #define X64_PROLOGUE_SRET_BYTES 7u #define X64_PROLOGUE_SAVE_BYTES 7u +/* XMM save: movaps [rbp + disp32], xmm_n. + * XMM0-7 : 0F 29 modrm disp32 = 7 B + * XMM8-15 : 44 0F 29 modrm disp32 (REX.R) = 8 B + * We size with the high-reg worst case so the placeholder always fits. */ +#define X64_PROLOGUE_XMM_SAVE_BYTES 8u +/* chkstk replaces a 7B sub-rsp-imm32 with 13B (mov eax,imm32 + + * call disp32 + sub rsp,rax). Net +6 over the plain sub. */ +#define X64_PROLOGUE_CHKSTK_DELTA 6u + +/* Win64-specific constants. */ +#define X64_WIN64_SHADOW_SPACE 32u /* 4 home slots, 8 B each. */ +#define X64_WIN64_CHKSTK_THRESHOLD 4096u + +/* Maximum callee-saved GPRs across all supported ABIs. SysV saves up to + * 5 (RBX, R12..R15; RBP is handled separately by the prologue head), + * Win64 adds RDI + RSI for 7. */ +#define X64_MAX_CS_INT_REGS 7u + +/* ============================================================ + * Per-OS ABI register layout. + * + * Selected once at x_func_begin_init from t->c->target.os and + * consulted by the call-site and param-consumer paths so they stop + * hard-coding SysV reg orders and slot counts. */ +typedef struct X64ABIRegs { + const u32* int_args; /* size = n_int_args; SysV: RDI..R9; + Win64: RCX..R9 */ + u32 n_int_args; /* 6 (SysV) or 4 (Win64) */ + u32 n_fp_args; /* 8 (SysV) or 4 (Win64) */ + int slot_shared_int_fp; /* 1 (Win64): arg slot index shared between + int_args[i] and XMMi; 0 (SysV) */ + u32 shadow_space; /* 0 (SysV) or 32 (Win64) */ + int emit_sysv_vararg_save; /* 1 (SysV): emit the 176 B reg-save area */ + int vararg_fp_dup_to_gpr; /* 1 (Win64): call-site duplicates each + variadic FP arg into the matching GPR */ + u64 cs_int_mask; /* callee-saved GPRs (eligible set) */ + u64 cs_fp_mask; /* callee-saved XMMs (eligible set) */ +} X64ABIRegs; + +const X64ABIRegs* x64_abi_for_os(CfreeOSKind os); /* ============================================================ * XImpl and friends. */ @@ -80,13 +131,15 @@ typedef struct XImpl { FrameSlot sret_ptr_slot; FrameSlot reg_save_slot; /* variadic: 176-byte __va_list_tag reg save area */ - u32 used_cs_int_mask; /* SysV callee-saved GPRs used by this function */ - u32 used_cs_fp_mask; /* reserved for ABIs with callee-saved FP regs */ + u32 used_cs_int_mask; /* callee-saved GPRs used by this function */ + u32 used_cs_fp_mask; /* callee-saved XMMs used by this function */ u32 planned_cs_int_mask; u32 planned_cs_fp_mask; u8 has_planned_regs; u8 pad1[3]; + const X64ABIRegs* abi; /* selected from t->c->target.os at func_begin */ + XScope* scopes; u32 nscopes; u32 scopes_cap; @@ -140,7 +193,6 @@ static inline _Noreturn void x_panic(CGTarget* t, const char* what) { extern const Reg g_int_order[6]; extern const Reg g_fp_order[10]; -extern const u32 g_int_arg_regs[6]; static inline void x64_abi_direct_reg_need(const ABIArgInfo* ai, u32* need_int, u32* need_fp) { diff --git a/src/arch/x64/isa.c b/src/arch/x64/isa.c @@ -256,6 +256,11 @@ const X64InsnDesc x64_insn_table[] = { X64_FMT_SSE_RR, 0), ROW("movss", X64_PFX_F3, 2, 0x0F, 0x11, 0, 0xFF, NO_MODRM, X64_W_REQ_ANY, X64_FMT_SSE_RR, X64_ASMFL_ALIAS), + /* MOVAPS */ + ROW("movaps", X64_PFX_NONE, 2, 0x0F, 0x28, 0, 0xFF, NO_MODRM, + X64_W_REQ_ANY, X64_FMT_SSE_RR, 0), + ROW("movaps", X64_PFX_NONE, 2, 0x0F, 0x29, 0, 0xFF, NO_MODRM, + X64_W_REQ_ANY, X64_FMT_SSE_RR, X64_ASMFL_ALIAS), /* ADD/SUB/MUL/DIV — opcodes 58/5C/59/5E (same byte for ss and sd; * prefix picks). */ ROW("addsd", X64_PFX_F2, 2, 0x0F, 0x58, 0, 0xFF, NO_MODRM, X64_W_REQ_ANY, diff --git a/src/arch/x64/link.c b/src/arch/x64/link.c @@ -68,6 +68,22 @@ static int x64_is_got_load_reloc(RelocKind kind) { kind == R_X64_REX_GOTPCRELX; } +/* PE/COFF IAT stub for x86_64 (6 B): + * + * ff 25 disp32 ; jmpq *[rip + disp_to_iat_slot] + * + * disp32 is signed offset from the END of the JMP (stub_vaddr + 6) + * to the IAT slot in .idata. Identical layout to the ELF PLT entry + * head, minus the trailing NOP pad — Win64 calls don't need a stub + * aligned to a fixed entry stride because there's no PLT0 to share + * the address space with. */ +static void x64_emit_coff_iat_stub(u8* dst, u64 stub_vaddr, + u64 iat_slot_vaddr) { + i64 disp = (i64)iat_slot_vaddr - (i64)(stub_vaddr + X64_JMP_RIPREL_SIZE); + i32 disp32 = (i32)(u32)((u64)disp & 0xffffffffu); + x64_write_jmp_riprel(dst, disp32); +} + const LinkArchDesc link_arch_x64 = { .e_machine = EM_X86_64, .default_musl_interp = "/lib/ld-musl-x86_64.so.1", @@ -87,4 +103,7 @@ const LinkArchDesc link_arch_x64 = { .is_branch_reloc = x64_is_branch_reloc, .is_got_load_reloc = x64_is_got_load_reloc, .needs_jit_call_stub = x64_is_branch_reloc, + + .coff_stub_size = X64_JMP_RIPREL_SIZE, + .emit_coff_iat_stub = x64_emit_coff_iat_stub, }; diff --git a/src/arch/x64/ops.c b/src/arch/x64/ops.c @@ -83,6 +83,20 @@ static void x_load_const(CGTarget* t, Operand dst, ConstBytes cb) { } static void x_copy(CGTarget* t, Operand dst, Operand src) { + if (dst.cls == RC_FP && src.cls == RC_INT) { + u32 sz = type_byte_size(dst.type); + int w = sz == 8 ? 1 : 0; + emit_sse_rr_w(t->mc, 0x66, 0x6E, w, dst.v.reg & 0xFu, + src.v.reg & 0xFu); + return; + } + if (dst.cls == RC_INT && src.cls == RC_FP) { + u32 sz = type_byte_size(src.type); + int w = sz == 8 ? 1 : 0; + emit_sse_rr_w(t->mc, 0x66, 0x7E, w, src.v.reg & 0xFu, + dst.v.reg & 0xFu); + return; + } if (dst.cls == RC_FP || src.cls == RC_FP) { u8 prefix2 = type_is_fp_double(dst.type) ? 0xF2 : 0xF3; emit_sse_rr(t->mc, prefix2, 0x10, dst.v.reg & 0xFu, src.v.reg & 0xFu); @@ -369,6 +383,107 @@ static void x_addr_of(CGTarget* t, Operand dst, Operand lv) { x_panic(t, "addr_of: kind unsupported"); } +/* Win64 TLS Local-Exec materialization (PE-COFF). + * + * Sequence (5 instructions, 26-29 bytes depending on register encoding): + * mov rd, gs:[0x58] ; TEB.ThreadLocalStoragePointer + * mov r11d,[rip + _tls_index] ; per-image TLS slot index + * mov rd, [rd + r11*8] ; TLS block base for this image + * lea rd, [rd + sym@SECREL] ; rd = &sym + * + * `_tls_index` is a u32 the CRT defines for each image; the linker + * resolves the RIP-relative load. The LEA's disp32 carries + * IMAGE_REL_AMD64_SECREL (via R_COFF_SECREL) against the TLS data + * symbol — the linker fills in the symbol's offset from the start of + * the merged .tls section, which matches what gs:[0x58]+index lookup + * lands on at runtime. R11 is caller-saved under Win64; we use it + * unconditionally as scratch so we don't have to special-case + * rd == rcx. */ +static void x_tls_addr_of_win64(CGTarget* t, Operand dst, ObjSymId sym, + i64 addend) { + MCEmitter* mc = t->mc; + u32 sec = mc->section_id; + u32 rd = dst.v.reg & 0xFu; + + /* (1) mov rd, gs:[0x58]: 65 [REX.W|R?] 8B mod=00/reg=rd/rm=100 sib disp32. */ + u8 gs_prefix = 0x65; + mc->emit_bytes(mc, &gs_prefix, 1); + emit_rex(mc, 1, rd, 0, 0); + u8 op_mov_load = 0x8B; + mc->emit_bytes(mc, &op_mov_load, 1); + u8 mr1 = modrm(0u, rd & 7u, 4u); + mc->emit_bytes(mc, &mr1, 1); + u8 s1 = sib(0u, 4u, 5u); + mc->emit_bytes(mc, &s1, 1); + emit_u32le(mc, 0x58u); + + /* (2) mov r11d, [rip + _tls_index]: 44 8B 1D disp32. */ + Sym idx_name = pool_intern_cstr(t->c->global, "_tls_index"); + ObjSymId idx_sym = obj_symbol_find(t->obj, idx_name); + if (idx_sym == 0) { + idx_sym = obj_symbol(t->obj, idx_name, SB_GLOBAL, SK_UNDEF, + OBJ_SEC_NONE, 0, 0); + } + u8 rex_r_only = X64_REX_BASE | X64_REX_R; /* R11 in ModRM.reg. */ + mc->emit_bytes(mc, &rex_r_only, 1); + u8 op_mov_load_32 = 0x8B; + mc->emit_bytes(mc, &op_mov_load_32, 1); + u8 mr2 = modrm(0u, 3u /* r11 & 7 */, 5u /* RIP-rel */); + mc->emit_bytes(mc, &mr2, 1); + u32 idx_disp_pos = mc->pos(mc); + emit_u32le(mc, 0); + mc->emit_reloc_at(mc, sec, idx_disp_pos, R_PC32, idx_sym, -4, 1, 0); + + /* (3) mov rd, [rd + r11*8]: REX.W + (REX.X for r11) + (REX.B for rd>=8) + + * 8B modrm(mod, reg=rd&7, rm=4=SIB) sib(scale=3, index=3=r11&7, base=rd&7). + * When base&7 == 5 (rbp/r13) mod=0 means "disp32 only"; force mod=01 + * with disp8=0 to actually mean [reg+r11*8 + 0]. */ + u8 rex3 = X64_REX_BASE | X64_REX_W | X64_REX_X; + if (rd & 8) rex3 |= X64_REX_R; /* reg = rd */ + if (rd & 8) rex3 |= X64_REX_B; /* base = rd */ + mc->emit_bytes(mc, &rex3, 1); + u8 op_mov_load2 = 0x8B; + mc->emit_bytes(mc, &op_mov_load2, 1); + if ((rd & 7u) == 5u) { + u8 mr3 = modrm(1u, rd & 7u, 4u); + mc->emit_bytes(mc, &mr3, 1); + u8 s3 = sib(3u, 3u, rd & 7u); + mc->emit_bytes(mc, &s3, 1); + u8 zero = 0; + mc->emit_bytes(mc, &zero, 1); + } else { + u8 mr3 = modrm(0u, rd & 7u, 4u); + mc->emit_bytes(mc, &mr3, 1); + u8 s3 = sib(3u, 3u, rd & 7u); + mc->emit_bytes(mc, &s3, 1); + } + + /* (4) lea rd, [rd + disp32@SECREL]: REX.W + (.R/.B for rd) + 8D modrm + disp32. + * rsp/r12 (rd&7==4) needs a SIB; rbp/r13 (rd&7==5) already takes + * disp32 form natively at mod=10. */ + u8 rex4 = X64_REX_BASE | X64_REX_W; + if (rd & 8) rex4 |= X64_REX_R; /* reg = rd */ + if (rd & 8) rex4 |= X64_REX_B; /* base = rd */ + mc->emit_bytes(mc, &rex4, 1); + u8 op_lea = 0x8D; + mc->emit_bytes(mc, &op_lea, 1); + u32 lea_disp_pos; + if ((rd & 7u) == 4u) { + u8 mr4 = modrm(2u, rd & 7u, 4u); + mc->emit_bytes(mc, &mr4, 1); + u8 s4 = sib(0u, 4u, rd & 7u); + mc->emit_bytes(mc, &s4, 1); + lea_disp_pos = mc->pos(mc); + emit_u32le(mc, 0); + } else { + u8 mr4 = modrm(2u, rd & 7u, rd & 7u); + mc->emit_bytes(mc, &mr4, 1); + lea_disp_pos = mc->pos(mc); + emit_u32le(mc, 0); + } + mc->emit_reloc_at(mc, sec, lea_disp_pos, R_COFF_SECREL, sym, addend, 1, 0); +} + /* x86_64 TLS Local-Exec materialization. * mov rd, fs:0 ; read thread pointer (FS base + 0) * lea rd, [rd + sym@tpoff] ; add TP-relative offset @@ -380,6 +495,11 @@ static void x_tls_addr_of(CGTarget* t, Operand dst, ObjSymId sym, i64 addend) { u32 sec = mc->section_id; u32 rd = dst.v.reg & 0xFu; + if (t->c->target.os == CFREE_OS_WINDOWS) { + x_tls_addr_of_win64(t, dst, sym, addend); + return; + } + /* mov rd, qword ptr fs:[0] * 64 [REX.W|REX.R] 8B mod=00/reg=rd/rm=100 sib(0,4,5) disp32=0 */ u8 fs_prefix = 0x64; @@ -1008,6 +1128,9 @@ static u32 x_call_plan_stack_raw_size(const CGCallPlan* p) { return size; } +static inline void x_call_sync_slot(const X64ABIRegs* abi, u32* next_int, + u32* next_fp); + static void emit_arg_value(CGTarget* t, const CGABIValue* av, u32* next_int, u32* next_fp, u32* stack_off, int tail) { XImpl* a = impl_of(t); @@ -1031,13 +1154,11 @@ static void emit_arg_value(CGTarget* t, const CGABIValue* av, u32* next_int, if (ai->kind == ABI_ARG_IGNORE) return; if (ai->kind == ABI_ARG_INDIRECT) { /* Pass &av->storage_local in the next int arg reg. */ - u32 dst_reg = (*next_int < 6) ? g_int_arg_regs[(*next_int)++] : X64_RAX; - int to_stack = (*next_int > 6) || (dst_reg == X64_RAX && *next_int == 6); - /* Above is awkward — recompute clearly: */ - if (*next_int >= 6 + (a->has_sret ? 0 : 0)) { - /* (next_int was already bumped past 6) — stack route */ - } - to_stack = (dst_reg == X64_RAX); + u32 nargs_reg = a->abi->n_int_args; + u32 dst_reg = (*next_int < nargs_reg) ? a->abi->int_args[(*next_int)++] + : X64_RAX; + int to_stack = (dst_reg == X64_RAX); + x_call_sync_slot(a->abi, next_int, next_fp); if (av->storage.kind == OPK_LOCAL) { XSlot* s = x64_slot_get(a, av->storage.v.frame_slot); if (!s) compiler_panic(t->c, a->loc, "x64 call: bad byval slot"); @@ -1135,8 +1256,9 @@ static void emit_arg_value(CGTarget* t, const CGABIValue* av, u32* next_int, const ABIArgPart* pt = &ai->parts[i]; u32 sz = pt->size; if (pt->cls == ABI_CLASS_INT) { - int to_stack = (*next_int >= 6); - u32 dst_reg = to_stack ? X64_RAX : g_int_arg_regs[(*next_int)++]; + int to_stack = (*next_int >= a->abi->n_int_args); + u32 dst_reg = to_stack ? X64_RAX : a->abi->int_args[(*next_int)++]; + if (!to_stack) x_call_sync_slot(a->abi, next_int, next_fp); switch (av->storage.kind) { case OPK_IMM: { int w = (sz == 8) ? 1 : 0; @@ -1176,10 +1298,16 @@ static void emit_arg_value(CGTarget* t, const CGABIValue* av, u32* next_int, *stack_off += 8; } } else if (pt->cls == ABI_CLASS_FP) { - int to_stack = (*next_fp >= 8); + int to_stack = (*next_fp >= a->abi->n_fp_args); u8 prefix2 = (sz == 8) ? 0xF2 : 0xF3; if (!to_stack) { u32 dst_x = (*next_fp)++; + /* Win64: variadic FP args must be duplicated into the matching + * GPR so a callee that doesn't know the argument type finds the + * bits in either register. `av->abi == NULL` is cfree's marker + * that this is a variadic (un-prototyped) arg. */ + int dup_to_gpr = a->abi->vararg_fp_dup_to_gpr && (av->abi == NULL) && + (dst_x < a->abi->n_int_args); if (av->storage.kind == OPK_REG) { u32 sx = av->storage.v.reg & 0xFu; if (sx != dst_x) emit_sse_rr(t->mc, prefix2, 0x10, dst_x, sx); @@ -1197,6 +1325,15 @@ static void emit_arg_value(CGTarget* t, const CGABIValue* av, u32* next_int, "x64 call: FP arg storage kind %d unsupported", (int)av->storage.kind); } + if (dup_to_gpr) { + /* movq r64, xmm: 66 REX.W 0F 7E /r (xmm as ModRM:reg, + * r64 as ModRM:r/m). emit_sse_rr_w(prefix=0x66, opcode=0x7E, + * w=1, dst=xmm, src=gpr) emits that encoding. */ + u32 gpr = a->abi->int_args[dst_x]; + emit_sse_rr_w(t->mc, 0x66, 0x7E, /*w=*/1, dst_x, gpr); + } + /* Keep int/fp slot indices in lockstep on Win64. */ + x_call_sync_slot(a->abi, next_int, next_fp); } else { if (av->storage.kind == OPK_REG) { Operand addr = x_call_stack_arg_addr(t, *stack_off, tail); @@ -1233,8 +1370,16 @@ static void emit_arg_value(CGTarget* t, const CGABIValue* av, u32* next_int, } } -static void count_arg_stack(const CGABIValue* av, u32* next_int, u32* next_fp, - u32* stack_off) { +static inline void x_call_sync_slot(const X64ABIRegs* abi, u32* next_int, + u32* next_fp) { + if (!abi->slot_shared_int_fp) return; + u32 m = *next_int > *next_fp ? *next_int : *next_fp; + *next_int = m; + *next_fp = m; +} + +static void count_arg_stack(const X64ABIRegs* abi, const CGABIValue* av, + u32* next_int, u32* next_fp, u32* stack_off) { ABIArgInfo va_ai; ABIArgPart va_pt; const ABIArgInfo* ai = av->abi; @@ -1253,10 +1398,11 @@ static void count_arg_stack(const CGABIValue* av, u32* next_int, u32* next_fp, } if (ai->kind == ABI_ARG_IGNORE) return; if (ai->kind == ABI_ARG_INDIRECT) { - if (*next_int < 6) + if (*next_int < abi->n_int_args) ++*next_int; else *stack_off += 8; + x_call_sync_slot(abi, next_int, next_fp); return; } if (ai->kind == ABI_ARG_DIRECT && @@ -1267,47 +1413,87 @@ static void count_arg_stack(const CGABIValue* av, u32* next_int, u32* next_fp, for (u16 i = 0; i < ai->nparts; ++i) { const ABIArgPart* pt = &ai->parts[i]; if (pt->cls == ABI_CLASS_INT) { - if (*next_int < 6) + if (*next_int < abi->n_int_args) ++*next_int; else *stack_off += 8; } else if (pt->cls == ABI_CLASS_FP) { - if (*next_fp < 8) + if (*next_fp < abi->n_fp_args) ++*next_fp; else *stack_off += 8; } + x_call_sync_slot(abi, next_int, next_fp); } } static u32 x_call_stack_size(CGTarget* t, const CGCallDesc* d) { - (void)t; + const X64ABIRegs* abi = x64_abi_for_os(t->c->target.os); u32 next_int = (d->abi && d->abi->has_sret) ? 1u : 0u; - u32 next_fp = 0, stack_off = 0; + u32 next_fp = 0; + /* Win64 reserves a 32 B shadow space at [rsp+0..31] which is part of + * the caller's outgoing area; stack args land above it. SysV has no + * shadow space. */ + u32 stack_off = abi->shadow_space; + x_call_sync_slot(abi, &next_int, &next_fp); for (u32 i = 0; i < d->nargs; ++i) - count_arg_stack(&d->args[i], &next_int, &next_fp, &stack_off); + count_arg_stack(abi, &d->args[i], &next_int, &next_fp, &stack_off); return (stack_off + 15u) & ~15u; } +static const Reg g_tail_cs_int_order_all[X64_MAX_CS_INT_REGS] = { + X64_RBX, X64_R12, X64_R13, X64_R14, X64_R15, X64_RDI, X64_RSI, +}; + +#define X64_TAIL_MAX_CS_FP_REGS 10u +static const Reg g_tail_cs_fp_order_all[X64_TAIL_MAX_CS_FP_REGS] = { + X64_XMM6, X64_XMM7, X64_XMM8, X64_XMM0 + 9, + X64_XMM0 + 10, X64_XMM0 + 11, X64_XMM0 + 12, X64_XMM0 + 13, + X64_XMM0 + 14, X64_XMM15, +}; + static u32 x_tail_collect_cs_regs(const XImpl* a, Reg* cs_regs) { u32 cs_used = 0; - for (u32 i = 0; i < 5u; ++i) { - Reg r = g_int_order[i]; - if (a->used_cs_int_mask & (1u << r)) cs_regs[cs_used++] = r; + u64 mask = (u64)a->used_cs_int_mask & a->abi->cs_int_mask; + mask &= ~(1ull << X64_RBP); + for (u32 i = 0; i < X64_MAX_CS_INT_REGS; ++i) { + Reg r = g_tail_cs_int_order_all[i]; + if (mask & (1ull << r)) cs_regs[cs_used++] = r; } return cs_used; } +static u32 x_tail_collect_cs_fp_regs(const XImpl* a, Reg* cs_fp_regs) { + u32 n = 0; + u64 mask = (u64)a->used_cs_fp_mask & a->abi->cs_fp_mask; + for (u32 i = 0; i < X64_TAIL_MAX_CS_FP_REGS; ++i) { + Reg r = g_tail_cs_fp_order_all[i]; + if (mask & (1ull << r)) cs_fp_regs[n++] = r; + } + return n; +} + static void x_tail_restore_frame(CGTarget* t) { XImpl* a = impl_of(t); MCEmitter* mc = t->mc; - Reg cs_regs[5]; + Reg cs_regs[X64_MAX_CS_INT_REGS]; + Reg cs_fp_regs[X64_TAIL_MAX_CS_FP_REGS]; u32 cs_used = x_tail_collect_cs_regs(a, cs_regs); + u32 cs_fp_used = x_tail_collect_cs_fp_regs(a, cs_fp_regs); if (a->omit_frame) return; + /* Mirror the func_end frame layout: xmm_base is cum_off rounded up to + * 16 when any XMM is saved, else == cum_off. */ + u32 xmm_base = a->cum_off; + if (cs_fp_used) xmm_base = (xmm_base + 15u) & ~15u; + for (i32 i = (i32)cs_fp_used - 1; i >= 0; --i) { + u32 xmm = cs_fp_regs[i]; + i32 off = -(i32)xmm_base - (i32)(i + 1) * 16; + emit_sse_load(mc, /*prefix=*/0, /*opcode=*/0x28, xmm, X64_RBP, off); + } for (i32 i = (i32)cs_used - 1; i >= 0; --i) { u32 reg = cs_regs[i]; - i32 off = -(i32)a->cum_off - (i32)(i + 1) * 8; + i32 off = -(i32)xmm_base - (i32)(cs_fp_used) * 16 - (i32)(i + 1) * 8; emit_mov_load(mc, 8, 0, reg, X64_RBP, off); } { @@ -1344,7 +1530,11 @@ static void x_call(CGTarget* t, const CGCallDesc* d) { XImpl* a = impl_of(t); MCEmitter* mc = t->mc; - u32 next_int = 0, next_fp = 0, stack_off = 0; + u32 next_int = 0, next_fp = 0; + /* Win64 reserves a 32 B shadow space at [rsp+0..31] which is part of + * the caller's outgoing-arg area; the first stack-passed arg lands + * at [rsp+32]. SysV starts at [rsp+0]. */ + u32 stack_off = a->abi->shadow_space; int requested_tail = (d->flags & CG_CALL_TAIL) != 0; int tail_ok = 1; if (requested_tail) { @@ -1352,15 +1542,17 @@ static void x_call(CGTarget* t, const CGCallDesc* d) { tail_ok = tail_stack <= a->next_param_stack; } - /* sret: caller puts destination pointer in rdi. */ + /* sret: caller puts the destination pointer in the first int arg reg + * (RDI on SysV, RCX on Win64). */ if (d->abi && d->abi->has_sret) { if (d->ret.storage.kind != OPK_LOCAL) { compiler_panic(t->c, a->loc, "x64 call: sret destination must be LOCAL"); } XSlot* s = x64_slot_get(a, d->ret.storage.v.frame_slot); if (!s) compiler_panic(t->c, a->loc, "x64 call: bad sret slot"); - emit_lea(mc, X64_RDI, X64_RBP, -(i32)s->off); + emit_lea(mc, a->abi->int_args[0], X64_RBP, -(i32)s->off); next_int = 1; + x_call_sync_slot(a->abi, &next_int, &next_fp); } for (u32 i = 0; i < d->nargs; ++i) { emit_arg_value(t, &d->args[i], &next_int, &next_fp, &stack_off, @@ -1811,6 +2003,22 @@ static void x_va_start_(CGTarget* t, Operand ap_op) { if (!a->is_variadic) compiler_panic(t->c, a->loc, "x64 va_start: function not variadic"); u32 ap = ap_op.v.reg & 0xFu; + if (a->abi->shadow_space) { + /* Win64 va_list is a single pointer to the next variadic stack + * slot. The 32 B caller-allocated home space at [rbp + 16] holds + * the first four named integer args (RCX/RDX/R8/R9, spilled by + * the prologue's variadic save). Variadic args start immediately + * after the named args at: + * [rbp + 16 + named_int_count * 8 + named_stack_bytes] + * x_emit_variadic_reg_saves already spilled the four arg regs to + * the home space; va_arg consumes from there onward at 8-byte + * stride (the call-site duplicates FP varargs into the matching + * GPR, so all FP varargs are reachable through the integer arm). */ + u32 first_var_off = 16u + a->next_param_int * 8u + a->next_param_stack; + emit_lea(mc, X64_RAX, X64_RBP, (i32)first_var_off); + emit_mov_store(mc, 8, X64_RAX, ap, 0); + return; + } XSlot* rs = x64_slot_get(a, a->reg_save_slot); if (!rs) compiler_panic(t->c, a->loc, "x64 va_start: no reg_save_slot"); @@ -1830,14 +2038,46 @@ static void x_va_start_(CGTarget* t, Operand ap_op) { static void x_va_arg_(CGTarget* t, Operand dst, Operand ap_op, CfreeCgTypeId ty) { + XImpl* a = impl_of(t); MCEmitter* mc = t->mc; u32 ap = ap_op.v.reg & 0xFu; u32 sz = type_byte_size(ty); int is_fp = (dst.cls == RC_FP); + u32 dr = dst.v.reg & 0xFu; + if (a->abi->shadow_space) { + /* Win64: va_list is a plain pointer to the next slot. Every + * variadic arg occupies exactly 8 bytes (or 16-byte aggregates + * passed by hidden ptr — cfree's caller side already handles + * that). FP varargs are duplicated into the matching GPR slot + * at the call site (vararg_fp_dup_to_gpr), so we always load + * from the integer slot at *ap. + * r11 = *ap ; current slot address + * dst = [r11] ; load + * r11 += 8 ; advance + * *ap = r11 ; write back */ + emit_mov_load(mc, 8, 0, X64_R11, ap, 0); + if (is_fp) { + u8 prefix = (sz == 8) ? 0xF2 : 0xF3; + emit_sse_load(mc, prefix, 0x10, dr, X64_R11, 0); + } else { + int sx = type_is_signed(ty); + emit_mov_load(mc, sz, sx, dr, X64_R11, 0); + } + /* add r11, 8 : REX.WB 0x83 /0 imm8. */ + { + u32 ofs = obj_pos(mc->obj, mc->section_id); + u8 rex = (u8)(X64_REX_BASE | X64_REX_W | X64_REX_B); + mc->emit_bytes(mc, &rex, 1); + u8 buf[3] = {0x83, modrm(3u, 0u, X64_R11 & 7u), 8}; + mc->emit_bytes(mc, buf, 3); + if (mc->debug) debug_emit_row(mc->debug, mc->section_id, ofs, mc->loc); + } + emit_mov_store(mc, 8, X64_R11, ap, 0); + return; + } u32 offs_field = is_fp ? 4u : 0u; u32 max_offs = is_fp ? 176u : 48u; u32 stride = is_fp ? 16u : 8u; - u32 dr = dst.v.reg & 0xFu; MCLabel L_stack = mc->label_new(mc); MCLabel L_done = mc->label_new(mc); @@ -1914,10 +2154,17 @@ static void x_va_end_(CGTarget* t, Operand a) { } static void x_va_copy_(CGTarget* t, Operand d, Operand s) { + XImpl* a = impl_of(t); MCEmitter* mc = t->mc; u32 dr = d.v.reg & 0xFu; u32 sr = s.v.reg & 0xFu; - /* va_list is 24 bytes; three 8B loads + stores via rax. */ + if (a->abi->shadow_space) { + /* Win64 va_list is a single 8-byte pointer. */ + emit_mov_load(mc, 8, 0, X64_RAX, sr, 0); + emit_mov_store(mc, 8, X64_RAX, dr, 0); + return; + } + /* SysV va_list is 24 bytes; three 8B loads + stores via rax. */ for (u32 i = 0; i < 24u; i += 8u) { emit_mov_load(mc, 8, 0, X64_RAX, sr, (i32)i); emit_mov_store(mc, 8, X64_RAX, dr, (i32)i); diff --git a/src/arch/x64/opt_coord.c b/src/arch/x64/opt_coord.c @@ -119,16 +119,19 @@ static void x_get_phys_regs(CGTarget* t, RegClass cls, } static int x_is_caller_saved(CGTarget* t, RegClass cls, Reg reg) { - (void)t; + const X64ABIRegs* abi = x64_abi_for_os(t->c->target.os); switch (cls) { case RC_INT: - /* SysV AMD64 caller-saved: RAX,RCX,RDX,RSI,RDI,R8-R11 */ - return reg == X64_RAX || reg == X64_RCX || reg == X64_RDX || - reg == X64_RSI || reg == X64_RDI || - (reg >= X64_R8 && reg <= X64_R11); + /* Everything that isn't callee-saved (and isn't RSP/RBP) is + * caller-saved. Inverting the ABI's cs_int_mask handles both + * SysV and Win64 in one line. */ + if (reg == X64_RSP || reg == X64_RBP) return 0; + return (abi->cs_int_mask & (1ull << reg)) == 0; case RC_FP: - /* SysV AMD64: all XMM regs are caller-saved */ - return reg >= X64_XMM0 && reg <= X64_XMM0 + 15; + /* SysV: all XMMs caller-saved. Win64: XMM0..XMM5 caller-saved, + * XMM6..XMM15 callee-saved. */ + if (reg < X64_XMM0 || reg > X64_XMM0 + 15) return 0; + return (abi->cs_fp_mask & (1ull << reg)) == 0; default: return 0; } @@ -136,26 +139,41 @@ static int x_is_caller_saved(CGTarget* t, RegClass cls, Reg reg) { static u32 x_call_clobber_mask(CGTarget* t, const CGCallDesc* d, RegClass cls) { - (void)t; (void)d; + const X64ABIRegs* abi = x64_abi_for_os(t->c->target.os); switch (cls) { - case RC_INT: - return (1u << X64_RAX) | (1u << X64_RCX) | (1u << X64_RDX) | - (1u << X64_RSI) | (1u << X64_RDI) | (1u << X64_R8) | - (1u << X64_R9) | (1u << X64_R10) | (1u << X64_R11); - case RC_FP: - return 0xFFFFu; + case RC_INT: { + /* All GPRs except callee-saved (and RSP/RBP) are clobbered by a + * call. */ + u32 mask = 0; + for (u32 r = 0; r < 16u; ++r) { + if (r == X64_RSP || r == X64_RBP) continue; + if ((abi->cs_int_mask & (1ull << r)) == 0) mask |= (1u << r); + } + return mask; + } + case RC_FP: { + /* All XMMs except callee-saved are clobbered by a call. */ + u32 mask = 0; + for (u32 r = 0; r < 16u; ++r) { + if ((abi->cs_fp_mask & (1ull << r)) == 0) mask |= (1u << r); + } + return mask; + } default: return 0; } } static u32 x_callee_save_mask(CGTarget* t, RegClass cls) { - (void)t; - return cls == RC_INT ? ((1u << X64_RBX) | (1u << X64_R12) | - (1u << X64_R13) | (1u << X64_R14) | - (1u << X64_R15)) - : 0; + const X64ABIRegs* abi = x64_abi_for_os(t->c->target.os); + if (cls == RC_INT) { + /* RBP is saved by the prologue head, not exposed for general + * callee-save spill bookkeeping. */ + return (u32)(abi->cs_int_mask & ~(1ull << X64_RBP)); + } + if (cls == RC_FP) return (u32)abi->cs_fp_mask; + return 0; } static u32 x_return_reg_mask(CGTarget* t, const ABIFuncInfo* abi, @@ -194,19 +212,26 @@ static void x_plan_call(CGTarget* t, const CGCallDesc* d, CGCallPlan* out) { u32 cap = d->nargs * 2u + 2u; out->args = arena_zarray(t->c->tu, CGCallPlanMove, cap ? cap : 1u); out->rets = arena_zarray(t->c->tu, CGCallPlanRet, 4); - u32 next_int = d->abi && d->abi->has_sret ? 1u : 0u, next_fp = 0, stack = 0; - static const u32 iregs[6] = {X64_RDI, X64_RSI, X64_RDX, X64_RCX, X64_R8, X64_R9}; + const X64ABIRegs* abi = x64_abi_for_os(t->c->target.os); + u32 next_int = d->abi && d->abi->has_sret ? 1u : 0u, next_fp = 0; + /* Win64 reserves a 32 B shadow space above the return address that + * the caller owns; the first stack-passed arg lands above it. SysV + * starts at offset 0. */ + u32 stack = abi->shadow_space; if (d->abi && d->abi->has_sret) { CGCallPlanMove* m = &out->args[out->nargs++]; m->src = d->ret.storage; m->src_kind = CG_CALL_PLAN_SRC_ADDR; m->dst_kind = CG_CALL_PLAN_REG; m->cls = RC_INT; - m->dst_reg = X64_RDI; + m->dst_reg = abi->int_args[0]; m->mem.type = d->ret.type; m->mem.size = 8; m->mem.align = 8; } + /* On Win64, advance the FP slot counter in lockstep with the int + * slot counter (shared slot). */ + if (abi->slot_shared_int_fp) next_fp = next_int; for (u32 a = 0; a < d->nargs; ++a) { const CGABIValue* av = &d->args[a]; const ABIArgInfo* ai = av->abi; @@ -228,14 +253,15 @@ static void x_plan_call(CGTarget* t, const CGCallDesc* d, CGCallPlan* out) { m->src = av->storage; m->src_kind = CG_CALL_PLAN_SRC_ADDR; m->cls = RC_INT; - if (next_int < 6) { + if (next_int < abi->n_int_args) { m->dst_kind = CG_CALL_PLAN_REG; - m->dst_reg = iregs[next_int++]; + m->dst_reg = abi->int_args[next_int++]; } else { m->dst_kind = CG_CALL_PLAN_STACK; m->stack_offset = stack; stack += 8; } + if (abi->slot_shared_int_fp) next_fp = next_int; m->mem.type = av->type; m->mem.size = 8; m->mem.align = 8; @@ -271,24 +297,34 @@ static void x_plan_call(CGTarget* t, const CGCallDesc* d, CGCallPlan* out) { m->mem.align = p->align ? p->align : p->size; if (p->cls == ABI_CLASS_FP) { m->cls = RC_FP; - if (next_fp < 8) { + if (next_fp < abi->n_fp_args) { + u32 dst_x = next_fp; m->dst_kind = CG_CALL_PLAN_REG; m->dst_reg = X64_XMM0 + next_fp++; + if (abi->vararg_fp_dup_to_gpr && av->abi == NULL && + dst_x < abi->n_int_args) { + CGCallPlanMove* dup = &out->args[out->nargs++]; + *dup = *m; + dup->cls = RC_INT; + dup->dst_reg = abi->int_args[dst_x]; + } } else { m->dst_kind = CG_CALL_PLAN_STACK; m->stack_offset = stack; stack += 8; } + if (abi->slot_shared_int_fp) next_int = next_fp; } else { m->cls = RC_INT; - if (next_int < 6) { + if (next_int < abi->n_int_args) { m->dst_kind = CG_CALL_PLAN_REG; - m->dst_reg = iregs[next_int++]; + m->dst_reg = abi->int_args[next_int++]; } else { m->dst_kind = CG_CALL_PLAN_STACK; m->stack_offset = stack; stack += 8; } + if (abi->slot_shared_int_fp) next_fp = next_int; } } } diff --git a/src/link/link.c b/src/link/link.c @@ -139,6 +139,10 @@ LinkInputId link_add_obj_bytes(Linker* l, const char* name, const u8* data, ob = read_macho(l->c, name, data, len); reader_name = "read_macho"; break; + case CFREE_BIN_COFF: + ob = read_coff(l->c, name, data, len); + reader_name = "read_coff"; + break; default: compiler_panic(l->c, no_loc(), "link_add_obj_bytes: unsupported object format " @@ -150,10 +154,23 @@ LinkInputId link_add_obj_bytes(Linker* l, const char* name, const u8* data, "link_add_obj_bytes: %s returned NULL for '%s'", reader_name, name ? name : "(unnamed)"); in = inputs_push(l, &id); - in->kind = LINK_INPUT_OBJ_BYTES; in->order = l->next_input_order++; in->obj = ob; /* re-uses the ObjBuilder slot for ownership */ in->name = name ? pool_intern_cstr(l->c->global, name) : 0; + /* PE/COFF short-import: read_coff_short_import stashes the providing + * DLL name on the builder. Reclassify the input as a DSO so the + * resolver treats its symbols as exports (matching the .lib archive + * member path in include_archive_member). */ + { + Sym coff_dll = 0; + if (fmt == CFREE_BIN_COFF && obj_get_coff_import_dll(ob, &coff_dll) && + coff_dll) { + in->kind = LINK_INPUT_DSO_BYTES; + in->soname = coff_dll; + } else { + in->kind = LINK_INPUT_OBJ_BYTES; + } + } return id; } @@ -186,6 +203,16 @@ LinkInputId link_add_dso_bytes(Linker* l, const char* name, const u8* data, ob = read_macho_dso(l->c, name, data, len, &soname); reader_name = "read_macho_dso"; break; + case CFREE_BIN_COFF: + case CFREE_BIN_PE: + /* Both spellings route through read_coff_dso: CFREE_BIN_PE is + * the MZ/PE-signed form (a real .dll), CFREE_BIN_COFF can land + * here when the caller hands us a single short-import record + * directly (rare; .lib archives are the usual conveyance and + * are handled by link_add_archive_bytes). */ + ob = read_coff_dso(l->c, name, data, len, &soname); + reader_name = "read_coff_dso"; + break; default: compiler_panic(l->c, no_loc(), "link_add_dso_bytes: unsupported DSO format " @@ -219,6 +246,305 @@ LinkInputId link_add_dso_bytes(Linker* l, const char* name, const u8* data, return id; } +/* ---- COFF long-form import-archive support ---- + * + * mingw `.a` archives (e.g. libkernel32.a) don't use the Microsoft + * short-import record format (Sig1=0/Sig2=0xFFFF — handled in + * read_coff_short_import). Instead every archive member is a regular + * long-form COFF .o file containing `.idata$N` sections. Three flavors + * appear: + * + * - Head member (e.g. libkernel32h.o): defines `_head_lib64_<lib>_a`, + * has `.idata$2` with one IMAGE_IMPORT_DESCRIPTOR template plus + * sentinel `.idata$4` / `.idata$5` slots. + * - Trailer member (e.g. libkernel32t.o): defines + * `__lib64_lib<lib>_a_iname` (the DLL-name string), with tiny + * `.idata$4` / `.idata$5` / `.idata$7` terminators. + * - Per-function stub (e.g. libkernel32s00001.o for ExitProcess): + * defines `__imp_<name>` (the IAT slot in `.idata$5`) and the bare + * `<name>` in `.text` (a 6-byte `ff 25 disp32` indirect jump + * against `__imp_<name>`). Carries `.idata$4` (ILT), `.idata$5` + * (IAT), `.idata$6` (hint+name), `.idata$7` (DLL-name back-ptr). + * + * cfree's link-emit path synthesizes the entire .idata from + * LinkSymbol.imported entries (link_emit_coff). The long-form members' + * `.idata$N` byte content is therefore redundant — only the symbol + * naming the export matters. We absorb the per-function stubs at + * archive-ingest time by rewriting them into short-import-shaped + * DSO shims (matching what read_coff_short_import produces), and we + * drop the head/trailer members entirely. + * + * DLL-name source: mingw/llvm-mingw long import members are named after + * the providing DLL (`KERNEL32.dll`, `api-ms-win-crt-runtime-l1-1-0.dll`, + * ...). Fall back to the archive filename for older import archives. + * + * The conversion is gated on Compiler.target.obj == CFREE_OBJ_COFF so + * non-Windows targets are unaffected. */ + +typedef enum CoffArMemberClass { + COFF_AR_KEEP = 0, /* regular .obj — leave as LINK_INPUT_OBJ_BYTES */ + COFF_AR_SHIM = 1, /* per-function stub — replaced with DSO shim */ + COFF_AR_SKIP = 2, /* head/trailer — drop entirely */ +} CoffArMemberClass; + +static const char kCoffImpPrefix_[] = "__imp_"; +static const u32 kCoffImpPrefixLen_ = (u32)(sizeof kCoffImpPrefix_ - 1u); +static const char kCoffHeadPrefix_[] = "_head_"; +static const u32 kCoffHeadPrefixLen_ = (u32)(sizeof kCoffHeadPrefix_ - 1u); +static const char kCoffInameSuffix_[] = "_iname"; +static const u32 kCoffInameSuffixLen_ = (u32)(sizeof kCoffInameSuffix_ - 1u); + +/* Derive a DLL name from the archive path. Handles: + * path/to/libkernel32.a -> "kernel32.dll" + * path/to/libkernel32.dll.a -> "kernel32.dll" + * path/to/kernel32.lib -> "kernel32.dll" + * path/to/libfoo -> "foo.dll" + * If nothing recognizable, returns the interned basename verbatim + * (callers can still match by name; case-insensitive at runtime). */ +static Sym derive_dll_name_from_archive_path(Compiler* c, const char* path) { + const char* base; + const char* p; + size_t n; + size_t out_len; + char* out; + Sym sym; + if (!path || !*path) return 0; + base = path; + for (p = path; *p; ++p) + if (*p == '/' || *p == '\\') base = p + 1; + n = strlen(base); + /* Strip trailing ".dll.a" / ".a" / ".lib" (case-sensitive — mingw + * uses lowercase, MSVC uses .lib). */ + if (n >= 6 && memcmp(base + n - 6, ".dll.a", 6) == 0) n -= 6; + else if (n >= 2 && memcmp(base + n - 2, ".a", 2) == 0) n -= 2; + else if (n >= 4 && memcmp(base + n - 4, ".lib", 4) == 0) n -= 4; + /* Strip leading "lib" prefix. */ + if (n >= 3 && memcmp(base, "lib", 3) == 0) { + base += 3; + n -= 3; + } + if (n == 0) return 0; + /* Append ".dll". */ + out_len = n + 4u; + out = (char*)arena_array(c->scratch, char, out_len); + memcpy(out, base, n); + memcpy(out + n, ".dll", 4); + sym = pool_intern(c->global, out, (u32)out_len); + return sym; +} + +static Sym derive_dll_name_from_archive_member(Compiler* c, + const char* member_name, + Sym fallback) { + const char* base; + const char* p; + size_t n; + if (!member_name || !*member_name) return fallback; + base = member_name; + for (p = member_name; *p; ++p) + if (*p == '/' || *p == '\\') base = p + 1; + n = strlen(base); + if (n >= 4 && memcmp(base + n - 4, ".dll", 4) == 0) + return pool_intern(c->global, base, (u32)n); + if (n >= 4 && memcmp(base + n - 4, ".DLL", 4) == 0) + return pool_intern(c->global, base, (u32)n); + return fallback; +} + +/* Resolve a COFF symbol-record's name to (ptr, len) without copying. + * Mirrors the resolve_sym_name helper in coff_read.c: 8-byte short + * name in-record, or (Zeroes==0, Offset) into the string table. */ +static void coff_resolve_sym_name_(const u8* rec, const u8* strtab, + u32 strtab_size, const char** name_out, + u32* len_out) { + u32 z = (u32)rec[0] | ((u32)rec[1] << 8) | ((u32)rec[2] << 16) | + ((u32)rec[3] << 24); + if (z == 0) { + u32 off = (u32)rec[4] | ((u32)rec[5] << 8) | ((u32)rec[6] << 16) | + ((u32)rec[7] << 24); + if (off >= strtab_size) { + *name_out = ""; + *len_out = 0; + return; + } + const char* s = (const char*)(strtab + off); + u32 max = strtab_size - off; + u32 n = 0; + while (n < max && s[n] != '\0') ++n; + *name_out = s; + *len_out = n; + return; + } + u32 n = 0; + while (n < 8 && rec[n] != '\0') ++n; + *name_out = (const char*)rec; + *len_out = n; +} + +/* Byte-level classifier that walks a long-form COFF member's symbol + * table directly, without running read_coff. We use this instead of + * the post-read_coff approach because mingw's archives contain reloc + * types read_coff doesn't grok (e.g. IMAGE_REL_AMD64_SECTION/SECREL + * in `.idata$N` sections), and we want to drop those members entirely + * rather than fail at read time. + * + * Returns SHIM / SKIP / KEEP. On SHIM, *out_name is the interned bare + * `<name>` (the export's real symbol, decoded from `__imp_<name>`). + * + * The COFF byte layout we rely on: header is fixed 20 bytes; symbol + * table starts at PointerToSymbolTable; each symbol record is + * COFF_SYMBOL_SIZE (18) bytes including aux slots. String table + * follows symtab: u32 size header + bytes. */ +#define COFF_SYM_REC_SIZE_ 18u +#define COFF_FILE_HDR_SIZE_ 20u +#define COFF_SYM_CLASS_EXTERNAL_ 2u + +static CoffArMemberClass classify_coff_archive_member_bytes( + Compiler* c, const u8* data, size_t len, Sym* out_name) { + u32 ptr_to_symtab; + u32 nsymbols; + u16 nsections; + const u8* sym_base; + const u8* strtab; + u32 strtab_size; + int has_imp = 0; + int has_idata = 0; + int has_head_def = 0; + int has_iname_def = 0; + Sym imp_bare_name = 0; + u32 i; + *out_name = 0; + if (len < COFF_FILE_HDR_SIZE_) return COFF_AR_KEEP; + nsections = (u16)((u32)data[2] | ((u32)data[3] << 8)); + ptr_to_symtab = (u32)data[8] | ((u32)data[9] << 8) | + ((u32)data[10] << 16) | ((u32)data[11] << 24); + nsymbols = (u32)data[12] | ((u32)data[13] << 8) | + ((u32)data[14] << 16) | ((u32)data[15] << 24); + if (nsymbols == 0 || ptr_to_symtab == 0) return COFF_AR_KEEP; + if ((u64)COFF_FILE_HDR_SIZE_ + (u64)nsections * 40u <= (u64)len) { + u32 si; + for (si = 0; si < (u32)nsections; ++si) { + const u8* sh = data + COFF_FILE_HDR_SIZE_ + (u64)si * 40u; + if (memcmp(sh, ".idata$", 7) == 0) { + has_idata = 1; + break; + } + } + } + if ((u64)ptr_to_symtab + (u64)nsymbols * (u64)COFF_SYM_REC_SIZE_ > (u64)len) + return COFF_AR_KEEP; + sym_base = data + ptr_to_symtab; + /* String table follows symtab. Leading u32 = total size (incl. self). + * Absent if there's no room after symtab. */ + { + u64 symtab_end = + (u64)ptr_to_symtab + (u64)nsymbols * (u64)COFF_SYM_REC_SIZE_; + if (symtab_end + 4u <= (u64)len) { + u32 declared = (u32)data[symtab_end] | + ((u32)data[symtab_end + 1] << 8) | + ((u32)data[symtab_end + 2] << 16) | + ((u32)data[symtab_end + 3] << 24); + if (declared < 4u || symtab_end + (u64)declared > (u64)len) { + strtab = NULL; + strtab_size = 0; + } else { + strtab = data + symtab_end; + strtab_size = declared; + } + } else { + strtab = NULL; + strtab_size = 0; + } + } + i = 0; + while (i < nsymbols) { + const u8* p = sym_base + (u64)i * COFF_SYM_REC_SIZE_; + u16 sec_num = (u16)((u32)p[12] | ((u32)p[13] << 8)); + u8 sclass = p[16]; + u8 naux = p[17]; + const char* nm = NULL; + u32 nlen = 0; + /* Only consider defined external symbols. UNDEF (sec_num==0) + * gives no information about what this object *provides*. */ + if (sclass == COFF_SYM_CLASS_EXTERNAL_ && sec_num != 0) { + coff_resolve_sym_name_(p, strtab, strtab_size, &nm, &nlen); + if (nlen > kCoffImpPrefixLen_ && + memcmp(nm, kCoffImpPrefix_, kCoffImpPrefixLen_) == 0) { + has_imp = 1; + if (imp_bare_name == 0) { + const char* tail = nm + kCoffImpPrefixLen_; + u32 tail_len = nlen - kCoffImpPrefixLen_; + imp_bare_name = pool_intern(c->global, tail, tail_len); + } + } else if (nlen > kCoffHeadPrefixLen_ && + memcmp(nm, kCoffHeadPrefix_, kCoffHeadPrefixLen_) == 0) { + has_head_def = 1; + } else if (nlen > kCoffInameSuffixLen_ && + memcmp(nm + nlen - kCoffInameSuffixLen_, kCoffInameSuffix_, + kCoffInameSuffixLen_) == 0) { + has_iname_def = 1; + } + } + /* Skip primary + aux records. */ + i += 1u + (u32)naux; + } + if (has_imp && has_idata) { + *out_name = imp_bare_name; + return COFF_AR_SHIM; + } + if (has_head_def || has_iname_def) return COFF_AR_SKIP; + return COFF_AR_KEEP; +} + +/* Build a fresh ObjBuilder containing just `<name>` and `__imp_<name>` + * as defined-at-OBJ_SEC_NONE globals (the shape read_coff_dso / + * read_coff_short_import produce for a DLL export), and annotate it + * with the providing DLL name. Mirrors read_coff_short_import. */ +static ObjBuilder* build_coff_long_import_shim(Compiler* c, Sym bare_name, + Sym dll_name) { + ObjBuilder* ob; + const char* bare; + size_t bare_len = 0; + u32 imp_len; + char* imp_buf; + Sym imp_sn; + ObjSymId id; + ObjSymId imp_id; + if (bare_name == 0 || dll_name == 0) return NULL; + bare = pool_str(c->global, bare_name, &bare_len); + if (!bare || bare_len == 0) return NULL; + ob = obj_new(c); + if (!ob) return NULL; + id = obj_symbol_ex(ob, bare_name, SB_GLOBAL, SV_DEFAULT, SK_FUNC, + OBJ_SEC_NONE, 0, 0, 0); + obj_sym_mark_referenced(ob, id); + imp_len = kCoffImpPrefixLen_ + (u32)bare_len; + imp_buf = (char*)arena_array(c->scratch, char, imp_len); + memcpy(imp_buf, kCoffImpPrefix_, kCoffImpPrefixLen_); + memcpy(imp_buf + kCoffImpPrefixLen_, bare, bare_len); + imp_sn = pool_intern(c->global, imp_buf, imp_len); + imp_id = obj_symbol_ex(ob, imp_sn, SB_GLOBAL, SV_DEFAULT, SK_OBJ, + OBJ_SEC_NONE, 0, 0, 0); + obj_sym_mark_referenced(ob, imp_id); + obj_set_coff_import_dll(ob, dll_name); + obj_finalize(ob); + return ob; +} + +static int coff_skip_long_import_shim_bare(Compiler* c, Sym bare_name) { + const char* s; + size_t n = 0; + if (!bare_name) return 0; + s = pool_str(c->global, bare_name, &n); + if (!s) return 0; + /* llvm-mingw's UCRT libmsvcrt.a intentionally provides these legacy + * CRT entry helpers as regular archive members later in the same + * archive. Prefer those wrappers over the older direct msvcrt.dll + * import shims. */ + return (n == 13 && memcmp(s, "__getmainargs", 13) == 0) || + (n == 13 && memcmp(s, "__p___initenv", 13) == 0); +} + LinkInputId link_add_archive_bytes(Linker* l, const char* name, const u8* data, size_t len, u8 whole_archive, u8 link_mode, u8 group_id) { @@ -227,8 +553,12 @@ LinkInputId link_add_archive_bytes(Linker* l, const char* name, const u8* data, CfreeArMember mem; LinkArchive* ar; u32 n; + Sym archive_dll_name = 0; + int is_coff_target = (l && l->c->target.obj == CFREE_OBJ_COFF); if (!l || !data || !len) return LINK_INPUT_NONE; + if (is_coff_target) + archive_dll_name = derive_dll_name_from_archive_path(l->c, name); in_arc.name = name; in_arc.data = data; @@ -277,6 +607,35 @@ LinkInputId link_add_archive_bytes(Linker* l, const char* name, const u8* data, while (cfree_ar_iter_next(it, &mem) == CFREE_ITER_ITEM && n < ar->nmembers) { ObjBuilder* ob = NULL; CfreeBinFmt mfmt = cfree_detect_fmt(mem.data, mem.size); + /* COFF long-form import-archive absorption (mingw `.a`). Classify + * the member from raw bytes *before* read_coff so we can drop + * members carrying `.idata$N` reloc types read_coff doesn't model + * (e.g. IMAGE_REL_AMD64_SECTION) without ever invoking the reader + * on them. KEEP members fall through to the standard read path. */ + if (mfmt == CFREE_BIN_COFF && is_coff_target && archive_dll_name != 0) { + Sym bare = 0; + CoffArMemberClass cls = classify_coff_archive_member_bytes( + l->c, mem.data, mem.size, &bare); + if (cls == COFF_AR_SHIM) { + if (coff_skip_long_import_shim_bare(l->c, bare)) { + ob = NULL; + } else { + Sym member_dll = + derive_dll_name_from_archive_member(l->c, mem.name, + archive_dll_name); + ob = build_coff_long_import_shim(l->c, bare, member_dll); + } + } else if (cls == COFF_AR_SKIP) { + ob = NULL; + } + if (cls != COFF_AR_KEEP) { + ar->members[n].name = + mem.name ? pool_intern_cstr(l->c->global, mem.name) : 0; + ar->members[n].obj = ob; + ++n; + continue; + } + } switch (mfmt) { case CFREE_BIN_ELF: ob = read_elf(l->c, mem.name, mem.data, mem.size); @@ -284,6 +643,9 @@ LinkInputId link_add_archive_bytes(Linker* l, const char* name, const u8* data, case CFREE_BIN_MACHO: ob = read_macho(l->c, mem.name, mem.data, mem.size); break; + case CFREE_BIN_COFF: + ob = read_coff(l->c, mem.name, mem.data, mem.size); + break; default: compiler_panic(l->c, no_loc(), "link_add_archive_bytes: unsupported member " @@ -360,6 +722,11 @@ void link_set_pie(Linker* l, int enable) { l->emit_pie = enable ? 1 : 0; } +void link_set_pe_subsystem(Linker* l, u16 subsystem) { + if (!l) return; + l->pe_subsystem = subsystem; +} + void link_set_jit_host(Linker* l, const CfreeJitHost* host) { if (!l) return; l->jit_host = host; @@ -504,6 +871,9 @@ static void link_image_release(LinkImage* img) { if (m->section) img->heap->free(img->heap, m->section, sizeof(*m->section) * m->nsection); + if (m->comdat_discarded) + img->heap->free(img->heap, m->comdat_discarded, + m->nsection ? m->nsection : 1u); } img->heap->free(img->heap, img->input_maps, sizeof(*img->input_maps) * img->ninput_maps); @@ -591,9 +961,8 @@ void link_emit_image_writer(LinkImage* img, Writer* w) { link_emit_macho(img, w); return; case CFREE_OBJ_COFF: - compiler_panic(img->c, no_loc(), - "link_emit_image_writer: COFF/PE linker emit not yet " - "implemented"); + link_emit_coff(img, w); + return; case CFREE_OBJ_WASM: compiler_panic(img->c, no_loc(), "link_emit_image_writer: Wasm linker emit not yet " diff --git a/src/link/link.h b/src/link/link.h @@ -199,6 +199,7 @@ void link_set_jit_mode(Linker*, int enable); * emit_static_exe; both may be set in the same link (the IFUNC ctor * still wants to run on the exe path regardless of PIE). */ void link_set_pie(Linker*, int enable); +void link_set_pe_subsystem(Linker*, u16 subsystem); /* Runtime loader path written into PT_INTERP / .interp. NULL leaves the * default ("/lib/ld-musl-aarch64.so.1" for aarch64-linux). Only diff --git a/src/link/link_arch.h b/src/link/link_arch.h @@ -101,6 +101,20 @@ typedef struct LinkArchDesc { u32 macho_stub_size; void (*emit_macho_stub)(u8* dst, u64 stub_vaddr, u64 got_slot_vaddr); + /* PE/COFF IAT stub. Used when target.obj == CFREE_OBJ_COFF and a + * relocation targets an imported function. The stub performs an + * indirect jump through the IAT slot: + * x64: ff 25 disp32 ; jmp [rip + disp_to_iat_slot] (6 B) + * aa64: adrp/ldr/br x16 ; load IAT slot, branch to it (12 B) + * + * The 32-bit displacement on x64 and the page-relative pair on + * aa64 are baked into the stub bytes directly (no apply-time + * relocations needed), so callers do not enqueue extra + * LinkRelocApply records — see how emit_iplt_stub returns 0 for + * arches that can encode the displacement inline. */ + u32 coff_stub_size; + void (*emit_coff_iat_stub)(u8* dst, u64 stub_vaddr, u64 iat_slot_vaddr); + /* Relocation classification used by format-specific linker passes. */ int (*is_branch_reloc)(RelocKind); int (*is_got_load_reloc)(RelocKind); diff --git a/src/link/link_coff.c b/src/link/link_coff.c @@ -0,0 +1,1741 @@ +/* link_emit_coff: write a PE32+ MH_EXECUTABLE-style image to the + * caller-provided Writer. + * + * Phase 3.1 deliverable per doc/WINDOWS.md: skeleton + base-reloc + * handling for the four standard PE sections. Import-table synthesis + * (.idata / IAT) lands in Phase 3.2; per-arch IAT stub bytes in 3.3; + * TLS directory in 3.5; debug directory in 3.6 — those code paths + * panic loudly here so the strict-by-default posture surfaces them. + * + * File layout (in write order): + * + * [DOS stub IMAGE_DOS_HEADER] -- 64 bytes; e_lfanew=0x40 + * [PE signature "PE\0\0"] -- 4 bytes + * [IMAGE_FILE_HEADER] -- 20 bytes + * [IMAGE_OPTIONAL_HEADER64] -- 240 bytes (PE32+) + * [IMAGE_SECTION_HEADER * nsec] -- 40 bytes each + * [pad to FileAlignment] + * [.text bytes, padded to FileAlignment] + * [.rdata bytes, padded to FileAlignment] + * [.data bytes, padded to FileAlignment] + * [.reloc bytes, padded to FileAlignment] + * + * .bss is uninitialized — it has a section header (with VirtualSize) + * but no file bytes and PointerToRawData=0. + * + * RVAs follow SectionAlignment (0x1000); FileAlignment is 0x200; the + * first section starts at RVA 0x1000 (right after the headers map). + * ImageBase is the Win64 convention 0x140000000. + * + * Reloc strategy. The link layout pass has already placed every kept + * input section into img->sections / img->segments under the ELF/Mach-O + * coordinate system (image-relative vaddrs, often packed by permission + * bucket). COFF wants a different packing — the four standard + * sections at SectionAlignment-aligned RVAs — so this writer re-derives + * per-input-section vaddrs from scratch and shifts each LinkSection / + * symbol / LinkRelocApply by its section's per-section delta before + * applying relocations. link_emit_macho takes the same tack for its + * __DATA_CONST splits; the ELF writer leaves vaddrs alone because the + * link layout already matches ELF's PT_LOAD shape. */ + +#include <stdlib.h> +#include <string.h> + +#include "arch/arch.h" +#include "core/core.h" +#include "core/heap.h" +#include "core/pool.h" +#include "core/util.h" +#include "core/vec.h" +#include "link/link.h" +#include "link/link_arch.h" +#include "link/link_internal.h" +#include "obj/coff.h" + +/* ---- .idata layout constants ---- + * + * Per doc/WINDOWS.md §3.2: the .idata content is a concatenation of an + * IMAGE_IMPORT_DESCRIPTOR table (NULL-terminated), one ILT per DLL + * (each NULL-terminated u64 array), one IAT per DLL (same shape), + * a hint/name table, and a DLL-name string pool. Each block is + * pointer-sized aligned within the section. AArch64 import thunks use + * PAGEOFFSET_12L for 64-bit ILT/IAT slots, so those sub-blocks must be + * 8-byte aligned. */ +#define PE_IDATA_BLOCK_ALIGN 8u +/* Hint field on IMAGE_IMPORT_BY_NAME records. cfree never has a real + * hint (the OS loader doesn't need one to do the bsearch on the DLL's + * export name table), so 0 is the canonical "no hint" value. */ +#define PE_IMPORT_HINT_NONE 0u + +static SrcLoc no_loc(void) { + SrcLoc l = {0, 0, 0}; + return l; +} + +/* ---- PE/Win64 layout constants ---- + * + * Centralised here so the wire-format numbers in this TU stay named + * (and the magic-numbers rule in CLAUDE.md is honoured). Values match + * the PE/COFF spec + Win64 conventions; mingw-w64's ld defaults agree. */ +#define PE_IMAGE_BASE LINK_PE_IMAGE_BASE +#define PE_SECTION_ALIGNMENT 0x1000u +#define PE_FILE_ALIGNMENT 0x200u +#define PE_FIRST_SECTION_RVA 0x1000u +#define PE_DOS_E_LFANEW 0x40u +#define PE_NUM_DATA_DIRS COFF_NUM_DATA_DIRECTORIES +#define PE_OPT_HDR_SIZE COFF_OPT_HDR64_SIZE +#define PE_LINKER_MAJOR 0u +#define PE_LINKER_MINOR 1u +#define PE_OS_MAJOR 6u /* Windows Vista+ — mingw default */ +#define PE_OS_MINOR 0u +#define PE_SUBSYS_MAJOR 6u +#define PE_SUBSYS_MINOR 0u +#define PE_STACK_RESERVE 0x100000ULL +#define PE_STACK_COMMIT 0x1000ULL +#define PE_HEAP_RESERVE 0x100000ULL +#define PE_HEAP_COMMIT 0x1000ULL +#define PE_DLL_CHARS \ + (IMAGE_DLLCHARACTERISTICS_HIGH_ENTROPY_VA | \ + IMAGE_DLLCHARACTERISTICS_DYNAMIC_BASE | \ + IMAGE_DLLCHARACTERISTICS_NX_COMPAT | \ + IMAGE_DLLCHARACTERISTICS_TERMINAL_SERVER_AWARE) + +/* PE32+ DOS-stub-to-PE-signature offsets (manual, since we marshal + * field-by-field rather than memcpy'ing the packed struct). */ +#define PE_DOS_HDR_SIZE COFF_DOS_HEADER_SIZE +#define PE_SIG_SIZE 4u +#define PE_FILE_HDR_SIZE COFF_FILE_HEADER_SIZE +#define PE_SECTION_HDR_SIZE COFF_SECTION_HEADER_SIZE + +/* Standard PE output buckets, plus .idata (import directory) and + * .reloc — both synthesised here rather than copied from input + * sections. Order matters: it's the on-image RVA order. */ +typedef enum CoffBucket { + COFF_BUCKET_TEXT = 0, + COFF_BUCKET_RDATA = 1, + COFF_BUCKET_IDATA = 2, + COFF_BUCKET_DATA = 3, + COFF_BUCKET_TLS = 4, + COFF_BUCKET_BSS = 5, + COFF_BUCKET_RELOC = 6, + COFF_NBUCKETS = 7, +} CoffBucket; + +/* IMAGE_TLS_DIRECTORY64 wire size: u64*4 + u32*2 = 40 bytes. */ +#define COFF_TLS_DIRECTORY64_SIZE 40u +/* Byte offsets of the four u64 VA fields within IMAGE_TLS_DIRECTORY64 + * — they need base relocations so ASLR can fix them up. */ +#define COFF_TLSDIR_OFF_START_ADDR 0u +#define COFF_TLSDIR_OFF_END_ADDR 8u +#define COFF_TLSDIR_OFF_INDEX_ADDR 16u +#define COFF_TLSDIR_OFF_CALLBACKS 24u + +typedef struct CoffSection { + const char* name; /* short ASCII; <= 8 bytes including NUL pad */ + u32 characteristics; + u8* bytes; /* NULL for .bss / .reloc-before-build */ + u32 size; /* VirtualSize (real bytes; for .bss, mem size) */ + u32 size_raw; /* SizeOfRawData (file size, FileAlignment-padded) */ + u32 rva; /* VirtualAddress in image */ + u32 file_offset; /* PointerToRawData; 0 for .bss */ + u8 in_image; /* 1 if this bucket is emitted as a section */ + u8 has_file_bytes; /* 0 for .bss */ + u8 pad[2]; +} CoffSection; + +/* ---- byte writer helpers ---- */ + +static void coff_write_zeroes(Writer* w, u64 n) { + static const u8 zeroes[256] = {0}; + while (n) { + u64 step = n > sizeof(zeroes) ? sizeof(zeroes) : n; + cfree_writer_write(w, zeroes, (size_t)step); + n -= step; + } +} + +/* Return the COFF bucket for a kept LinkSection. SF_TLS sections route + * into the dedicated .tls bucket so SECREL relocations from TLS access + * code resolve against the merged TLS image, not against .data. + * Everything else partitions on SF_EXEC / SF_WRITE plus the SSEM_NOBITS + * bit for .bss. */ +static CoffBucket coff_bucket_for(const LinkSection* ls) { + if (ls->flags & SF_EXEC) return COFF_BUCKET_TEXT; + if (ls->flags & SF_TLS) return COFF_BUCKET_TLS; + if (ls->sem == SSEM_NOBITS) return COFF_BUCKET_BSS; + if (ls->flags & SF_WRITE) return COFF_BUCKET_DATA; + return COFF_BUCKET_RDATA; +} + +/* True for relocation kinds that need an entry in .reloc so the OS + * loader can patch the site after ASLR picks a runtime ImageBase. + * PC-relative fixups don't need base-relocs — the displacement is + * load-invariant. */ +static int coff_reloc_needs_base_reloc(RelocKind k) { + return k == R_ABS64 || k == R_ABS32; +} + +/* Look up the LinkSection whose [vaddr, vaddr+size] range covers the + * given image-relative address `v`, or return NULL. Used to attribute + * symbol vaddrs to a containing section so we can apply per-section + * vaddr deltas after re-laying out for PE. */ +static const LinkSection* coff_section_at(const LinkImage* img, u64 v) { + u32 i; + for (i = 0; i < img->nsections; ++i) { + const LinkSection* ls = &img->sections[i]; + if (v >= ls->vaddr && v <= ls->vaddr + ls->size) return ls; + } + return NULL; +} + +/* Per-input-section delta map. Indexed by `LinkSection.id - 1`. + * Populated by coff_build_buckets. Consumed by every subsequent pass + * that needs to translate input-coordinate offsets (the world that + * img->sections / img->relocs live in) into PE-coordinate ones (where + * the writer plants bytes). delta is stored explicitly so callers + * avoid recomputing (new_rva + bucket.rva - old_vaddr) for every + * LinkRelocApply whose link_section_id points at the section. */ +typedef struct CoffSecMap { + u32 new_rva; /* image-relative RVA after PE relayout */ + u32 new_file_off; /* file offset of the patched byte */ + i64 delta; /* new_rva - old_vaddr */ + u8 bucket; + u8 pad[3]; +} CoffSecMap; + +/* TLS directory placement state. Populated when at least one SF_TLS + * section survives dead-strip; consumed by the optional-header writer, + * the .reloc builder (base-relocs for the four absolute VA fields), + * and the .rdata emit pass that writes the final 40-byte record. */ +typedef struct CoffTlsLayout { + int present; /* 1 iff at least one TLS section was kept */ + u32 dir_rdata_off; /* byte offset of the IMAGE_TLS_DIRECTORY64 within .rdata */ + u32 tls_size; /* size of the merged .tls bucket */ + LinkSymId tls_index_sym; /* resolved _tls_index LinkSymbol */ + LinkSymId callbacks_sym; /* __xl_a when mingw's TLS callbacks are linked */ + u64 callbacks_addend; /* mingw points past the leading NULL sentinel */ +} CoffTlsLayout; + +static LinkSymId coff_find_sym(LinkImage* img, const char* name) { + Sym sym = pool_intern_cstr(img->c->global, name); + u32 n = LinkSyms_count(&img->syms); + u32 i; + for (i = 0; i < n; ++i) { + const LinkSymbol* s = LinkSyms_at(&img->syms, i); + if (s->name == sym) return (LinkSymId)(i + 1); + } + return LINK_SYM_NONE; +} + +/* Locate _tls_index by name in the resolved symbol table. mingw's + * libmingwex defines it (as part of tlsmcrt); without a CRT the link + * fails here with a clear message rather than producing a TLS + * directory pointing at a stale address. */ +static LinkSymId coff_find_tls_index_sym(LinkImage* img) { + return coff_find_sym(img, "_tls_index"); +} + +static const LinkSection* coff_symbol_section(const LinkImage* img, + const LinkSymbol* s) { + if (s->name) { + size_t n = 0; + const char* nm = pool_str(img->c->global, s->name, &n); + const char* sec_name = NULL; + if (nm && n == 6 && memcmp(nm, "__xd_a", 6) == 0) + sec_name = ".CRT$XDA"; + else if (nm && n == 6 && memcmp(nm, "__xd_z", 6) == 0) + sec_name = ".CRT$XDZ"; + else if (nm && n == 6 && memcmp(nm, "__xl_a", 6) == 0) + sec_name = ".CRT$XLA"; + else if (nm && n == 6 && memcmp(nm, "__xl_c", 6) == 0) + sec_name = ".CRT$XLC"; + else if (nm && n == 6 && memcmp(nm, "__xl_d", 6) == 0) + sec_name = ".CRT$XLD"; + else if (nm && n == 6 && memcmp(nm, "__xl_z", 6) == 0) + sec_name = ".CRT$XLZ"; + if (sec_name) { + u32 i; + size_t sn = strlen(sec_name); + for (i = 0; i < img->nsections; ++i) { + const LinkSection* ls = &img->sections[i]; + size_t ln = 0; + const char* lname = + ls->name ? pool_str(img->c->global, ls->name, &ln) : NULL; + if (lname && ln == sn && memcmp(lname, sec_name, sn) == 0) + return ls; + } + } + } + if (s->section_id != LINK_SEC_NONE && s->section_id <= img->nsections) + return &img->sections[s->section_id - 1]; + return coff_section_at(img, s->vaddr); +} + +static u64 coff_symbol_final_va(const LinkImage* img, + const CoffSection out[COFF_NBUCKETS], + const CoffSecMap* map, + LinkSymId id, + const char* what) { + const LinkSymbol* s = LinkSyms_at(&img->syms, id - 1); + if (!s->defined || s->kind == SK_ABS) { + compiler_panic(img->c, no_loc(), + "link_emit_coff: `%s` is not a defined section-bound " + "symbol", + what); + } + const LinkSection* sec = coff_symbol_section(img, s); + if (!sec) { + compiler_panic(img->c, no_loc(), + "link_emit_coff: `%s` has no containing section", what); + } + u8 b = map[sec->id - 1].bucket; + return PE_IMAGE_BASE + (u64)out[b].rva + + (u64)map[sec->id - 1].new_rva + (s->vaddr - sec->vaddr); +} + +/* Reserve 40 bytes at the tail of the .rdata bucket for the + * IMAGE_TLS_DIRECTORY64 record. Records the offset for later emit and + * grows the bucket if needed. The bytes start zeroed; coff_emit_tls_dir + * fills them in once final RVAs are known. */ +static void coff_plan_tls_layout(LinkImage* img, + CoffSection out[COFF_NBUCKETS], + u32* rdata_cap, CoffTlsLayout* tls) { + memset(tls, 0, sizeof(*tls)); + if (out[COFF_BUCKET_TLS].size == 0) return; + tls->present = 1; + tls->tls_size = out[COFF_BUCKET_TLS].size; + tls->tls_index_sym = coff_find_tls_index_sym(img); + if (tls->tls_index_sym == LINK_SYM_NONE) { + compiler_panic(img->c, no_loc(), + "link_emit_coff: .tls section requires `_tls_index` " + "(provided by mingw libmingwex / tlsmcrt.o) — none of " + "the linked inputs define it"); + } + /* IMAGE_TLS_DIRECTORY64 needs 8-byte alignment for its u64 fields; + * round the .rdata size up before reserving the 40-byte record. */ + tls->callbacks_sym = coff_find_sym(img, "__xl_a"); + if (tls->callbacks_sym != LINK_SYM_NONE) { + tls->callbacks_addend = 8; + } else { + tls->callbacks_sym = coff_find_sym(img, "__xl_c"); + tls->callbacks_addend = 0; + } + u32 rdata_size = (u32)ALIGN_UP((u64)out[COFF_BUCKET_RDATA].size, 8ull); + u32 need = rdata_size + COFF_TLS_DIRECTORY64_SIZE; + if (need > *rdata_cap) { + (void)VEC_GROW(img->heap, out[COFF_BUCKET_RDATA].bytes, *rdata_cap, need); + } + /* Zero any padding bytes introduced by the alignment bump and the + * directory slot itself. */ + if (rdata_size > out[COFF_BUCKET_RDATA].size) { + memset(out[COFF_BUCKET_RDATA].bytes + out[COFF_BUCKET_RDATA].size, 0, + rdata_size - out[COFF_BUCKET_RDATA].size); + } + memset(out[COFF_BUCKET_RDATA].bytes + rdata_size, 0, + COFF_TLS_DIRECTORY64_SIZE); + tls->dir_rdata_off = rdata_size; + out[COFF_BUCKET_RDATA].size = need; +} + +/* Write the IMAGE_TLS_DIRECTORY64 bytes once all bucket RVAs are + * final. Each u64 VA field gets ImageBase + RVA; the base-reloc pass + * will emit IMAGE_REL_BASED_DIR64 entries so ASLR keeps them valid. */ +static void coff_emit_tls_dir(const LinkImage* img, + const CoffSection out[COFF_NBUCKETS], + const CoffSecMap* map, + const CoffTlsLayout* tls) { + if (!tls->present) return; + u64 tls_start = PE_IMAGE_BASE + (u64)out[COFF_BUCKET_TLS].rva; + u64 tls_end = tls_start + (u64)tls->tls_size; + u64 idx_vaddr = + coff_symbol_final_va(img, out, map, tls->tls_index_sym, "_tls_index"); + const char* callbacks_name = + tls->callbacks_addend ? "__xl_a" : "__xl_c"; + u64 callbacks_vaddr = + tls->callbacks_sym + ? coff_symbol_final_va(img, out, map, tls->callbacks_sym, + callbacks_name) + + tls->callbacks_addend + : 0; + + u8* p = out[COFF_BUCKET_RDATA].bytes + tls->dir_rdata_off; + wr_u64_le(p + COFF_TLSDIR_OFF_START_ADDR, tls_start); + wr_u64_le(p + COFF_TLSDIR_OFF_END_ADDR, tls_end); + wr_u64_le(p + COFF_TLSDIR_OFF_INDEX_ADDR, idx_vaddr); + wr_u64_le(p + COFF_TLSDIR_OFF_CALLBACKS, callbacks_vaddr); + wr_u32_le(p + 32, 0); /* SizeOfZeroFill */ + wr_u32_le(p + 36, 0); /* Characteristics */ +} + +static void coff_define_tls_used(LinkImage* img, + const CoffSection out[COFF_NBUCKETS], + const CoffTlsLayout* tls) { + if (!tls->present) return; + if (!img->linker) return; + link_emit_boundary_sym(img->linker, img, "_tls_used", + PE_IMAGE_BASE + (u64)out[COFF_BUCKET_RDATA].rva + + (u64)tls->dir_rdata_off); +} + +/* ---- import-table synthesis (Phase 3.2) --------------------------- + * + * Per doc/WINDOWS.md §3.2: every LinkSymbol with `imported = 1` gets + * routed through an IAT slot synthesized in `.idata`. Function + * imports additionally receive a small per-arch stub in `.text` + * (`ff 25 disp32` on x64 / `adrp;ldr;br` on aa64) so a direct CALL26 + * or PC32 against the symbol lands on a stub that indirects through + * the IAT. Data imports skip the stub — the symbol's final vaddr is + * just the IAT slot vaddr, and code-gen emits a `mov rax, [slot]` + * sequence the same way it would for any other GOT-style load. + * + * cfree's COFF code-gen uses direct symbol references; there is no + * separate `__imp_<name>` LinkSymbol consulted at link time. The + * IAT-slot rewrite happens entirely by overriding the imported + * symbol's vaddr in apply_all_relocs. */ + +typedef struct CoffImport { + LinkSymId sym; /* canonical LinkSymId from img->syms */ + u32 dll_idx; /* index into CoffImportTable.dlls */ + u32 stub_off; /* offset in .text bucket (functions only) */ + u32 iat_off; /* offset in .idata IAT block */ + u32 ilt_off; /* offset in .idata ILT block */ + u32 hint_off; /* offset in .idata hint/name table */ + u8 is_func; + u8 pad[3]; +} CoffImport; + +typedef struct CoffImportDll { + Sym soname; + u32 first; /* index of first import in CoffImportTable.imports */ + u32 count; + u32 ilt_off; /* offset of this DLL's ILT block in .idata */ + u32 iat_off; /* offset of this DLL's IAT block in .idata */ + u32 name_off; /* offset of DLL name string in .idata */ +} CoffImportDll; + +typedef struct CoffImportTable { + CoffImport* imports; + u32 nimports; + u32 imports_cap; /* heap-allocation size for cleanup */ + u32 nfunc_imports; /* subset of nimports that needs a .text stub */ + CoffImportDll* dlls; + u32 ndlls; + u32 dlls_cap; /* heap-allocation size for cleanup */ + /* Offsets within .idata of the five sub-blocks. Filled in by + * coff_plan_idata_layout once nimports / ndlls is known. */ + u32 desc_off; /* always 0 — descriptors come first */ + u32 desc_size; + u32 ilt_base; + u32 ilt_total; + u32 iat_base; + u32 iat_total; + u32 hint_base; + u32 hint_total; + u32 name_base; + u32 name_total; + u32 idata_size; + /* Stub region in .text bucket. Stubs are appended after every + * input .text section has been bucketed. stub_text_off is the + * bucket-local offset of the first stub; per-import stub offsets + * are stored in CoffImport.stub_off. */ + u32 stub_text_off; + u32 stub_total; +} CoffImportTable; + +/* Sort comparator: imports grouped by DLL slot, stable on input + * order within a DLL (sort is stable enough via secondary key). */ +static int coff_import_cmp(const void* a, const void* b) { + const CoffImport* ia = (const CoffImport*)a; + const CoffImport* ib = (const CoffImport*)b; + if (ia->dll_idx < ib->dll_idx) return -1; + if (ia->dll_idx > ib->dll_idx) return 1; + /* Secondary: LinkSymId so the order is reproducible. */ + if (ia->sym < ib->sym) return -1; + if (ia->sym > ib->sym) return 1; + return 0; +} + +static const char* coff_import_lookup_name(Compiler* c, const LinkSymbol* s, + size_t* nlen_out) { + size_t nlen = 0; + const char* nm = s->name ? pool_str(c->global, s->name, &nlen) : NULL; + static const char kImpPrefix[] = "__imp_"; + const size_t kImpPrefixLen = sizeof(kImpPrefix) - 1u; + if (nm && nlen > kImpPrefixLen && + memcmp(nm, kImpPrefix, kImpPrefixLen) == 0) { + nm += kImpPrefixLen; + nlen -= kImpPrefixLen; + } + if (nlen_out) *nlen_out = nlen; + return nm; +} + +/* True iff the import classifies as function-like. Mirrors the ELF + * `sym_is_func_import` heuristic: if the canonical kind is known + * we trust it, otherwise we default to function (which matches the + * COFF code-gen contract — direct calls are by far the common case + * and a data import wrongly stubbed would still fail loudly via the + * IAT-routed call). */ +static int coff_import_is_func(Compiler* c, const LinkSymbol* s) { + if (s->name) { + size_t nlen = 0; + const char* nm = pool_str(c->global, s->name, &nlen); + if (nm && nlen > 6u && memcmp(nm, "__imp_", 6u) == 0) return 0; + } + if (s->kind == SK_FUNC || s->kind == SK_IFUNC) return 1; + if (s->kind == SK_OBJ) return 0; + /* SK_UNDEF / SK_NOTYPE: assume function (the common case). */ + return 1; +} + +/* Walk LinkSyms, collect imports, group by DLL soname. Returns 1 if + * any imports were collected, 0 otherwise (caller skips the entire + * .idata path). */ +static int coff_collect_imports(LinkImage* img, CoffImportTable* it) { + Heap* heap = img->heap; + Compiler* c = img->c; + Linker* l = img->linker; + u32 nsyms = LinkSyms_count(&img->syms); + u32 imp_cap = 0; + u32 dll_cap = 0; + u32 i; + + memset(it, 0, sizeof(*it)); + if (!l) return 0; + for (i = 0; i < nsyms; ++i) { + LinkSymbol* s = LinkSyms_at(&img->syms, i); + LinkInput* in; + u32 dll_idx = (u32)-1; + u32 d; + if (!s->imported) continue; + if (s->name == 0) continue; + if (s->dso_input_id == LINK_INPUT_NONE) { + compiler_panic(c, no_loc(), + "link_emit_coff: imported symbol has no providing DSO"); + } + /* img->globals only carries defined globals/weaks; imported undefs + * never land there. Dedup by name: skip if any earlier slot + * already collected this name. */ + { + int dup = 0; + for (u32 k = 0; k < it->nimports; ++k) { + LinkSymbol* prev = LinkSyms_at(&img->syms, it->imports[k].sym - 1); + if (prev->name == s->name) { dup = 1; break; } + } + if (dup) continue; + } + if (s->dso_input_id - 1u >= LinkInputs_count(&l->inputs)) { + compiler_panic(c, no_loc(), + "link_emit_coff: import dso_input_id out of range"); + } + in = LinkInputs_at(&l->inputs, s->dso_input_id - 1u); + if (in->soname == 0) { + compiler_panic(c, no_loc(), + "link_emit_coff: providing DSO has no soname; cannot " + "emit IMAGE_IMPORT_DESCRIPTOR.Name"); + } + /* Find-or-add the DLL slot. */ + for (d = 0; d < it->ndlls; ++d) { + if (it->dlls[d].soname == in->soname) { dll_idx = d; break; } + } + if (dll_idx == (u32)-1) { + if (VEC_GROW(heap, it->dlls, dll_cap, it->ndlls + 1u)) + compiler_panic(c, no_loc(), "link_emit_coff: oom on import dlls"); + dll_idx = it->ndlls++; + memset(&it->dlls[dll_idx], 0, sizeof(it->dlls[dll_idx])); + it->dlls[dll_idx].soname = in->soname; + } + if (VEC_GROW(heap, it->imports, imp_cap, it->nimports + 1u)) + compiler_panic(c, no_loc(), "link_emit_coff: oom on imports"); + memset(&it->imports[it->nimports], 0, + sizeof(it->imports[it->nimports])); + it->imports[it->nimports].sym = s->id; + it->imports[it->nimports].dll_idx = dll_idx; + it->imports[it->nimports].is_func = (u8)coff_import_is_func(c, s); + if (it->imports[it->nimports].is_func) ++it->nfunc_imports; + ++it->nimports; + it->dlls[dll_idx].count++; + } + if (it->nimports == 0) return 0; + /* Re-bucket the imports array so each DLL's run is contiguous. */ + qsort(it->imports, it->nimports, sizeof(*it->imports), coff_import_cmp); + /* Fix up CoffImportDll.first now that imports[] is sorted. */ + { + u32 cur = 0; + for (u32 d = 0; d < it->ndlls; ++d) { + it->dlls[d].first = cur; + cur += it->dlls[d].count; + } + } + it->imports_cap = imp_cap; + it->dlls_cap = dll_cap; + return 1; +} + +static void coff_imports_free(LinkImage* img, CoffImportTable* it) { + Heap* heap = img->heap; + if (it->imports) { + heap->free(heap, it->imports, + (size_t)it->imports_cap * sizeof(*it->imports)); + } + if (it->dlls) { + heap->free(heap, it->dlls, + (size_t)it->dlls_cap * sizeof(*it->dlls)); + } +} + +/* Compute every per-block / per-import offset inside .idata and the + * total .idata size in bytes. Also assigns per-import hint/name and + * dll-name offsets so the descriptor table can reference them by RVA + * later (RVAs need the bucket's final RVA, added in coff_emit_idata). */ +static void coff_plan_idata_layout(LinkImage* img, CoffImportTable* it) { + Compiler* c = img->c; + u32 off; + + /* Block 1: import descriptors (one per DLL + zero terminator). */ + it->desc_off = 0; + it->desc_size = (it->ndlls + 1u) * COFF_IMPORT_DESCRIPTOR_SIZE; + off = (u32)ALIGN_UP((u64)it->desc_size, (u64)PE_IDATA_BLOCK_ALIGN); + + /* Block 2: ILTs. Per DLL: count entries + 1 (terminator), 8 B each. */ + it->ilt_base = off; + for (u32 d = 0; d < it->ndlls; ++d) { + it->dlls[d].ilt_off = off; + /* Per-import: assign ilt_off within this DLL's block. */ + for (u32 k = 0; k < it->dlls[d].count; ++k) { + it->imports[it->dlls[d].first + k].ilt_off = + off + k * (u32)COFF_THUNK_DATA64_SIZE; + } + off += (it->dlls[d].count + 1u) * (u32)COFF_THUNK_DATA64_SIZE; + } + it->ilt_total = off - it->ilt_base; + off = (u32)ALIGN_UP((u64)off, (u64)PE_IDATA_BLOCK_ALIGN); + + /* Block 3: IATs (same shape as ILTs). */ + it->iat_base = off; + for (u32 d = 0; d < it->ndlls; ++d) { + it->dlls[d].iat_off = off; + for (u32 k = 0; k < it->dlls[d].count; ++k) { + it->imports[it->dlls[d].first + k].iat_off = + off + k * (u32)COFF_THUNK_DATA64_SIZE; + } + off += (it->dlls[d].count + 1u) * (u32)COFF_THUNK_DATA64_SIZE; + } + it->iat_total = off - it->iat_base; + off = (u32)ALIGN_UP((u64)off, (u64)PE_IDATA_BLOCK_ALIGN); + + /* Block 4: hint/name records. Each: u16 hint + NUL-term name + + * 1-byte pad if the resulting size is odd (PE/COFF spec). */ + it->hint_base = off; + for (u32 i = 0; i < it->nimports; ++i) { + LinkSymbol* s = LinkSyms_at(&img->syms, it->imports[i].sym - 1); + size_t nlen = 0; + const char* nm = coff_import_lookup_name(c, s, &nlen); + if (!nm || nlen == 0) + compiler_panic(c, no_loc(), + "link_emit_coff: imported symbol has empty name"); + it->imports[i].hint_off = off; + /* hint (2 B) + name (nlen + 1) + optional pad to even. */ + u32 rec = 2u + (u32)nlen + 1u; + if (rec & 1u) ++rec; + off += rec; + } + it->hint_total = off - it->hint_base; + off = (u32)ALIGN_UP((u64)off, (u64)PE_IDATA_BLOCK_ALIGN); + + /* Block 5: DLL name strings (NUL-terminated). */ + it->name_base = off; + for (u32 d = 0; d < it->ndlls; ++d) { + size_t nlen = 0; + const char* nm = pool_str(c->global, it->dlls[d].soname, &nlen); + if (!nm || nlen == 0) + compiler_panic(c, no_loc(), + "link_emit_coff: providing DSO has empty soname"); + it->dlls[d].name_off = off; + off += (u32)nlen + 1u; + } + it->name_total = off - it->name_base; + it->idata_size = off; +} + +/* Append the function-import stubs to the .text bucket. Each stub is + * `coff_stub_size` bytes (arch-specific). Records each stub's bucket- + * local offset on the matching CoffImport so the per-symbol stub vaddr + * can be computed once the .text bucket's RVA is final. */ +static void coff_append_stubs(LinkImage* img, CoffImportTable* it, + CoffSection* text_bucket, + u32* text_bucket_cap) { + Heap* heap = img->heap; + Compiler* c = img->c; + const LinkArchDesc* arch = link_arch_desc_for(c); + u32 stub_size; + u32 stub_align; + u64 cur; + if (!arch || arch->coff_stub_size == 0 || !arch->emit_coff_iat_stub) { + compiler_panic(c, no_loc(), + "link_emit_coff: arch has no COFF IAT stub emitter"); + } + stub_size = arch->coff_stub_size; + /* Stubs are pure code; aligning to instruction alignment is enough. + * x64 wants byte-granular, aa64 wants 4 B; align to stub size as a + * convenient upper bound. */ + stub_align = stub_size; + cur = (u64)text_bucket->size; + cur = ALIGN_UP(cur, (u64)stub_align); + it->stub_text_off = (u32)cur; + for (u32 i = 0; i < it->nimports; ++i) { + if (!it->imports[i].is_func) continue; + it->imports[i].stub_off = (u32)cur; + cur += stub_size; + } + it->stub_total = (u32)cur - it->stub_text_off; + if (it->stub_total == 0) return; + /* Grow the .text bucket buffer to hold the new region. */ + u32 need = (u32)cur; + if (need > *text_bucket_cap) { + (void)VEC_GROW(heap, text_bucket->bytes, *text_bucket_cap, need); + } + /* Zero the alignment pad; stub bytes are written later by + * coff_emit_stubs once vaddrs are known. */ + if ((u32)cur > text_bucket->size) { + memset(text_bucket->bytes + text_bucket->size, 0, + (size_t)((u32)cur - text_bucket->size)); + } + text_bucket->size = (u32)cur; +} + +/* Emit each function import's IAT stub into the .text bucket. Must + * run after coff_assign_layout has fixed both .text's RVA and + * .idata's RVA, since the stub bakes in the post-shift IAT slot + * displacement. */ +static void coff_emit_stubs(LinkImage* img, const CoffImportTable* it, + const CoffSection out[COFF_NBUCKETS]) { + Compiler* c = img->c; + const LinkArchDesc* arch = link_arch_desc_for(c); + u64 img_base = PE_IMAGE_BASE; + u32 text_rva = out[COFF_BUCKET_TEXT].rva; + u32 idata_rva = out[COFF_BUCKET_IDATA].rva; + if (!arch || !arch->emit_coff_iat_stub) { + compiler_panic(c, no_loc(), + "link_emit_coff: arch has no COFF IAT stub emitter"); + } + for (u32 i = 0; i < it->nimports; ++i) { + u64 stub_va, slot_va; + if (!it->imports[i].is_func) continue; + stub_va = img_base + (u64)text_rva + (u64)it->imports[i].stub_off; + slot_va = img_base + (u64)idata_rva + (u64)it->imports[i].iat_off; + arch->emit_coff_iat_stub(out[COFF_BUCKET_TEXT].bytes + + it->imports[i].stub_off, + stub_va, slot_va); + } +} + +/* Emit .idata content into the bucket buffer. Allocates the buffer + * here (size is already known from coff_plan_idata_layout). */ +static void coff_emit_idata(LinkImage* img, const CoffImportTable* it, + CoffSection out[COFF_NBUCKETS], + u32* idata_bucket_cap) { + Heap* heap = img->heap; + Compiler* c = img->c; + CoffSection* idata = &out[COFF_BUCKET_IDATA]; + u32 idata_rva = idata->rva; + u8* buf; + /* Allocate the bucket buffer (idata_size is already block-aligned). */ + buf = (u8*)heap->alloc(heap, it->idata_size, _Alignof(u64)); + if (!buf) + compiler_panic(c, no_loc(), "link_emit_coff: oom on .idata buffer"); + memset(buf, 0, it->idata_size); + idata->bytes = buf; + idata->size = it->idata_size; + *idata_bucket_cap = it->idata_size; + + /* Block 1: IMAGE_IMPORT_DESCRIPTOR table. */ + for (u32 d = 0; d < it->ndlls; ++d) { + u8* p = buf + d * (u32)COFF_IMPORT_DESCRIPTOR_SIZE; + u32 ilt_rva = idata_rva + it->dlls[d].ilt_off; + u32 iat_rva = idata_rva + it->dlls[d].iat_off; + u32 name_rva = idata_rva + it->dlls[d].name_off; + wr_u32_le(p + 0, ilt_rva); /* OriginalFirstThunk */ + wr_u32_le(p + 4, 0u); /* TimeDateStamp */ + wr_u32_le(p + 8, 0u); /* ForwarderChain */ + wr_u32_le(p + 12, name_rva); /* Name */ + wr_u32_le(p + 16, iat_rva); /* FirstThunk */ + } + /* Trailing zero descriptor already zero-filled by memset. */ + + /* Blocks 2+3: ILT + IAT. Both initially point at the same hint/name + * record for each import; the OS loader rewrites IAT entries at + * load time. */ + for (u32 i = 0; i < it->nimports; ++i) { + u64 hint_rva = (u64)idata_rva + (u64)it->imports[i].hint_off; + wr_u64_le(buf + it->imports[i].ilt_off, hint_rva); + wr_u64_le(buf + it->imports[i].iat_off, hint_rva); + } + /* Per-DLL ILT/IAT terminators are u64 0, already zero-filled. */ + + /* Block 4: hint/name records. */ + for (u32 i = 0; i < it->nimports; ++i) { + LinkSymbol* s = LinkSyms_at(&img->syms, it->imports[i].sym - 1); + size_t nlen = 0; + const char* nm = coff_import_lookup_name(c, s, &nlen); + u8* p = buf + it->imports[i].hint_off; + wr_u16_le(p, PE_IMPORT_HINT_NONE); + memcpy(p + 2, nm, nlen); + /* NUL terminator + optional pad already zero. */ + } + + /* Block 5: DLL name strings. */ + for (u32 d = 0; d < it->ndlls; ++d) { + size_t nlen = 0; + const char* nm = pool_str(c->global, it->dlls[d].soname, &nlen); + memcpy(buf + it->dlls[d].name_off, nm, nlen); + /* NUL already zero. */ + } +} + +/* Per-LinkSymId vaddr override table for imports. Indexed by + * LinkSymId-1; 0 means "not an import". Built once after the .idata + * bucket RVA is final. Consumed by coff_apply_all_relocs in lieu of + * the symbol's own vaddr field (which is 0 for imports). */ +typedef struct CoffImportVaddr { + u64* by_sym; /* size = nsyms; 0 entries mean "not imported" */ + u32 nsyms; +} CoffImportVaddr; + +static void coff_import_vaddr_build(LinkImage* img, const CoffImportTable* it, + const CoffSection out[COFF_NBUCKETS], + CoffImportVaddr* iv) { + Heap* heap = img->heap; + u64 img_base = PE_IMAGE_BASE; + u32 text_rva = out[COFF_BUCKET_TEXT].rva; + u32 idata_rva = out[COFF_BUCKET_IDATA].rva; + iv->nsyms = LinkSyms_count(&img->syms); + iv->by_sym = (u64*)heap->alloc(heap, sizeof(u64) * (size_t)(iv->nsyms + 1u), + _Alignof(u64)); + if (!iv->by_sym) + compiler_panic(img->c, no_loc(), + "link_emit_coff: oom on import vaddr table"); + memset(iv->by_sym, 0, sizeof(u64) * (size_t)(iv->nsyms + 1u)); + for (u32 i = 0; i < it->nimports; ++i) { + LinkSymId sid = it->imports[i].sym; + u64 va; + if (it->imports[i].is_func) { + va = img_base + (u64)text_rva + (u64)it->imports[i].stub_off; + } else { + va = img_base + (u64)idata_rva + (u64)it->imports[i].iat_off; + } + iv->by_sym[sid - 1u] = va; + /* Fan out across every shadow LinkSymId with the same name so a + * per-input undef reference resolves to the same import slot. */ + { + LinkSymbol* canonical = LinkSyms_at(&img->syms, sid - 1u); + for (u32 j = 0; j < iv->nsyms; ++j) { + LinkSymbol* s = LinkSyms_at(&img->syms, j); + if (s->name == canonical->name && s->imported) { + iv->by_sym[s->id - 1u] = va; + } + } + } + } +} + +static void coff_import_vaddr_free(LinkImage* img, CoffImportVaddr* iv) { + Heap* heap = img->heap; + if (iv->by_sym) { + heap->free(heap, iv->by_sym, + sizeof(u64) * (size_t)(iv->nsyms + 1u)); + } +} + +/* Resolve Compiler.target.arch -> IMAGE_FILE_MACHINE_* via the per-arch + * coff ops table. Panic if the arch has no COFF descriptor or the + * machine value is one cfree doesn't ship (Phase 1 supports AMD64 and + * ARM64 only). */ +static u16 coff_machine_or_panic(Compiler* c) { + const ArchImpl* arch = arch_for_compiler(c); + u16 m; + if (!arch || !arch->coff) + compiler_panic(c, no_loc(), "link_emit_coff: no COFF arch descriptor"); + m = arch->coff->machine; + if (m != IMAGE_FILE_MACHINE_AMD64 && m != IMAGE_FILE_MACHINE_ARM64) + compiler_panic(c, no_loc(), + "link_emit_coff: unsupported machine 0x%x", (unsigned)m); + return m; +} + +static int coff_section_name_starts(Compiler* c, const LinkSection* ls, + const char* prefix) { + size_t n = 0; + size_t pn = strlen(prefix); + const char* s = ls->name ? pool_str(c->global, ls->name, &n) : NULL; + return s && n >= pn && memcmp(s, prefix, pn) == 0; +} + +static int coff_section_name_cmp(Compiler* c, const LinkSection* a, + const LinkSection* b) { + size_t an = 0, bn = 0; + const char* as = a->name ? pool_str(c->global, a->name, &an) : ""; + const char* bs = b->name ? pool_str(c->global, b->name, &bn) : ""; + size_t n = an < bn ? an : bn; + int cmp = n ? memcmp(as, bs, n) : 0; + if (cmp) return cmp; + if (an < bn) return -1; + if (an > bn) return 1; + if (a->id < b->id) return -1; + if (a->id > b->id) return 1; + return 0; +} + +static void coff_place_section(LinkImage* img, CoffSection out[COFF_NBUCKETS], + CoffSecMap* map, u64 bucket_cur[COFF_NBUCKETS], + u32 bucket_cap[COFF_NBUCKETS], + const LinkSection* ls) { + Heap* heap = img->heap; + CoffBucket b2 = coff_bucket_for(ls); + u32 align = ls->align ? ls->align : 1u; + u64 cur = bucket_cur[b2]; + cur = ALIGN_UP(cur, (u64)align); + map[ls->id - 1].bucket = (u8)b2; + /* Record the bucket-local offset; the absolute RVA / file offset + * are filled in after bucket placement (RVAs need + * SectionAlignment, file offsets need FileAlignment). */ + map[ls->id - 1].new_rva = (u32)cur; + if (b2 != COFF_BUCKET_BSS) { + /* Copy bytes from the source segment buffer into the bucket. */ + if (ls->size) { + u32 need = (u32)(cur + ls->size); + if (need > bucket_cap[b2]) { + (void)VEC_GROW(heap, out[b2].bytes, bucket_cap[b2], need); + } + memset(out[b2].bytes + bucket_cur[b2], 0, + (size_t)(cur - bucket_cur[b2])); + if (ls->sem != SSEM_NOBITS) { + const LinkSegment* seg = &img->segments[ls->segment_id - 1]; + const u8* src = img->segment_bytes[seg->id - 1] + + (size_t)(ls->file_offset - seg->file_offset); + memcpy(out[b2].bytes + cur, src, (size_t)ls->size); + } else { + memset(out[b2].bytes + cur, 0, (size_t)ls->size); + } + } + } + cur += ls->size; + bucket_cur[b2] = cur; + out[b2].size = (u32)cur; +} + +static void coff_insert_sorted_section(Compiler* c, const LinkSection** a, + u32* n, const LinkSection* ls) { + u32 i = *n; + while (i > 0 && coff_section_name_cmp(c, ls, a[i - 1u]) < 0) { + a[i] = a[i - 1u]; + --i; + } + a[i] = ls; + *n += 1u; +} + +/* ---- pass 1: bucket input sections, assemble bytes, assign deltas ---- + * CoffSecMap is defined above (alongside CoffTlsLayout) because the + * TLS planning helpers need to consume one. */ + +/* Build the four payload buckets (.text/.rdata/.data/.bss). + * + * `map[secid-1]` is populated for every kept LinkSection with the + * section's new RVA, new file offset, the bucket it landed in, and the + * delta to add to in-section vaddrs. Bucket buffers are + * heap-allocated; the caller frees them after emit. */ +static void coff_build_buckets(LinkImage* img, CoffSection out[COFF_NBUCKETS], + CoffSecMap* map) { + Heap* heap = img->heap; + Compiler* c = img->c; + const LinkSection** tls_sorted = NULL; + const LinkSection** crt_sorted = NULL; + u32 ntls_sorted = 0; + u32 ncrt_sorted = 0; + u32 i, b; + + for (b = 0; b < COFF_NBUCKETS; ++b) { + memset(&out[b], 0, sizeof(out[b])); + } + out[COFF_BUCKET_TEXT].name = ".text"; + out[COFF_BUCKET_TEXT].characteristics = + IMAGE_SCN_CNT_CODE | IMAGE_SCN_MEM_EXECUTE | IMAGE_SCN_MEM_READ; + out[COFF_BUCKET_TEXT].has_file_bytes = 1; + out[COFF_BUCKET_RDATA].name = ".rdata"; + out[COFF_BUCKET_RDATA].characteristics = + IMAGE_SCN_CNT_INITIALIZED_DATA | IMAGE_SCN_MEM_READ; + out[COFF_BUCKET_RDATA].has_file_bytes = 1; + out[COFF_BUCKET_IDATA].name = ".idata"; + out[COFF_BUCKET_IDATA].characteristics = + IMAGE_SCN_CNT_INITIALIZED_DATA | IMAGE_SCN_MEM_READ; + out[COFF_BUCKET_IDATA].has_file_bytes = 1; + out[COFF_BUCKET_DATA].name = ".data"; + out[COFF_BUCKET_DATA].characteristics = + IMAGE_SCN_CNT_INITIALIZED_DATA | IMAGE_SCN_MEM_READ | + IMAGE_SCN_MEM_WRITE; + out[COFF_BUCKET_DATA].has_file_bytes = 1; + /* The Windows loader uses .tls as a *template*: the bytes on disk + * seed each thread's per-TLS copy at thread creation, and threads + * write to their copies, not the template. The PE section is still + * marked writable because that's what mingw and link.exe emit; the + * loader special-cases it via the TLS directory. */ + out[COFF_BUCKET_TLS].name = ".tls"; + out[COFF_BUCKET_TLS].characteristics = + IMAGE_SCN_CNT_INITIALIZED_DATA | IMAGE_SCN_MEM_READ | + IMAGE_SCN_MEM_WRITE; + out[COFF_BUCKET_TLS].has_file_bytes = 1; + out[COFF_BUCKET_BSS].name = ".bss"; + out[COFF_BUCKET_BSS].characteristics = + IMAGE_SCN_CNT_UNINITIALIZED_DATA | IMAGE_SCN_MEM_READ | + IMAGE_SCN_MEM_WRITE; + out[COFF_BUCKET_BSS].has_file_bytes = 0; + out[COFF_BUCKET_RELOC].name = ".reloc"; + out[COFF_BUCKET_RELOC].characteristics = + IMAGE_SCN_CNT_INITIALIZED_DATA | IMAGE_SCN_MEM_READ | + IMAGE_SCN_MEM_DISCARDABLE; + out[COFF_BUCKET_RELOC].has_file_bytes = 1; + + /* Track per-bucket cursors. Bucket sizes are bounded by the sum of + * input section sizes plus per-section alignment padding; we grow + * lazily via VEC_GROW. */ + u64 bucket_cur[COFF_NBUCKETS]; + u32 bucket_cap[COFF_NBUCKETS]; + for (b = 0; b < COFF_NBUCKETS; ++b) { + bucket_cur[b] = 0; + bucket_cap[b] = 0; + } + + tls_sorted = img->nsections + ? (const LinkSection**)heap->alloc( + heap, sizeof(*tls_sorted) * img->nsections, + _Alignof(const LinkSection*)) + : NULL; + crt_sorted = img->nsections + ? (const LinkSection**)heap->alloc( + heap, sizeof(*crt_sorted) * img->nsections, + _Alignof(const LinkSection*)) + : NULL; + if (img->nsections && (!tls_sorted || !crt_sorted)) + compiler_panic(c, no_loc(), "link_emit_coff: oom sorting sections"); + + for (i = 0; i < img->nsections; ++i) { + const LinkSection* ls = &img->sections[i]; + if (!(ls->flags & SF_ALLOC)) continue; + if (ls->flags & SF_TLS) { + coff_insert_sorted_section(c, tls_sorted, &ntls_sorted, ls); + continue; + } + if (coff_section_name_starts(c, ls, ".CRT$")) { + coff_insert_sorted_section(c, crt_sorted, &ncrt_sorted, ls); + continue; + } + coff_place_section(img, out, map, bucket_cur, bucket_cap, ls); + } + + for (i = 0; i < ntls_sorted; ++i) { + coff_place_section(img, out, map, bucket_cur, bucket_cap, tls_sorted[i]); + } + for (i = 0; i < ncrt_sorted; ++i) { + coff_place_section(img, out, map, bucket_cur, bucket_cap, crt_sorted[i]); + } + + /* Track caps so we can free with the right size later (heap->free + * needs the original allocation size). Stash into size_raw + * temporarily — overwritten below with the proper PE value. */ + for (b = 0; b < COFF_NBUCKETS; ++b) out[b].size_raw = bucket_cap[b]; + if (tls_sorted) + heap->free(heap, tls_sorted, sizeof(*tls_sorted) * img->nsections); + if (crt_sorted) + heap->free(heap, crt_sorted, sizeof(*crt_sorted) * img->nsections); +} + +/* Assign RVAs and file offsets to the buckets that participate in the + * image. Returns the file offset at which trailing pad-to-EOF should + * land (== file size). */ +static u64 coff_assign_layout(CoffSection out[COFF_NBUCKETS], + u32 headers_file_size, + u32 first_section_rva) { + u32 rva = first_section_rva; + u64 file = ALIGN_UP((u64)headers_file_size, (u64)PE_FILE_ALIGNMENT); + u32 b; + for (b = 0; b < COFF_NBUCKETS; ++b) { + if (out[b].size == 0) { + out[b].in_image = 0; + out[b].rva = 0; + out[b].file_offset = 0; + out[b].size_raw = 0; + continue; + } + out[b].in_image = 1; + out[b].rva = (u32)ALIGN_UP((u64)rva, (u64)PE_SECTION_ALIGNMENT); + if (out[b].has_file_bytes) { + out[b].file_offset = (u32)file; + out[b].size_raw = (u32)ALIGN_UP((u64)out[b].size, + (u64)PE_FILE_ALIGNMENT); + file += out[b].size_raw; + } else { + out[b].file_offset = 0; + out[b].size_raw = 0; + } + rva = out[b].rva + out[b].size; + } + return file; +} + +/* Build the .reloc bytes by grouping absolute relocs by 4-KiB page. + * The map[] array maps LinkSectionId-1 to the per-section post-PE-relayout + * RVA, so we can compute each reloc's site_rva = section_rva + (orig + * write_vaddr - orig section_vaddr). + * + * Layout per page: + * u32 page_rva + * u32 size_of_block (8 + n_entries*2, padded to a multiple of 4) + * u16 entries[]: (type << 12) | (offset & 0xfff) + * optional trailing u16 = 0 (IMAGE_REL_BASED_ABSOLUTE) for u32 alignment */ +typedef struct CoffRelocEntry { + u32 site_rva; + u16 type; + u16 pad; +} CoffRelocEntry; + +static int coff_reloc_entry_cmp(const void* a, const void* b) { + const CoffRelocEntry* ea = (const CoffRelocEntry*)a; + const CoffRelocEntry* eb = (const CoffRelocEntry*)b; + if (ea->site_rva < eb->site_rva) return -1; + if (ea->site_rva > eb->site_rva) return 1; + return 0; +} + +static void coff_build_reloc_section(LinkImage* img, + const CoffSection out[COFF_NBUCKETS], + const CoffSecMap* map, + CoffSection* reloc, + const CoffRelocEntry* extras, + u32 n_extras) { + Heap* heap = img->heap; + Compiler* c = img->c; + u32 nrel = LinkRelocs_count(&img->relocs); + CoffRelocEntry* entries = NULL; + u32 nentries = 0; + u32 cap = 0; + u32 i; + + if (!img->pie) { + reloc->bytes = NULL; + reloc->size = 0; + return; + } + for (i = 0; i < nrel; ++i) { + const LinkRelocApply* r = LinkRelocs_at(&img->relocs, i); + const LinkSection* ls; + u64 site_old_vaddr; + u32 site_rva; + u16 type; + if (!coff_reloc_needs_base_reloc(r->kind)) continue; + if (r->link_section_id == LINK_SEC_NONE) continue; + ls = &img->sections[r->link_section_id - 1]; + /* r->write_vaddr is in the pre-relayout coordinate system (same as + * ls->vaddr), so the offset into the section is stable. Add the + * containing bucket's final RVA to land at the image RVA. */ + site_old_vaddr = r->write_vaddr; + u8 sb = map[ls->id - 1].bucket; + site_rva = out[sb].rva + map[ls->id - 1].new_rva + + (u32)(site_old_vaddr - ls->vaddr); + if (r->kind == R_ABS64) { + type = (u16)IMAGE_REL_BASED_DIR64; + } else { + type = (u16)IMAGE_REL_BASED_HIGHLOW; + } + if (nentries == cap) { + (void)VEC_GROW(heap, entries, cap, nentries + 1u); + } + entries[nentries].site_rva = site_rva; + entries[nentries].type = type; + entries[nentries].pad = 0; + ++nentries; + } + /* Append caller-supplied extras (TLS directory absolute-VA fields, + * etc.). These are already site-RVAs in the final image. */ + for (i = 0; i < n_extras; ++i) { + if (nentries == cap) { + (void)VEC_GROW(heap, entries, cap, nentries + 1u); + } + entries[nentries] = extras[i]; + ++nentries; + } + if (nentries == 0) { + reloc->bytes = NULL; + reloc->size = 0; + if (entries) heap->free(heap, entries, cap * sizeof(*entries)); + (void)c; + return; + } + /* Sort entries by RVA so we can group runs sharing a 4-KiB page. */ + qsort(entries, nentries, sizeof(*entries), coff_reloc_entry_cmp); + + /* Two-pass: first compute the total size (so we can allocate the + * blob exactly), then emit. */ + u32 blob_size = 0; + u32 run_start = 0; + while (run_start < nentries) { + u32 page = entries[run_start].site_rva & ~0xfffu; + u32 run_end = run_start; + while (run_end < nentries && + (entries[run_end].site_rva & ~0xfffu) == page) { + ++run_end; + } + u32 n = run_end - run_start; + u32 block = COFF_BASE_RELOCATION_SIZE + n * 2u; + block = (u32)ALIGN_UP((u64)block, 4ull); + blob_size += block; + run_start = run_end; + } + reloc->bytes = (u8*)heap->alloc(heap, blob_size, 4); + if (!reloc->bytes && blob_size) + compiler_panic(c, no_loc(), "link_emit_coff: oom on .reloc blob"); + memset(reloc->bytes, 0, blob_size); + reloc->size = blob_size; + /* Stash allocation size for free path. */ + reloc->size_raw = blob_size; + + u32 cursor = 0; + run_start = 0; + while (run_start < nentries) { + u32 page = entries[run_start].site_rva & ~0xfffu; + u32 run_end = run_start; + while (run_end < nentries && + (entries[run_end].site_rva & ~0xfffu) == page) { + ++run_end; + } + u32 n = run_end - run_start; + u32 raw_size = COFF_BASE_RELOCATION_SIZE + n * 2u; + u32 block = (u32)ALIGN_UP((u64)raw_size, 4ull); + u8* p = reloc->bytes + cursor; + wr_u32_le(p, page); + wr_u32_le(p + 4, block); + u32 k; + for (k = 0; k < n; ++k) { + u16 entry = (u16)(((u16)entries[run_start + k].type << 12) | + (entries[run_start + k].site_rva & 0xfffu)); + wr_u16_le(p + 8 + k * 2u, entry); + } + /* Optional trailing pad: a single IMAGE_REL_BASED_ABSOLUTE (0). */ + if (block > raw_size) { + wr_u16_le(p + 8 + n * 2u, 0); + } + cursor += block; + run_start = run_end; + } + heap->free(heap, entries, cap * sizeof(*entries)); +} + +/* Patch each LinkRelocApply against the PE-relayout coordinates and + * apply. `bucket_bytes[bucket]` gives the writable buffer for that + * bucket; the per-section delta in map[] turns the old in-section + * offsets into bucket-local offsets. + * + * Imported targets (LinkSymbol.imported == 1) have no vaddr of their + * own — instead the .idata pass populated `iv->by_sym[id-1]` with the + * function stub's vaddr (for callable imports) or the IAT slot's + * vaddr (for data imports). This is the spot where that table is + * consulted in lieu of the symbol's own zero vaddr. */ +static void coff_apply_all_relocs(LinkImage* img, + const CoffSection out[COFF_NBUCKETS], + const CoffSecMap* map, + const CoffImportVaddr* iv) { + Compiler* c = img->c; + u32 i; + u64 img_base = PE_IMAGE_BASE; + u32 nrel = LinkRelocs_count(&img->relocs); + for (i = 0; i < nrel; ++i) { + LinkRelocApply* r = LinkRelocs_at(&img->relocs, i); + const LinkSymbol* tgt = LinkSyms_at(&img->syms, r->target - 1); + const LinkSection* sec; + const LinkSection* tgt_sec; + u64 S, P; + u8* P_bytes; + u8 bucket; + u32 site_off_in_sec; + u32 site_bucket_off; + if (r->link_section_id == LINK_SEC_NONE) continue; + sec = &img->sections[r->link_section_id - 1]; + bucket = map[sec->id - 1].bucket; + if (!out[bucket].has_file_bytes || !out[bucket].bytes) { + /* Shouldn't happen — .bss has no relocations. */ + continue; + } + site_off_in_sec = (u32)(r->write_vaddr - sec->vaddr); + site_bucket_off = map[sec->id - 1].new_rva + site_off_in_sec; + P_bytes = out[bucket].bytes + site_bucket_off; + /* P = ImageBase + bucket_rva + map[].new_rva + site_off_in_sec + * — i.e. the final runtime address of the patch site. */ + P = img_base + (u64)out[bucket].rva + + (u64)map[sec->id - 1].new_rva + site_off_in_sec; + + /* Resolve S: target symbol's new image-relative address. Look up + * the LinkSection that contains the symbol's original vaddr, then + * apply that section's delta. */ + if (tgt->imported) { + /* IAT-routed: stub vaddr (functions) / slot vaddr (data). */ + if (!iv || iv->by_sym[r->target - 1u] == 0) + compiler_panic(c, no_loc(), + "link_emit_coff: imported target lacks IAT slot"); + S = iv->by_sym[r->target - 1u]; + } else if (tgt->kind == SK_ABS) { + S = tgt->vaddr; + } else if (tgt->defined) { + tgt_sec = coff_symbol_section(img, tgt); + if (!tgt_sec) { + compiler_panic(c, no_loc(), + "link_emit_coff: symbol vaddr 0x%llx has no " + "containing section", + (unsigned long long)tgt->vaddr); + } + u8 tb = map[tgt_sec->id - 1].bucket; + u64 sym_off = tgt->vaddr - tgt_sec->vaddr; + S = img_base + (u64)out[tb].rva + + (u64)map[tgt_sec->id - 1].new_rva + sym_off; + } else { + /* Undef and not imported — shouldn't survive resolve_undefs. */ + compiler_panic(c, no_loc(), + "link_emit_coff: unresolved non-imported symbol"); + } + /* COFF-only section-relative kinds: the SECREL value is the + * symbol's offset from the start of its containing output section + * (PE bucket), and SECTION is the 1-based PE section index. + * link_reloc_apply only sees S and P, so we patch these inline + * before delegating common kinds. */ + if (r->kind == R_COFF_SECREL || r->kind == R_COFF_SECTION || + r->kind == R_COFF_AARCH64_SECREL_LOW12A || + r->kind == R_COFF_AARCH64_SECREL_HIGH12A) { + if (!tgt->defined || tgt->kind == SK_ABS) { + compiler_panic(c, no_loc(), + "link_emit_coff: COFF SECREL/SECTION requires a " + "defined section-bound target symbol"); + } + u8 tb = map[tgt_sec->id - 1].bucket; + u64 sym_off_in_bucket = + (u64)map[tgt_sec->id - 1].new_rva + (tgt->vaddr - tgt_sec->vaddr); + if (r->kind == R_COFF_SECREL) { + u64 v = sym_off_in_bucket + (u64)r->addend; + wr_u32_le(P_bytes, (u32)(v & 0xffffffffu)); + } else if (r->kind == R_COFF_SECTION) { + /* PE section indices are 1-based; buckets are 0-based, so add 1. */ + wr_u16_le(P_bytes, (u16)((tb + 1u) & 0xffffu)); + } else { + /* AArch64 SECREL_{LOW,HIGH}12A: patch the imm12 field of an + * existing ADD-imm12 instruction. LOW12A = bits [11:0] of the + * SECREL; HIGH12A = bits [23:12]. The instruction's sh bit was + * already set by the codegen (0 for LOW, 1 for HIGH). */ + u64 v = sym_off_in_bucket + (u64)r->addend; + u32 imm12 = (r->kind == R_COFF_AARCH64_SECREL_HIGH12A) + ? (u32)((v >> 12) & 0xfffu) + : (u32)(v & 0xfffu); + u32 instr = rd_u32_le(P_bytes); + instr = (instr & ~(0xfffu << 10)) | (imm12 << 10); + wr_u32_le(P_bytes, instr); + } + continue; + } + link_reloc_apply(c, r->kind, P_bytes, S, r->addend, P); + } +} + +/* ---- header marshalling ---- + * + * Each helper streams its on-disk shape to the writer field-by-field; + * we avoid sizeof(struct) on the packed PE wire types since they carry + * implicit-padding hazards on hosts that disagree with #pragma pack(1) + * defaults. */ + +static void coff_write_dos_stub(Writer* w) { + u8 buf[PE_DOS_HDR_SIZE]; + memset(buf, 0, sizeof(buf)); + /* e_magic ("MZ") + e_lfanew (offset of PE signature). All other + * legacy fields zero. */ + buf[0] = (u8)(IMAGE_DOS_SIGNATURE & 0xffu); + buf[1] = (u8)((IMAGE_DOS_SIGNATURE >> 8) & 0xffu); + wr_u32_le(buf + 0x3c, PE_DOS_E_LFANEW); + cfree_writer_write(w, buf, sizeof(buf)); +} + +static void coff_write_file_header(Writer* w, u16 machine, u16 nsec, + u16 characteristics) { + coff_wr_u16(w, machine); + coff_wr_u16(w, nsec); + coff_wr_u32(w, 0u); /* TimeDateStamp */ + coff_wr_u32(w, 0u); /* PointerToSymbolTable */ + coff_wr_u32(w, 0u); /* NumberOfSymbols */ + coff_wr_u16(w, (u16)PE_OPT_HDR_SIZE); /* SizeOfOptionalHeader */ + coff_wr_u16(w, characteristics); +} + +/* Per-section meta used by both the data-directory fill and the + * IMAGE_SECTION_HEADER emit. Compactly captures everything the writer + * needs to know about the four-or-five output sections. */ +typedef struct CoffOutHdr { + const char* name; + u32 vsize; + u32 rva; + u32 size_raw; + u32 file_offset; + u32 characteristics; +} CoffOutHdr; + +static void coff_write_optional_header(Writer* w, u32 entry_rva, + const CoffSection out[COFF_NBUCKETS], + u32 headers_size_padded, + u32 image_size, + int pie, + u16 subsystem, + const CoffImportTable* it, + const CoffTlsLayout* tls) { + /* Standard fields. */ + coff_wr_u16(w, IMAGE_NT_OPTIONAL_HDR64_MAGIC); + coff_wr_u8(w, PE_LINKER_MAJOR); + coff_wr_u8(w, PE_LINKER_MINOR); + /* SizeOfCode / SizeOfInitializedData / SizeOfUninitializedData. */ + u32 size_code = out[COFF_BUCKET_TEXT].in_image + ? out[COFF_BUCKET_TEXT].size_raw : 0; + u32 size_init = (out[COFF_BUCKET_RDATA].in_image + ? out[COFF_BUCKET_RDATA].size_raw : 0) + + (out[COFF_BUCKET_DATA].in_image + ? out[COFF_BUCKET_DATA].size_raw : 0); + u32 size_uninit = out[COFF_BUCKET_BSS].in_image + ? out[COFF_BUCKET_BSS].size : 0; + coff_wr_u32(w, size_code); + coff_wr_u32(w, size_init); + coff_wr_u32(w, size_uninit); + coff_wr_u32(w, entry_rva); + coff_wr_u32(w, out[COFF_BUCKET_TEXT].in_image + ? out[COFF_BUCKET_TEXT].rva : 0); + /* Windows-specific fields. */ + coff_wr_u64(w, PE_IMAGE_BASE); + coff_wr_u32(w, PE_SECTION_ALIGNMENT); + coff_wr_u32(w, PE_FILE_ALIGNMENT); + coff_wr_u16(w, PE_OS_MAJOR); + coff_wr_u16(w, PE_OS_MINOR); + coff_wr_u16(w, 0u); /* MajorImageVersion */ + coff_wr_u16(w, 0u); /* MinorImageVersion */ + coff_wr_u16(w, PE_SUBSYS_MAJOR); + coff_wr_u16(w, PE_SUBSYS_MINOR); + coff_wr_u32(w, 0u); /* Win32VersionValue */ + coff_wr_u32(w, image_size); + coff_wr_u32(w, headers_size_padded); + coff_wr_u32(w, 0u); /* CheckSum */ + coff_wr_u16(w, subsystem ? subsystem : IMAGE_SUBSYSTEM_WINDOWS_CUI); + coff_wr_u16(w, PE_DLL_CHARS); + coff_wr_u64(w, PE_STACK_RESERVE); + coff_wr_u64(w, PE_STACK_COMMIT); + coff_wr_u64(w, PE_HEAP_RESERVE); + coff_wr_u64(w, PE_HEAP_COMMIT); + coff_wr_u32(w, 0u); /* LoaderFlags */ + coff_wr_u32(w, (u32)PE_NUM_DATA_DIRS); + /* DataDirectory[16]. Populated entries: + * [1] IMPORT — descriptor table RVA + total descriptor bytes + * [5] BASERELOC — when PIE and .reloc is in the image + * [12] IAT — first IAT block RVA + sum of per-DLL IAT sizes + * Everything else stays zero. */ + u32 i; + int has_idata = it && it->nimports > 0 && + out[COFF_BUCKET_IDATA].in_image; + for (i = 0; i < PE_NUM_DATA_DIRS; ++i) { + if (i == IMAGE_DIRECTORY_ENTRY_IMPORT && has_idata) { + coff_wr_u32(w, out[COFF_BUCKET_IDATA].rva + it->desc_off); + coff_wr_u32(w, it->desc_size); + } else if (i == IMAGE_DIRECTORY_ENTRY_IAT && has_idata) { + coff_wr_u32(w, out[COFF_BUCKET_IDATA].rva + it->iat_base); + coff_wr_u32(w, it->iat_total); + } else if (i == IMAGE_DIRECTORY_ENTRY_BASERELOC && pie && + out[COFF_BUCKET_RELOC].in_image) { + coff_wr_u32(w, out[COFF_BUCKET_RELOC].rva); + coff_wr_u32(w, out[COFF_BUCKET_RELOC].size); + } else if (i == IMAGE_DIRECTORY_ENTRY_TLS && tls && tls->present) { + coff_wr_u32(w, out[COFF_BUCKET_RDATA].rva + tls->dir_rdata_off); + coff_wr_u32(w, COFF_TLS_DIRECTORY64_SIZE); + } else { + coff_wr_u32(w, 0u); + coff_wr_u32(w, 0u); + } + } +} + +static void coff_write_section_header(Writer* w, const char* name, + u32 vsize, u32 rva, u32 size_raw, + u32 file_offset, + u32 characteristics) { + u8 nm[8] = {0, 0, 0, 0, 0, 0, 0, 0}; + size_t n = strlen(name); + if (n > 8) n = 8; + memcpy(nm, name, n); + cfree_writer_write(w, nm, 8); + coff_wr_u32(w, vsize); + coff_wr_u32(w, rva); + coff_wr_u32(w, size_raw); + coff_wr_u32(w, file_offset); + coff_wr_u32(w, 0u); /* PointerToRelocations */ + coff_wr_u32(w, 0u); /* PointerToLinenumbers */ + coff_wr_u16(w, 0u); /* NumberOfRelocations */ + coff_wr_u16(w, 0u); /* NumberOfLinenumbers */ + coff_wr_u32(w, characteristics); +} + +/* ---- main entry ---- */ + +void link_emit_coff(LinkImage* img, Writer* w) { + Heap* heap = img->heap; + Compiler* c = img->c; + u16 machine = coff_machine_or_panic(c); + if (img->entry_sym == LINK_SYM_NONE) + compiler_panic(c, no_loc(), + "link_emit_coff: no resolved entry symbol"); + + /* ---- pass 1: build buckets + per-section delta map ---- */ + CoffSection out[COFF_NBUCKETS]; + CoffSecMap* map = (CoffSecMap*)heap->alloc( + heap, sizeof(CoffSecMap) * (img->nsections + 1u), + _Alignof(CoffSecMap)); + if (!map && img->nsections) + compiler_panic(c, no_loc(), "link_emit_coff: oom on section map"); + memset(map, 0, sizeof(CoffSecMap) * (img->nsections + 1u)); + + /* coff_build_buckets stashes per-bucket allocation caps in size_raw; + * we read them back into a local before size_raw is recomputed by + * coff_assign_layout so the cleanup path can free with the right + * size. */ + coff_build_buckets(img, out, map); + /* coff_build_buckets stashes per-bucket allocation caps in size_raw + * (the only bucket field we own for the duration of layout); read + * them out before coff_assign_layout overwrites the field. .reloc + * and .idata aren't touched by coff_build_buckets — their caps are + * filled in below once coff_build_reloc_section / coff_emit_idata + * run. */ + u32 bucket_caps[COFF_NBUCKETS]; + u32 b; + for (b = 0; b < COFF_NBUCKETS; ++b) bucket_caps[b] = out[b].size_raw; + + /* ---- pass 1b: collect imports and reserve .idata + .text stubs ---- + * + * Builds the per-DLL / per-import layout and appends one IAT-routing + * stub per imported function to the .text bucket. The .idata bucket + * size is set here (so it counts in nsec); the stub vaddrs and + * IAT-slot vaddrs are finalised after coff_assign_layout. */ + CoffImportTable imports; + int have_imports = coff_collect_imports(img, &imports); + if (have_imports) { + coff_plan_idata_layout(img, &imports); + coff_append_stubs(img, &imports, &out[COFF_BUCKET_TEXT], + &bucket_caps[COFF_BUCKET_TEXT]); + /* Reserve the .idata bucket size so coff_assign_layout / nsec + * accounting sees it. Actual bytes are written by coff_emit_idata + * once the bucket RVA is known. */ + out[COFF_BUCKET_IDATA].size = imports.idata_size; + } + + /* ---- pass 1c: plan the TLS directory record ---- + * + * If any SF_TLS sections survived, reserve 40 bytes at the tail of + * .rdata for the IMAGE_TLS_DIRECTORY64. Bytes are zeroed now and + * filled in by coff_emit_tls_dir once the bucket RVAs are final. */ + CoffTlsLayout tls; + coff_plan_tls_layout(img, out, &bucket_caps[COFF_BUCKET_RDATA], &tls); + + /* ---- pass 2: decide whether .reloc will be in the image ---- + * + * The headers' file size (and therefore every section's file + * offset) depends on the section-table entry count, so we need to + * commit to "is .reloc emitted?" before laying out file offsets. + * .reloc lights up iff PIE and at least one absolute reloc points + * into a kept section, OR a TLS directory is emitted (its four u64 + * VA fields all need base-relocs). */ + int emit_reloc = 0; + if (img->pie) { + u32 i; + u32 nrel = LinkRelocs_count(&img->relocs); + for (i = 0; i < nrel; ++i) { + const LinkRelocApply* r = LinkRelocs_at(&img->relocs, i); + if (!coff_reloc_needs_base_reloc(r->kind)) continue; + if (r->link_section_id == LINK_SEC_NONE) continue; + emit_reloc = 1; + break; + } + if (!emit_reloc && tls.present) emit_reloc = 1; + } + + u32 nsec = 0; + for (b = 0; b < COFF_NBUCKETS; ++b) { + if (b == COFF_BUCKET_RELOC) { + if (emit_reloc) ++nsec; /* tentative; size set below */ + continue; + } + if (out[b].size) ++nsec; + } + u32 headers_size_unpadded = + PE_DOS_HDR_SIZE + PE_SIG_SIZE + PE_FILE_HDR_SIZE + PE_OPT_HDR_SIZE + + nsec * PE_SECTION_HDR_SIZE; + u32 headers_size_padded = + (u32)ALIGN_UP((u64)headers_size_unpadded, (u64)PE_FILE_ALIGNMENT); + + /* First layout pass: fixes RVAs / file offsets for buckets that + * already have a finalised size (.text, .rdata, .idata, .data, .bss). + * .reloc's RVA is provisional — it depends on .reloc's own size, + * which is still 0 at this point. */ + (void)coff_assign_layout(out, headers_size_padded, PE_FIRST_SECTION_RVA); + + /* ---- pass 2b: emit .idata bytes + per-arch IAT stubs ---- + * + * The .idata bucket's RVA is final after the first assign_layout; + * stubs need it (the indirect-jump displacement targets an IAT slot) + * and .idata's own descriptor / ILT / IAT records all carry RVAs. + * coff_import_vaddr_build builds the per-LinkSymId override table + * that apply_all_relocs consults in place of the (zero) symbol + * vaddr for imported targets. */ + CoffImportVaddr import_vaddr; + memset(&import_vaddr, 0, sizeof(import_vaddr)); + if (have_imports) { + coff_emit_idata(img, &imports, out, &bucket_caps[COFF_BUCKET_IDATA]); + coff_emit_stubs(img, &imports, out); + coff_import_vaddr_build(img, &imports, out, &import_vaddr); + } + + /* Write the TLS directory bytes now that bucket RVAs are final. */ + coff_emit_tls_dir(img, out, map, &tls); + + /* ---- pass 3: build .reloc using the now-final bucket RVAs ---- + * + * coff_build_reloc_section reads out[bucket].rva indirectly via + * map[].new_rva + (write_vaddr - sec->vaddr) → site offset within + * the bucket; the absolute site_rva is bucket.rva + that offset. + * Patch site RVAs are page-quantised in the emitted blob, so this + * is the spot where the bucket RVAs need to be already final. + * + * TLS directory's four absolute-VA fields ride into the entries via + * the `extras` array — they aren't ordinary symbol relocations, so + * they don't show up in img->relocs. */ + if (emit_reloc) { + CoffRelocEntry tls_extras[4]; + u32 n_tls_extras = 0; + if (tls.present) { + u32 dir_rva = out[COFF_BUCKET_RDATA].rva + tls.dir_rdata_off; + static const u32 field_offs[4] = { + COFF_TLSDIR_OFF_START_ADDR, COFF_TLSDIR_OFF_END_ADDR, + COFF_TLSDIR_OFF_INDEX_ADDR, COFF_TLSDIR_OFF_CALLBACKS, + }; + u32 k; + for (k = 0; k < 4; ++k) { + if (field_offs[k] == COFF_TLSDIR_OFF_CALLBACKS && + !tls.callbacks_sym) + continue; + tls_extras[n_tls_extras].site_rva = dir_rva + field_offs[k]; + tls_extras[n_tls_extras].type = (u16)IMAGE_REL_BASED_DIR64; + tls_extras[n_tls_extras].pad = 0; + ++n_tls_extras; + } + } + coff_build_reloc_section(img, out, map, &out[COFF_BUCKET_RELOC], + tls_extras, n_tls_extras); + bucket_caps[COFF_BUCKET_RELOC] = out[COFF_BUCKET_RELOC].size_raw; + /* size_raw was stashed by build; assign_layout below recomputes it + * as the FileAlignment-padded length. */ + (void)coff_assign_layout(out, headers_size_padded, + PE_FIRST_SECTION_RVA); + } + + /* `_tls_used` is the public mingw/PE name for the TLS directory + * record. Keep it in lockstep with the optional-header TLS data + * directory, rather than leaving references bound to mingw's tlssup.o + * placeholder record. */ + coff_define_tls_used(img, out, &tls); + + /* ---- pass 4: resolve entry symbol's PE RVA ---- + * + * Done before apply so the optional-header field has its final + * value. */ + const LinkSymbol* entry_sym = + LinkSyms_at(&img->syms, img->entry_sym - 1); + if (!entry_sym->defined || entry_sym->kind == SK_ABS) + compiler_panic(c, no_loc(), + "link_emit_coff: entry symbol is not a defined " + "image-relative function"); + const LinkSection* entry_sec = coff_section_at(img, entry_sym->vaddr); + if (!entry_sec) + compiler_panic(c, no_loc(), + "link_emit_coff: entry symbol has no containing " + "section"); + u8 entry_bucket = map[entry_sec->id - 1].bucket; + u32 entry_rva = out[entry_bucket].rva + + map[entry_sec->id - 1].new_rva + + (u32)(entry_sym->vaddr - entry_sec->vaddr); + + /* ---- pass 5: apply all relocations into bucket bytes ---- */ + coff_apply_all_relocs(img, out, map, + have_imports ? &import_vaddr : NULL); + + /* ---- pass 6: compute SizeOfImage (in-memory size) ---- */ + u32 image_size = 0; + for (b = 0; b < COFF_NBUCKETS; ++b) { + if (!out[b].in_image) continue; + u32 end = out[b].rva + out[b].size; + if (end > image_size) image_size = end; + } + image_size = (u32)ALIGN_UP((u64)image_size, (u64)PE_SECTION_ALIGNMENT); + + /* ---- pass 7: write everything ---- */ + u16 file_chars = IMAGE_FILE_EXECUTABLE_IMAGE | + IMAGE_FILE_LARGE_ADDRESS_AWARE; + if (!img->pie || !out[COFF_BUCKET_RELOC].in_image) { + file_chars |= IMAGE_FILE_RELOCS_STRIPPED; + } + + coff_write_dos_stub(w); + /* PE signature. */ + coff_wr_u32(w, IMAGE_NT_SIGNATURE); + coff_write_file_header(w, machine, (u16)nsec, file_chars); + u16 subsystem = img->linker ? img->linker->pe_subsystem : 0; + coff_write_optional_header(w, entry_rva, out, headers_size_padded, + image_size, img->pie, subsystem, + have_imports ? &imports : NULL, &tls); + + /* Section table. */ + for (b = 0; b < COFF_NBUCKETS; ++b) { + if (!out[b].in_image) continue; + coff_write_section_header(w, out[b].name, out[b].size, out[b].rva, + out[b].size_raw, out[b].file_offset, + out[b].characteristics); + } + + /* Pad to first section's file offset. */ + u64 cur = (u64)headers_size_unpadded; + u64 first_file_off = headers_size_padded; + if (cur < first_file_off) { + coff_write_zeroes(w, first_file_off - cur); + cur = first_file_off; + } + + /* Section bodies. */ + for (b = 0; b < COFF_NBUCKETS; ++b) { + if (!out[b].in_image) continue; + if (!out[b].has_file_bytes) continue; + if (cur < out[b].file_offset) { + coff_write_zeroes(w, out[b].file_offset - cur); + cur = out[b].file_offset; + } + cfree_writer_write(w, out[b].bytes, out[b].size); + cur += out[b].size; + if (out[b].size_raw > out[b].size) { + coff_write_zeroes(w, out[b].size_raw - out[b].size); + cur += out[b].size_raw - out[b].size; + } + } + + /* ---- cleanup ---- */ + for (b = 0; b < COFF_NBUCKETS; ++b) { + if (out[b].bytes) heap->free(heap, out[b].bytes, bucket_caps[b]); + } + heap->free(heap, map, sizeof(CoffSecMap) * (img->nsections + 1u)); + if (have_imports) { + coff_import_vaddr_free(img, &import_vaddr); + coff_imports_free(img, &imports); + } +} diff --git a/src/link/link_internal.h b/src/link/link_internal.h @@ -23,6 +23,11 @@ typedef struct InputMap { u32 nsym; LinkSectionId* section; /* size = ObjBuilder.nsections */ u32 nsection; + /* COMDAT discard mask, size = nsection. Set by link_resolve_symbols + * for COFF/PE SELECTANY: when an input's COMDAT section conflicts + * with an earlier definition, the duplicate section is marked here + * so link_gc_compute and link_layout_sections skip it. */ + u8* comdat_discarded; } InputMap; /* Open-addressed name -> LinkSymId hash for global / weak definitions @@ -111,6 +116,7 @@ struct Linker { * DSO input is present. Triggers layout_dyn (Phase 4) and the * dynamic ELF emit path (Phase 6). */ int emit_pie; + u16 pe_subsystem; /* Caller-supplied PT_INTERP. layout_dyn falls back to a target- * derived default when this is 0. */ Sym interp_path; @@ -182,6 +188,10 @@ u32 link_iplt_alloc_sections(LinkImage* img, u32 nsec); /* ---- Public entries (link_resolve.c) -------------------------------------- */ void link_ingest_archives(struct Linker*); +/* PE/COFF only: synthesize a tiny ObjBuilder providing the mingw CRT + * `__CTOR_LIST__` / `__CTOR_END__` / `__DTOR_LIST__` / `__DTOR_END__` + * boundary symbols. See link_resolve.c for the contract. */ +void link_synth_coff_ctor_dtor_list(struct Linker*); void link_resolve_symbols(struct Linker*, LinkImage*); void link_resolve_undefs(struct Linker*, LinkImage*); void link_gc_compute(struct Linker*, LinkImage*, GcLive*); @@ -218,6 +228,12 @@ void link_resolve_entry(struct Linker*, LinkImage*); * (the caller still owns). DSO / TBD inputs are skipped. */ void link_capture_debug_inputs(struct Linker*, LinkImage*); +/* Default PE/COFF ImageBase for executables. Mirrored in link_coff.c + * (the emitter writes this into the optional header). Exposed here so + * link_layout can synthesize the `__ImageBase` symbol at the same + * vaddr, before resolve_undefs runs. */ +#define LINK_PE_IMAGE_BASE 0x140000000ULL + /* Defined in link_dyn.c. Phase 4: synthesize .interp/.dynsym/.dynstr/ * .gnu.hash/.rela.dyn/.rela.plt/.plt/.got.plt/.dynamic when the link * is producing a PIE / ET_DYN exe. No-op when there are zero imports @@ -445,6 +461,7 @@ void link_reloc_apply(Compiler*, RelocKind, u8* P_bytes, u64 S, i64 A, u64 P); * reloc application remains keyed by RelocKind. COFF arrives later. */ void link_emit_elf(LinkImage*, Writer*); void link_emit_macho(LinkImage*, Writer*); +void link_emit_coff(LinkImage*, Writer*); /* Format-agnostic 16-byte image identity, derived from per-segment * post-shift bytes + vaddrs/sizes. ELF wraps it in a diff --git a/src/link/link_layout.c b/src/link/link_layout.c @@ -510,10 +510,16 @@ void link_emit_boundary_sym(Linker* l, LinkImage* img, const char* name, Sym sym = boundary_name(l, name); LinkSymId id = symhash_get(&img->globals, sym); LinkSymbol rec; + u8 kind = SK_OBJ; u32 i, n; + if (l->c->target.obj == CFREE_OBJ_COFF && + (strcmp(name, "__ImageBase") == 0 || + strcmp(name, "_tls_used") == 0)) { + kind = SK_ABS; + } memset(&rec, 0, sizeof(rec)); rec.name = sym; - rec.kind = SK_OBJ; + rec.kind = kind; rec.defined = 1; rec.vaddr = vaddr; rec.bind = SB_GLOBAL; @@ -533,7 +539,7 @@ void link_emit_boundary_sym(Linker* l, LinkImage* img, const char* name, s->section_id = LINK_SEC_NONE; s->value = 0; s->vaddr = vaddr; - s->kind = SK_OBJ; + s->kind = kind; s->defined = 1; s->imported = 0; } @@ -947,6 +953,7 @@ LinkImage* link_resolve(Linker* l) { Heap* h; metrics_scope_begin(l->c, "link.resolve.total"); + link_synth_coff_ctor_dtor_list(l); metrics_scope_begin(l->c, "link.ingest_archives"); link_ingest_archives(l); metrics_scope_end(l->c, "link.ingest_archives"); @@ -997,6 +1004,14 @@ LinkImage* link_resolve(Linker* l) { link_emit_boundary_sym(l, img, "__dso_handle", 0); link_emit_boundary_sym(l, img, "_DYNAMIC", 0); link_emit_boundary_sym(l, img, "_GLOBAL_OFFSET_TABLE_", 0); + /* PE/COFF: mingw CRT references `__ImageBase` for ASLR-relative + * addressing and base-relocation bookkeeping. The PE emitter + * writes LINK_PE_IMAGE_BASE into the optional header; expose the + * same value as a linker-defined symbol so input objects resolve. */ + if (l->c->target.obj == CFREE_OBJ_COFF) { + link_emit_boundary_sym(l, img, "__ImageBase", LINK_PE_IMAGE_BASE); + if (img->tls_memsz) link_emit_boundary_sym(l, img, "_tls_used", 0); + } { const LinkArchDesc* arch = link_arch_desc_for(l->c); u32 si; @@ -1041,8 +1056,12 @@ LinkImage* link_resolve(Linker* l) { if (got_map) h->free(h, got_map, sizeof(*got_map) * map_size); if (stub_map) h->free(h, stub_map, sizeof(*stub_map) * map_size); } + /* layout_dyn synthesizes ELF-specific .interp / .dynsym / .dynstr / + * .rela.dyn / .plt / .got.plt / .dynamic sections. Mach-O has its + * own equivalent path; COFF binds imports through .idata + IAT + * (Phase 3.2). Skip for non-ELF formats. */ metrics_scope_begin(l->c, "link.layout_dyn"); - layout_dyn(l, img); + if (l->c->target.obj == CFREE_OBJ_ELF) layout_dyn(l, img); metrics_scope_end(l->c, "link.layout_dyn"); metrics_scope_begin(l->c, "link.resolve_entry"); link_resolve_entry(l, img); diff --git a/src/link/link_reloc_layout.c b/src/link/link_reloc_layout.c @@ -330,6 +330,13 @@ static u8 reloc_width(RelocKind k) { case R_RV_ADD64: case R_RV_SUB64: return 8; + case R_COFF_SECREL: + return 4; + case R_COFF_SECTION: + return 2; + case R_COFF_AARCH64_SECREL_LOW12A: + case R_COFF_AARCH64_SECREL_HIGH12A: + return 4; default: return 0; } diff --git a/src/link/link_resolve.c b/src/link/link_resolve.c @@ -41,6 +41,10 @@ static void map_alloc(LinkImage* img, InputMap* m, u32 nsym, u32 nsection) { if (!m->section) compiler_panic(img->c, no_loc(), "link: oom on input section map"); memset(m->section, 0, sizeof(*m->section) * nsection); + m->comdat_discarded = (u8*)h->alloc(h, nsection ? nsection : 1u, 1); + if (!m->comdat_discarded) + compiler_panic(img->c, no_loc(), "link: oom on input comdat map"); + memset(m->comdat_discarded, 0, nsection ? nsection : 1u); } /* ---- pass 1: collect symbols ---- */ @@ -64,6 +68,17 @@ static int obj_sym_is_logical_undef(const ObjSym* s) { s->kind != SK_COMMON; } +/* COFF/PE SELECTANY: a duplicate strong global is acceptable iff both + * definitions live in COMDAT (SF_GROUP-tagged) sections. When that + * holds, the earlier-processed definition wins and the new section is + * marked for discard so its bytes never reach layout. */ +static int obj_sym_defined_in_comdat(ObjBuilder* ob, const ObjSym* s) { + const Section* sec; + if (!s || s->section_id == OBJ_SEC_NONE) return 0; + sec = obj_section_get(ob, s->section_id); + return sec && (sec->flags & SF_GROUP); +} + void link_resolve_symbols(Linker* l, LinkImage* img) { u32 ii; for (ii = 0; ii < LinkInputs_count(&l->inputs); ++ii) { @@ -146,12 +161,29 @@ void link_resolve_symbols(Linker* l, LinkImage* img) { m->sym[e.id] = existing; } else if (new_strength == old_strength && new_strength == bind_strength(SB_GLOBAL)) { - size_t namelen; - const char* nm = pool_str(l->c->global, s->name, &namelen); - compiler_panic(l->c, no_loc(), - "link: duplicate definition of " - "global symbol '%.*s'", - (int)namelen, nm); + /* COFF SELECTANY: if both defs are in COMDAT sections, + * keep the earlier one and discard the new section. */ + ObjBuilder* prev_ob = (prev->input_id != LINK_INPUT_NONE) + ? LinkInputs_at(&l->inputs, + prev->input_id - 1)->obj + : NULL; + const ObjSym* prev_os = prev_ob + ? obj_symbol_get(prev_ob, prev->obj_sym) + : NULL; + if (prev_ob && prev_os && + obj_sym_defined_in_comdat(prev_ob, prev_os) && + obj_sym_defined_in_comdat(ob, s)) { + m->sym[e.id] = existing; + if (s->section_id < m->nsection) + m->comdat_discarded[s->section_id] = 1; + } else { + size_t namelen; + const char* nm = pool_str(l->c->global, s->name, &namelen); + compiler_panic(l->c, no_loc(), + "link: duplicate definition of " + "global symbol '%.*s'", + (int)namelen, nm); + } } else { m->sym[e.id] = existing; } @@ -227,6 +259,71 @@ void link_resolve_undefs(Linker* l, LinkImage* img) { continue; } } + /* COFF WEAK_EXTERNAL alias fallback: cfree drops the aux TagIndex + * at read time (see coff_read.c step "WEAK_EXTERNAL primary"), so + * the alias relationship is recovered here via the mingw single- + * underscore naming convention. e.g. `__set_app_type` aliases to + * `_set_app_type`; `__imp___set_app_type` aliases to + * `__imp__set_app_type`. Try the de-underscored variant first, + * then the re-underscored one. Look in both image globals (for + * defined-in-input aliases) and DSO exports (for DLL imports). + * + * Applied to both WEAK (alias-declarator members) and GLOBAL + * undefs (user references like crt2.o's call to `__set_app_type`) + * because the alias relationship is purely a naming convention in + * the mingw CRT — losing the aux TagIndex means we can't tell + * which side is the alias declarator. */ + if (l->c->target.obj == CFREE_OBJ_COFF && s->name != 0) { + size_t nlen; + const char* nm = pool_str(l->c->global, s->name, &nlen); + Sym candidates[2] = {0, 0}; + u32 ncand = 0; + if (nm && nlen >= 2 && nm[0] == '_') { + candidates[ncand++] = + pool_intern(l->c->global, nm + 1, (u32)(nlen - 1u)); + } + if (nm && nlen > 0) { + char* buf = (char*)arena_array(l->c->scratch, char, nlen + 1u); + buf[0] = '_'; + memcpy(buf + 1, nm, nlen); + candidates[ncand++] = pool_intern(l->c->global, buf, (u32)(nlen + 1u)); + } + int resolved = 0; + for (u32 ci = 0; !resolved && ci < ncand; ++ci) { + Sym alias = candidates[ci]; + if (alias == 0) continue; + LinkSymId hit = symhash_get(&img->globals, alias); + if (hit != LINK_SYM_NONE) { + LinkSymbol* def = LinkSyms_at(&img->syms, hit - 1); + if (def->defined || def->imported) { + s->name = def->name; + s->section_id = def->section_id; + s->value = def->value; + s->vaddr = def->vaddr; + s->kind = def->kind; + s->defined = def->defined; + s->imported = def->imported; + s->dso_input_id = def->dso_input_id; + if (!s->defined && !s->imported) { + s->kind = SK_ABS; + s->vaddr = 0; + s->defined = 1; + } + resolved = 1; + break; + } + } + LinkInputId dso = find_dso_export(l, alias); + if (dso != LINK_INPUT_NONE) { + s->name = alias; + s->imported = 1; + s->dso_input_id = dso; + resolved = 1; + break; + } + } + if (resolved) continue; + } if (s->bind == SB_WEAK) { s->kind = SK_ABS; s->vaddr = 0; @@ -404,10 +501,12 @@ void link_gc_compute(Linker* l, LinkImage* img, GcLive* g) { if (!l->gc_sections) { for (ii = 0; ii < LinkInputs_count(&l->inputs); ++ii) { ObjBuilder* ob = LinkInputs_at(&l->inputs, ii)->obj; + InputMap* m = &img->input_maps[ii]; u32 nsec = obj_section_count(ob); for (j = 1; j < nsec; ++j) { const Section* s = obj_section_get(ob, j); - if (s && link_section_kept(s)) g->marks[ii][j] = 1; + if (s && link_section_kept(s) && !m->comdat_discarded[j]) + g->marks[ii][j] = 1; } } return; @@ -417,11 +516,13 @@ void link_gc_compute(Linker* l, LinkImage* img, GcLive* g) { for (ii = 0; ii < LinkInputs_count(&l->inputs); ++ii) { ObjBuilder* ob = LinkInputs_at(&l->inputs, ii)->obj; + InputMap* m = &img->input_maps[ii]; u32 nsec = obj_section_count(ob); for (j = 1; j < nsec; ++j) { const Section* s = obj_section_get(ob, j); int root; if (!s || !link_section_kept(s)) continue; + if (m->comdat_discarded[j]) continue; root = (s->flags & SF_RETAIN) || s->sem == SSEM_INIT_ARRAY || s->sem == SSEM_FINI_ARRAY || s->sem == SSEM_PREINIT_ARRAY; if (root) gc_mark(g, &q, h, ii, j); @@ -503,13 +604,23 @@ static void include_archive_member(Linker* l, const LinkArchive* ar, LinkInput* in; LinkInputId id; u32 idx; + Sym coff_dll = 0; if (mem->included) return; in = LinkInputs_push(&l->inputs, &idx); if (!in) compiler_panic(l->c, no_loc(), "link: oom growing inputs (archive member)"); id = (LinkInputId)(idx + 1u); in->id = id; - in->kind = LINK_INPUT_OBJ_BYTES; + /* PE/COFF short-import shim: read_coff_short_import stashes the + * providing DLL name on the ObjBuilder. Such members behave like + * DSO inputs — symbols are exports, not local definitions — so route + * through LINK_INPUT_DSO_BYTES with the DLL name as the soname. */ + if (mem->obj && obj_get_coff_import_dll(mem->obj, &coff_dll) && coff_dll) { + in->kind = LINK_INPUT_DSO_BYTES; + in->soname = coff_dll; + } else { + in->kind = LINK_INPUT_OBJ_BYTES; + } in->order = ar->order; in->obj = mem->obj; in->name = mem->name; @@ -531,10 +642,21 @@ static void scan_presence_before(Linker* l, u32 max_order, SymHash* defined, const ObjSym* s = e.sym; if (s->name == 0) continue; if (s->bind == SB_LOCAL) continue; - if (obj_sym_is_logical_undef(s)) + if (obj_sym_is_logical_undef(s)) { + /* Match the spurious-UNDEF prune in link_resolve (line 109) and + * obj_sweep_dead at .o emit (obj.c:513): an unreferenced + * global/weak extern declaration is a header artifact, not a + * real demand to pull from an archive. Without this check the + * C frontend's per-extern undef synthesis (e.g. every prototype + * in <math.h>) drags in matching archive members even when the + * user's source never references them. */ + if (!s->referenced && + (s->bind == SB_GLOBAL || s->bind == SB_WEAK)) + continue; symhash_set(undefs, s->name, 1u); - else + } else { symhash_set(defined, s->name, 1u); + } } obj_symiter_free(it); } @@ -562,7 +684,7 @@ static int inputs_have_defined_ifunc_before(Linker* l, u32 max_order) { } static int member_satisfies(LinkArchiveMember* mem, const SymHash* defined, - const SymHash* wanted) { + const SymHash* wanted, int coff_target) { ObjSymIter* it; ObjSymEntry e; int hit = 0; @@ -570,7 +692,16 @@ static int member_satisfies(LinkArchiveMember* mem, const SymHash* defined, while (obj_symiter_next(it, &e)) { const ObjSym* s = e.sym; if (s->name == 0) continue; - if (s->kind == SK_UNDEF) continue; + /* In COFF archives, WEAK_EXTERNAL alias declarations are read as + * SB_WEAK + SK_UNDEF (cfree has no native alias model — see + * coff_read.c step "WEAK_EXTERNAL primary"). The archive's symbol + * map still lists the member as the canonical provider of that + * name, so treat such weak undefs as defining for the archive-pull + * decision. The actual alias-to-target resolution happens later in + * link_resolve_undefs. */ + if (s->kind == SK_UNDEF) { + if (!(coff_target && s->bind == SB_WEAK)) continue; + } if (s->bind != SB_GLOBAL && s->bind != SB_WEAK) continue; if (symhash_get(wanted, s->name) == LINK_SYM_NONE) continue; if (symhash_get(defined, s->name) != LINK_SYM_NONE) continue; @@ -581,6 +712,78 @@ static int member_satisfies(LinkArchiveMember* mem, const SymHash* defined, return hit; } +/* Synthesize an ObjBuilder providing the mingw CRT ctor/dtor list + * boundary symbols (`__CTOR_LIST__`, `__CTOR_END__`, `__DTOR_LIST__`, + * `__DTOR_END__`) backed by a 16-byte zero blob. mingw's gccmain.o + * references these and walks them at program startup; lld/binutils + * generate them via the linker script's `.ctors` / `.dtors` rules. + * cfree has no script for PE, so we inject an equivalent here. + * + * Zero contents are intentional for the empty-list case: + * - __do_global_ctors loads `*(u32*)__CTOR_LIST__`; sees 0; cbz + * short-circuit returns without iterating. + * - __do_global_dtors loads `*(u64*)__DTOR_LIST__`; sees 0; cbz + * short-circuit returns. + * + * For programs that emit real ctor/dtor sections this synth would + * need to coordinate with .ctors/.dtors layout; v1 covers the empty + * case (hello-world through mingw CRT). */ +void link_synth_coff_ctor_dtor_list(Linker* l) { + ObjBuilder* ob; + ObjSecId sid; + static const u8 kZeros[16] = {0}; + /* AArch64 __chkstk: probes `x15 * 16` bytes of stack one page at a + * time, then returns. Mirrors the LLVM compiler-rt implementation + * (chkstk.S in builtins/aarch64). 28 bytes. */ + static const u8 kAa64Chkstk[28] = { + 0xf0, 0xed, 0x7c, 0xd3, /* lsl x16, x15, #4 */ + 0xf1, 0x03, 0x00, 0x91, /* mov x17, sp */ + 0x31, 0x06, 0x40, 0xd1, /* sub x17, x17, #0x1, lsl #12 */ + 0x10, 0x06, 0x40, 0xf1, /* subs x16, x16, #0x1, lsl #12 */ + 0x3f, 0x02, 0x40, 0xf9, /* ldr xzr, [x17] */ + 0xac, 0xff, 0xff, 0x54, /* b.gt #-0x14 */ + 0xc0, 0x03, 0x5f, 0xd6, /* ret */ + }; + LinkInput* in; + u32 idx; + if (!l || l->c->target.obj != CFREE_OBJ_COFF) return; + ob = obj_new(l->c); + if (!ob) return; + sid = obj_section_ex(ob, pool_intern_cstr(l->c->global, ".rdata$ctors"), + SEC_RODATA, SSEM_PROGBITS, SF_ALLOC | SF_RETAIN, 16, + 0u, 0u, 0u); + obj_section_replace_bytes(ob, sid, kZeros, sizeof(kZeros)); + obj_symbol_ex(ob, pool_intern_cstr(l->c->global, "__CTOR_LIST__"), + SB_GLOBAL, SV_DEFAULT, SK_OBJ, sid, 0, 0, 0); + obj_symbol_ex(ob, pool_intern_cstr(l->c->global, "__CTOR_END__"), + SB_GLOBAL, SV_DEFAULT, SK_OBJ, sid, 0, 0, 0); + obj_symbol_ex(ob, pool_intern_cstr(l->c->global, "__DTOR_LIST__"), + SB_GLOBAL, SV_DEFAULT, SK_OBJ, sid, 0, 0, 0); + obj_symbol_ex(ob, pool_intern_cstr(l->c->global, "__DTOR_END__"), + SB_GLOBAL, SV_DEFAULT, SK_OBJ, sid, 0, 0, 0); + /* __chkstk: only the aa64 variant is synthesized here; x64 codegen + * already emits inline probes (or links libmingwex's __chkstk + * which is a plain object, not an ARM64EC alias). */ + if (l->c->target.arch == CFREE_ARCH_ARM_64) { + ObjSecId tsid = + obj_section_ex(ob, pool_intern_cstr(l->c->global, ".text$chkstk"), + SEC_TEXT, SSEM_PROGBITS, + SF_ALLOC | SF_EXEC | SF_RETAIN, 4, 0u, 0u, 0u); + obj_section_replace_bytes(ob, tsid, kAa64Chkstk, sizeof(kAa64Chkstk)); + obj_symbol_ex(ob, pool_intern_cstr(l->c->global, "__chkstk"), SB_GLOBAL, + SV_DEFAULT, SK_FUNC, tsid, 0, sizeof(kAa64Chkstk), 0); + } + obj_finalize(ob); + in = LinkInputs_push(&l->inputs, &idx); + if (!in) compiler_panic(l->c, no_loc(), "link: oom growing inputs (synth)"); + in->id = (LinkInputId)(idx + 1u); + in->kind = LINK_INPUT_OBJ_BYTES; + in->order = l->next_input_order++; + in->obj = ob; + in->name = pool_intern_cstr(l->c->global, "<cfree-synth-coff-runtime>"); + in->soname = 0; +} + void link_ingest_archives(Linker* l) { u32 a, m; if (LinkArchives_count(&l->archives) == 0) return; @@ -588,8 +791,12 @@ void link_ingest_archives(Linker* l) { for (a = 0; a < LinkArchives_count(&l->archives); ++a) { LinkArchive* ar = LinkArchives_at(&l->archives, a); if (!ar->whole_archive) continue; - for (m = 0; m < ar->nmembers; ++m) + for (m = 0; m < ar->nmembers; ++m) { + /* obj==NULL is the long-form COFF head/trailer skip path + * (set by link_add_archive_bytes). Drop them silently. */ + if (!ar->members[m].obj) continue; include_archive_member(l, ar, &ar->members[m]); + } } for (a = 0; a < LinkArchives_count(&l->archives); ++a) { @@ -608,10 +815,12 @@ void link_ingest_archives(Linker* l) { symhash_get(&defined, want_ifunc_init) == LINK_SYM_NONE) symhash_set(&undefs, want_ifunc_init, 1u); + int coff_target = (l->c->target.obj == CFREE_OBJ_COFF); for (m = 0; m < ar->nmembers; ++m) { LinkArchiveMember* mem = &ar->members[m]; if (mem->included) continue; - if (!member_satisfies(mem, &defined, &undefs)) continue; + if (!mem->obj) continue; /* long-form skip (head/trailer) */ + if (!member_satisfies(mem, &defined, &undefs, coff_target)) continue; include_archive_member(l, ar, mem); changed = 1; } diff --git a/src/obj/coff.h b/src/obj/coff.h @@ -0,0 +1,598 @@ +/* PE/COFF wire-format constants, structs, and per-arch reloc translators + * shared between obj/coff_emit.c, obj/coff_read.c, and link/link_coff.c + * (none of which exist yet). + * + * Private to src/. The public ObjBuilder/Linker surface is format-neutral + * (obj/obj.h, link/link.h); the PE/COFF spelling of those abstractions + * only exists inside libcfree. + * + * Scope: 64-bit little-endian only — IMAGE_FILE_MACHINE_AMD64 (x86_64) + * and IMAGE_FILE_MACHINE_ARM64 (aarch64). 32-bit (i386 win32) and + * big-endian variants are out of scope. The per-arch reloc mapping is + * split across coff_reloc_<arch>.c, mirroring the ELF arrangement; + * emit_coff and the linker dispatch to the right translator by + * Compiler.target.arch. */ + +#ifndef CFREE_OBJ_COFF_H +#define CFREE_OBJ_COFF_H + +#include <cfree/core.h> + +#include "core/bytes.h" +#include "core/core.h" +#include "obj/obj.h" + +/* ---- file header (IMAGE_FILE_HEADER) ---- + * On-disk: 20 bytes, little-endian, no padding. */ +#define COFF_FILE_HEADER_SIZE 20u + +typedef struct ImageFileHeader { + u16 Machine; /* IMAGE_FILE_MACHINE_* */ + u16 NumberOfSections; + u32 TimeDateStamp; /* zero for reproducible builds */ + u32 PointerToSymbolTable; /* file offset, or 0 if no symtab */ + u32 NumberOfSymbols; /* counts aux records too */ + u16 SizeOfOptionalHeader; /* 0 for .obj, 240 for PE32+ image */ + u16 Characteristics; /* IMAGE_FILE_* */ +} ImageFileHeader; + +/* Machine types. Only AMD64 and ARM64 are emitted/read by cfree; the + * rest are listed for completeness so readers can give a useful + * "unsupported machine" diagnostic instead of "unknown machine". */ +#define IMAGE_FILE_MACHINE_UNKNOWN 0x0000u +#define IMAGE_FILE_MACHINE_AMD64 0x8664u /* x86_64, cfree supports */ +#define IMAGE_FILE_MACHINE_ARM64 0xAA64u /* aarch64, cfree supports */ +#define IMAGE_FILE_MACHINE_ARM64EC 0xA641u /* ARM64EC — readers alias + * to AArch64 (encoding is + * identical, only ABI + * differs). llvm-mingw's + * compiler-rt builtins ship + * as ARM64EC objects. */ +/* Not supported by cfree (here for diagnostic recognition only): */ +#define IMAGE_FILE_MACHINE_I386 0x014Cu +#define IMAGE_FILE_MACHINE_ARM 0x01C0u +#define IMAGE_FILE_MACHINE_ARMNT 0x01C4u +#define IMAGE_FILE_MACHINE_IA64 0x0200u +#define IMAGE_FILE_MACHINE_RISCV64 0x5064u + +/* Characteristics flags (subset cfree handles). */ +#define IMAGE_FILE_RELOCS_STRIPPED 0x0001u +#define IMAGE_FILE_EXECUTABLE_IMAGE 0x0002u +#define IMAGE_FILE_LARGE_ADDRESS_AWARE 0x0020u +#define IMAGE_FILE_DEBUG_STRIPPED 0x0200u +#define IMAGE_FILE_DLL 0x2000u + +/* ---- DOS header + PE signature ---- + * cfree only emits a minimal DOS stub for executable images: 'MZ' + * magic and the e_lfanew offset pointing at "PE\0\0". The remaining + * legacy fields are zeroed but kept named for clarity. The DOS stub + * program (typically prints "This program cannot be run in DOS mode") + * is emitted as a separate byte blob after this header. */ +#define COFF_DOS_HEADER_SIZE 64u +#define IMAGE_DOS_SIGNATURE 0x5A4Du /* 'MZ' little-endian */ +#define IMAGE_NT_SIGNATURE 0x00004550u /* "PE\0\0" little-endian */ + +typedef struct ImageDosHeader { + u16 e_magic; /* IMAGE_DOS_SIGNATURE */ + u16 e_cblp; + u16 e_cp; + u16 e_crlc; + u16 e_cparhdr; + u16 e_minalloc; + u16 e_maxalloc; + u16 e_ss; + u16 e_sp; + u16 e_csum; + u16 e_ip; + u16 e_cs; + u16 e_lfarlc; + u16 e_ovno; + u16 e_res[4]; + u16 e_oemid; + u16 e_oeminfo; + u16 e_res2[10]; + u32 e_lfanew; /* file offset of "PE\0\0" */ +} ImageDosHeader; + +/* ---- optional header (PE32+, IMAGE_OPTIONAL_HEADER64) ---- + * On-disk size for PE32+ with 16 DataDirectory entries = 240 bytes: + * 28 (standard) + 88 (windows-specific) + 16*8 (data directories). */ +#define COFF_OPT_HDR64_SIZE 240u +#define COFF_DATA_DIRECTORY_SIZE 8u +#define COFF_NUM_DATA_DIRECTORIES 16u + +#define IMAGE_NT_OPTIONAL_HDR64_MAGIC 0x020Bu + +/* Subsystem (Subsystem field). */ +#define IMAGE_SUBSYSTEM_UNKNOWN 0u +#define IMAGE_SUBSYSTEM_NATIVE 1u +#define IMAGE_SUBSYSTEM_WINDOWS_GUI 2u +#define IMAGE_SUBSYSTEM_WINDOWS_CUI 3u /* console */ + +/* DllCharacteristics. */ +#define IMAGE_DLLCHARACTERISTICS_HIGH_ENTROPY_VA 0x0020u +#define IMAGE_DLLCHARACTERISTICS_DYNAMIC_BASE 0x0040u +#define IMAGE_DLLCHARACTERISTICS_NX_COMPAT 0x0100u +#define IMAGE_DLLCHARACTERISTICS_NO_SEH 0x0400u +#define IMAGE_DLLCHARACTERISTICS_TERMINAL_SERVER_AWARE 0x8000u + +/* DataDirectory indices into ImageOptionalHeader64.DataDirectory[]. */ +#define IMAGE_DIRECTORY_ENTRY_EXPORT 0 +#define IMAGE_DIRECTORY_ENTRY_IMPORT 1 +#define IMAGE_DIRECTORY_ENTRY_RESOURCE 2 +#define IMAGE_DIRECTORY_ENTRY_EXCEPTION 3 +#define IMAGE_DIRECTORY_ENTRY_SECURITY 4 +#define IMAGE_DIRECTORY_ENTRY_BASERELOC 5 +#define IMAGE_DIRECTORY_ENTRY_DEBUG 6 +#define IMAGE_DIRECTORY_ENTRY_ARCHITECTURE 7 +#define IMAGE_DIRECTORY_ENTRY_GLOBALPTR 8 +#define IMAGE_DIRECTORY_ENTRY_TLS 9 +#define IMAGE_DIRECTORY_ENTRY_LOAD_CONFIG 10 +#define IMAGE_DIRECTORY_ENTRY_BOUND_IMPORT 11 +#define IMAGE_DIRECTORY_ENTRY_IAT 12 +#define IMAGE_DIRECTORY_ENTRY_DELAY_IMPORT 13 +#define IMAGE_DIRECTORY_ENTRY_COM_DESCRIPTOR 14 +/* index 15 is reserved (must be zero) */ + +typedef struct ImageDataDirectory { + u32 VirtualAddress; /* RVA */ + u32 Size; +} ImageDataDirectory; + +typedef struct ImageOptionalHeader64 { + /* Standard fields (28 bytes for PE32+). */ + u16 Magic; /* IMAGE_NT_OPTIONAL_HDR64_MAGIC */ + u8 MajorLinkerVersion; + u8 MinorLinkerVersion; + u32 SizeOfCode; + u32 SizeOfInitializedData; + u32 SizeOfUninitializedData; + u32 AddressOfEntryPoint; /* RVA of _start */ + u32 BaseOfCode; + /* Windows-specific (88 bytes for PE32+). */ + u64 ImageBase; /* preferred load address */ + u32 SectionAlignment; /* in-memory alignment, >= page */ + u32 FileAlignment; /* on-disk alignment */ + u16 MajorOperatingSystemVersion; + u16 MinorOperatingSystemVersion; + u16 MajorImageVersion; + u16 MinorImageVersion; + u16 MajorSubsystemVersion; + u16 MinorSubsystemVersion; + u32 Win32VersionValue; /* reserved, zero */ + u32 SizeOfImage; /* in-memory size, SectionAlignment-padded */ + u32 SizeOfHeaders; /* file offset of first section's raw data */ + u32 CheckSum; + u16 Subsystem; /* IMAGE_SUBSYSTEM_* */ + u16 DllCharacteristics; /* IMAGE_DLLCHARACTERISTICS_* */ + u64 SizeOfStackReserve; + u64 SizeOfStackCommit; + u64 SizeOfHeapReserve; + u64 SizeOfHeapCommit; + u32 LoaderFlags; /* reserved, zero */ + u32 NumberOfRvaAndSizes; /* COFF_NUM_DATA_DIRECTORIES */ + /* Data directories (128 bytes = 16 * 8). */ + ImageDataDirectory DataDirectory[COFF_NUM_DATA_DIRECTORIES]; +} ImageOptionalHeader64; + +/* ---- section header (IMAGE_SECTION_HEADER) ---- + * On-disk: 40 bytes, no padding. */ +#define COFF_SECTION_HEADER_SIZE 40u + +/* Name field convention: 8 raw bytes. If the section name is <= 8 chars + * the bytes are the name, zero-padded (not necessarily NUL-terminated + * if exactly 8). For longer names (only legal in object files, not + * images) the form is "/<decimal-offset>" where <offset> is the + * little-endian decimal offset into the string table. Emit/read paths + * must marshal this convention explicitly. */ +typedef struct ImageSectionHeader { + char Name[8]; + u32 VirtualSize; /* size in image; for .obj usually 0 */ + u32 VirtualAddress; /* RVA in image; for .obj usually 0 */ + u32 SizeOfRawData; + u32 PointerToRawData; /* file offset */ + u32 PointerToRelocations; /* file offset of reloc array */ + u32 PointerToLinenumbers; /* file offset of COFF linenumbers (legacy) */ + u16 NumberOfRelocations; + u16 NumberOfLinenumbers; + u32 Characteristics; /* IMAGE_SCN_* */ +} ImageSectionHeader; + +/* Section characteristics flags. */ +#define IMAGE_SCN_CNT_CODE 0x00000020u +#define IMAGE_SCN_CNT_INITIALIZED_DATA 0x00000040u +#define IMAGE_SCN_CNT_UNINITIALIZED_DATA 0x00000080u +#define IMAGE_SCN_LNK_INFO 0x00000200u +#define IMAGE_SCN_LNK_REMOVE 0x00000800u +#define IMAGE_SCN_LNK_COMDAT 0x00001000u +#define IMAGE_SCN_GPREL 0x00008000u +#define IMAGE_SCN_MEM_DISCARDABLE 0x02000000u +#define IMAGE_SCN_MEM_SHARED 0x10000000u +#define IMAGE_SCN_MEM_EXECUTE 0x20000000u +#define IMAGE_SCN_MEM_READ 0x40000000u +#define IMAGE_SCN_MEM_WRITE 0x80000000u + +/* Alignment lives in bits 20..23 of Characteristics. Encoding is + * (log2(align) + 1) << 20: ALIGN_1BYTES = 1<<20, ALIGN_2BYTES = 2<<20, + * ..., ALIGN_8192BYTES = 14<<20. Zero in the field means "default" + * (16-byte for code). */ +#define IMAGE_SCN_ALIGN_1BYTES 0x00100000u +#define IMAGE_SCN_ALIGN_2BYTES 0x00200000u +#define IMAGE_SCN_ALIGN_4BYTES 0x00300000u +#define IMAGE_SCN_ALIGN_8BYTES 0x00400000u +#define IMAGE_SCN_ALIGN_16BYTES 0x00500000u +#define IMAGE_SCN_ALIGN_32BYTES 0x00600000u +#define IMAGE_SCN_ALIGN_64BYTES 0x00700000u +#define IMAGE_SCN_ALIGN_128BYTES 0x00800000u +#define IMAGE_SCN_ALIGN_256BYTES 0x00900000u +#define IMAGE_SCN_ALIGN_512BYTES 0x00A00000u +#define IMAGE_SCN_ALIGN_1024BYTES 0x00B00000u +#define IMAGE_SCN_ALIGN_2048BYTES 0x00C00000u +#define IMAGE_SCN_ALIGN_4096BYTES 0x00D00000u +#define IMAGE_SCN_ALIGN_8192BYTES 0x00E00000u +#define IMAGE_SCN_ALIGN_MASK 0x00F00000u + +/* Encode an alignment given as log2(bytes): align=1 (2^0=1B) -> 1<<20, + * align=13 (2^13=8192B) -> 14<<20. */ +#define IMAGE_SCN_ALIGN_FROM_LOG2(n) (((u32)((n) + 1u)) << 20) + +/* ---- symbol record (IMAGE_SYMBOL) ---- + * On-disk: 18 bytes per record, packed to 2-byte alignment (pragma + * pack(2) in the official headers). The host C struct below would + * have sizeof >= 20 due to padding; emit/read MUST marshal field by + * field — never write sizeof(ImageSymbol). The COFF_SYMBOL_SIZE + * constant is the source of truth. */ +#define COFF_SYMBOL_SIZE 18u + +typedef struct ImageSymbol { + union { + char ShortName[8]; /* in-place if name <= 8 bytes */ + struct { + u32 Zeroes; /* 0 signals strtab lookup */ + u32 Offset; /* string-table offset (>= 4) */ + } LongName; + } Name; + u32 Value; + i16 SectionNumber; /* 1-based; specials below */ + u16 Type; /* low4=base, high12=derived */ + u8 StorageClass; /* IMAGE_SYM_CLASS_* */ + u8 NumberOfAuxSymbols; +} ImageSymbol; + +/* Section number specials (i16-valued sentinel values). */ +#define IMAGE_SYM_UNDEFINED 0 +#define IMAGE_SYM_ABSOLUTE (-1) +#define IMAGE_SYM_DEBUG (-2) + +/* Type encoding. Low 4 bits = base type, high 12 bits = derived. The + * only derived-type bit cfree distinguishes is FUNCTION (so a global + * is marked as a function when (Type >> 8) == DTYPE_FUNCTION). */ +#define IMAGE_SYM_TYPE_NULL 0u +#define IMAGE_SYM_DTYPE_NULL 0u +#define IMAGE_SYM_DTYPE_FUNCTION 2u +#define COFF_SYM_TYPE_FUNCTION (IMAGE_SYM_DTYPE_FUNCTION << 8) + +/* Storage classes. The subset cfree emits is EXTERNAL, STATIC, FILE, + * SECTION, WEAK_EXTERNAL; readers must additionally skip LABEL and + * FUNCTION (.bf/.ef debug pairs). END_OF_FUNCTION is signed -1 (the + * field is u8 so the wire value is 0xFF). */ +#define IMAGE_SYM_CLASS_END_OF_FUNCTION 0xFFu +#define IMAGE_SYM_CLASS_NULL 0u +#define IMAGE_SYM_CLASS_AUTOMATIC 1u +#define IMAGE_SYM_CLASS_EXTERNAL 2u +#define IMAGE_SYM_CLASS_STATIC 3u +#define IMAGE_SYM_CLASS_REGISTER 4u +#define IMAGE_SYM_CLASS_EXTERNAL_DEF 5u +#define IMAGE_SYM_CLASS_LABEL 6u +#define IMAGE_SYM_CLASS_UNDEFINED_LABEL 7u +#define IMAGE_SYM_CLASS_FUNCTION 101u /* .bf / .ef markers */ +#define IMAGE_SYM_CLASS_FILE 103u /* aux records hold filename */ +#define IMAGE_SYM_CLASS_SECTION 104u +#define IMAGE_SYM_CLASS_WEAK_EXTERNAL 105u +#define IMAGE_SYM_CLASS_CLR_TOKEN 107u + +/* ---- aux records ---- + * Each aux record is exactly COFF_SYMBOL_SIZE (18 bytes) on disk; the + * structs below are wire-shaped but again must be marshalled field by + * field rather than via sizeof. */ +#define COFF_AUX_SECTION_SIZE 18u +#define COFF_AUX_WEAKEXTERN_SIZE 18u +#define COFF_AUX_FILE_SIZE 18u +#define COFF_AUX_FUNCTION_SIZE 18u + +/* Follows a STATIC symbol whose Value is 0 and SectionNumber is the + * section's 1-based index. Encodes per-section metadata + COMDAT + * grouping. */ +typedef struct ImageAuxSymbolSection { + u32 Length; /* section's SizeOfRawData */ + u16 NumberOfRelocations; + u16 NumberOfLinenumbers; + u32 CheckSum; /* COMDAT checksum, 0 otherwise */ + u16 Number; /* associated section idx for COMDAT */ + u8 Selection; /* IMAGE_COMDAT_SELECT_* */ + u8 Unused[3]; +} ImageAuxSymbolSection; + +/* Follows a WEAK_EXTERNAL symbol. TagIndex is the symbol-table index + * of the fall-back symbol used when the weak ref is unresolved. */ +typedef struct ImageAuxSymbolWeakExternal { + u32 TagIndex; + u32 Characteristics; /* IMAGE_WEAK_EXTERN_SEARCH_* */ + u8 Unused[10]; +} ImageAuxSymbolWeakExternal; + +/* Follows a FILE symbol. For source paths longer than 18 bytes the + * NumberOfAuxSymbols on the parent FILE record is >1 and the name + * spans multiple aux records concatenated. */ +typedef struct ImageAuxSymbolFile { + char FileName[18]; +} ImageAuxSymbolFile; + +/* Follows a FUNCTION (.bf/.ef) symbol. cfree does not emit these but + * the reader must skip them when walking the symbol table. */ +typedef struct ImageAuxSymbolFunction { + u32 TagIndex; + u32 TotalSize; + u32 PointerToLinenumber; + u32 PointerToNextFunction; + u8 Unused[2]; +} ImageAuxSymbolFunction; + +/* COMDAT selection (ImageAuxSymbolSection.Selection). */ +#define IMAGE_COMDAT_SELECT_NODUPLICATES 1u +#define IMAGE_COMDAT_SELECT_ANY 2u +#define IMAGE_COMDAT_SELECT_SAME_SIZE 3u +#define IMAGE_COMDAT_SELECT_EXACT_MATCH 4u +#define IMAGE_COMDAT_SELECT_ASSOCIATIVE 5u +#define IMAGE_COMDAT_SELECT_LARGEST 6u +#define IMAGE_COMDAT_SELECT_NEWEST 7u + +/* Weak-external resolution policy. */ +#define IMAGE_WEAK_EXTERN_SEARCH_NOLIBRARY 1u +#define IMAGE_WEAK_EXTERN_SEARCH_LIBRARY 2u +#define IMAGE_WEAK_EXTERN_SEARCH_ALIAS 3u + +/* ---- relocation entry (IMAGE_RELOCATION) ---- + * On-disk: 10 bytes per record, packed to 2-byte alignment. Same + * sizeof caveat as ImageSymbol — never write sizeof(ImageRelocation), + * use COFF_RELOC_SIZE. */ +#define COFF_RELOC_SIZE 10u + +typedef struct ImageRelocation { + u32 VirtualAddress; /* offset within the section being patched */ + u32 SymbolTableIndex; + u16 Type; /* IMAGE_REL_<machine>_* */ +} ImageRelocation; + +/* ---- string table layout ---- + * Immediately follows the symbol table on disk: + * [0..3] u32 total size in bytes, INCLUSIVE of these 4 bytes. + * [4..] concatenated NUL-terminated UTF-8 strings. + * Therefore the smallest legal offset for a name reference is 4, and + * Offset == 0 in the LongName form is reserved (means "no string"). + * Empty string tables write the size field as 4 (i.e. no payload). */ +#define COFF_STRTAB_SIZE_FIELD_BYTES 4u + +/* ---- base relocation block (IMAGE_BASE_RELOCATION) ---- + * Used in the .reloc directory of PE images. Each block describes + * fixups for one 4 KiB page: VirtualAddress is the page base RVA, the + * payload is (SizeOfBlock - 8) bytes of u16 entries packed as + * (type:4, offset:12) where offset is relative to VirtualAddress. */ +typedef struct ImageBaseRelocation { + u32 VirtualAddress; + u32 SizeOfBlock; /* header (8) + entries; padded to u32 */ +} ImageBaseRelocation; +#define COFF_BASE_RELOCATION_SIZE 8u + +#define IMAGE_REL_BASED_ABSOLUTE 0u /* skip entry, used for padding */ +#define IMAGE_REL_BASED_HIGH 1u +#define IMAGE_REL_BASED_LOW 2u +#define IMAGE_REL_BASED_HIGHLOW 3u +#define IMAGE_REL_BASED_HIGHADJ 4u +#define IMAGE_REL_BASED_ARM_MOV32 5u +#define IMAGE_REL_BASED_DIR64 10u /* the one used on x64 / arm64 */ + +/* ---- export directory (IMAGE_EXPORT_DIRECTORY) ---- + * One record, pointed at by IMAGE_DIRECTORY_ENTRY_EXPORT in the + * optional header. AddressOfFunctions is the EAT (u32 RVAs); the ENT + * (u32 RVAs at AddressOfNames) is parallel to the ordinal table + * (u16 ordinals at AddressOfNameOrdinals) and indexes into the EAT. + * An EAT entry whose RVA falls inside the export directory's own + * [VA, VA+Size) range is a *forwarder*: the bytes at that RVA are a + * "OTHERMODULE.OtherName" NUL-terminated string and the OS loader + * follows the chain at load time. */ +#define COFF_EXPORT_DIR_SIZE 40u + +typedef struct ImageExportDirectory { + u32 Characteristics; + u32 TimeDateStamp; + u16 MajorVersion; + u16 MinorVersion; + u32 Name; /* RVA of DLL name */ + u32 Base; /* first ordinal */ + u32 NumberOfFunctions; + u32 NumberOfNames; + u32 AddressOfFunctions; /* EAT: RVA[NumberOfFunctions] */ + u32 AddressOfNames; /* ENT: RVA[NumberOfNames] */ + u32 AddressOfNameOrdinals; /* parallel ordinals: u16[NumberOfNames] */ +} ImageExportDirectory; + +/* ---- import directory (IMAGE_IMPORT_DESCRIPTOR) ---- + * Array of these, terminated by an all-zero entry, lives at the RVA + * named by IMAGE_DIRECTORY_ENTRY_IMPORT in the optional header. + * OriginalFirstThunk -> the import lookup table (read-only); FirstThunk + * -> the IAT (overwritten by the loader with resolved addresses). */ +#define COFF_IMPORT_DESCRIPTOR_SIZE 20u + +typedef struct ImageImportDescriptor { + u32 OriginalFirstThunk; /* RVA -> IMAGE_THUNK_DATA64[] (ILT) */ + u32 TimeDateStamp; + u32 ForwarderChain; + u32 Name; /* RVA -> NUL-terminated DLL name */ + u32 FirstThunk; /* RVA -> IMAGE_THUNK_DATA64[] (IAT) */ +} ImageImportDescriptor; + +/* Thunk entries are u64 on PE32+. If the high bit (IMAGE_ORDINAL_FLAG64) + * is set, the low 16 bits hold an ordinal. Otherwise the value is an + * RVA to an IMAGE_IMPORT_BY_NAME record. */ +#define IMAGE_ORDINAL_FLAG64 0x8000000000000000ull +#define COFF_THUNK_DATA64_SIZE 8u + +typedef struct ImageImportByName { + u16 Hint; /* index hint into the DLL's export table */ + /* char Name[]; NUL-terminated, followed by optional pad to even. */ +} ImageImportByName; + +/* ---- TLS directory (IMAGE_TLS_DIRECTORY64) ---- + * Pointed at by IMAGE_DIRECTORY_ENTRY_TLS in the optional header. The + * loader walks the callbacks array (NUL-terminated) before main runs. */ +#define COFF_TLS_DIRECTORY64_SIZE 40u + +typedef struct ImageTlsDirectory64 { + u64 StartAddressOfRawData; + u64 EndAddressOfRawData; + u64 AddressOfIndex; /* VA of u32 _tls_index */ + u64 AddressOfCallBacks; /* VA of NULL-terminated callback array */ + u32 SizeOfZeroFill; + u32 Characteristics; /* alignment encoded as IMAGE_SCN_ALIGN_* */ +} ImageTlsDirectory64; + +/* ---- short import record (Microsoft .lib member) ---- + * Inside an archive whose member-data starts with Sig1==0, Sig2==0xFFFF + * the rest of the member is this "short import" descriptor: a fixed + * 20-byte header followed by SizeOfData bytes containing two + * NUL-terminated strings — symbol name then DLL name. */ +#define COFF_IMPORT_OBJECT_HEADER_SIZE 20u +#define IMPORT_OBJECT_HDR_SIG1 0x0000u +#define IMPORT_OBJECT_HDR_SIG2 0xFFFFu + +typedef struct ImportObjectHeader { + u16 Sig1; /* IMPORT_OBJECT_HDR_SIG1 (0) */ + u16 Sig2; /* IMPORT_OBJECT_HDR_SIG2 (0xFFFF) */ + u16 Version; + u16 Machine; /* IMAGE_FILE_MACHINE_* */ + u32 TimeDateStamp; + u32 SizeOfData; /* bytes after this header */ + u16 OrdinalOrHint; + /* Bitfield encoded as a single u16 on the wire: + * Type:2, NameType:3, Reserved:11 (low-to-high). */ + u16 TypeFlags; +} ImportObjectHeader; + +#define IMPORT_OBJECT_CODE 0u +#define IMPORT_OBJECT_DATA 1u +#define IMPORT_OBJECT_CONST 2u + +#define IMPORT_OBJECT_ORDINAL 0u +#define IMPORT_OBJECT_NAME 1u +#define IMPORT_OBJECT_NAME_NOPREFIX 2u +#define IMPORT_OBJECT_NAME_UNDECORATE 3u + +/* ---- debug directory (IMAGE_DEBUG_DIRECTORY) ---- + * Pointed at by IMAGE_DIRECTORY_ENTRY_DEBUG. cfree emits a single + * IMAGE_DEBUG_TYPE_REPRO entry to mark the image as deterministic. */ +#define COFF_DEBUG_DIRECTORY_SIZE 28u + +typedef struct ImageDebugDirectory { + u32 Characteristics; /* reserved, zero */ + u32 TimeDateStamp; + u16 MajorVersion; + u16 MinorVersion; + u32 Type; /* IMAGE_DEBUG_TYPE_* */ + u32 SizeOfData; + u32 AddressOfRawData; /* RVA in image */ + u32 PointerToRawData; /* file offset */ +} ImageDebugDirectory; + +#define IMAGE_DEBUG_TYPE_UNKNOWN 0u +#define IMAGE_DEBUG_TYPE_COFF 1u +#define IMAGE_DEBUG_TYPE_CODEVIEW 2u +#define IMAGE_DEBUG_TYPE_MISC 4u +#define IMAGE_DEBUG_TYPE_REPRO 16u /* deterministic-build marker */ + +/* ---- AMD64 (x86_64) PE/COFF wire reloc types ---- + * The REL32_N variants encode the PC base N bytes after the relocation + * field (so REL32_1 maps to a -1 implicit addend in cfree's S + A - P + * model). Plain REL32 is relative to the byte after the 4-byte field. */ +#define IMAGE_REL_AMD64_ABSOLUTE 0u +#define IMAGE_REL_AMD64_ADDR64 1u /* 64-bit VA */ +#define IMAGE_REL_AMD64_ADDR32 2u /* 32-bit VA */ +#define IMAGE_REL_AMD64_ADDR32NB 3u /* 32-bit RVA (image-relative) */ +#define IMAGE_REL_AMD64_REL32 4u /* 32-bit relative to next inst */ +#define IMAGE_REL_AMD64_REL32_1 5u +#define IMAGE_REL_AMD64_REL32_2 6u +#define IMAGE_REL_AMD64_REL32_3 7u +#define IMAGE_REL_AMD64_REL32_4 8u +#define IMAGE_REL_AMD64_REL32_5 9u +#define IMAGE_REL_AMD64_SECTION 10u /* 16-bit section index */ +#define IMAGE_REL_AMD64_SECREL 11u /* 32-bit section-relative */ +#define IMAGE_REL_AMD64_SECREL7 12u +#define IMAGE_REL_AMD64_TOKEN 13u +#define IMAGE_REL_AMD64_SREL32 14u +#define IMAGE_REL_AMD64_PAIR 15u +#define IMAGE_REL_AMD64_SSPAN32 16u + +/* ---- ARM64 PE/COFF wire reloc types ---- */ +#define IMAGE_REL_ARM64_ABSOLUTE 0u +#define IMAGE_REL_ARM64_ADDR32 1u +#define IMAGE_REL_ARM64_ADDR32NB 2u +#define IMAGE_REL_ARM64_BRANCH26 3u +#define IMAGE_REL_ARM64_PAGEBASE_REL21 4u +#define IMAGE_REL_ARM64_REL21 5u +#define IMAGE_REL_ARM64_PAGEOFFSET_12A 6u +#define IMAGE_REL_ARM64_PAGEOFFSET_12L 7u +#define IMAGE_REL_ARM64_SECREL 8u +#define IMAGE_REL_ARM64_SECREL_LOW12A 9u +#define IMAGE_REL_ARM64_SECREL_HIGH12A 10u +#define IMAGE_REL_ARM64_SECREL_LOW12L 11u +#define IMAGE_REL_ARM64_TOKEN 12u +#define IMAGE_REL_ARM64_SECTION 13u +#define IMAGE_REL_ARM64_ADDR64 14u +#define IMAGE_REL_ARM64_BRANCH19 15u +#define IMAGE_REL_ARM64_BRANCH14 16u +#define IMAGE_REL_ARM64_REL32 17u + +/* ---- per-arch reloc translators ---- + * Map cfree-canonical RelocKind <-> PE/COFF wire type. Contract + * matches elf_<arch>_reloc_{to,from}: + * _to: returns the wire type, or IMAGE_REL_*_ABSOLUTE (== 0) for + * unsupported input. Callers treat that as a panic trigger. + * _from: returns the canonical RelocKind, or (u32)-1 on unknown + * input. Callers diagnose the unknown wire value. */ +u32 coff_x86_64_reloc_to(u32 kind /* RelocKind */); +u32 coff_x86_64_reloc_from(u32 wire_type); +u32 coff_aarch64_reloc_to(u32 kind /* RelocKind */); +u32 coff_aarch64_reloc_from(u32 wire_type); + +/* ---- little-endian byte writers/readers (Writer-based) ---- + * Reads use rd_u*_le from core/bytes.h directly; only writes need the + * Writer-bridging wrappers. */ + +static inline void coff_wr_u8(Writer* w, u32 v) { + u8 b = (u8)v; + cfree_writer_write(w, &b, 1); +} + +static inline void coff_wr_u16(Writer* w, u32 v) { + u8 b[2]; + wr_u16_le(b, (u16)v); + cfree_writer_write(w, b, 2); +} + +static inline void coff_wr_u32(Writer* w, u32 v) { + u8 b[4]; + wr_u32_le(b, v); + cfree_writer_write(w, b, 4); +} + +static inline void coff_wr_u64(Writer* w, u64 v) { + u8 b[8]; + wr_u64_le(b, v); + cfree_writer_write(w, b, 8); +} + +static inline u16 coff_rd_u16(const u8* p) { return rd_u16_le(p); } +static inline u32 coff_rd_u32(const u8* p) { return rd_u32_le(p); } +static inline u64 coff_rd_u64(const u8* p) { return rd_u64_le(p); } + +#endif /* CFREE_OBJ_COFF_H */ diff --git a/src/obj/coff_emit.c b/src/obj/coff_emit.c @@ -0,0 +1,731 @@ +/* PE/COFF relocatable .obj writer. Walks a finalized ObjBuilder and + * emits a 64-bit little-endian relocatable object via the supplied + * Writer. Counterpart to emit_elf / emit_macho. + * + * Layout strategy: + * 1. plan COFF sections (one per kept obj section), assigning + * Characteristics, alignment, raw size, and per-section reloc + * counts; + * 2. build the symbol table (synthesized per-section static symbols + * with section-definition aux records, plus file symbols and + * every ObjSym kept after sweep); + * 3. build per-section relocation records via the per-arch + * translator (arch_for_compiler(c)->coff->reloc_to); + * 4. assign file offsets: + * file header | section headers | (bytes + relocs)* | symtab | strtab + * 5. write the file in that order. + * + * 64-bit little-endian only — IMAGE_FILE_MACHINE_AMD64 (x86_64) and + * IMAGE_FILE_MACHINE_ARM64 (aarch64). Big-endian / ptr_size != 8 panic + * at entry. + * + * Section name mapping policy: we pass the cfree Section.name through + * verbatim to the COFF Name field. Callers / readers are expected to + * have stored COFF-shaped names (".text", ".rdata", ".tls$", etc.) at + * the obj layer; emit_coff does not rewrite ELF-style spellings like + * ".rodata" -> ".rdata". Names longer than 8 bytes spill into the + * string table with the "/<decimal-offset>" encoding. + * + * Addend handling: COFF stores the addend inline in the patched bytes + * (there is no addend field in IMAGE_RELOCATION). The ObjBuilder + * caller is responsible for having written the addend into the section + * bytes already — matching how MSVC / mingw emit. A nonzero + * Reloc::addend with has_explicit_addend set is rejected here as a + * known v1 limitation. */ + +#include <string.h> + +#include "arch/arch.h" +#include "core/arena.h" +#include "core/buf.h" +#include "core/heap.h" +#include "core/pool.h" +#include "core/util.h" +#include "obj/coff.h" + +static SrcLoc no_loc(void) { + SrcLoc l = {0, 0, 0}; + return l; +} + +static int coff_rel32_absorbs_minus4(CfreeArchKind arch, RelocKind kind, + i64 addend) { + if (arch != CFREE_ARCH_X86_64 || addend != -4) return 0; + switch (kind) { + case R_PC32: + case R_REL32: + case R_PLT32: + case R_X64_PLT32: + case R_X64_GOTPCREL: + case R_X64_GOTPCRELX: + case R_X64_REX_GOTPCRELX: + return 1; + default: + return 0; + } +} + +/* ---- per-COFF-section plan record ---- */ + +typedef struct CSec { + /* IMAGE_SECTION_HEADER fields (little-endian-encoded at write time). */ + char name8[8]; /* Name field bytes; "/N" form if long name */ + u32 virtual_size; /* nonzero for NOBITS (bss size) */ + u32 size_of_raw_data; /* zero for NOBITS */ + u32 pointer_to_raw_data; + u32 pointer_to_relocations; + u16 number_of_relocations; + u32 characteristics; /* IMAGE_SCN_* | ALIGN nibble */ + + /* Planning state. */ + u32 align; /* in bytes, power of two */ + u32 obj_sec; /* originating ObjSecId */ + int is_nobits; + const Buf* obj_bytes; /* NULL when nobits */ + u8* reloc_bytes; /* arena-allocated, nreloc * 10 bytes */ + ObjGroupId group_id; /* OBJ_GROUP_NONE if not in a group */ +} CSec; + +/* ---- emit ---- */ + +static u32 log2_align(u32 a) { + u32 r = 0; + while ((1u << r) < a) ++r; + return r; +} + +/* Map cfree section flags/sem to IMAGE_SCN_* Characteristics, leaving + * the alignment nibble for the caller to OR in. */ +static u32 sec_characteristics(const Section* s, int in_group) { + u32 r = 0; + int is_bss = (s->kind == SEC_BSS) || (s->sem == SSEM_NOBITS); + if (s->flags & SF_EXEC) { + r |= IMAGE_SCN_CNT_CODE | IMAGE_SCN_MEM_EXECUTE; + } else if (is_bss) { + r |= IMAGE_SCN_CNT_UNINITIALIZED_DATA; + } else if (s->flags & SF_WRITE) { + r |= IMAGE_SCN_CNT_INITIALIZED_DATA; + } else if (s->flags & SF_ALLOC) { + /* Read-only allocated data (.rdata). */ + r |= IMAGE_SCN_CNT_INITIALIZED_DATA; + } + if (s->flags & SF_ALLOC) r |= IMAGE_SCN_MEM_READ; + if (s->flags & SF_WRITE) r |= IMAGE_SCN_MEM_WRITE; + if (in_group) r |= IMAGE_SCN_LNK_COMDAT; + /* When a reader stashed format-specific flag bits on a COFF-origin + * section, OR them back in here. ext_type carries the raw + * Characteristics value (or zero if no override); ext_flags is a + * sibling bag for any bits the canonical mapping above would lose. */ + if (s->ext_kind == OBJ_EXT_COFF) { + if (s->ext_type) { + /* Preserve the raw characteristics verbatim — overrides the + * canonical mapping. Keeps round-trip byte-stable for sections + * carrying CNT_INFO / LNK_REMOVE / MEM_DISCARDABLE / etc. */ + r = s->ext_type & ~IMAGE_SCN_ALIGN_MASK; + } + r |= s->ext_flags; + } + return r; +} + +/* Append `len` bytes of `s` followed by a single NUL to `b`, returning + * the offset at which `s` was placed. Dedupe linearly — strtabs are + * small enough that this is fine without a hash table, and the + * dedupe matches what binutils / llvm-objcopy emit. Mirror of the + * helper in elf_emit. */ +static u32 strtab_add(Buf* b, const char* s, u32 len) { + if (len == 0) return 0; + u32 total = buf_pos(b); + if (total > len) { + u8 stack[256]; + u8* tmp = + total <= sizeof stack ? stack : (u8*)b->heap->alloc(b->heap, total, 1); + if (tmp) { + buf_flatten(b, tmp); + /* Skip the first 4 bytes (the size-prefix placeholder) when + * searching for matches. */ + u32 start = COFF_STRTAB_SIZE_FIELD_BYTES; + if (total > start + len) { + for (u32 i = start; i + len < total; ++i) { + if (tmp[i + len] == 0 && memcmp(tmp + i, s, len) == 0) { + if (tmp != stack) b->heap->free(b->heap, tmp, total); + return i; + } + } + } + if (tmp != stack) b->heap->free(b->heap, tmp, total); + } + } + u32 off = total; + buf_write(b, s, len); + { + u8 z = 0; + buf_write(b, &z, 1); + } + return off; +} + +/* Encode an 8-byte Name field. If the name fits in 8 bytes, copy + * verbatim and zero-pad. Otherwise allocate the name in `strtab` and + * write "/<decimal-offset>" (NUL-padded to 8 bytes). */ +static void encode_name8(char out[8], const char* name, u32 nlen, Buf* strtab) { + memset(out, 0, 8); + if (nlen <= 8) { + if (nlen) memcpy(out, name, nlen); + return; + } + u32 off = strtab_add(strtab, name, nlen); + /* "/<decimal-offset>" — up to 7 decimal digits leaves room for the + * leading slash within 8 bytes. COFF .obj strtabs are < 1 MiB in + * practice, so 7 digits is plenty. */ + char tmp[16]; + int n = 0; + tmp[n++] = '/'; + /* Decimal-format off into tmp+1. */ + char dig[12]; + int d = 0; + u32 v = off; + if (v == 0) { + dig[d++] = '0'; + } else { + while (v) { + dig[d++] = (char)('0' + (v % 10u)); + v /= 10u; + } + } + while (d > 0 && n < (int)sizeof tmp) tmp[n++] = dig[--d]; + if (n > 8) n = 8; + memcpy(out, tmp, (size_t)n); +} + +/* Write one 18-byte IMAGE_SYMBOL record into `dst`. */ +static void wr_sym(u8* dst, const char ShortName[8], u32 Zeroes, u32 Offset, + u32 Value, i16 SectionNumber, u16 Type, u8 StorageClass, + u8 NumberOfAuxSymbols) { + if (Zeroes == 0 && Offset != 0) { + /* LongName form: 4 zero bytes then 4-byte LE strtab offset. */ + memset(dst, 0, 4); + wr_u32_le(dst + 4, Offset); + } else { + memcpy(dst, ShortName, 8); + } + wr_u32_le(dst + 8, Value); + wr_u16_le(dst + 12, (u16)SectionNumber); + wr_u16_le(dst + 14, Type); + dst[16] = StorageClass; + dst[17] = NumberOfAuxSymbols; +} + +/* Write a section-definition aux record (18 bytes). */ +static void wr_aux_secdef(u8* dst, u32 Length, u16 NumberOfRelocations, + u16 NumberOfLinenumbers, u32 CheckSum, u16 Number, + u8 Selection) { + wr_u32_le(dst + 0, Length); + wr_u16_le(dst + 4, NumberOfRelocations); + wr_u16_le(dst + 6, NumberOfLinenumbers); + wr_u32_le(dst + 8, CheckSum); + wr_u16_le(dst + 12, Number); + dst[14] = Selection; + dst[15] = 0; + dst[16] = 0; + dst[17] = 0; +} + +/* Write a weak-externals aux record (18 bytes). */ +static void wr_aux_weak(u8* dst, u32 TagIndex, u32 Characteristics) { + wr_u32_le(dst + 0, TagIndex); + wr_u32_le(dst + 4, Characteristics); + memset(dst + 8, 0, 10); +} + +/* Look up the pool-interned string for a Sym. */ +static const char* sym_to_str(Compiler* c, Sym n, u32* len_out) { + size_t len; + const char* s = pool_str(c->global, n, &len); + if (!s) { + *len_out = 0; + return ""; + } + *len_out = (u32)len; + return s; +} + +void emit_coff(Compiler* c, ObjBuilder* ob, Writer* w) { + Heap* h = (Heap*)c->ctx->heap; + + /* Tombstone sweep — see obj_sweep_dead. */ + obj_sweep_dead(ob); + + /* ---- target validation ----------------------------------------- */ + const ArchImpl* arch = arch_for_compiler(c); + const ArchCoffOps* coff = arch ? arch->coff : NULL; + if (!coff || !coff->reloc_to) { + compiler_panic(c, no_loc(), "emit_coff: unsupported target arch %u", + (u32)c->target.arch); + } + u16 machine = coff->machine; + u32 (*reloc_to)(u32) = coff->reloc_to; + if (c->target.big_endian) { + compiler_panic(c, no_loc(), "emit_coff: big-endian COFF not supported"); + } + if (c->target.ptr_size != 8) { + compiler_panic(c, no_loc(), "emit_coff: ptr_size %u (expected 8)", + (u32)c->target.ptr_size); + } + + /* ---- pass 1: plan sections ------------------------------------- */ + u32 nobjsec = obj_section_count(ob); + CSec* secs = arena_zarray(c->scratch, CSec, nobjsec ? nobjsec : 1); + u32* obj_to_coff = arena_zarray(c->scratch, u32, nobjsec ? nobjsec : 1); + u32 nsecs = 0; + + /* String table — leading 4-byte size placeholder. Real strings start + * at offset 4. */ + Buf strtab; + buf_init(&strtab, h); + { + u8 zero4[COFF_STRTAB_SIZE_FIELD_BYTES] = {0, 0, 0, 0}; + buf_write(&strtab, zero4, COFF_STRTAB_SIZE_FIELD_BYTES); + } + + for (u32 i = 1; i < nobjsec; ++i) { + const Section* s = obj_section_get(ob, i); + if (s->removed) continue; + /* Skip ELF-style synthetic sections (a reader from another format + * may have surfaced them) — COFF stores symtab/strtab/relocs + * out-of-band, not as named sections. */ + if (s->sem == SSEM_SYMTAB || s->sem == SSEM_STRTAB || + s->sem == SSEM_RELA || s->sem == SSEM_REL || s->sem == SSEM_GROUP) { + continue; + } + + CSec* cs = &secs[nsecs]; + u32 nlen; + const char* nm = sym_to_str(c, s->name, &nlen); + encode_name8(cs->name8, nm, nlen, &strtab); + + cs->obj_sec = i; + cs->group_id = s->group_id; + cs->align = s->align ? s->align : 1; + + int in_group = (s->group_id != OBJ_GROUP_NONE); + u32 ch = sec_characteristics(s, in_group); + /* Alignment lives in bits 20..23. Cap at log2(8192)=13 -> nibble + * value 14 (IMAGE_SCN_ALIGN_8192BYTES). */ + u32 lg = log2_align(cs->align); + if (lg > 13) lg = 13; + ch &= ~IMAGE_SCN_ALIGN_MASK; + ch |= IMAGE_SCN_ALIGN_FROM_LOG2(lg); + cs->characteristics = ch; + + if (s->sem == SSEM_NOBITS || s->kind == SEC_BSS) { + cs->is_nobits = 1; + cs->virtual_size = s->bss_size; + cs->size_of_raw_data = 0; + cs->obj_bytes = NULL; + } else { + cs->is_nobits = 0; + cs->virtual_size = 0; + cs->size_of_raw_data = s->bytes.total; + cs->obj_bytes = &s->bytes; + } + + obj_to_coff[i] = nsecs + 1; /* 1-based; matches SectionNumber. */ + nsecs++; + } + + /* ---- pass 2: count and assign per-section reloc counts --------- */ + /* COFF stores NumberOfRelocations as u16; sections with > 65535 + * relocs use the IMAGE_SCN_LNK_NRELOC_OVFL extension which we don't + * implement in v1. Panic if any single section exceeds the limit. */ + u32 total_relocs = obj_reloc_total(ob); + for (u32 ci = 0; ci < nsecs; ++ci) { + CSec* cs = &secs[ci]; + u32 nr = obj_reloc_count(ob, cs->obj_sec); + if (nr > 0xFFFFu) { + compiler_panic(c, no_loc(), + "emit_coff: section %u has %u relocs (max 65535)", + (u32)cs->obj_sec, nr); + } + cs->number_of_relocations = (u16)nr; + } + + /* ---- pass 3: build the symbol table ---------------------------- */ + /* Count ObjSyms (incl. tombstoned — we'll skip those when emitting). */ + u32 nobjsym = 0; + { + ObjSymIter* it = obj_symiter_new(ob); + ObjSymEntry e; + while (obj_symiter_next(it, &e)) ++nobjsym; + obj_symiter_free(it); + } + + /* Upper bound on symbol-table records (including aux slots): + * - 2 records per section symbol (primary + 1 aux secdef) + * - 2 records per ObjSym (primary + up to 1 weak aux) + * - +2 spare for safety + * Worst case is generous; we trim by tracking nrecords as we emit. */ + u32 max_records = 2u * nsecs + 2u * nobjsym + 4u; + u8* symtab = (u8*)arena_zarray(c->scratch, u8, + (size_t)COFF_SYMBOL_SIZE * max_records); + u32 nrecords = 0; + + /* obj_id -> COFF symbol index (including aux slots). Index 0 is + * reserved as "none" in our internal map (a real COFF symbol may + * legitimately live at index 0, but no ObjSym ever maps there since + * we never put OBJ_SYM_NONE through). */ + u32* sym_to_coff = arena_zarray(c->scratch, u32, nobjsym + 2); + + /* Section symbols first — one STATIC per kept obj section, each + * followed by a SECTION DEFINITION aux. Reloc-against-section in + * other tools' output uses these; emitting them unconditionally + * matches what clang / mingw emit and gives readers a stable target. */ + u32* secsym_index = arena_zarray(c->scratch, u32, nsecs + 1); + for (u32 ci = 0; ci < nsecs; ++ci) { + CSec* cs = &secs[ci]; + char short_name[8]; + /* The section symbol's name is the section's own name (truncated + * to 8 bytes — section symbols never use the strtab spill form in + * MSVC/clang output). */ + memcpy(short_name, cs->name8, 8); + + u8* slot = symtab + (size_t)nrecords * COFF_SYMBOL_SIZE; + wr_sym(slot, short_name, /*Zeroes*/ 1, /*Offset*/ 0, + /*Value*/ 0, + /*SectionNumber*/ (i16)(ci + 1), + /*Type*/ IMAGE_SYM_TYPE_NULL, + /*StorageClass*/ IMAGE_SYM_CLASS_STATIC, + /*NumberOfAuxSymbols*/ 1); + secsym_index[ci] = nrecords; + nrecords++; + + /* Section-definition aux. For COMDAT members we encode the + * Selection from the group; default to SELECT_ANY which is what + * gcc/clang emit unless the user requests a specific selection + * mode. The associated-section Number is left at 0 (cfree does + * not produce associative-COMDAT chains today). */ + u8 selection = 0; + if (cs->group_id != OBJ_GROUP_NONE) { + const ObjGroup* g = obj_group_get(ob, cs->group_id); + if (g && !g->removed) { + selection = g->flags ? (u8)IMAGE_COMDAT_SELECT_ANY + : (u8)IMAGE_COMDAT_SELECT_ANY; + } + } + u8* aux = symtab + (size_t)nrecords * COFF_SYMBOL_SIZE; + wr_aux_secdef(aux, /*Length*/ cs->size_of_raw_data, + /*NumberOfRelocations*/ cs->number_of_relocations, + /*NumberOfLinenumbers*/ 0, + /*CheckSum*/ 0, + /*Number*/ 0, + /*Selection*/ selection); + nrecords++; + } + + /* File / regular symbols. */ + { + ObjSymIter* it = obj_symiter_new(ob); + ObjSymEntry e; + while (obj_symiter_next(it, &e)) { + const ObjSym* s = e.sym; + if (s->removed) continue; + if (s->kind == SK_IFUNC) { + compiler_panic(c, no_loc(), + "emit_coff: SK_IFUNC has no PE/COFF representation"); + } + /* Don't re-emit SK_SECTION symbols — section symbols are + * synthesized above. Map any input-side SK_SECTION onto the + * already-emitted one. */ + if (s->kind == SK_SECTION) { + if (s->section_id && s->section_id < nobjsec) { + u32 ci = obj_to_coff[s->section_id]; + if (ci) sym_to_coff[e.id] = secsym_index[ci - 1]; + } + continue; + } + + u32 nlen; + const char* nm = sym_to_str(c, s->name, &nlen); + + if (s->kind == SK_FILE) { + /* File symbol: name ".file" (short), section IMAGE_SYM_DEBUG, + * storage class FILE, followed by aux records carrying the + * NUL-padded file path (18 bytes per aux). */ + u32 file_len = nlen; + u32 naux = file_len ? (file_len + COFF_AUX_FILE_SIZE - 1u) / + COFF_AUX_FILE_SIZE + : 1u; + char short_name[8] = {'.', 'f', 'i', 'l', 'e', 0, 0, 0}; + u8* slot = symtab + (size_t)nrecords * COFF_SYMBOL_SIZE; + wr_sym(slot, short_name, 1, 0, /*Value*/ 0, + /*SectionNumber*/ (i16)IMAGE_SYM_DEBUG, + /*Type*/ IMAGE_SYM_TYPE_NULL, + /*StorageClass*/ IMAGE_SYM_CLASS_FILE, + /*NumberOfAuxSymbols*/ (u8)naux); + sym_to_coff[e.id] = nrecords; + nrecords++; + for (u32 a = 0; a < naux; ++a) { + u8* aux = symtab + (size_t)nrecords * COFF_SYMBOL_SIZE; + memset(aux, 0, COFF_AUX_FILE_SIZE); + u32 off = a * COFF_AUX_FILE_SIZE; + u32 copy = file_len > off ? file_len - off : 0; + if (copy > COFF_AUX_FILE_SIZE) copy = COFF_AUX_FILE_SIZE; + if (copy) memcpy(aux, nm + off, copy); + nrecords++; + } + continue; + } + + /* Regular symbol. */ + char short_name[8]; + u32 zeroes = 1, offset = 0; + memset(short_name, 0, 8); + if (nlen <= 8) { + if (nlen) memcpy(short_name, nm, nlen); + } else { + zeroes = 0; + offset = strtab_add(&strtab, nm, nlen); + } + + i16 section_number = 0; + u32 value = 0; + u8 storage_class = IMAGE_SYM_CLASS_NULL; + u16 type = IMAGE_SYM_TYPE_NULL; + u8 naux = 0; + int emit_weak_aux = 0; + + switch (s->kind) { + case SK_ABS: + section_number = (i16)IMAGE_SYM_ABSOLUTE; + value = (u32)s->value; + break; + case SK_COMMON: + /* COFF lacks a per-common alignment field; encode size in + * Value with SectionNumber=UNDEFINED and rely on the linker + * to pick a default alignment. (cfree's frontend uses + * COMMON only via __attribute__((common)) which is rare on + * PE/COFF targets.) */ + section_number = (i16)IMAGE_SYM_UNDEFINED; + value = (u32)s->size; + break; + default: + if (s->section_id == OBJ_SEC_NONE) { + section_number = (i16)IMAGE_SYM_UNDEFINED; + value = 0; + } else if (s->section_id < nobjsec && obj_to_coff[s->section_id]) { + section_number = (i16)obj_to_coff[s->section_id]; + value = (u32)s->value; + } else { + section_number = (i16)IMAGE_SYM_UNDEFINED; + value = 0; + } + break; + } + + if (s->kind == SK_FUNC) type = (u16)COFF_SYM_TYPE_FUNCTION; + + switch (s->bind) { + case SB_LOCAL: + storage_class = IMAGE_SYM_CLASS_STATIC; + break; + case SB_GLOBAL: + storage_class = IMAGE_SYM_CLASS_EXTERNAL; + break; + case SB_WEAK: + /* mingw / clang spell weak as EXTERNAL with a WeakExternal + * aux that points at the fallback symbol. cfree's obj layer + * doesn't carry a separate fallback symbol today, so we emit + * a self-referential weak aux (TagIndex=0) which the linker + * treats as "weak, no fallback" — equivalent to ELF STB_WEAK. */ + storage_class = IMAGE_SYM_CLASS_WEAK_EXTERNAL; + emit_weak_aux = 1; + naux = 1; + break; + default: + storage_class = IMAGE_SYM_CLASS_STATIC; + break; + } + + u8* slot = symtab + (size_t)nrecords * COFF_SYMBOL_SIZE; + wr_sym(slot, short_name, zeroes, offset, value, section_number, type, + storage_class, naux); + sym_to_coff[e.id] = nrecords; + nrecords++; + if (emit_weak_aux) { + u8* aux = symtab + (size_t)nrecords * COFF_SYMBOL_SIZE; + wr_aux_weak(aux, /*TagIndex*/ 0, + /*Characteristics*/ IMAGE_WEAK_EXTERN_SEARCH_LIBRARY); + nrecords++; + } + } + obj_symiter_free(it); + } + + /* ---- pass 4: build per-section relocation tables --------------- */ + for (u32 ci = 0; ci < nsecs; ++ci) { + CSec* cs = &secs[ci]; + u32 nr = cs->number_of_relocations; + if (!nr) continue; + u8* buf = + (u8*)arena_alloc(c->scratch, (size_t)COFF_RELOC_SIZE * nr, _Alignof(u32)); + u32 j = 0; + for (u32 ri = 0; ri < total_relocs; ++ri) { + const Reloc* r = obj_reloc_at(ob, ri); + if (r->removed) continue; + if (r->section_id != cs->obj_sec) continue; + if (r->sym == OBJ_SYM_NONE) { + compiler_panic(c, no_loc(), + "emit_coff: reloc without symbol not supported " + "(sec=%u offset=%u kind=%u)", + (u32)r->section_id, (u32)r->offset, (u32)r->kind); + } + if (r->has_explicit_addend && r->addend != 0 && + !coff_rel32_absorbs_minus4(c->target.arch, (RelocKind)r->kind, + r->addend)) { + /* v1 limitation: COFF carries the addend in the patched bytes, + * and we don't currently mutate the obj's section bytes to + * encode a separate explicit addend. cfree's MCEmitter writes + * the addend inline for COFF targets, so this branch only + * fires for inputs synthesized by external tools. */ + compiler_panic(c, no_loc(), + "emit_coff: explicit nonzero addend not supported " + "(sec=%u offset=%u kind=%u addend=%lld)", + (u32)r->section_id, (u32)r->offset, (u32)r->kind, + (long long)r->addend); + } + u32 wire = reloc_to(r->kind); + /* Both arch translators use 0 (IMAGE_REL_*_ABSOLUTE) as the + * unsupported-input sentinel; treat that as a panic unless the + * input really is R_NONE. */ + if (wire == 0 && r->kind != R_NONE) { + compiler_panic( + c, no_loc(), + "emit_coff: unsupported relocation kind %u for arch %u", + (u32)r->kind, (u32)c->target.arch); + } + u32 sym_idx = sym_to_coff[r->sym]; + u8* slot = buf + (size_t)j * COFF_RELOC_SIZE; + wr_u32_le(slot + 0, r->offset); + wr_u32_le(slot + 4, sym_idx); + wr_u16_le(slot + 8, (u16)wire); + ++j; + } + cs->reloc_bytes = buf; + /* If a tombstoned reloc was skipped between count and emit, j may + * be less than nr; trust the latter count for the wire field. */ + if (j != nr) cs->number_of_relocations = (u16)j; + } + + /* ---- pass 5: assign file offsets ------------------------------- */ + /* Layout: + * [file header] [section headers] [per-section: bytes, relocs]* + * [symbol table] [string table] */ + u64 cur = (u64)COFF_FILE_HEADER_SIZE + + (u64)COFF_SECTION_HEADER_SIZE * (u64)nsecs; + + for (u32 ci = 0; ci < nsecs; ++ci) { + CSec* cs = &secs[ci]; + /* Raw data offset. NOBITS contributes nothing on disk. */ + if (cs->is_nobits || cs->size_of_raw_data == 0) { + cs->pointer_to_raw_data = 0; + } else { + cur = ALIGN_UP(cur, (u64)cs->align); + cs->pointer_to_raw_data = (u32)cur; + cur += cs->size_of_raw_data; + } + /* Reloc table. COFF doesn't mandate alignment for the reloc array, + * but llvm and binutils emit them naturally byte-packed; we 4-align + * for tidiness. */ + if (cs->number_of_relocations) { + cur = ALIGN_UP(cur, (u64)4); + cs->pointer_to_relocations = (u32)cur; + cur += (u64)cs->number_of_relocations * COFF_RELOC_SIZE; + } else { + cs->pointer_to_relocations = 0; + } + } + + cur = ALIGN_UP(cur, (u64)4); + u64 symtab_off = cur; + cur += (u64)nrecords * COFF_SYMBOL_SIZE; + + /* String table starts immediately after the symtab. Patch the 4-byte + * size prefix (inclusive). */ + u32 strtab_size = buf_pos(&strtab); + /* The size field is part of the on-disk strtab and is the total + * inclusive byte count. Patch it now. */ + { + u8 sz_le[4]; + wr_u32_le(sz_le, strtab_size); + /* Buf doesn't expose in-place patch; flatten, patch, re-emit when + * we write. Just remember the value. */ + (void)sz_le; + } + u64 strtab_off = cur; + cur += strtab_size; + + /* ---- pass 6: write the file ------------------------------------ */ + cfree_writer_seek(w, 0); + + /* IMAGE_FILE_HEADER */ + coff_wr_u16(w, machine); + coff_wr_u16(w, (u16)nsecs); + coff_wr_u32(w, 0); /* TimeDateStamp: reproducible */ + coff_wr_u32(w, (u32)symtab_off); + coff_wr_u32(w, nrecords); + coff_wr_u16(w, 0); /* SizeOfOptionalHeader: 0 for .obj */ + coff_wr_u16(w, IMAGE_FILE_LARGE_ADDRESS_AWARE); + + /* Section headers — one 40-byte block immediately after the file + * header. */ + for (u32 ci = 0; ci < nsecs; ++ci) { + const CSec* cs = &secs[ci]; + cfree_writer_write(w, cs->name8, 8); + coff_wr_u32(w, cs->virtual_size); + coff_wr_u32(w, 0); /* VirtualAddress: 0 for .obj */ + coff_wr_u32(w, cs->size_of_raw_data); + coff_wr_u32(w, cs->pointer_to_raw_data); + coff_wr_u32(w, cs->pointer_to_relocations); + coff_wr_u32(w, 0); /* PointerToLinenumbers: 0 */ + coff_wr_u16(w, cs->number_of_relocations); + coff_wr_u16(w, 0); /* NumberOfLinenumbers: 0 */ + coff_wr_u32(w, cs->characteristics); + } + + /* Section bytes + relocs (interleaved). */ + for (u32 ci = 0; ci < nsecs; ++ci) { + const CSec* cs = &secs[ci]; + if (!cs->is_nobits && cs->size_of_raw_data && cs->obj_bytes) { + cfree_writer_seek(w, cs->pointer_to_raw_data); + u32 sz = cs->obj_bytes->total; + u8* tmp = (u8*)h->alloc(h, sz ? sz : 1, 1); + if (sz) buf_flatten(cs->obj_bytes, tmp); + cfree_writer_write(w, tmp, sz); + h->free(h, tmp, sz ? sz : 1); + } + if (cs->number_of_relocations && cs->reloc_bytes) { + cfree_writer_seek(w, cs->pointer_to_relocations); + cfree_writer_write(w, cs->reloc_bytes, + (size_t)cs->number_of_relocations * COFF_RELOC_SIZE); + } + } + + /* Symbol table. */ + cfree_writer_seek(w, symtab_off); + cfree_writer_write(w, symtab, (size_t)nrecords * COFF_SYMBOL_SIZE); + + /* String table: 4-byte total size (inclusive) followed by the body. + * `strtab` was initialized with 4 placeholder zero bytes; rewrite + * them with the real size before flushing. */ + { + u8* flat = (u8*)arena_alloc(c->scratch, strtab_size ? strtab_size : 1, 1); + if (strtab_size) buf_flatten(&strtab, flat); + /* Patch the 4-byte size prefix in place. */ + if (strtab_size >= COFF_STRTAB_SIZE_FIELD_BYTES) { + wr_u32_le(flat, strtab_size); + } + cfree_writer_seek(w, strtab_off); + cfree_writer_write(w, flat, strtab_size); + } + buf_fini(&strtab); +} diff --git a/src/obj/coff_read.c b/src/obj/coff_read.c @@ -0,0 +1,714 @@ +/* PE/COFF .obj (IMAGE_FILE_HEADER + sections) reader. Parses a 64-bit + * little-endian relocatable object back into a fresh ObjBuilder. Peer + * of read_elf / read_macho; the post-finalize ObjBuilder shape is the + * canonical superset doc/DESIGN.md §5.5 promises: read_coff of an + * emit_coff output produces an ObjBuilder shape-equivalent to the + * writer's input, modulo synthesized SECTION symbols and the COMDAT + * section-definition aux records. + * + * Scope: IMAGE_FILE_MACHINE_AMD64 and IMAGE_FILE_MACHINE_ARM64. PE + * executables (with a non-zero SizeOfOptionalHeader) are rejected — a + * future read_coff_pe would handle those. Microsoft "short import" + * records (Sig1=0, Sig2=0xFFFF) found inside .lib archive members are + * detected at entry and dispatched to read_coff_short_import, which + * synthesizes a DSO-shaped ObjBuilder annotated with the providing + * DLL name via obj_set_coff_import_dll. */ + +#include <string.h> + +#include "arch/arch.h" +#include "core/arena.h" +#include "core/heap.h" +#include "core/pool.h" +#include "obj/coff.h" + +static SrcLoc no_loc(void) { + SrcLoc l = {0, 0, 0}; + return l; +} + +/* ---- section-header scratch ---- */ + +typedef struct CSecRec { + char raw_name[8]; + u32 virtual_size; + u32 size_of_raw_data; + u32 pointer_to_raw_data; + u32 pointer_to_relocations; + u16 number_of_relocations; + u32 characteristics; + ObjSecId obj_sec; /* OBJ_SEC_NONE if skipped */ +} CSecRec; + +static void parse_shdr(const u8* p, CSecRec* out) { + memcpy(out->raw_name, p, 8); + out->virtual_size = coff_rd_u32(p + 8); + out->size_of_raw_data = coff_rd_u32(p + 16); + out->pointer_to_raw_data = coff_rd_u32(p + 20); + out->pointer_to_relocations = coff_rd_u32(p + 24); + out->number_of_relocations = coff_rd_u16(p + 32); + out->characteristics = coff_rd_u32(p + 36); + out->obj_sec = OBJ_SEC_NONE; +} + +/* ---- string-table lookup (4-byte size prefix, NUL-terminated entries) ---- */ + +static const char* strtab_lookup(const u8* tab, u32 tab_size, u32 off, + u32* len_out) { + if (off >= tab_size) { + *len_out = 0; + return ""; + } + const char* s = (const char*)(tab + off); + u32 max = tab_size - off; + u32 n = 0; + while (n < max && s[n] != '\0') ++n; + *len_out = n; + return s; +} + +/* Resolve a section/symbol short-or-long name into (ptr, len). COFF + * section names use the "/<decimal>" convention for >8-byte names; COFF + * symbol names use the (Zeroes==0, Offset) form instead. This helper + * handles the section form (8 raw bytes; leading '/' triggers strtab + * lookup). */ +static void resolve_section_name(const char raw[8], const u8* strtab, + u32 strtab_size, const char** name_out, + u32* len_out) { + if (raw[0] == '/') { + /* Parse decimal offset. Up to 7 ASCII digits. */ + u32 off = 0; + for (u32 i = 1; i < 8 && raw[i] >= '0' && raw[i] <= '9'; ++i) { + off = off * 10u + (u32)(raw[i] - '0'); + } + *name_out = strtab_lookup(strtab, strtab_size, off, len_out); + return; + } + /* Inline: up to 8 bytes, NUL-padded (not necessarily NUL-terminated). */ + u32 n = 0; + while (n < 8 && raw[n] != '\0') ++n; + *name_out = raw; + *len_out = n; +} + +/* ---- characteristics -> SecKind / SecFlag / SecSem ---- */ + +static u16 coff_sec_kind(const char* name, u32 nlen, u32 ch) { + if (ch & IMAGE_SCN_CNT_UNINITIALIZED_DATA) return SEC_BSS; + if (ch & IMAGE_SCN_CNT_CODE) return SEC_TEXT; + if (ch & IMAGE_SCN_MEM_EXECUTE) return SEC_TEXT; + if (nlen >= 7 && memcmp(name, ".debug_", 7) == 0) return SEC_DEBUG; + /* The MS toolchain spells DWARF section names with a leading ".debug$" + * (CodeView) — keep ELF-style ".debug_" detection but also treat the + * MS form as debug. */ + if (nlen >= 7 && memcmp(name, ".debug$", 7) == 0) return SEC_DEBUG; + if (ch & IMAGE_SCN_CNT_INITIALIZED_DATA) { + if (ch & IMAGE_SCN_MEM_WRITE) return SEC_DATA; + return SEC_RODATA; + } + return SEC_OTHER; +} + +static u16 coff_sec_flags(const char* name, u32 nlen, u32 ch) { + u16 f = 0; + if (ch & IMAGE_SCN_MEM_READ) f |= SF_ALLOC; + if (ch & IMAGE_SCN_MEM_EXECUTE) f |= SF_EXEC; + if (ch & IMAGE_SCN_MEM_WRITE) f |= SF_WRITE; + if (ch & IMAGE_SCN_LNK_COMDAT) f |= SF_GROUP; + /* TLS sections in PE are spelled ".tls$<suffix>" (e.g. ".tls$", ".tls$ZZZ"). + * There is no characteristics bit for TLS — detection is name-based. */ + if (nlen >= 5 && memcmp(name, ".tls$", 5) == 0) f |= SF_TLS; + if (nlen == 4 && memcmp(name, ".tls", 4) == 0) f |= SF_TLS; + return f; +} + +/* Bits 20..23 of Characteristics encode alignment as (log2(align)+1). + * 0 means "default"; we collapse to align=1 for round-trip purposes. */ +static u32 coff_sec_align(u32 ch) { + u32 n = (ch & IMAGE_SCN_ALIGN_MASK) >> 20; + if (n == 0) return 1; + return 1u << (n - 1u); +} + +/* ---- symbol-name resolution ---- */ + +static void resolve_sym_name(const u8* rec, const u8* strtab, u32 strtab_size, + const char** name_out, u32* len_out) { + /* ShortName: 8 bytes. If first 4 bytes are zero, second 4 bytes is + * the strtab offset (LongName form). */ + u32 z = coff_rd_u32(rec + 0); + if (z == 0) { + u32 off = coff_rd_u32(rec + 4); + *name_out = strtab_lookup(strtab, strtab_size, off, len_out); + return; + } + u32 n = 0; + while (n < 8 && rec[n] != '\0') ++n; + *name_out = (const char*)rec; + *len_out = n; +} + +/* ---- short-import record handler ---- + * Microsoft "short import" format: a 20-byte ImportObjectHeader + * followed by SizeOfData bytes containing two NUL-terminated strings — + * the imported symbol name then the DLL name. These live as members + * of .lib archives (mingw's libkernel32.dll.a etc.) and stand in for + * a full long-form COFF import object. + * + * cfree-side model: synthesize a DSO-shaped ObjBuilder with the + * imported symbol defined at section_id = OBJ_SEC_NONE (the same + * shape read_coff_dso / read_elf_dso produce for an exported name), + * and stash the providing DLL name on the builder via + * obj_set_coff_import_dll so the archive-ingestion layer can route + * the resulting LinkInput as a DSO with this name as the soname. + * + * We also synthesize the `__imp_<name>` alias mingw codegen uses to + * spell explicit IAT-slot access; both names ultimately resolve to + * the same DLL export at link time. */ +static ObjBuilder* read_coff_short_import(Compiler* c, const char* name, + const u8* data, size_t len) { + if (len < COFF_IMPORT_OBJECT_HEADER_SIZE) + compiler_panic(c, no_loc(), + "read_coff: short-import record shorter than header"); + + /* Sig1 / Sig2 already checked by the caller. */ + /* data + 4: Version (2 bytes, ignored). */ + u16 machine = coff_rd_u16(data + 6); + /* data + 8: TimeDateStamp (4 bytes, ignored). */ + u32 size_of_data = coff_rd_u32(data + 12); + u16 ordinal_or_hint = coff_rd_u16(data + 16); + u16 type_flags = coff_rd_u16(data + 18); + + if ((u64)COFF_IMPORT_OBJECT_HEADER_SIZE + (u64)size_of_data > (u64)len) + compiler_panic(c, no_loc(), + "read_coff: short-import SizeOfData=%u extends past input " + "(len=%zu)", + size_of_data, len); + + if (machine != IMAGE_FILE_MACHINE_AMD64 && + machine != IMAGE_FILE_MACHINE_ARM64) + compiler_panic(c, no_loc(), + "read_coff: short-import unsupported machine %#x", + (u32)machine); + + /* Decode TypeFlags bitfield (Type:2, NameType:3, Reserved:11). */ + u32 import_type = (u32)(type_flags & 0x3u); + u32 name_type = (u32)((type_flags >> 2) & 0x7u); + + /* Ordinal-only imports (NameType=IMPORT_OBJECT_ORDINAL) are not yet + * implemented in cfree. None of the mingw / llvm-mingw system import + * archives use this shape — every libfoo.a member in the supported + * sysroots imports by name — so refusing here is a clean diagnostic, + * not an internal panic. When a real consumer surfaces, the work is + * to thread the ordinal through link_resolve and into the PE import + * directory hint/name tables. */ + if (name_type == IMPORT_OBJECT_ORDINAL) + compiler_panic(c, no_loc(), + "read_coff: short-import by ordinal not implemented " + "(archive member \"%s\", ordinal %u). cfree links " + "imports by name only; rebuild the consumer to import " + "by name, or omit this archive from the link.", + name ? name : "<unnamed>", + (unsigned)ordinal_or_hint); + + /* Symbol name: NUL-terminated starting at data + 20. */ + const u8* body = data + COFF_IMPORT_OBJECT_HEADER_SIZE; + u32 sym_name_max = size_of_data; + u32 sym_name_len = 0; + while (sym_name_len < sym_name_max && body[sym_name_len] != '\0') + ++sym_name_len; + if (sym_name_len == sym_name_max) + compiler_panic(c, no_loc(), + "read_coff: short-import symbol name not NUL-terminated"); + + /* DLL name: NUL-terminated starting after the symbol name's NUL. */ + u32 dll_name_off = sym_name_len + 1u; + if (dll_name_off >= size_of_data) + compiler_panic(c, no_loc(), + "read_coff: short-import missing DLL name"); + const u8* dll_p = body + dll_name_off; + u32 dll_name_max = size_of_data - dll_name_off; + u32 dll_name_len = 0; + while (dll_name_len < dll_name_max && dll_p[dll_name_len] != '\0') + ++dll_name_len; + if (dll_name_len == dll_name_max) + compiler_panic(c, no_loc(), + "read_coff: short-import DLL name not NUL-terminated"); + + ObjBuilder* ob = obj_new(c); + if (!ob) compiler_panic(c, no_loc(), "read_coff: obj_new failed"); + + /* Pick SymKind by import type: CODE -> function, DATA/CONST -> object. + * Both are defined at section_id=OBJ_SEC_NONE, value=0, size=0 — the + * shape read_coff_dso would produce for a DLL export. */ + SymKind k = (import_type == IMPORT_OBJECT_CODE) ? SK_FUNC : SK_OBJ; + + Sym sn = pool_intern(c->global, (const char*)body, sym_name_len); + ObjSymId id = obj_symbol_ex(ob, sn, SB_GLOBAL, SV_DEFAULT, k, OBJ_SEC_NONE, + 0, 0, 0); + obj_sym_mark_referenced(ob, id); + + /* `__imp_<name>` alias for codegen that refers to the IAT slot + * directly (mingw convention). Even code imports use an object-like + * `__imp_` symbol because references to it want the IAT data slot, not + * the callable import stub. */ + static const char kImpPrefix[] = "__imp_"; + u32 imp_len = (u32)(sizeof kImpPrefix - 1u) + sym_name_len; + char* imp_buf = arena_array(c->scratch, char, imp_len); + memcpy(imp_buf, kImpPrefix, sizeof kImpPrefix - 1u); + memcpy(imp_buf + (sizeof kImpPrefix - 1u), body, sym_name_len); + Sym imp_sn = pool_intern(c->global, imp_buf, imp_len); + ObjSymId imp_id = obj_symbol_ex(ob, imp_sn, SB_GLOBAL, SV_DEFAULT, SK_OBJ, + OBJ_SEC_NONE, 0, 0, 0); + obj_sym_mark_referenced(ob, imp_id); + + /* Stash the DLL name so the archive-ingestion layer (Phase 4.3) can + * route this builder as a DSO with the DLL as soname. */ + Sym dll_sn = pool_intern(c->global, (const char*)dll_p, dll_name_len); + obj_set_coff_import_dll(ob, dll_sn); + + (void)name_type; + obj_finalize(ob); + return ob; +} + +ObjBuilder* read_coff(Compiler* c, const char* name, const u8* data, + size_t len) { + (void)name; + + /* ---- Step 0: header validation ---- */ + if (len < COFF_FILE_HEADER_SIZE) + compiler_panic(c, no_loc(), "read_coff: input shorter than COFF header"); + + /* Microsoft short-import record? (Sig1=0, Sig2=0xFFFF.) These live + * as members of .lib archives and stand in for a long-form import + * object. Detect at entry; the rest of read_coff assumes the + * input is a real IMAGE_FILE_HEADER. */ + if (len >= 4 && + coff_rd_u16(data + 0) == IMPORT_OBJECT_HDR_SIG1 && + coff_rd_u16(data + 2) == IMPORT_OBJECT_HDR_SIG2) { + return read_coff_short_import(c, name, data, len); + } + + u16 machine = coff_rd_u16(data + 0); + u16 nsections = coff_rd_u16(data + 2); + /* data + 4: TimeDateStamp (4 bytes, ignored). */ + u32 ptr_to_symtab = coff_rd_u32(data + 8); + u32 nsymbols = coff_rd_u32(data + 12); + u16 size_opt_hdr = coff_rd_u16(data + 16); + /* data + 18: Characteristics (2 bytes, currently ignored). */ + + if (size_opt_hdr != 0) + compiler_panic(c, no_loc(), + "read_coff: input has optional header (size=%u); " + "use read_coff_pe for executables", + (u32)size_opt_hdr); + + if (machine != IMAGE_FILE_MACHINE_AMD64 && + machine != IMAGE_FILE_MACHINE_ARM64 && + machine != IMAGE_FILE_MACHINE_ARM64EC) + compiler_panic(c, no_loc(), "read_coff: unsupported machine %#x", + (u32)machine); + + const ArchImpl* arch = arch_lookup_coff_machine(machine); + if (!arch || !arch->coff || !arch->coff->reloc_from) + compiler_panic(c, no_loc(), "read_coff: no arch impl for machine %#x", + (u32)machine); + u32 (*reloc_from)(u32) = arch->coff->reloc_from; + + if ((u64)COFF_FILE_HEADER_SIZE + + (u64)nsections * (u64)COFF_SECTION_HEADER_SIZE > + (u64)len) + compiler_panic(c, no_loc(), + "read_coff: section header table out of range"); + + /* ---- Step 1: bootstrap, locate strtab ---- */ + /* Strtab is at PointerToSymbolTable + NumberOfSymbols * 18. When the + * file has no symbol table (ptr=0, n=0) we treat strtab as empty. */ + const u8* strtab = NULL; + u32 strtab_size = 0; + if (ptr_to_symtab && nsymbols) { + u64 symtab_end = + (u64)ptr_to_symtab + (u64)nsymbols * (u64)COFF_SYMBOL_SIZE; + if (symtab_end + COFF_STRTAB_SIZE_FIELD_BYTES > (u64)len) + compiler_panic(c, no_loc(), + "read_coff: symbol table / strtab header out of range"); + u32 declared = coff_rd_u32(data + symtab_end); + /* The size field is inclusive of the 4-byte prefix; treat <4 as + * "empty" (some tools write 0). */ + if (declared < COFF_STRTAB_SIZE_FIELD_BYTES) declared = 0; + if (declared) { + if (symtab_end + (u64)declared > (u64)len) + compiler_panic(c, no_loc(), "read_coff: strtab body out of range"); + strtab = data + symtab_end; + strtab_size = declared; + } else { + strtab = data + symtab_end; + strtab_size = COFF_STRTAB_SIZE_FIELD_BYTES; + } + } + + ObjBuilder* ob = obj_new(c); + if (!ob) compiler_panic(c, no_loc(), "read_coff: obj_new failed"); + + /* ---- Step 2: ingest sections ---- */ + CSecRec* secs = arena_array(c->scratch, CSecRec, nsections ? nsections : 1); + const u8* shdr_base = data + COFF_FILE_HEADER_SIZE; + for (u32 i = 0; i < nsections; ++i) { + CSecRec* s = &secs[i]; + parse_shdr(shdr_base + (u64)i * COFF_SECTION_HEADER_SIZE, s); + + const char* nm; + u32 nlen; + resolve_section_name(s->raw_name, strtab, strtab_size, &nm, &nlen); + Sym sn = pool_intern(c->global, nm, nlen); + + u16 kind = coff_sec_kind(nm, nlen, s->characteristics); + u16 flags = coff_sec_flags(nm, nlen, s->characteristics); + u32 align = coff_sec_align(s->characteristics); + + int is_bss = (s->characteristics & IMAGE_SCN_CNT_UNINITIALIZED_DATA) != 0; + u16 sem = is_bss ? SSEM_NOBITS : SSEM_PROGBITS; + + ObjSecId id = obj_section_ex(ob, sn, (SecKind)kind, (SecSem)sem, flags, + align, 0u, 0u, 0u); + if (id == OBJ_SEC_NONE) + compiler_panic(c, no_loc(), + "read_coff: obj_section_ex failed for section %u", i); + s->obj_sec = id; + + /* Preserve raw Characteristics so emit_coff can write back any bits + * the canonical SecFlag/SecSem mapping doesn't model (LNK_INFO, + * LNK_REMOVE, MEM_DISCARDABLE, MEM_SHARED, GPREL, alignment nibble). */ + obj_section_set_ext(ob, id, OBJ_EXT_COFF, s->characteristics, 0); + + if (is_bss) { + u32 bss_size = s->virtual_size ? s->virtual_size : s->size_of_raw_data; + obj_reserve_bss(ob, id, bss_size, align); + } else if (s->size_of_raw_data) { + u64 end = (u64)s->pointer_to_raw_data + (u64)s->size_of_raw_data; + if (end > (u64)len) + compiler_panic(c, no_loc(), + "read_coff: section %u bytes out of range", i); + u8* dst = obj_reserve(ob, id, s->size_of_raw_data); + memcpy(dst, data + s->pointer_to_raw_data, s->size_of_raw_data); + } + } + + /* ---- Step 3: ingest symbols (with aux-record awareness) ---- + * sym_to_obj is indexed by RAW symbol-table index (including aux + * slots), so reloc.SymbolTableIndex resolves directly without + * adjusting for skipped aux records. Aux slots map to OBJ_SYM_NONE. */ + ObjSymId* sym_to_obj = + arena_zarray(c->scratch, ObjSymId, nsymbols ? nsymbols : 1); + + /* Track section-symbol primary symtab index per section, stored as + * (raw_index + 1) so 0 can mean "not seen yet" without colliding + * with the (legitimate) first symbol-table slot — emit_coff always + * lays the first section's section-symbol at index 0. */ + u32* sec_sym_primary = arena_zarray(c->scratch, u32, nsections + 1u); + + const u8* sym_base = data + ptr_to_symtab; + if (nsymbols) { + if ((u64)ptr_to_symtab + (u64)nsymbols * (u64)COFF_SYMBOL_SIZE > (u64)len) + compiler_panic(c, no_loc(), + "read_coff: symbol table body out of range"); + } + + for (u32 i = 0; i < nsymbols; ) { + const u8* p = sym_base + (u64)i * COFF_SYMBOL_SIZE; + const char* nm; + u32 nlen; + resolve_sym_name(p, strtab, strtab_size, &nm, &nlen); + + u32 value = coff_rd_u32(p + 8); + i16 sec_num = (i16)coff_rd_u16(p + 12); + u16 type = coff_rd_u16(p + 14); + u8 sclass = p[16]; + u8 naux = p[17]; + + /* FILE storage class: concatenate aux records' raw bytes (each + * 18 bytes, NUL-padded) for the source-file name. */ + if (sclass == IMAGE_SYM_CLASS_FILE) { + /* Build name from aux records (up to naux*18 bytes); fall back + * to the primary record's name if naux==0. */ + const char* fnm = nm; + u32 fnlen = nlen; + if (naux) { + /* Each aux record's 18 bytes are interpreted as raw file-name + * bytes; concatenate then trim trailing NULs. */ + u32 total = (u32)naux * COFF_SYMBOL_SIZE; + if ((u64)i + 1u + (u64)naux > (u64)nsymbols) + compiler_panic(c, no_loc(), + "read_coff: FILE aux records extend past symbol " + "table"); + const u8* aux = p + COFF_SYMBOL_SIZE; + u32 n = 0; + while (n < total && aux[n] != '\0') ++n; + fnm = (const char*)aux; + fnlen = n; + } + Sym fsn = fnlen ? pool_intern(c->global, fnm, fnlen) : 0; + ObjSymId id = obj_symbol_ex(ob, fsn, SB_LOCAL, SV_DEFAULT, SK_FILE, + OBJ_SEC_NONE, 0, 0, 0); + obj_sym_mark_referenced(ob, id); + sym_to_obj[i] = id; + i += 1u + naux; + continue; + } + + /* Skip .bf/.ef debug pair primaries (FUNCTION storage class) and + * the END_OF_FUNCTION marker: they carry no symbol cfree models. */ + if (sclass == IMAGE_SYM_CLASS_FUNCTION || + sclass == IMAGE_SYM_CLASS_END_OF_FUNCTION) { + sym_to_obj[i] = OBJ_SYM_NONE; + i += 1u + naux; + continue; + } + + /* Resolve (bind, vis, kind, section_id, value, size, cmnalign). */ + SymBind bind = SB_LOCAL; + SymVis vis = SV_DEFAULT; + SymKind kind = SK_NOTYPE; + ObjSecId target_sec = OBJ_SEC_NONE; + u64 sym_value = 0; + u64 sym_size = 0; + u64 cmnalign = 0; + + if (sec_num == IMAGE_SYM_UNDEFINED) { + /* Undef or common. EXTERNAL with Value > 0 is a common. */ + if (sclass == IMAGE_SYM_CLASS_EXTERNAL && value > 0) { + bind = SB_GLOBAL; + kind = SK_COMMON; + sym_size = value; + cmnalign = 1; /* COFF doesn't carry per-common alignment */ + } else { + bind = (sclass == IMAGE_SYM_CLASS_WEAK_EXTERNAL) ? SB_WEAK + : (sclass == IMAGE_SYM_CLASS_EXTERNAL) ? SB_GLOBAL + : SB_LOCAL; + kind = SK_UNDEF; + } + } else if (sec_num == IMAGE_SYM_ABSOLUTE) { + kind = SK_ABS; + sym_value = value; + bind = (sclass == IMAGE_SYM_CLASS_EXTERNAL) ? SB_GLOBAL : SB_LOCAL; + } else if (sec_num == IMAGE_SYM_DEBUG) { + /* Defined-in-debug — cfree has no model for it. Skip with an + * OBJ_SYM_NONE entry; relocations against this slot will resolve + * to OBJ_SYM_NONE, which obj_reloc_ex tolerates. */ + sym_to_obj[i] = OBJ_SYM_NONE; + i += 1u + naux; + continue; + } else if (sec_num >= 1 && (u32)sec_num <= nsections) { + target_sec = secs[sec_num - 1].obj_sec; + sym_value = value; + switch (sclass) { + case IMAGE_SYM_CLASS_EXTERNAL: + bind = SB_GLOBAL; + break; + case IMAGE_SYM_CLASS_WEAK_EXTERNAL: + bind = SB_WEAK; + break; + case IMAGE_SYM_CLASS_STATIC: + case IMAGE_SYM_CLASS_LABEL: + default: + bind = SB_LOCAL; + break; + } + + /* Detect SECTION symbols: STATIC, Value==0, name matches the + * section's own name, and the section has at least one aux + * record (the section-definition aux). Mark as SK_SECTION so + * emit_coff regenerates the synthetic entry. */ + int is_section_sym = 0; + if (sclass == IMAGE_SYM_CLASS_STATIC && value == 0 && naux >= 1) { + const CSecRec* cs = &secs[sec_num - 1]; + u32 raw_nlen = 0; + while (raw_nlen < 8 && cs->raw_name[raw_nlen] != '\0') ++raw_nlen; + if (raw_nlen == nlen && memcmp(cs->raw_name, nm, nlen) == 0) { + is_section_sym = 1; + } else if (cs->raw_name[0] == '/') { + /* Long-named section: compare the resolved name. */ + const char* rn; + u32 rnlen; + resolve_section_name(cs->raw_name, strtab, strtab_size, &rn, &rnlen); + if (rnlen == nlen && memcmp(rn, nm, nlen) == 0) is_section_sym = 1; + } + } + + if (is_section_sym) { + kind = SK_SECTION; + sec_sym_primary[sec_num] = i + 1u; + } else if (sclass == IMAGE_SYM_CLASS_SECTION) { + kind = SK_SECTION; + } else if (sclass == IMAGE_SYM_CLASS_LABEL) { + kind = SK_NOTYPE; + } else if ((type >> 8) == IMAGE_SYM_DTYPE_FUNCTION) { + kind = SK_FUNC; + } else if (type == IMAGE_SYM_TYPE_NULL) { + kind = (bind == SB_LOCAL) ? SK_NOTYPE : SK_OBJ; + } else { + kind = SK_OBJ; + } + } else { + compiler_panic(c, no_loc(), + "read_coff: symbol section number %d out of range", + (int)sec_num); + } + + /* WEAK_EXTERNAL primary: aux record carries TagIndex + Characteristics. + * cfree's model has SB_WEAK; the fallback symbol is link-time + * resolution by name and we drop the explicit index. */ + if (sclass == IMAGE_SYM_CLASS_WEAK_EXTERNAL) bind = SB_WEAK; + + Sym sn = nlen ? pool_intern(c->global, nm, nlen) : 0; + ObjSymId id = obj_symbol_ex(ob, sn, bind, vis, kind, target_sec, sym_value, + sym_size, cmnalign); + obj_sym_mark_referenced(ob, id); + sym_to_obj[i] = id; + i += 1u + naux; + } + + /* ---- Step 4: stitch COMDAT groups from section-definition aux ---- + * Each COMDAT section has a STATIC primary symbol (the section + * symbol) followed by one section-definition aux record. Selection + * != 0 marks the section as a COMDAT member; the signature symbol + * is the section symbol itself (Number field's selection variant + * controls dedup policy at link time). */ + for (u32 s = 1; s <= nsections; ++s) { + u32 prim_plus1 = sec_sym_primary[s]; + if (!prim_plus1) continue; + u32 prim = prim_plus1 - 1u; + const CSecRec* cs = &secs[s - 1]; + if (!(cs->characteristics & IMAGE_SCN_LNK_COMDAT)) continue; + const u8* p = sym_base + (u64)prim * COFF_SYMBOL_SIZE; + u8 naux = p[17]; + if (!naux) continue; + const u8* aux = p + COFF_SYMBOL_SIZE; + /* Aux layout: Length(4), NumberOfRelocations(2), NumberOfLinenumbers(2), + * CheckSum(4), Number(2), Selection(1), Unused(3). */ + u16 assoc_number = coff_rd_u16(aux + 12); + u8 selection = aux[14]; + if (selection == 0) continue; + + ObjSymId sig = sym_to_obj[prim]; + const ObjSym* sigsym = obj_symbol_get(ob, sig); + Sym gname = sigsym ? sigsym->name : 0; + ObjGroupId gid = obj_group(ob, gname, sig, (u32)selection); + obj_group_add_section(ob, gid, cs->obj_sec); + obj_section_set_group(ob, cs->obj_sec, gid); + + /* ASSOCIATIVE: the COMDAT member is associated with another + * section's group. Add this section to that group's list too so + * dead-strip keeps them paired. */ + if (selection == IMAGE_COMDAT_SELECT_ASSOCIATIVE && assoc_number >= 1 && + (u32)assoc_number <= nsections) { + u32 other_prim_plus1 = sec_sym_primary[assoc_number]; + if (other_prim_plus1) { + u32 other_prim = other_prim_plus1 - 1u; + const u8* op = sym_base + (u64)other_prim * COFF_SYMBOL_SIZE; + if (op[17]) { + const u8* oaux = op + COFF_SYMBOL_SIZE; + u8 osel = oaux[14]; + if (osel != 0) { + ObjSymId osig = sym_to_obj[other_prim]; + const ObjSym* osigsym = obj_symbol_get(ob, osig); + Sym ogname = osigsym ? osigsym->name : 0; + ObjGroupId ogid = obj_group(ob, ogname, osig, (u32)osel); + obj_group_add_section(ob, ogid, cs->obj_sec); + } + } + } + } + } + + /* ---- Step 5: per-section relocations ---- */ + for (u32 i = 0; i < nsections; ++i) { + const CSecRec* s = &secs[i]; + if (!s->number_of_relocations) continue; + u64 reloc_end = (u64)s->pointer_to_relocations + + (u64)s->number_of_relocations * (u64)COFF_RELOC_SIZE; + if (reloc_end > (u64)len) + compiler_panic(c, no_loc(), + "read_coff: relocation table for section %u out of range", + i); + const u8* rbase = data + s->pointer_to_relocations; + for (u32 j = 0; j < s->number_of_relocations; ++j) { + const u8* rp = rbase + (u64)j * COFF_RELOC_SIZE; + u32 r_va = coff_rd_u32(rp + 0); + u32 r_sym = coff_rd_u32(rp + 4); + u16 r_type = coff_rd_u16(rp + 8); + + u32 kind = reloc_from(r_type); + if (kind == (u32)-1) + compiler_panic(c, no_loc(), + "read_coff: unsupported reloc type %u for machine %#x", + (u32)r_type, (u32)machine); + + ObjSymId target = OBJ_SYM_NONE; + if (r_sym < nsymbols) target = sym_to_obj[r_sym]; + + /* AMD64 REL32 encodings are relative to a PC after the relocated + * field, while cfree's R_PC32-style apply formula subtracts the + * relocation field address P. Plain REL32 is relative to P+4; + * REL32_N is relative to P+N. Record that convention as an + * implicit negative addend so link_reloc_apply can stay format + * neutral. */ + /* ARM64 PAGEOFFSET_12L is one wire code for LDST{8,16,32,64,128}. + * The per-arch translator returns R_AARCH64_LDST64_ABS_LO12_NC by + * default; recover the actual access width from the patched LDR/ + * STR instruction's size field at bits [31:30] (and a SIMD/FP + * extension via bit 26 + opc[23]) so the linker applies the right + * scale. Mismatch panics at apply-time with "misaligned + * address" otherwise — see link_reloc.c. */ + if ((machine == IMAGE_FILE_MACHINE_ARM64 || + machine == IMAGE_FILE_MACHINE_ARM64EC) && + r_type == IMAGE_REL_ARM64_PAGEOFFSET_12L && + s->size_of_raw_data && (u64)r_va + 4u <= (u64)s->size_of_raw_data) { + const u8* ibytes = data + s->pointer_to_raw_data + r_va; + u32 instr = (u32)ibytes[0] | ((u32)ibytes[1] << 8) | + ((u32)ibytes[2] << 16) | ((u32)ibytes[3] << 24); + u32 sz = (instr >> 30) & 0x3u; + int is_simd = (instr >> 26) & 0x1u; + if (is_simd && ((instr >> 23) & 0x1u)) { + kind = R_AARCH64_LDST128_ABS_LO12_NC; + } else { + switch (sz) { + case 0: kind = R_AARCH64_LDST8_ABS_LO12_NC; break; + case 1: kind = R_AARCH64_LDST16_ABS_LO12_NC; break; + case 2: kind = R_AARCH64_LDST32_ABS_LO12_NC; break; + default: kind = R_AARCH64_LDST64_ABS_LO12_NC; break; + } + } + } + + i64 addend = 0; + int has_explicit = 0; + if (machine == IMAGE_FILE_MACHINE_AMD64) { + switch (r_type) { + case IMAGE_REL_AMD64_REL32: + addend = -4; has_explicit = 1; break; + case IMAGE_REL_AMD64_REL32_1: + addend = -1; has_explicit = 1; break; + case IMAGE_REL_AMD64_REL32_2: + addend = -2; has_explicit = 1; break; + case IMAGE_REL_AMD64_REL32_3: + addend = -3; has_explicit = 1; break; + case IMAGE_REL_AMD64_REL32_4: + addend = -4; has_explicit = 1; break; + case IMAGE_REL_AMD64_REL32_5: + addend = -5; has_explicit = 1; break; + default: + break; + } + } + + obj_reloc_ex(ob, s->obj_sec, r_va, (RelocKind)kind, target, addend, + has_explicit, 0); + } + } + + /* ---- Step 6: finalize and return ---- */ + obj_finalize(ob); + return ob; +} diff --git a/src/obj/coff_read_dso.c b/src/obj/coff_read_dso.c @@ -0,0 +1,235 @@ +/* PE32+ DLL reader. Peer of read_elf_dso / read_macho_dso: walks the + * IMAGE_DIRECTORY_ENTRY_EXPORT data directory of a Windows .dll and + * produces an ObjBuilder of defined OBJ_SEC_NONE symbols — one per + * name in the Export Name Table. The DLL's own Name string (the + * analogue of DT_SONAME / LC_ID_DYLIB) is returned via *soname_out. + * + * The produced ObjBuilder carries no sections, relocations, or groups + * — DSO inputs contribute no bytes to the link. The consumer's + * resolve_undefs pass sees the exports as defined globals and marks + * matching consumer-side undefs as `imported`; the import-table + * emitter (Phase 3 / 4.4) later groups them by providing DLL. + * + * Scope: PE32+ only (IMAGE_NT_OPTIONAL_HDR64_MAGIC), AMD64 or ARM64, + * with IMAGE_FILE_DLL set. Ordinal-only exports (entries present in + * the EAT but absent from the ENT) are not synthesized in v1 — almost + * all real-world imports are by name. Forwarder entries (EAT RVA + * falls within the export directory's own range) are still emitted as + * symbols so the linker can satisfy imports against them; the OS + * loader follows the forwarder chain at runtime. This contract is + * pinned by test/coff/pe-dso-forwarder.c. */ + +#include <string.h> + +#include "core/arena.h" +#include "core/heap.h" +#include "core/pool.h" +#include "obj/coff.h" + +static SrcLoc no_loc(void) { + SrcLoc l = {0, 0, 0}; + return l; +} + +/* ---- RVA -> file offset ---- + * Walks the section table once per call. Returns 1 on success and + * fills *off_out; returns 0 if the RVA falls outside every section's + * [VirtualAddress, VirtualAddress + max(VirtualSize, SizeOfRawData)) + * range or the resulting file offset would exceed `len`. */ +static int rva_to_offset(const u8* shdrs, u16 nsec, u32 rva, size_t len, + u64* off_out) { + for (u16 i = 0; i < nsec; ++i) { + const u8* sh = shdrs + (u64)i * COFF_SECTION_HEADER_SIZE; + u32 vsize = coff_rd_u32(sh + 8); + u32 vaddr = coff_rd_u32(sh + 12); + u32 raw_size = coff_rd_u32(sh + 16); + u32 raw_ptr = coff_rd_u32(sh + 20); + /* Some linkers leave VirtualSize == 0 in objects; use raw_size as + * a fallback so we still resolve RVAs in well-formed images. */ + u32 span = vsize ? vsize : raw_size; + if (rva >= vaddr && rva < vaddr + span) { + u64 delta = (u64)(rva - vaddr); + if (delta >= raw_size) return 0; /* RVA past on-disk data */ + u64 off = (u64)raw_ptr + delta; + if (off >= len) return 0; + *off_out = off; + return 1; + } + } + return 0; +} + +/* Read a NUL-terminated string starting at `off`, bounded by `len`. + * Returns the string length (excluding NUL); writes the pointer to + * *out. Returns 0 if off is out of range or the string is not + * terminated within the file. */ +static u32 read_cstr(const u8* data, size_t len, u64 off, const char** out) { + if (off >= len) { *out = ""; return 0; } + const char* s = (const char*)(data + off); + u64 max = (u64)len - off; + u64 n = 0; + while (n < max && s[n] != '\0') ++n; + if (n == max) { *out = ""; return 0; } /* unterminated */ + *out = s; + return (u32)n; +} + +ObjBuilder* read_coff_dso(Compiler* c, const char* name, const u8* data, + size_t len, Sym* soname_out) { + (void)name; + if (soname_out) *soname_out = 0; + + /* ---- DOS header + PE signature ---- */ + if (len < COFF_DOS_HEADER_SIZE) + compiler_panic(c, no_loc(), "read_coff_dso: input shorter than DOS header"); + u16 e_magic = coff_rd_u16(data + 0); + if (e_magic != IMAGE_DOS_SIGNATURE) + compiler_panic(c, no_loc(), "read_coff_dso: bad DOS magic 0x%x", e_magic); + u32 e_lfanew = coff_rd_u32(data + 60); + + u64 nt_end = (u64)e_lfanew + 4u + COFF_FILE_HEADER_SIZE + COFF_OPT_HDR64_SIZE; + if (nt_end > len) + compiler_panic(c, no_loc(), + "read_coff_dso: PE headers extend past end of file"); + + u32 pe_sig = coff_rd_u32(data + e_lfanew); + if (pe_sig != IMAGE_NT_SIGNATURE) + compiler_panic(c, no_loc(), "read_coff_dso: bad PE signature 0x%x", pe_sig); + + /* ---- IMAGE_FILE_HEADER ---- */ + const u8* fh = data + e_lfanew + 4u; + u16 machine = coff_rd_u16(fh + 0); + u16 nsec = coff_rd_u16(fh + 2); + u16 size_of_opt = coff_rd_u16(fh + 16); + u16 chars = coff_rd_u16(fh + 18); + + if (machine != IMAGE_FILE_MACHINE_AMD64 && machine != IMAGE_FILE_MACHINE_ARM64) + compiler_panic(c, no_loc(), + "read_coff_dso: unsupported machine 0x%x", machine); + if (!(chars & IMAGE_FILE_DLL)) + compiler_panic(c, no_loc(), + "read_coff_dso: not a DLL (Characteristics=0x%x)", chars); + if (size_of_opt < COFF_OPT_HDR64_SIZE) + compiler_panic(c, no_loc(), + "read_coff_dso: SizeOfOptionalHeader %u too small for PE32+", + size_of_opt); + + /* ---- IMAGE_OPTIONAL_HEADER64 ---- */ + const u8* oh = fh + COFF_FILE_HEADER_SIZE; + u16 opt_magic = coff_rd_u16(oh + 0); + if (opt_magic != IMAGE_NT_OPTIONAL_HDR64_MAGIC) + compiler_panic(c, no_loc(), + "read_coff_dso: not PE32+ (optional header Magic=0x%x)", + opt_magic); + + /* DataDirectory begins at offset 112 inside the PE32+ optional header + * (28 standard + 84 windows-specific + NumberOfRvaAndSizes = 112). */ + const u8* data_dir = oh + COFF_OPT_HDR64_SIZE + - COFF_NUM_DATA_DIRECTORIES * COFF_DATA_DIRECTORY_SIZE; + u32 export_rva = coff_rd_u32(data_dir + + IMAGE_DIRECTORY_ENTRY_EXPORT + * COFF_DATA_DIRECTORY_SIZE); + u32 export_size = coff_rd_u32(data_dir + + IMAGE_DIRECTORY_ENTRY_EXPORT + * COFF_DATA_DIRECTORY_SIZE + + 4u); + + /* ---- section table ---- */ + u64 shdrs_off = (u64)e_lfanew + 4u + COFF_FILE_HEADER_SIZE + size_of_opt; + u64 shdrs_end = shdrs_off + (u64)nsec * COFF_SECTION_HEADER_SIZE; + if (shdrs_end > len) + compiler_panic(c, no_loc(), + "read_coff_dso: section table extends past end of file"); + const u8* shdrs = data + shdrs_off; + + ObjBuilder* ob = obj_new(c); + if (!ob) compiler_panic(c, no_loc(), "read_coff_dso: obj_new failed"); + + /* No export directory => empty DSO (legal for stub DLLs). */ + if (export_size == 0 || export_rva == 0) { + obj_finalize(ob); + return ob; + } + + u64 exp_off; + if (!rva_to_offset(shdrs, nsec, export_rva, len, &exp_off)) + compiler_panic(c, no_loc(), + "read_coff_dso: export directory RVA 0x%x out of range", + export_rva); + if (exp_off + COFF_EXPORT_DIR_SIZE > len) + compiler_panic(c, no_loc(), + "read_coff_dso: export directory truncated"); + + const u8* ed = data + exp_off; + u32 name_rva = coff_rd_u32(ed + 12); + u32 num_funcs = coff_rd_u32(ed + 20); + u32 num_names = coff_rd_u32(ed + 24); + u32 eat_rva = coff_rd_u32(ed + 28); + u32 ent_rva = coff_rd_u32(ed + 32); + u32 ord_rva = coff_rd_u32(ed + 36); + /* Base (ed + 16) is the user-visible ordinal offset; the cfree linker + * matches imports by name, so we don't propagate it. */ + + /* ---- DLL name (soname) ---- */ + if (name_rva) { + u64 name_off; + if (!rva_to_offset(shdrs, nsec, name_rva, len, &name_off)) + compiler_panic(c, no_loc(), + "read_coff_dso: DLL name RVA 0x%x out of range", + name_rva); + const char* dll_name; + u32 nlen = read_cstr(data, len, name_off, &dll_name); + if (nlen && soname_out) + *soname_out = pool_intern(c->global, dll_name, nlen); + } + + /* ---- resolve EAT / ENT / ordinal table once ---- */ + u64 eat_off = 0, ent_off = 0, ord_off = 0; + if (num_names) { + if (!rva_to_offset(shdrs, nsec, eat_rva, len, &eat_off)) + compiler_panic(c, no_loc(), + "read_coff_dso: EAT RVA 0x%x out of range", eat_rva); + if (!rva_to_offset(shdrs, nsec, ent_rva, len, &ent_off)) + compiler_panic(c, no_loc(), + "read_coff_dso: ENT RVA 0x%x out of range", ent_rva); + if (!rva_to_offset(shdrs, nsec, ord_rva, len, &ord_off)) + compiler_panic(c, no_loc(), + "read_coff_dso: ordinal table RVA 0x%x out of range", + ord_rva); + if (ent_off + (u64)num_names * 4u > len + || ord_off + (u64)num_names * 2u > len) + compiler_panic(c, no_loc(), + "read_coff_dso: ENT/ordinal table extends past file"); + if (eat_off + (u64)num_funcs * 4u > len) + compiler_panic(c, no_loc(), + "read_coff_dso: EAT extends past file"); + } + + /* ---- walk the ENT ---- + * Forwarders (EAT RVA inside [export_rva, export_rva + export_size)) + * still produce a symbol: cfree's linker doesn't follow the chain, + * but the import needs to be satisfiable so the OS loader can. */ + for (u32 i = 0; i < num_names; ++i) { + u32 nrva = coff_rd_u32(data + ent_off + (u64)i * 4u); + u16 ord = coff_rd_u16(data + ord_off + (u64)i * 2u); + if (ord >= num_funcs) continue; /* malformed; skip rather than panic */ + /* func_rva is fetched for forwarder classification only; cfree does + * not consume the address itself (DSO symbols are OBJ_SEC_NONE). */ + u32 func_rva = coff_rd_u32(data + eat_off + (u64)ord * 4u); + (void)func_rva; /* see comment above re: forwarders */ + + u64 name_off; + if (!rva_to_offset(shdrs, nsec, nrva, len, &name_off)) continue; + const char* nm; + u32 nlen = read_cstr(data, len, name_off, &nm); + if (!nlen) continue; + + Sym sn = pool_intern(c->global, nm, nlen); + ObjSymId id = obj_symbol(ob, sn, SB_GLOBAL, SK_FUNC, + OBJ_SEC_NONE, 0, 0); + obj_sym_mark_referenced(ob, id); + } + + obj_finalize(ob); + return ob; +} diff --git a/src/obj/coff_reloc_aarch64.c b/src/obj/coff_reloc_aarch64.c @@ -0,0 +1,96 @@ +/* RelocKind <-> AArch64 PE/COFF reloc-type mapping. Mirror of + * elf_reloc_aarch64.c for PE/COFF. + * + * The ARM64 PE/COFF reloc set covers the common AArch64 patch sites: + * ADRP page-base / page-offset pairs, BRANCH26/19/14, ADDR32/64, plus + * the section-relative SECREL family which cfree does not model in v1. + * PAGEOFFSET_12L collapses all LDST*_ABS_LO12_NC widths into one wire + * code; the width is recoverable from the patched LDR/STR instruction + * encoding, so the reader picks the LDST64 form and the consumer can + * disambiguate later if it cares. ADDR32NB is image-relative; v1 + * collapses it to R_ABS32 and lets layout subtract the image base. */ + +#include "obj/coff.h" + +u32 coff_aarch64_reloc_to(u32 kind /* RelocKind */) { + switch (kind) { + case R_NONE: + return IMAGE_REL_ARM64_ABSOLUTE; + case R_ABS64: + return IMAGE_REL_ARM64_ADDR64; + case R_ABS32: + return IMAGE_REL_ARM64_ADDR32; + case R_AARCH64_CALL26: + case R_AARCH64_JUMP26: + return IMAGE_REL_ARM64_BRANCH26; + case R_AARCH64_CONDBR19: + return IMAGE_REL_ARM64_BRANCH19; + case R_AARCH64_TSTBR14: + return IMAGE_REL_ARM64_BRANCH14; + case R_AARCH64_ADR_PREL_PG_HI21: + case R_AARCH64_ADR_PREL_PG_HI21_NC: + return IMAGE_REL_ARM64_PAGEBASE_REL21; + case R_AARCH64_ADR_PREL_LO21: + return IMAGE_REL_ARM64_REL21; + case R_AARCH64_ADD_ABS_LO12_NC: + return IMAGE_REL_ARM64_PAGEOFFSET_12A; + case R_AARCH64_LDST8_ABS_LO12_NC: + case R_AARCH64_LDST16_ABS_LO12_NC: + case R_AARCH64_LDST32_ABS_LO12_NC: + case R_AARCH64_LDST64_ABS_LO12_NC: + case R_AARCH64_LDST128_ABS_LO12_NC: + return IMAGE_REL_ARM64_PAGEOFFSET_12L; + case R_PC32: + case R_REL32: + return IMAGE_REL_ARM64_REL32; + case R_COFF_SECREL: + return IMAGE_REL_ARM64_SECREL; + case R_COFF_SECTION: + return IMAGE_REL_ARM64_SECTION; + case R_COFF_AARCH64_SECREL_LOW12A: + return IMAGE_REL_ARM64_SECREL_LOW12A; + case R_COFF_AARCH64_SECREL_HIGH12A: + return IMAGE_REL_ARM64_SECREL_HIGH12A; + default: + return IMAGE_REL_ARM64_ABSOLUTE; + } +} + +u32 coff_aarch64_reloc_from(u32 wire_type) { + switch (wire_type) { + case IMAGE_REL_ARM64_ABSOLUTE: + return R_NONE; + case IMAGE_REL_ARM64_ADDR64: + return R_ABS64; + case IMAGE_REL_ARM64_ADDR32: + return R_ABS32; + case IMAGE_REL_ARM64_ADDR32NB: + return R_ABS32; + case IMAGE_REL_ARM64_BRANCH26: + return R_AARCH64_CALL26; + case IMAGE_REL_ARM64_BRANCH19: + return R_AARCH64_CONDBR19; + case IMAGE_REL_ARM64_BRANCH14: + return R_AARCH64_TSTBR14; + case IMAGE_REL_ARM64_PAGEBASE_REL21: + return R_AARCH64_ADR_PREL_PG_HI21; + case IMAGE_REL_ARM64_REL21: + return R_AARCH64_ADR_PREL_LO21; + case IMAGE_REL_ARM64_PAGEOFFSET_12A: + return R_AARCH64_ADD_ABS_LO12_NC; + case IMAGE_REL_ARM64_PAGEOFFSET_12L: + return R_AARCH64_LDST64_ABS_LO12_NC; + case IMAGE_REL_ARM64_REL32: + return R_PC32; + case IMAGE_REL_ARM64_SECREL: + return R_COFF_SECREL; + case IMAGE_REL_ARM64_SECTION: + return R_COFF_SECTION; + case IMAGE_REL_ARM64_SECREL_LOW12A: + return R_COFF_AARCH64_SECREL_LOW12A; + case IMAGE_REL_ARM64_SECREL_HIGH12A: + return R_COFF_AARCH64_SECREL_HIGH12A; + default: + return (u32)-1; /* sentinel */ + } +} diff --git a/src/obj/coff_reloc_x86_64.c b/src/obj/coff_reloc_x86_64.c @@ -0,0 +1,76 @@ +/* RelocKind <-> x86_64 PE/COFF reloc-type mapping. Mirror of + * elf_reloc_x86_64.c for PE/COFF. + * + * PE/COFF's AMD64 reloc set is much narrower than ELF's: only ABSOLUTE, + * ADDR64, ADDR32, ADDR32NB, REL32 (with REL32_1..5 implicit-addend + * variants), plus a few section-relative forms cfree does not model in + * v1. We emit plain REL32 (4) for every PC-relative kind and let the + * explicit Reloc.addend ride in the patched bytes; on the read side + * REL32_1..5 collapse to R_PC32 (the reader applies the implicit + * addend itself). IMAGE_REL_AMD64_ABSOLUTE (== 0) doubles as the + * "unsupported" sentinel on the _to side, matching the ELF contract. */ + +#include "obj/coff.h" + +u32 coff_x86_64_reloc_to(u32 kind /* RelocKind */) { + switch (kind) { + case R_NONE: + return IMAGE_REL_AMD64_ABSOLUTE; + case R_ABS64: + return IMAGE_REL_AMD64_ADDR64; + case R_ABS32: + return IMAGE_REL_AMD64_ADDR32; + case R_X64_32S: + return IMAGE_REL_AMD64_ADDR32NB; + case R_PC32: + case R_REL32: + case R_PLT32: + case R_X64_PLT32: + case R_X64_GOTPCREL: + case R_X64_GOTPCRELX: + case R_X64_REX_GOTPCRELX: + return IMAGE_REL_AMD64_REL32; + case R_COFF_SECREL: + return IMAGE_REL_AMD64_SECREL; + case R_COFF_SECTION: + return IMAGE_REL_AMD64_SECTION; + default: + return IMAGE_REL_AMD64_ABSOLUTE; + } +} + +u32 coff_x86_64_reloc_from(u32 wire_type) { + switch (wire_type) { + case IMAGE_REL_AMD64_ABSOLUTE: + return R_NONE; + case IMAGE_REL_AMD64_ADDR64: + return R_ABS64; + case IMAGE_REL_AMD64_ADDR32: + return R_ABS32; + case IMAGE_REL_AMD64_ADDR32NB: + return R_X64_32S; + case IMAGE_REL_AMD64_REL32: + case IMAGE_REL_AMD64_REL32_1: + case IMAGE_REL_AMD64_REL32_2: + case IMAGE_REL_AMD64_REL32_3: + case IMAGE_REL_AMD64_REL32_4: + case IMAGE_REL_AMD64_REL32_5: + return R_PC32; + case IMAGE_REL_AMD64_SECREL: + return R_COFF_SECREL; + case IMAGE_REL_AMD64_SECTION: + return R_COFF_SECTION; + /* SECREL7 (7-bit section-relative) appears in mingw-emitted archive + * members (intrinsic helpers, exception tables, DWARF). cfree + * doesn't currently apply or emit these, but panicking at read + * time would block ingesting any mingw archive whose non-import + * members carry .debug_info / .pdata. Map to R_NONE so the + * relocation slot is preserved structurally but treated as a + * no-op by the relocator; the member can still be dead-stripped + * when nothing references it. */ + case IMAGE_REL_AMD64_SECREL7: + return R_NONE; + default: + return (u32)-1; /* sentinel */ + } +} diff --git a/src/obj/obj.c b/src/obj/obj.c @@ -33,6 +33,11 @@ struct CfreeObjBuilder { * derives a sensible default by arch. */ u32 elf_e_flags; u8 elf_e_flags_set; + /* COFF short-import annotation. Carries the providing DLL name when + * the builder was synthesized by read_coff from a Microsoft short + * import record; zero / unset otherwise. See obj_set_coff_import_dll. */ + Sym coff_import_dll; + u8 coff_import_dll_set; /* Cached undef extern `__tlv_bootstrap` (Mach-O on-disk name) used by * obj_define_tls when emitting `_Thread_local` storage on Mach-O. * Lazily materialized on the first TLV emission; OBJ_SYM_NONE otherwise. */ @@ -113,6 +118,18 @@ int obj_get_elf_e_flags(const ObjBuilder* ob, u32* out) { return 1; } +void obj_set_coff_import_dll(ObjBuilder* ob, Sym dll_name) { + if (!ob) return; + ob->coff_import_dll = dll_name; + ob->coff_import_dll_set = 1; +} + +int obj_get_coff_import_dll(const ObjBuilder* ob, Sym* out) { + if (!ob || !ob->coff_import_dll_set) return 0; + if (out) *out = ob->coff_import_dll; + return 1; +} + /* ---- write side ---- */ ObjSecId obj_section(ObjBuilder* ob, Sym name, SecKind kind, u16 flags, @@ -766,6 +783,10 @@ const char* reloc_kind_name(RelocKind k) { _CASE(R_WASM_TABLEIDX); _CASE(R_WASM_MEMOFS); _CASE(R_WASM_TYPEIDX); + _CASE(R_COFF_SECREL); + _CASE(R_COFF_SECTION); + _CASE(R_COFF_AARCH64_SECREL_LOW12A); + _CASE(R_COFF_AARCH64_SECREL_HIGH12A); #undef _CASE } return "UNKNOWN"; diff --git a/src/obj/obj.h b/src/obj/obj.h @@ -236,6 +236,24 @@ typedef enum RelocKind { R_WASM_TABLEIDX, R_WASM_MEMOFS, R_WASM_TYPEIDX, + /* COFF/PE-only reloc kinds — section-relative fixups used by Windows + * TLS Local-Exec lowering and debug info. SECREL = 32-bit offset + * from the start of the containing section. SECTION = 16-bit section + * index (1-based). Both arch-independent on the cfree side; the + * per-arch translators map to IMAGE_REL_{AMD64,ARM64}_SECREL/SECTION. */ + R_COFF_SECREL, + R_COFF_SECTION, + /* AArch64 Windows TLS access uses an ADD-imm12-pair to materialize a + * 24-bit SECREL value into a register: + * add xd, xd, #:secrel_hi12:sym, lsl #12 ; HIGH12A bits [23:12] + * add xd, xd, #:secrel_lo12:sym ; LOW12A bits [11:0] + * The instruction at the patch site already has sh=1 (HIGH) or sh=0 + * (LOW) preset by the codegen; the linker only patches the imm12 + * field at bits [21:10]. NC variants ("no carry / no overflow check" + * in PE terminology) mean the high bits of SECREL above 24 are + * discarded — fine for any .tls section under 16 MiB. */ + R_COFF_AARCH64_SECREL_LOW12A, + R_COFF_AARCH64_SECREL_HIGH12A, } RelocKind; typedef struct Section { @@ -466,6 +484,18 @@ void obj_sweep_dead(ObjBuilder*); void obj_set_elf_e_flags(ObjBuilder*, u32 e_flags); int obj_get_elf_e_flags(const ObjBuilder*, u32* out); +/* COFF short-import shim annotation. Set by read_coff when the input + * is a Microsoft "short import" record (Sig1=0, Sig2=0xFFFF) found + * inside a .lib archive member: the ObjBuilder synthesizes the + * imported symbol(s) the long-form import object would have provided, + * and stores the providing DLL name here so the archive-ingestion + * layer (Phase 4.3) can reclassify the resulting LinkInput as a + * DSO with this name as the soname. Unset (returns 0 from the + * getter) on every other input. The setter records a presence bit + * the same way obj_set_elf_e_flags does. */ +void obj_set_coff_import_dll(ObjBuilder*, Sym dll_name); +int obj_get_coff_import_dll(const ObjBuilder*, Sym* out); + /* Per-symbol format-specific flag bits. ObjSym.flags is otherwise * unused; readers stash format-specific attribute bits there so the * matching emitter can re-apply them. Today this is Mach-O n_desc @@ -645,6 +675,18 @@ ObjBuilder* read_elf(Compiler*, const char* name, const u8* data, size_t len); ObjBuilder* read_elf_dso(Compiler*, const char* name, const u8* data, size_t len, Sym* soname_out); ObjBuilder* read_coff(Compiler*, const char* name, const u8* data, size_t len); +/* PE32+ DLL reader. Walks the IMAGE_DIRECTORY_ENTRY_EXPORT data + * directory and produces an ObjBuilder containing one defined symbol + * (OBJ_SEC_NONE, SB_GLOBAL, SK_FUNC) per name in the Export Name + * Table — the peer of read_elf_dso / read_macho_dso. The DLL's + * own Name string (the analogue of DT_SONAME / LC_ID_DYLIB) is + * interned and returned via *soname_out, or 0 if missing. + * + * Scope: PE32+ images with IMAGE_FILE_DLL set, machine AMD64 or + * ARM64. Ordinal-only exports (in the EAT but not the ENT) are not + * synthesized in v1 — almost all real-world imports are by name. */ +ObjBuilder* read_coff_dso(Compiler*, const char* name, const u8* data, + size_t len, Sym* soname_out); ObjBuilder* read_macho(Compiler*, const char* name, const u8* data, size_t len); /* Mach-O MH_DYLIB reader. Produces an ObjBuilder containing only the * dylib's exported symbols (as defined OBJ_SEC_NONE entries — the diff --git a/src/obj/obj_secnames.c b/src/obj/obj_secnames.c @@ -40,6 +40,10 @@ Sym obj_secname_init_array(Compiler* c) { return pool_intern_cstr(c->global, ".init_array"); case CFREE_OBJ_MACHO: return pool_intern_cstr(c->global, "__DATA,__mod_init_func"); + case CFREE_OBJ_COFF: + /* CRT runtime scans `.CRT$X[A-Z]` for ctor/dtor tables; XCU is + * the user-constructor bucket. See doc/WINDOWS.md §1.6. */ + return pool_intern_cstr(c->global, ".CRT$XCU"); default: return secname_panic_unimpl(c, ".init_array"); } @@ -51,6 +55,10 @@ Sym obj_secname_fini_array(Compiler* c) { return pool_intern_cstr(c->global, ".fini_array"); case CFREE_OBJ_MACHO: return pool_intern_cstr(c->global, "__DATA,__mod_term_func"); + case CFREE_OBJ_COFF: + /* `.CRT$XPA`/`XPZ` are markers; XPU is the user-destructor + * bucket. See doc/WINDOWS.md §1.6. */ + return pool_intern_cstr(c->global, ".CRT$XPU"); default: return secname_panic_unimpl(c, ".fini_array"); } @@ -67,6 +75,10 @@ Sym obj_secname_preinit_array(Compiler* c) { * caller hitting this on a MACHO target is doing something the * platform can't represent. */ return secname_panic_unimpl(c, ".preinit_array"); + case CFREE_OBJ_COFF: + /* CRT's own setup runs in `.CRT$XI*`; user pre-init lives at + * XIA just after the CRT. See doc/WINDOWS.md §1.6. */ + return pool_intern_cstr(c->global, ".CRT$XIA"); default: return secname_panic_unimpl(c, ".preinit_array"); } @@ -78,6 +90,10 @@ Sym obj_secname_tdata(Compiler* c) { return pool_intern_cstr(c->global, ".tdata"); case CFREE_OBJ_MACHO: return pool_intern_cstr(c->global, "__DATA,__thread_data"); + case CFREE_OBJ_COFF: + /* MSVC `.tls$` convention; linker concatenates `.tls$*` sorted + * by suffix. See doc/WINDOWS.md §1.6. */ + return pool_intern_cstr(c->global, ".tls$"); default: return secname_panic_unimpl(c, ".tdata"); } @@ -89,6 +105,10 @@ Sym obj_secname_tbss(Compiler* c) { return pool_intern_cstr(c->global, ".tbss"); case CFREE_OBJ_MACHO: return pool_intern_cstr(c->global, "__DATA,__thread_bss"); + case CFREE_OBJ_COFF: + /* sorted-alphabetically-last so it falls at the tail of the TLS + * image's zero-fill region. See doc/WINDOWS.md §1.6. */ + return pool_intern_cstr(c->global, ".tls$ZZZ"); default: return secname_panic_unimpl(c, ".tbss"); } @@ -169,5 +189,9 @@ void obj_format_demangle_c(const Compiler* c, const char** name, * literal; caller interns. */ const char* obj_format_default_entry_name(const Compiler* c) { if (c && c->target.obj == CFREE_OBJ_MACHO) return "_main"; + /* COFF: PE/Windows CRT entry sets up argc/argv and calls main. + * Resolved against the user-supplied CRT archive (mingw's + * libmingwex.a). See doc/WINDOWS.md §1.6. */ + if (c && c->target.obj == CFREE_OBJ_COFF) return "mainCRTStartup"; return "_start"; } diff --git a/test/api/abi_classify_test.c b/test/api/abi_classify_test.c @@ -59,6 +59,9 @@ static int g_fail; /* Storage outlives every Compiler; cfree_compiler_new just stores `ctx`. */ static CfreeContext g_ctx; +static void expect_direct_1x_int(const char* tag, const ABIArgInfo* ai, + u32 want_size); + static CfreeCompiler* new_compiler(CfreeArchKind arch, CfreeOSKind os, CfreeObjFmt obj) { CfreeTarget t; @@ -98,7 +101,7 @@ static const ABIFuncInfo* classify_fn(CfreeCompiler* c, CfreeCgTypeId ret_ty, static const char* arch_name(CfreeArchKind a) { switch (a) { - case CFREE_ARCH_X86_64: return "sysv-x64"; + case CFREE_ARCH_X86_64: return "x64"; case CFREE_ARCH_ARM_64: return "aarch64"; case CFREE_ARCH_RV64: return "rv64"; default: return "?"; @@ -106,9 +109,10 @@ static const char* arch_name(CfreeArchKind a) { } static const char* os_name(CfreeOSKind o) { switch (o) { - case CFREE_OS_LINUX: return "linux"; - case CFREE_OS_MACOS: return "macos"; - default: return "?"; + case CFREE_OS_LINUX: return "linux"; + case CFREE_OS_MACOS: return "macos"; + case CFREE_OS_WINDOWS: return "windows"; + default: return "?"; } } @@ -216,7 +220,17 @@ static void check_target(CfreeArchKind arch, CfreeOSKind os, CfreeObjFmt obj) { { const ABIFuncInfo* fi = classify_fn(c, f128_ty, f128_ty); snprintf(tag, sizeof tag, "%s/%s f128 arg", arch_name(arch), os_name(os)); - if (arch == CFREE_ARCH_X86_64) { + if (arch == CFREE_ARCH_X86_64 && os == CFREE_OS_WINDOWS) { + /* Win64: long double is 64-bit double. Front end normally lowers + * f128 before classification; defensive path treats size-16 FP as + * a size-8 double — DIRECT/1 FP part of 8B for both arg and ret. */ + expect_direct_1x_fp(tag, &fi->params[0], 8); + snprintf(tag, sizeof tag, "%s/%s f128 ret", arch_name(arch), + os_name(os)); + expect_direct_1x_fp(tag, &fi->ret, 8); + EXPECT(fi->has_sret == 0, "%s/%s: f128 should not set has_sret", + arch_name(arch), os_name(os)); + } else if (arch == CFREE_ARCH_X86_64) { /* SysV-x64: long double is x87 (80-bit padded to 16B). cfree lacks * x87 support; classify as INDIRECT (memory) so it routes through * a stack image consistent with the wide16 CG-layer shortcut. */ @@ -257,41 +271,321 @@ static void check_target(CfreeArchKind arch, CfreeOSKind os, CfreeObjFmt obj) { const ABIFuncInfo* fi = classify_fn(c, f64_i64, f64_i64); snprintf(tag, sizeof tag, "%s/%s {double,long} arg", arch_name(arch), os_name(os)); - expect_direct_2(tag, &fi->params[0], ABI_CLASS_FP, ABI_CLASS_INT, 8, 8); + if (os == CFREE_OS_WINDOWS) { + expect_indirect(tag, &fi->params[0], /*is_return=*/0); + } else { + expect_direct_2(tag, &fi->params[0], ABI_CLASS_FP, ABI_CLASS_INT, 8, + 8); + } snprintf(tag, sizeof tag, "%s/%s {double,long} ret", arch_name(arch), os_name(os)); - expect_direct_2(tag, &fi->ret, ABI_CLASS_FP, ABI_CLASS_INT, 8, 8); - EXPECT(fi->has_sret == 0, "%s/%s: mixed record should not use sret", - arch_name(arch), os_name(os)); + if (os == CFREE_OS_WINDOWS) { + expect_indirect(tag, &fi->ret, /*is_return=*/1); + EXPECT(fi->has_sret == 1, "%s/%s: mixed record should use sret", + arch_name(arch), os_name(os)); + } else { + expect_direct_2(tag, &fi->ret, ABI_CLASS_FP, ABI_CLASS_INT, 8, 8); + EXPECT(fi->has_sret == 0, "%s/%s: mixed record should not use sret", + arch_name(arch), os_name(os)); + } } { const ABIFuncInfo* fi = classify_fn(c, i64_f64, i64_f64); snprintf(tag, sizeof tag, "%s/%s {long,double} arg", arch_name(arch), os_name(os)); - expect_direct_2(tag, &fi->params[0], ABI_CLASS_INT, ABI_CLASS_FP, 8, 8); + if (os == CFREE_OS_WINDOWS) { + expect_indirect(tag, &fi->params[0], /*is_return=*/0); + } else { + expect_direct_2(tag, &fi->params[0], ABI_CLASS_INT, ABI_CLASS_FP, 8, + 8); + } snprintf(tag, sizeof tag, "%s/%s {long,double} ret", arch_name(arch), os_name(os)); - expect_direct_2(tag, &fi->ret, ABI_CLASS_INT, ABI_CLASS_FP, 8, 8); + if (os == CFREE_OS_WINDOWS) { + expect_indirect(tag, &fi->ret, /*is_return=*/1); + } else { + expect_direct_2(tag, &fi->ret, ABI_CLASS_INT, ABI_CLASS_FP, 8, 8); + } } { const ABIFuncInfo* fi = classify_fn(c, f32x2, f32x2); snprintf(tag, sizeof tag, "%s/%s {float,float} arg", arch_name(arch), os_name(os)); - expect_direct_1x_fp(tag, &fi->params[0], 8); + if (os == CFREE_OS_WINDOWS) + expect_direct_1x_int(tag, &fi->params[0], 8); + else + expect_direct_1x_fp(tag, &fi->params[0], 8); snprintf(tag, sizeof tag, "%s/%s {float,float} ret", arch_name(arch), os_name(os)); - expect_direct_1x_fp(tag, &fi->ret, 8); + if (os == CFREE_OS_WINDOWS) + expect_direct_1x_int(tag, &fi->ret, 8); + else + expect_direct_1x_fp(tag, &fi->ret, 8); } } cfree_compiler_free(c); } +/* Build a record with N i8 fields (so size == N and align == 1). */ +static CfreeCgTypeId make_i8_record(CfreeCompiler* c, const char* tag_name, + u32 nfields) { + CfreeCgBuiltinTypes bi = cfree_cg_builtin_types(c); + CfreeCgTypeId i8 = bi.id[CFREE_CG_BUILTIN_I8]; + CfreeCgField fields[16]; + static const char* const names[16] = {"f0", "f1", "f2", "f3", "f4", "f5", + "f6", "f7", "f8", "f9", "fa", "fb", + "fc", "fd", "fe", "ff"}; + if (nfields > 16) exit(2); + memset(fields, 0, sizeof fields); + for (u32 i = 0; i < nfields; ++i) { + fields[i].name = cfree_sym_intern(c, names[i]); + fields[i].type = i8; + } + return cfree_cg_type_record(c, cfree_sym_intern(c, tag_name), fields, + nfields); +} + +/* Build a record { i64 a; i64 b; } — size 16, align 8. */ +static CfreeCgTypeId make_two_i64_record(CfreeCompiler* c, const char* tag_n) { + CfreeCgBuiltinTypes bi = cfree_cg_builtin_types(c); + CfreeCgTypeId i64 = bi.id[CFREE_CG_BUILTIN_I64]; + CfreeCgField fields[2]; + memset(fields, 0, sizeof fields); + fields[0].name = cfree_sym_intern(c, "a"); + fields[0].type = i64; + fields[1].name = cfree_sym_intern(c, "b"); + fields[1].type = i64; + return cfree_cg_type_record(c, cfree_sym_intern(c, tag_n), fields, 2); +} + +/* Classify a function `ret_ty fn(p0, p1, ..., pN-1)` and return its info. */ +static const ABIFuncInfo* classify_fn_n(CfreeCompiler* c, CfreeCgTypeId ret_ty, + const CfreeCgTypeId* arg_types, + u32 nargs, int variadic) { + CfreeCgFuncParam params[8]; + CfreeCgFuncSig sig; + CfreeCgTypeId fn; + if (nargs > 8) exit(2); + memset(params, 0, sizeof params); + for (u32 i = 0; i < nargs; ++i) params[i].type = arg_types[i]; + memset(&sig, 0, sizeof sig); + sig.ret = ret_ty; + sig.params = params; + sig.nparams = nargs; + sig.abi_variadic = variadic ? true : false; + fn = cfree_cg_type_func(c, sig); + return abi_cg_func_info(((Compiler*)c)->abi, fn); +} + +/* Expect INDIRECT (memory image) with a specific indirect alignment. + * Win64 preserves the source type's natural alignment in the byval/sret + * copy — for a 3-byte i8 aggregate that's 1, not 8. */ +static void expect_indirect_align(const char* tag, const ABIArgInfo* ai, + int is_return, u32 want_align) { + EXPECT(ai->kind == ABI_ARG_INDIRECT, "%s: kind=%d want INDIRECT", tag, + (int)ai->kind); + EXPECT(ai->nparts == 0, "%s: nparts=%u want 0", tag, (unsigned)ai->nparts); + EXPECT(ai->indirect_align == want_align, + "%s: indirect_align=%u want %u", tag, (unsigned)ai->indirect_align, + want_align); + u32 expected_flag = is_return ? ABI_AF_SRET : ABI_AF_BYVAL; + EXPECT((ai->flags & expected_flag) != 0, + "%s: flags=0x%x missing %s", tag, (unsigned)ai->flags, + is_return ? "SRET" : "BYVAL"); +} + +/* Expect DIRECT with a single INT part of the given size. */ +static void expect_direct_1x_int(const char* tag, const ABIArgInfo* ai, + u32 want_size) { + EXPECT(ai->kind == ABI_ARG_DIRECT, "%s: kind=%d want DIRECT", tag, + (int)ai->kind); + EXPECT(ai->nparts == 1, "%s: nparts=%u want 1", tag, (unsigned)ai->nparts); + if (ai->nparts != 1 || !ai->parts) return; + EXPECT(ai->parts[0].cls == ABI_CLASS_INT, + "%s: parts[0].cls=%d want INT", tag, (int)ai->parts[0].cls); + EXPECT(ai->parts[0].size == want_size, + "%s: parts[0].size=%u want %u", tag, (unsigned)ai->parts[0].size, + want_size); +} + +/* Win64-specific ABI shape assertions: aggregate rules ({1,2,4,8} by value + * else hidden pointer), va_list = void*, variadic flag wiring, and that + * each scalar arg gets one part of the right class (reg-vs-stack + * placement is codegen, not classifier output). */ +static void test_win64_specifics(void) { + CfreeCompiler* c = + new_compiler(CFREE_ARCH_X86_64, CFREE_OS_WINDOWS, CFREE_OBJ_COFF); + CfreeCgBuiltinTypes bi = cfree_cg_builtin_types(c); + CfreeCgTypeId i32 = bi.id[CFREE_CG_BUILTIN_I32]; + CfreeCgTypeId f64 = bi.id[CFREE_CG_BUILTIN_F64]; + CfreeCgTypeId voidp = cfree_cg_type_ptr(c, bi.id[CFREE_CG_BUILTIN_VOID], 0); + CfreeCgTypeId rec1 = make_i8_record(c, "S1", 1); + CfreeCgTypeId rec3 = make_i8_record(c, "S3", 3); + CfreeCgTypeId rec16 = make_two_i64_record(c, "S16"); + + /* Case 1: int main(void) — DIRECT/1 INT/4 ret, no params. */ + { + const ABIFuncInfo* fi = classify_fn_n(c, i32, NULL, 0, 0); + expect_direct_1x_int("win64 main ret", &fi->ret, 4); + EXPECT(fi->nparams == 0, "win64 main: nparams=%u want 0", + (unsigned)fi->nparams); + EXPECT(fi->has_sret == 0, "win64 main: has_sret set"); + EXPECT(fi->variadic == 0, "win64 main: variadic set"); + } + + /* Case 2: void f(int,int,int,int,int) — 5 ints, each DIRECT/1 INT/4. + * Reg vs stack placement (4 reg slots) is a codegen concern; the + * classifier emits per-arg parts regardless. */ + { + CfreeCgTypeId args[5] = {i32, i32, i32, i32, i32}; + const ABIFuncInfo* fi = classify_fn_n(c, bi.id[CFREE_CG_BUILTIN_VOID], + args, 5, 0); + EXPECT(fi->nparams == 5, "win64 5xint: nparams=%u want 5", + (unsigned)fi->nparams); + for (u32 i = 0; i < 5; ++i) { + char t[64]; + snprintf(t, sizeof t, "win64 5xint arg[%u]", i); + expect_direct_1x_int(t, &fi->params[i], 4); + } + } + + /* Case 3: void f(double,double,double,double,double) — 5 doubles. */ + { + CfreeCgTypeId args[5] = {f64, f64, f64, f64, f64}; + const ABIFuncInfo* fi = classify_fn_n(c, bi.id[CFREE_CG_BUILTIN_VOID], + args, 5, 0); + EXPECT(fi->nparams == 5, "win64 5xfp: nparams=%u want 5", + (unsigned)fi->nparams); + for (u32 i = 0; i < 5; ++i) { + char t[64]; + snprintf(t, sizeof t, "win64 5xfp arg[%u]", i); + expect_direct_1x_fp(t, &fi->params[i], 8); + } + } + + /* Case 4: void f(int,double,int,double) — slot-shared on Win64. + * The classifier just emits per-arg parts of the right class; slot + * sharing is a codegen call-site concern. */ + { + CfreeCgTypeId args[4] = {i32, f64, i32, f64}; + const ABIFuncInfo* fi = classify_fn_n(c, bi.id[CFREE_CG_BUILTIN_VOID], + args, 4, 0); + EXPECT(fi->nparams == 4, "win64 mix: nparams=%u want 4", + (unsigned)fi->nparams); + expect_direct_1x_int("win64 mix arg[0]", &fi->params[0], 4); + expect_direct_1x_fp("win64 mix arg[1]", &fi->params[1], 8); + expect_direct_1x_int("win64 mix arg[2]", &fi->params[2], 4); + expect_direct_1x_fp("win64 mix arg[3]", &fi->params[3], 8); + } + + /* Case 5: struct{char a;} foo(void) — size 1, DIRECT/1 INT/1. */ + { + const ABIFuncInfo* fi = classify_fn_n(c, rec1, NULL, 0, 0); + expect_direct_1x_int("win64 ret S1", &fi->ret, 1); + EXPECT(fi->has_sret == 0, "win64 S1 ret: has_sret set"); + } + + /* Case 6: struct{long a; long b;} foo(void) — size 16, INDIRECT/sret. */ + { + const ABIFuncInfo* fi = classify_fn_n(c, rec16, NULL, 0, 0); + expect_indirect("win64 ret S16", &fi->ret, /*is_return=*/1); + EXPECT(fi->has_sret == 1, "win64 S16 ret: has_sret not set"); + } + + /* Case 7: struct{char,char,char} foo(void) — size 3, INDIRECT/sret on + * Win64 (only {1,2,4,8} pass by value). Natural align of the 3-byte + * i8 aggregate is 1, which Win64 preserves in indirect_align. */ + { + const ABIFuncInfo* fi = classify_fn_n(c, rec3, NULL, 0, 0); + expect_indirect_align("win64 ret S3", &fi->ret, /*is_return=*/1, + /*want_align=*/1); + EXPECT(fi->has_sret == 1, "win64 S3 ret: has_sret not set"); + } + + /* Case 8: void f(struct{char,char,char}) — by-value 3-byte aggregate + * goes by hidden pointer (BYVAL) on Win64. */ + { + CfreeCgTypeId args[1] = {rec3}; + const ABIFuncInfo* fi = classify_fn_n(c, bi.id[CFREE_CG_BUILTIN_VOID], + args, 1, 0); + EXPECT(fi->nparams == 1, "win64 S3 arg: nparams=%u want 1", + (unsigned)fi->nparams); + expect_indirect_align("win64 S3 arg", &fi->params[0], /*is_return=*/0, + /*want_align=*/1); + } + + /* Case 9: int printf(const char*, ...) — variadic flag set. */ + { + CfreeCgTypeId args[1] = {voidp}; + const ABIFuncInfo* fi = classify_fn_n(c, i32, args, 1, /*variadic=*/1); + EXPECT(fi->variadic == 1, "win64 printf: variadic=%u want 1", + (unsigned)fi->variadic); + EXPECT(fi->vararg_on_stack == 0, + "win64 printf: vararg_on_stack=%u want 0", + (unsigned)fi->vararg_on_stack); + expect_direct_1x_int("win64 printf ret", &fi->ret, 4); + } + + /* Case 10: va_list info — Win64 has va_list = void* (8/8/PTR). */ + { + ABITypeInfo vi = abi_va_list_info(((Compiler*)c)->abi); + EXPECT(vi.size == 8, "win64 va_list size=%u want 8", (unsigned)vi.size); + EXPECT(vi.align == 8, "win64 va_list align=%u want 8", + (unsigned)vi.align); + EXPECT(vi.scalar_kind == ABI_SC_PTR, + "win64 va_list scalar_kind=%u want ABI_SC_PTR (%u)", + (unsigned)vi.scalar_kind, (unsigned)ABI_SC_PTR); + } + + cfree_compiler_free(c); +} + +/* AArch64-Windows mostly starts from AAPCS64. Deltas: va_list is `void*`, + * and FP parameters to variadic functions are routed through integer slots. */ +static void test_aarch64_windows_variadic(void) { + CfreeCompiler* c = + new_compiler(CFREE_ARCH_ARM_64, CFREE_OS_WINDOWS, CFREE_OBJ_COFF); + CfreeCgBuiltinTypes bi = cfree_cg_builtin_types(c); + CfreeCgTypeId f64 = bi.id[CFREE_CG_BUILTIN_F64]; + CfreeCgTypeId args[1] = {f64}; + + ABITypeInfo vi = abi_va_list_info(((Compiler*)c)->abi); + EXPECT(vi.size == 8, "aarch64/windows va_list size=%u want 8", + (unsigned)vi.size); + EXPECT(vi.align == 8, "aarch64/windows va_list align=%u want 8", + (unsigned)vi.align); + EXPECT(vi.scalar_kind == ABI_SC_PTR, + "aarch64/windows va_list scalar_kind=%u want ABI_SC_PTR (%u)", + (unsigned)vi.scalar_kind, (unsigned)ABI_SC_PTR); + + { + const ABIFuncInfo* fi = + classify_fn_n(c, bi.id[CFREE_CG_BUILTIN_VOID], args, 1, 0); + expect_direct_1x_fp("aarch64/windows nonvariadic double arg", + &fi->params[0], 8); + } + { + const ABIFuncInfo* fi = + classify_fn_n(c, bi.id[CFREE_CG_BUILTIN_VOID], args, 1, 1); + expect_direct_1x_int("aarch64/windows variadic double arg", + &fi->params[0], 8); + EXPECT(fi->vararg_on_stack == 0, + "aarch64/windows variadic: vararg_on_stack=%u want 0", + (unsigned)fi->vararg_on_stack); + } + cfree_compiler_free(c); +} + int main(void) { check_target(CFREE_ARCH_X86_64, CFREE_OS_LINUX, CFREE_OBJ_ELF); check_target(CFREE_ARCH_ARM_64, CFREE_OS_LINUX, CFREE_OBJ_ELF); check_target(CFREE_ARCH_ARM_64, CFREE_OS_MACOS, CFREE_OBJ_MACHO); check_target(CFREE_ARCH_RV64, CFREE_OS_LINUX, CFREE_OBJ_ELF); + check_target(CFREE_ARCH_X86_64, CFREE_OS_WINDOWS, CFREE_OBJ_COFF); + check_target(CFREE_ARCH_ARM_64, CFREE_OS_WINDOWS, CFREE_OBJ_COFF); + test_win64_specifics(); + test_aarch64_windows_variadic(); if (g_fail) { fprintf(stderr, "%d failures\n", g_fail); return 1; diff --git a/test/coff/CORPUS.md b/test/coff/CORPUS.md @@ -0,0 +1,199 @@ +# PE/COFF Test Corpus — Target Coverage + +What the `test/coff/` corpus should cover for full PE/COFF object-file +support, independent of cfree's current implementation state. Mirrors +the section layout of `test/elf/CORPUS.md`. + +Conventions: +- **U** = unit (hand-built `ObjBuilder` round-trip; what this harness + ships today) +- **C** = `cases/` (`x86_64-w64-mingw32-gcc -c` / `aarch64-w64-mingw32-gcc -c` + fixtures — deferred, no mingw toolchain wired in yet) +- **E** = `exec/` (`link_emit_coff` + Wine — deferred until Phase 3 + of `doc/WINDOWS.md` lands) + +The driver lives in `cfree-roundtrip-coff.c`. Each U case is a +self-contained `static void test_*(void)` that builds an +`ObjBuilder`, emits to a `cfree_writer_mem`, reads back via +`read_coff`, asserts shape equivalence, then re-emits and asserts +byte equality between the two emits. + +--- + +## 1. File header / target identification + +| Case | Layer | Shape | +|---|---|---| +| `IMAGE_FILE_MACHINE_AMD64` round-trip | **U** | minimal `.text` with two bytes, no symbols (`test_header_minimal_x64`) | +| `IMAGE_FILE_MACHINE_ARM64` round-trip | **U** | minimal `.text` with `ret`, no symbols (`test_header_minimal_aa64`) | +| Reproducible TimeDateStamp == 0 | **U** | implicitly checked by byte-stable round-trip | +| Per-arch reloc machine dispatch | **U** | covered by reloc tests below | +| `SizeOfOptionalHeader == 0` for `.obj` | **U** | implicitly: every U case is a `.obj`, not a PE image | + +## 2. Section types + +| Case | Layer | Shape | +|---|---|---| +| `.text` (`IMAGE_SCN_CNT_CODE \| MEM_EXECUTE \| MEM_READ`) | **U** | `test_text_only_x64` / `test_text_only_aa64` | +| `.rdata` (`CNT_INITIALIZED_DATA \| MEM_READ`) | **U** | `test_rodata` | +| `.data` (`CNT_INITIALIZED_DATA \| MEM_READ \| MEM_WRITE`) | **U** | `test_data_with_reloc_abs64_x64` | +| `.bss` (`CNT_UNINITIALIZED_DATA`) | **U** | `test_bss` | +| `.tls$` (TLS template section, name-detected) | **U** | `test_tls_section` | +| `.debug_*` (DWARF passthrough) | C | deferred | +| `.CRT$X[CIP]*` (init/fini) | C | deferred | +| `.xdata` / `.pdata` (SEH unwind) | C | deferred — `doc/WINDOWS.md` §3.5 | +| Multiple text sections (`.text$mn`, etc.) | **U** | covered via `test_comdat_group` | + +## 3. Section characteristics flags + +| Flag | Coverage | +|---|---| +| `CNT_CODE` / `INITIALIZED_DATA` / `UNINITIALIZED_DATA` | U — kind matrix above | +| `MEM_EXECUTE` / `MEM_READ` / `MEM_WRITE` | U — kind matrix above | +| `IMAGE_SCN_LNK_COMDAT` | **U** — `test_comdat_group` | +| `IMAGE_SCN_ALIGN_*` nibble (1, 4, 8, 16, 4096) | **U** — `test_align_nibble` | +| `LNK_INFO` / `LNK_REMOVE` / `MEM_DISCARDABLE` | C — preserved via `OBJ_EXT_COFF`, not yet exercised by a U case | + +## 4. Symbol coverage + +**Storage classes:** `EXTERNAL`, `STATIC`, `WEAK_EXTERNAL`, `FILE`, +`SECTION` (synthesized). + +**Section number specials:** ordinary 1-based index, `UNDEFINED` (0), +`ABSOLUTE` (-1), `DEBUG` (-2). + +| Case | Layer | Shape | +|---|---|---| +| Plain global function (`EXTERNAL`, `SK_FUNC`) | **U** | `test_text_only_x64` | +| Static (file-local, `STATIC`, `SB_LOCAL`) | **U** | `test_static_local_symbol` | +| Common (`UNDEFINED` + Value>0) | **U** | `test_common_symbol` | +| Weak external (`WEAK_EXTERNAL` + aux) | **U** | `test_weak_global` | +| Section symbol synthesis (`SK_SECTION` round-trip) | **U** | `test_section_symbol_synthesis` | +| Long symbol name (>8 chars; strtab spillover) | **U** | `test_long_symbol_name` | +| Long section name (`/N` form) | **U** | `test_long_section_name` | +| File symbol (`.file` + aux records) | C | deferred (cfree's emit_coff handles it; no U case yet) | +| Hidden / protected visibility | n/a | COFF has no visibility model | + +## 5. Relocation coverage + +### x86_64 (`IMAGE_REL_AMD64_*`) + +| Wire kind | cfree `RelocKind` | Layer | Shape | +|---|---|---|---| +| `ABSOLUTE` (0) | `R_NONE` | implicit | — | +| `ADDR64` (1) | `R_ABS64` | **U** | `test_data_with_reloc_abs64_x64` | +| `ADDR32` (2) | `R_ABS32` | **U** | covered alongside REL32 (same harness) | +| `ADDR32NB` (3) | `R_X64_32S` | C | not yet exercised | +| `REL32` (4) | `R_PC32` / `R_REL32` / `R_PLT32` / `R_X64_GOTPCREL*` | **U** | `test_data_with_reloc_rel32_x64` | +| `REL32_1..5` (5..9) | `R_PC32` + explicit addend on read | C | reader-only path; no U yet | +| `SECREL` / `SECTION` | (not modeled in v1) | — | deferred — `doc/WINDOWS.md` §3.1 | + +### aarch64 (`IMAGE_REL_ARM64_*`) + +| Wire kind | cfree `RelocKind` | Layer | Shape | +|---|---|---|---| +| `ABSOLUTE` (0) | `R_NONE` | implicit | — | +| `ADDR32` (1) | `R_ABS32` | C | not yet exercised | +| `ADDR32NB` (2) | `R_ABS32` | C | not yet exercised | +| `BRANCH26` (3) | `R_AARCH64_CALL26` / `R_AARCH64_JUMP26` | **U** | `test_aa64_branch26` | +| `PAGEBASE_REL21` (4) | `R_AARCH64_ADR_PREL_PG_HI21` | **U** | `test_aa64_pagebase_pageoffset` | +| `REL21` (5) | `R_AARCH64_ADR_PREL_LO21` | C | not yet exercised | +| `PAGEOFFSET_12A` (6) | `R_AARCH64_ADD_ABS_LO12_NC` | **U** | `test_aa64_pagebase_pageoffset` | +| `PAGEOFFSET_12L` (7) | `R_AARCH64_LDST64_ABS_LO12_NC` | C | not yet exercised | +| `BRANCH19` (15) | `R_AARCH64_CONDBR19` | C | not yet exercised | +| `BRANCH14` (16) | `R_AARCH64_TSTBR14` | C | not yet exercised | +| `ADDR64` (14) | `R_ABS64` | **U** | `test_data_with_reloc_abs64_aa64` | +| `SECREL` family | (not modeled in v1) | — | deferred | + +## 6. COMDAT / groups + +| Case | Layer | Shape | +|---|---|---| +| COMDAT group with `SELECT_ANY` | **U** | `test_comdat_group` | +| `SELECT_NODUPLICATES` | C | not yet exercised | +| `SELECT_SAME_SIZE` / `EXACT_MATCH` | C | not yet exercised | +| `SELECT_ASSOCIATIVE` (paired sections) | C | reader handles; no U yet | +| `SELECT_LARGEST` / `NEWEST` | C | not yet exercised | + +## 7. TLS / special sections + +| Case | Layer | Shape | +|---|---|---| +| `.tls$` data section | **U** | `test_tls_section` | +| `.tls$ZZZ` BSS-tail | C | — | +| `_tls_index` / `_tls_used` directory | E | Phase 3 | +| `.CRT$XCU` constructors | C | deferred | + +## 8. Layout / structure edges + +| Case | Layer | Shape | +|---|---|---| +| Empty `.obj` (no sections, no symbols) | **U** | `test_empty_obj` | +| Long section name (`/<decimal>` form) | **U** | `test_long_section_name` | +| Long symbol name (LongName form) | **U** | `test_long_symbol_name` | +| Section alignment 1 / 4 / 8 / 16 / 4096 | **U** | `test_align_nibble` | +| > 65535 relocations in one section | n/a | emitter panics; not legal in v1 | + +## 9. Negative inputs (`bad/`) + +Deferred — no `bad/` corpus in Phase 1. Layer E will cover: + +- Truncated file header (< 20 bytes) +- Non-zero `SizeOfOptionalHeader` (i.e. PE image fed to `.obj` + reader) +- Unsupported `Machine` (e.g. `IMAGE_FILE_MACHINE_I386`) +- `PointerToRawData + SizeOfRawData > file_size` +- `PointerToSymbolTable + NumberOfSymbols * 18` overflows +- Strtab size field < 4 / strtab body extending past file +- Reloc `SymbolTableIndex` past symbol table +- COMDAT aux with `Selection == ASSOCIATIVE` and `Number` out of + range + +## 10. Known limitations (round-trip asymmetries) + +1. **Section-definition aux records.** `emit_coff` always emits a + STATIC section symbol + section-definition aux for every kept + section, even if the input `ObjBuilder` did not name one. The + reader maps those aux records onto `SK_SECTION` symbols. After + one round-trip the readback carries an `SK_SECTION` symbol per + section; the second emit reproduces the exact same wire bytes + (byte-stable from step 2 onward). + +2. **Symbol ordering.** Section symbols come first (one per kept + section), then `.file` symbols (if any), then user-defined + symbols in iteration order. A user-supplied `ObjBuilder` that + mints user symbols before section symbols still round-trips, + but the symbol-table index ordering differs after the first + emit. The harness compares by name, not index. + +3. **TimeDateStamp.** Always zero (reproducible builds), so byte + stability holds even across re-emits with different `now` + values. + +4. **COMDAT selection flag-vs-enum.** `obj_group(..., flags)` takes + a flag bitfield (`CFREE_OBJ_GROUP_COMDAT = 1`). The COFF + selection (e.g. `IMAGE_COMDAT_SELECT_ANY = 2`) is a small int + enum stored as `flags` on the group when read back from COFF. + Round-trip stability holds as long as callers consistently use + one or the other model — see `test_comdat_group`. + +## Stratification + +When picking what to land next: + +1. ★ **Reloc-kind matrix per arch (U)** — every kind in the + per-arch translator table needs a U case. Currently covered: + `R_ABS64`, `R_PC32` on both arches; `R_AARCH64_CALL26`, + `R_AARCH64_ADR_PREL_PG_HI21` + `R_AARCH64_ADD_ABS_LO12_NC` on + aa64. +2. ★ **Symbol storage-class matrix (U)** — covered: `EXTERNAL`, + `STATIC`, `WEAK_EXTERNAL`, `SECTION`; common symbols. +3. ★ **Section characteristics matrix (U)** — kind × flags matrix + covered for `.text` / `.rdata` / `.data` / `.bss` / `.tls$`. +4. mingw fixtures (C) — gated on toolchain availability. +5. Negative inputs (Layer E) — defer until reader's diagnostic + surface is exercised by Phase 3 link tests. +6. SEH / unwind-info round-trip — Phase 2.7. + +A "complete" corpus has one U cell for each row in groups 1–3 and +at least one C row for groups 4–6. diff --git a/test/coff/README.md b/test/coff/README.md @@ -0,0 +1,50 @@ +# test/coff — PE/COFF format roundtrip harness + +Round-trip testing for `emit_coff` / `read_coff` against the +`x86_64-windows` and `aarch64-windows` targets. Peer of +`test/elf/` and `test/macho/`. + +## Scope + +Hand-built `ObjBuilder` → `emit_coff` → bytes → `read_coff` → second +`emit_coff` against the readback. The harness asserts: + +1. Structural equivalence between the original `ObjBuilder` and the + readback (sections, symbols, relocations, groups all preserved + modulo synthesized SECTION symbols and section-definition aux + records — the asymmetry that `src/obj/coff_read.c` documents). +2. Byte stability across `emit_coff(read_coff(emit_coff(ob)))` — the + second emit must produce the exact bytes of the first. + +## Running + +``` +make test-coff +``` + +This builds `build/test/cfree-roundtrip-coff` and runs the embedded +unit cases. It also runs `windows-ucrt-hosted-smoke.sh`, which +self-skips unless an llvm-mingw UCRT sysroot is available via +`CFREE_MINGW_SYSROOT` or under `/tmp/llvm-mingw*`. Wine is not needed. + +## Layers + +`CORPUS.md` enumerates the cases worth covering, marked: + +- **U** (unit) — hand-built `ObjBuilder` fixtures. Layer A. The + current harness only carries U cases. +- **C** (cases) — mingw-cross-built `.obj` fixtures. Layer B. + The hosted UCRT smoke now covers one aarch64 llvm-mingw sysroot + path; broader fixture coverage remains pending. +- **E** (exec) — link + exec via Wine. Layer C/D, gated on Wine + availability (`doc/WINDOWS.md` Phase 3). + +Layer A is sufficient to gate the wire encoder / decoder against +each other. Layers B/C/D will catch cross-tool agreement and +end-to-end runtime correctness once the surrounding infrastructure +lands. + +## Pointer + +See `doc/WINDOWS.md` for the full PE/COFF support plan, including +the Phase-by-Phase task list, ABI notes, and corpus stratification. diff --git a/test/coff/cfree-roundtrip-coff.c b/test/coff/cfree-roundtrip-coff.c @@ -0,0 +1,1404 @@ +/* PE/COFF round-trip harness — peer of test/elf/unit/smoke.c and + * the Mach-O unit tests. Each test_*() builds an ObjBuilder, emits via + * emit_coff into a memory writer, reads back via read_coff, and + * asserts: + * + * 1. Structural equivalence — sections by name, symbols by name, + * relocations by (section, offset, kind, target-sym-name). + * Section-symbol synthesis is honored as a known asymmetry + * (see test/coff/CORPUS.md §10). + * 2. Byte stability — re-emitting the readback ObjBuilder produces + * the same bytes as the first emit (memcmp). + * + * Mixes public (<cfree/core.h>, <cfree/object.h>) and internal + * (src/obj/obj.h, src/core/core.h) surfaces. Compiled with -Isrc + * by test/test.mk. Not a libcfree consumer in the usual sense — a + * test binary that pokes the same private headers the writer / + * reader use. */ + +#include <cfree/core.h> +#include <cfree/object.h> +#include <setjmp.h> +#include <stdarg.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#include "core/core.h" +#include "core/pool.h" +#include "obj/obj.h" + +/* ---- env vtables --------------------------------------------------- */ + +static void* heap_alloc(CfreeHeap* h, size_t n, size_t a) { + (void)h; + (void)a; + return n ? malloc(n) : NULL; +} +static void* heap_realloc(CfreeHeap* h, void* p, size_t o, size_t n, size_t a) { + (void)h; + (void)o; + (void)a; + return realloc(p, n); +} +static void heap_free(CfreeHeap* h, void* p, size_t n) { + (void)h; + (void)n; + free(p); +} +static CfreeHeap g_heap = {heap_alloc, heap_realloc, heap_free, NULL}; + +static void diag_emit(CfreeDiagSink* s, CfreeDiagKind k, CfreeSrcLoc loc, + const char* fmt, va_list ap) { + static const char* names[] = {"note", "warning", "error", "fatal"}; + (void)s; + (void)loc; + fprintf(stderr, "%s: ", names[k]); + vfprintf(stderr, fmt, ap); + fputc('\n', stderr); +} +static CfreeDiagSink g_diag = {diag_emit, NULL, 0, 0}; + +/* ---- failure tracking --------------------------------------------- */ + +static int g_failures; +static const char* g_test_name = "?"; +#define EXPECT(cond, ...) \ + do { \ + if (!(cond)) { \ + fprintf(stderr, "FAIL [%s] %s:%d: ", g_test_name, __FILE__, \ + __LINE__); \ + fprintf(stderr, __VA_ARGS__); \ + fputc('\n', stderr); \ + g_failures++; \ + } \ + } while (0) + +/* ---- target builders ---------------------------------------------- */ + +static void target_x64_windows(CfreeTarget* t) { + memset(t, 0, sizeof *t); + t->arch = CFREE_ARCH_X86_64; + t->os = CFREE_OS_WINDOWS; + t->obj = CFREE_OBJ_COFF; + t->ptr_size = 8; + t->ptr_align = 8; + t->big_endian = false; + t->pic = CFREE_PIC_PIE; + t->code_model = CFREE_CM_SMALL; +} + +static void target_aa64_windows(CfreeTarget* t) { + memset(t, 0, sizeof *t); + t->arch = CFREE_ARCH_ARM_64; + t->os = CFREE_OS_WINDOWS; + t->obj = CFREE_OBJ_COFF; + t->ptr_size = 8; + t->ptr_align = 8; + t->big_endian = false; + t->pic = CFREE_PIC_PIE; + t->code_model = CFREE_CM_SMALL; +} + +/* ---- shape helpers ------------------------------------------------- */ + +static int sym_eq_str(Pool* p, Sym s, const char* want) { + size_t len; + const char* got = pool_str(p, s, &len); + size_t wlen = strlen(want); + return got && len == wlen && memcmp(got, want, len) == 0; +} + +static const Section* find_section_named(const ObjBuilder* ob, Pool* p, + const char* want) { + u32 n = obj_section_count(ob); + for (u32 i = 1; i < n; ++i) { + const Section* s = obj_section_get(ob, i); + if (s->removed) continue; + if (sym_eq_str(p, s->name, want)) return s; + } + return NULL; +} + +static ObjSecId find_section_id(const ObjBuilder* ob, Pool* p, + const char* want) { + u32 n = obj_section_count(ob); + for (u32 i = 1; i < n; ++i) { + const Section* s = obj_section_get(ob, i); + if (s->removed) continue; + if (sym_eq_str(p, s->name, want)) return i; + } + return OBJ_SEC_NONE; +} + +static ObjSymId find_sym_named(const ObjBuilder* ob, Pool* p, + const char* want) { + ObjSymIter* it = obj_symiter_new(ob); + ObjSymEntry e; + ObjSymId found = OBJ_SYM_NONE; + while (obj_symiter_next(it, &e)) { + if (e.sym->removed) continue; + if (sym_eq_str(p, e.sym->name, want)) { + found = e.id; + break; + } + } + obj_symiter_free(it); + return found; +} + +/* ---- emit / read driver ------------------------------------------- */ + +/* Emit ob into a fresh malloc()ed buffer. Caller frees *out_buf. + * Returns 0 on success, non-zero on failure. */ +static int emit_to_buf(Compiler* c, ObjBuilder* ob, uint8_t** out_buf, + size_t* out_len) { + CfreeWriter* w = NULL; + if (cfree_writer_mem(&g_heap, &w) != CFREE_OK || !w) return -1; + emit_coff(c, ob, w); + size_t n = 0; + const uint8_t* data = cfree_writer_mem_bytes(w, &n); + uint8_t* buf = (uint8_t*)malloc(n ? n : 1); + if (!buf) { + cfree_writer_close(w); + return -1; + } + if (n) memcpy(buf, data, n); + cfree_writer_close(w); + *out_buf = buf; + *out_len = n; + return 0; +} + +/* Debug helper: dump bytes side-by-side to stderr. */ +static void dump_diff(const uint8_t* a, const uint8_t* b, size_t n) { + for (size_t i = 0; i < n; i += 16) { + fprintf(stderr, "%04zx ", i); + for (size_t j = 0; j < 16 && i + j < n; ++j) { + fprintf(stderr, "%02x%c", a[i + j], + (i + j < n && a[i + j] != b[i + j]) ? '*' : ' '); + } + fprintf(stderr, " | "); + for (size_t j = 0; j < 16 && i + j < n; ++j) { + fprintf(stderr, "%02x%c", b[i + j], + (i + j < n && a[i + j] != b[i + j]) ? '*' : ' '); + } + fprintf(stderr, "\n"); + } +} + +/* Three-stage round-trip workflow: + * + * emit_coff(in) -> b1 + * read_coff(b1) -> mid + * verify(mid) + * emit_coff(mid) -> b2 + * EXPECT(b1 == b2) [skipped when expect_byte_stable == 0] + * + * `verify_fn` runs against the readback ObjBuilder (mid). */ +static void run_roundtrip_ex(Compiler* c, ObjBuilder* in, + void (*verify_fn)(const ObjBuilder*, Pool*), + int expect_byte_stable) { + uint8_t* b1 = NULL; + size_t n1 = 0; + if (emit_to_buf(c, in, &b1, &n1) != 0) { + EXPECT(0, "emit_to_buf #1 failed"); + return; + } + /* Header sanity: little-endian machine + nsections must be present. */ + EXPECT(n1 >= 20, "emit_coff #1 produced %zu bytes (< 20)", n1); + + ObjBuilder* mid = read_coff(c, "roundtrip", b1, n1); + EXPECT(mid != NULL, "read_coff returned NULL"); + if (!mid) { + free(b1); + return; + } + + if (verify_fn) verify_fn(mid, c->global); + + uint8_t* b2 = NULL; + size_t n2 = 0; + if (emit_to_buf(c, mid, &b2, &n2) != 0) { + EXPECT(0, "emit_to_buf #2 failed"); + obj_free(mid); + free(b1); + return; + } + + if (expect_byte_stable) { + EXPECT(n1 == n2, "byte-stable round-trip size mismatch: %zu vs %zu", n1, + n2); + if (n1 == n2) { + int differs = memcmp(b1, b2, n1) != 0; + EXPECT(!differs, "byte-stable round-trip differs (size %zu)", n1); + if (differs && getenv("CFREE_COFF_DUMP_DIFF")) { + fprintf(stderr, "--- b1 | b2 ---\n"); + dump_diff(b1, b2, n1); + } + } + } + + obj_free(mid); + free(b1); + free(b2); +} + +static void run_roundtrip(Compiler* c, ObjBuilder* in, + void (*verify_fn)(const ObjBuilder*, Pool*)) { + run_roundtrip_ex(c, in, verify_fn, /*expect_byte_stable=*/1); +} + +/* ---- compiler lifecycle ---------------------------------------------- + * CfreeContext must outlive the Compiler — compiler_init stashes the + * pointer. Use a file-scope context so make_compiler doesn't leave + * the compiler with a dangling ctx. */ + +static CfreeContext g_ctx; + +static Compiler* make_compiler(const CfreeTarget* t) { + memset(&g_ctx, 0, sizeof g_ctx); + g_ctx.heap = &g_heap; + g_ctx.diag = &g_diag; + g_ctx.now = -1; + CfreeCompiler* cc = NULL; + if (cfree_compiler_new(*t, &g_ctx, &cc) != CFREE_OK || !cc) return NULL; + return (Compiler*)cc; +} + +/* ---- payload bytes ------------------------------------------------- */ + +/* x64: mov eax, 42 ; ret. */ +static const uint8_t TEXT_X64[6] = { + 0xb8, 0x2a, 0x00, 0x00, 0x00, 0xc3, +}; + +/* aa64: mov w0, #42 ; ret. */ +static const uint8_t TEXT_AA64[8] = { + 0x40, 0x05, 0x80, 0x52, 0xc0, 0x03, 0x5f, 0xd6, +}; + +/* ---- per-test verifiers / builders -------------------------------- */ + +/* test_header_minimal_x64 / _aa64: a single .text section, no + * relocations, no user symbols. Exercises the file/section-header + * encoder + the section-symbol synthesis path. */ + +static void verify_header_minimal(const ObjBuilder* ob, Pool* p) { + const Section* text = find_section_named(ob, p, ".text"); + EXPECT(text != NULL, ".text not present"); + if (text) { + EXPECT(text->kind == SEC_TEXT, ".text kind=%u", text->kind); + EXPECT((text->flags & SF_EXEC) != 0, ".text missing SF_EXEC"); + EXPECT((text->flags & SF_ALLOC) != 0, ".text missing SF_ALLOC"); + } +} + +static void test_header_minimal_x64(void) { + g_test_name = "header_minimal_x64"; + CfreeTarget t; + target_x64_windows(&t); + Compiler* c = make_compiler(&t); + EXPECT(c != NULL, "compiler_new"); + if (!c) return; + + if (setjmp(c->panic)) { + compiler_run_cleanups(c); + cfree_compiler_free((CfreeCompiler*)c); + EXPECT(0, "panic during test"); + return; + } + ObjBuilder* ob = obj_new(c); + Pool* p = c->global; + Sym text = pool_intern_cstr(p, ".text"); + ObjSecId sec = obj_section(ob, text, SEC_TEXT, SF_ALLOC | SF_EXEC, 16); + obj_write(ob, sec, TEXT_X64, sizeof TEXT_X64); + obj_finalize(ob); + + run_roundtrip(c, ob, verify_header_minimal); + + obj_free(ob); + cfree_compiler_free((CfreeCompiler*)c); +} + +static void test_header_minimal_aa64(void) { + g_test_name = "header_minimal_aa64"; + CfreeTarget t; + target_aa64_windows(&t); + Compiler* c = make_compiler(&t); + EXPECT(c != NULL, "compiler_new"); + if (!c) return; + + if (setjmp(c->panic)) { + compiler_run_cleanups(c); + cfree_compiler_free((CfreeCompiler*)c); + EXPECT(0, "panic during test"); + return; + } + ObjBuilder* ob = obj_new(c); + Pool* p = c->global; + Sym text = pool_intern_cstr(p, ".text"); + ObjSecId sec = obj_section(ob, text, SEC_TEXT, SF_ALLOC | SF_EXEC, 4); + obj_write(ob, sec, TEXT_AA64, sizeof TEXT_AA64); + obj_finalize(ob); + + run_roundtrip(c, ob, verify_header_minimal); + + obj_free(ob); + cfree_compiler_free((CfreeCompiler*)c); +} + +/* test_text_only_x64: .text + one defined global function symbol. */ + +static void verify_text_only(const ObjBuilder* ob, Pool* p) { + const Section* text = find_section_named(ob, p, ".text"); + EXPECT(text != NULL, ".text not present"); + ObjSymId main = find_sym_named(ob, p, "main"); + EXPECT(main != OBJ_SYM_NONE, "missing 'main' symbol"); + if (main) { + const ObjSym* s = obj_symbol_get(ob, main); + EXPECT(s->bind == SB_GLOBAL, "main bind=%u", s->bind); + EXPECT(s->kind == SK_FUNC, "main kind=%u", s->kind); + EXPECT(s->section_id != OBJ_SEC_NONE, "main has no section"); + } +} + +static void test_text_only_x64(void) { + g_test_name = "text_only_x64"; + CfreeTarget t; + target_x64_windows(&t); + Compiler* c = make_compiler(&t); + if (!c) { + EXPECT(0, "compiler_new"); + return; + } + if (setjmp(c->panic)) { + compiler_run_cleanups(c); + cfree_compiler_free((CfreeCompiler*)c); + EXPECT(0, "panic"); + return; + } + ObjBuilder* ob = obj_new(c); + Pool* p = c->global; + Sym tn = pool_intern_cstr(p, ".text"); + Sym mn = pool_intern_cstr(p, "main"); + ObjSecId sec = obj_section(ob, tn, SEC_TEXT, SF_ALLOC | SF_EXEC, 16); + obj_write(ob, sec, TEXT_X64, sizeof TEXT_X64); + obj_symbol(ob, mn, SB_GLOBAL, SK_FUNC, sec, 0, sizeof TEXT_X64); + obj_finalize(ob); + + run_roundtrip(c, ob, verify_text_only); + + obj_free(ob); + cfree_compiler_free((CfreeCompiler*)c); +} + +static void test_text_only_aa64(void) { + g_test_name = "text_only_aa64"; + CfreeTarget t; + target_aa64_windows(&t); + Compiler* c = make_compiler(&t); + if (!c) { + EXPECT(0, "compiler_new"); + return; + } + if (setjmp(c->panic)) { + compiler_run_cleanups(c); + cfree_compiler_free((CfreeCompiler*)c); + EXPECT(0, "panic"); + return; + } + ObjBuilder* ob = obj_new(c); + Pool* p = c->global; + Sym tn = pool_intern_cstr(p, ".text"); + Sym mn = pool_intern_cstr(p, "main"); + ObjSecId sec = obj_section(ob, tn, SEC_TEXT, SF_ALLOC | SF_EXEC, 4); + obj_write(ob, sec, TEXT_AA64, sizeof TEXT_AA64); + obj_symbol(ob, mn, SB_GLOBAL, SK_FUNC, sec, 0, sizeof TEXT_AA64); + obj_finalize(ob); + + run_roundtrip(c, ob, verify_text_only); + + obj_free(ob); + cfree_compiler_free((CfreeCompiler*)c); +} + +/* test_rodata: .rdata read-only data + a defined object symbol. */ + +static void verify_rodata(const ObjBuilder* ob, Pool* p) { + const Section* rd = find_section_named(ob, p, ".rdata"); + EXPECT(rd != NULL, ".rdata not present"); + if (rd) { + EXPECT(rd->kind == SEC_RODATA, ".rdata kind=%u (want %u)", rd->kind, + SEC_RODATA); + EXPECT((rd->flags & SF_ALLOC) != 0, ".rdata missing SF_ALLOC"); + EXPECT((rd->flags & SF_WRITE) == 0, ".rdata wrongly has SF_WRITE"); + } + ObjSymId k = find_sym_named(ob, p, "kMsg"); + EXPECT(k != OBJ_SYM_NONE, "missing 'kMsg' symbol"); +} + +static void test_rodata(void) { + g_test_name = "rodata"; + CfreeTarget t; + target_x64_windows(&t); + Compiler* c = make_compiler(&t); + if (!c) { + EXPECT(0, "compiler_new"); + return; + } + if (setjmp(c->panic)) { + compiler_run_cleanups(c); + cfree_compiler_free((CfreeCompiler*)c); + EXPECT(0, "panic"); + return; + } + ObjBuilder* ob = obj_new(c); + Pool* p = c->global; + Sym rdn = pool_intern_cstr(p, ".rdata"); + Sym kn = pool_intern_cstr(p, "kMsg"); + ObjSecId sec = obj_section(ob, rdn, SEC_RODATA, SF_ALLOC, 8); + static const uint8_t MSG[12] = "hello world\0"; + obj_write(ob, sec, MSG, sizeof MSG); + obj_symbol(ob, kn, SB_GLOBAL, SK_OBJ, sec, 0, sizeof MSG); + obj_finalize(ob); + + run_roundtrip(c, ob, verify_rodata); + + obj_free(ob); + cfree_compiler_free((CfreeCompiler*)c); +} + +/* test_bss: .bss section (NOBITS), one defined symbol, size > 0. */ + +static void verify_bss(const ObjBuilder* ob, Pool* p) { + const Section* bss = find_section_named(ob, p, ".bss"); + EXPECT(bss != NULL, ".bss not present"); + if (bss) { + EXPECT(bss->kind == SEC_BSS, ".bss kind=%u (want %u)", bss->kind, SEC_BSS); + EXPECT(bss->bss_size >= 64, ".bss size=%u (want >= 64)", bss->bss_size); + } +} + +static void test_bss(void) { + g_test_name = "bss"; + CfreeTarget t; + target_x64_windows(&t); + Compiler* c = make_compiler(&t); + if (!c) { + EXPECT(0, "compiler_new"); + return; + } + if (setjmp(c->panic)) { + compiler_run_cleanups(c); + cfree_compiler_free((CfreeCompiler*)c); + EXPECT(0, "panic"); + return; + } + ObjBuilder* ob = obj_new(c); + Pool* p = c->global; + Sym bn = pool_intern_cstr(p, ".bss"); + Sym vn = pool_intern_cstr(p, "g_buf"); + ObjSecId sec = obj_section_ex(ob, bn, SEC_BSS, SSEM_NOBITS, + SF_ALLOC | SF_WRITE, 16, 0, 0, 0); + obj_reserve_bss(ob, sec, 64, 16); + obj_symbol(ob, vn, SB_GLOBAL, SK_OBJ, sec, 0, 64); + obj_finalize(ob); + + run_roundtrip(c, ob, verify_bss); + + obj_free(ob); + cfree_compiler_free((CfreeCompiler*)c); +} + +/* test_data_with_reloc_abs64_x64: .data with an 8-byte slot + * relocated R_ABS64 against an undefined external. */ + +static void verify_data_abs64(const ObjBuilder* ob, Pool* p) { + ObjSecId data_id = find_section_id(ob, p, ".data"); + EXPECT(data_id != OBJ_SEC_NONE, ".data id"); + ObjSymId foo = find_sym_named(ob, p, "foo_extern"); + EXPECT(foo != OBJ_SYM_NONE, "missing 'foo_extern'"); + if (foo) { + const ObjSym* s = obj_symbol_get(ob, foo); + EXPECT(s->section_id == OBJ_SEC_NONE, "foo_extern not undef"); + } + if (data_id == OBJ_SEC_NONE) return; + u32 nr = obj_reloc_count(ob, data_id); + EXPECT(nr == 1, ".data reloc count=%u (want 1)", nr); + u32 total = obj_reloc_total(ob); + const Reloc* found = NULL; + for (u32 i = 0; i < total; ++i) { + const Reloc* r = obj_reloc_at(ob, i); + if (r->removed) continue; + if (r->section_id == data_id) { + found = r; + break; + } + } + EXPECT(found != NULL, "no reloc on .data"); + if (found) { + EXPECT(found->kind == R_ABS64, ".data reloc kind=%u (want %u)", found->kind, + R_ABS64); + EXPECT(found->offset == 0, ".data reloc offset=%u", found->offset); + } +} + +static void test_data_with_reloc_abs64_x64(void) { + g_test_name = "data_with_reloc_abs64_x64"; + CfreeTarget t; + target_x64_windows(&t); + Compiler* c = make_compiler(&t); + if (!c) { + EXPECT(0, "compiler_new"); + return; + } + if (setjmp(c->panic)) { + compiler_run_cleanups(c); + cfree_compiler_free((CfreeCompiler*)c); + EXPECT(0, "panic"); + return; + } + ObjBuilder* ob = obj_new(c); + Pool* p = c->global; + Sym dn = pool_intern_cstr(p, ".data"); + Sym fn = pool_intern_cstr(p, "foo_extern"); + ObjSecId sec = obj_section(ob, dn, SEC_DATA, SF_ALLOC | SF_WRITE, 8); + static const uint8_t zero8[8] = {0}; + obj_write(ob, sec, zero8, sizeof zero8); + ObjSymId foo = obj_symbol(ob, fn, SB_GLOBAL, SK_UNDEF, OBJ_SEC_NONE, 0, 0); + obj_reloc(ob, sec, 0, R_ABS64, foo, 0); + obj_finalize(ob); + + run_roundtrip(c, ob, verify_data_abs64); + + obj_free(ob); + cfree_compiler_free((CfreeCompiler*)c); +} + +static void test_data_with_reloc_abs64_aa64(void) { + g_test_name = "data_with_reloc_abs64_aa64"; + CfreeTarget t; + target_aa64_windows(&t); + Compiler* c = make_compiler(&t); + if (!c) { + EXPECT(0, "compiler_new"); + return; + } + if (setjmp(c->panic)) { + compiler_run_cleanups(c); + cfree_compiler_free((CfreeCompiler*)c); + EXPECT(0, "panic"); + return; + } + ObjBuilder* ob = obj_new(c); + Pool* p = c->global; + Sym dn = pool_intern_cstr(p, ".data"); + Sym fn = pool_intern_cstr(p, "foo_extern"); + ObjSecId sec = obj_section(ob, dn, SEC_DATA, SF_ALLOC | SF_WRITE, 8); + static const uint8_t zero8[8] = {0}; + obj_write(ob, sec, zero8, sizeof zero8); + ObjSymId foo = obj_symbol(ob, fn, SB_GLOBAL, SK_UNDEF, OBJ_SEC_NONE, 0, 0); + obj_reloc(ob, sec, 0, R_ABS64, foo, 0); + obj_finalize(ob); + + run_roundtrip(c, ob, verify_data_abs64); + + obj_free(ob); + cfree_compiler_free((CfreeCompiler*)c); +} + +/* test_data_with_reloc_rel32_x64: .text with a REL32 relocation + * referencing an external symbol (call thunk). */ + +static void verify_rel32(const ObjBuilder* ob, Pool* p) { + ObjSecId text_id = find_section_id(ob, p, ".text"); + EXPECT(text_id != OBJ_SEC_NONE, ".text id"); + ObjSymId helper = find_sym_named(ob, p, "helper"); + EXPECT(helper != OBJ_SYM_NONE, "missing 'helper'"); + if (text_id == OBJ_SEC_NONE) return; + u32 nr = obj_reloc_count(ob, text_id); + EXPECT(nr == 1, ".text reloc count=%u (want 1)", nr); + u32 total = obj_reloc_total(ob); + for (u32 i = 0; i < total; ++i) { + const Reloc* r = obj_reloc_at(ob, i); + if (r->removed) continue; + if (r->section_id != text_id) continue; + EXPECT(r->kind == R_PC32, "reloc kind=%u (want R_PC32=%u)", r->kind, + R_PC32); + } +} + +static void test_data_with_reloc_rel32_x64(void) { + g_test_name = "reloc_rel32_x64"; + CfreeTarget t; + target_x64_windows(&t); + Compiler* c = make_compiler(&t); + if (!c) { + EXPECT(0, "compiler_new"); + return; + } + if (setjmp(c->panic)) { + compiler_run_cleanups(c); + cfree_compiler_free((CfreeCompiler*)c); + EXPECT(0, "panic"); + return; + } + ObjBuilder* ob = obj_new(c); + Pool* p = c->global; + Sym tn = pool_intern_cstr(p, ".text"); + Sym hn = pool_intern_cstr(p, "helper"); + ObjSecId sec = obj_section(ob, tn, SEC_TEXT, SF_ALLOC | SF_EXEC, 16); + /* call helper ; ret — e8 disp32 c3 (disp filled by reloc). */ + static const uint8_t bytes[6] = {0xe8, 0, 0, 0, 0, 0xc3}; + obj_write(ob, sec, bytes, sizeof bytes); + /* Undef symbol kind: SK_UNDEF — matches what real COFF inputs carry. + * SK_FUNC + section_id == 0 emits Type=function but the reader collapses + * to SK_UNDEF on readback (no "undef function" kind in cfree's model), + * which breaks byte stability. See CORPUS.md §10. */ + ObjSymId helper = + obj_symbol(ob, hn, SB_GLOBAL, SK_UNDEF, OBJ_SEC_NONE, 0, 0); + obj_reloc(ob, sec, 1, R_PC32, helper, 0); + obj_finalize(ob); + + run_roundtrip(c, ob, verify_rel32); + + obj_free(ob); + cfree_compiler_free((CfreeCompiler*)c); +} + +/* test_aa64_branch26: .text with a BRANCH26 (R_AARCH64_CALL26) + * relocation against an external. */ + +static void verify_aa64_branch26(const ObjBuilder* ob, Pool* p) { + ObjSecId text_id = find_section_id(ob, p, ".text"); + EXPECT(text_id != OBJ_SEC_NONE, ".text id"); + u32 total = obj_reloc_total(ob); + int seen = 0; + for (u32 i = 0; i < total; ++i) { + const Reloc* r = obj_reloc_at(ob, i); + if (r->removed) continue; + if (r->section_id != text_id) continue; + EXPECT(r->kind == R_AARCH64_CALL26, + "branch26 reloc kind=%u (want %u)", r->kind, R_AARCH64_CALL26); + ++seen; + } + EXPECT(seen == 1, "branch26 reloc count=%d (want 1)", seen); +} + +static void test_aa64_branch26(void) { + g_test_name = "aa64_branch26"; + CfreeTarget t; + target_aa64_windows(&t); + Compiler* c = make_compiler(&t); + if (!c) { + EXPECT(0, "compiler_new"); + return; + } + if (setjmp(c->panic)) { + compiler_run_cleanups(c); + cfree_compiler_free((CfreeCompiler*)c); + EXPECT(0, "panic"); + return; + } + ObjBuilder* ob = obj_new(c); + Pool* p = c->global; + Sym tn = pool_intern_cstr(p, ".text"); + Sym cn = pool_intern_cstr(p, "callee"); + ObjSecId sec = obj_section(ob, tn, SEC_TEXT, SF_ALLOC | SF_EXEC, 4); + /* bl callee ; ret — both 4 bytes; disp filled by reloc. */ + static const uint8_t bytes[8] = {0, 0, 0, 0x94, 0xc0, 0x03, 0x5f, 0xd6}; + obj_write(ob, sec, bytes, sizeof bytes); + /* See reloc_rel32_x64 note on SK_UNDEF for undef symbols. */ + ObjSymId callee = + obj_symbol(ob, cn, SB_GLOBAL, SK_UNDEF, OBJ_SEC_NONE, 0, 0); + obj_reloc(ob, sec, 0, R_AARCH64_CALL26, callee, 0); + obj_finalize(ob); + + run_roundtrip(c, ob, verify_aa64_branch26); + + obj_free(ob); + cfree_compiler_free((CfreeCompiler*)c); +} + +/* test_aa64_pagebase_pageoffset: ADRP + ADD pair against a .rdata + * symbol — exercises both PAGEBASE_REL21 and PAGEOFFSET_12A. */ + +static void verify_aa64_adrp_add(const ObjBuilder* ob, Pool* p) { + ObjSecId text_id = find_section_id(ob, p, ".text"); + EXPECT(text_id != OBJ_SEC_NONE, ".text id"); + u32 total = obj_reloc_total(ob); + int n_page = 0, n_off = 0; + for (u32 i = 0; i < total; ++i) { + const Reloc* r = obj_reloc_at(ob, i); + if (r->removed) continue; + if (r->section_id != text_id) continue; + if (r->kind == R_AARCH64_ADR_PREL_PG_HI21) ++n_page; + if (r->kind == R_AARCH64_ADD_ABS_LO12_NC) ++n_off; + } + EXPECT(n_page == 1, "ADRP reloc count=%d (want 1)", n_page); + EXPECT(n_off == 1, "ADD lo12 reloc count=%d (want 1)", n_off); +} + +static void test_aa64_pagebase_pageoffset(void) { + g_test_name = "aa64_pagebase_pageoffset"; + CfreeTarget t; + target_aa64_windows(&t); + Compiler* c = make_compiler(&t); + if (!c) { + EXPECT(0, "compiler_new"); + return; + } + if (setjmp(c->panic)) { + compiler_run_cleanups(c); + cfree_compiler_free((CfreeCompiler*)c); + EXPECT(0, "panic"); + return; + } + ObjBuilder* ob = obj_new(c); + Pool* p = c->global; + Sym tn = pool_intern_cstr(p, ".text"); + Sym rdn = pool_intern_cstr(p, ".rdata"); + Sym kn = pool_intern_cstr(p, "kStr"); + ObjSecId tsec = obj_section(ob, tn, SEC_TEXT, SF_ALLOC | SF_EXEC, 4); + ObjSecId rsec = obj_section(ob, rdn, SEC_RODATA, SF_ALLOC, 8); + /* adrp x0, kStr ; add x0, x0, :lo12:kStr ; ret. */ + static const uint8_t txt[12] = { + 0x00, 0, 0, 0x90, 0, 0, 0, 0x91, 0xc0, 0x03, 0x5f, 0xd6, + }; + obj_write(ob, tsec, txt, sizeof txt); + static const uint8_t str[6] = "hello"; + obj_write(ob, rsec, str, sizeof str); + ObjSymId kStr = + obj_symbol(ob, kn, SB_LOCAL, SK_OBJ, rsec, 0, sizeof str); + obj_reloc(ob, tsec, 0, R_AARCH64_ADR_PREL_PG_HI21, kStr, 0); + obj_reloc(ob, tsec, 4, R_AARCH64_ADD_ABS_LO12_NC, kStr, 0); + obj_finalize(ob); + + run_roundtrip(c, ob, verify_aa64_adrp_add); + + obj_free(ob); + cfree_compiler_free((CfreeCompiler*)c); +} + +/* test_long_section_name: section whose name exceeds 8 bytes, + * triggering the "/N" strtab-spill encoding. */ + +static void verify_long_section_name(const ObjBuilder* ob, Pool* p) { + const Section* s = find_section_named(ob, p, ".text$long_name_section"); + EXPECT(s != NULL, "long-named section not present"); + if (s) EXPECT(s->kind == SEC_TEXT, "long section kind=%u", s->kind); +} + +static void test_long_section_name(void) { + g_test_name = "long_section_name"; + CfreeTarget t; + target_x64_windows(&t); + Compiler* c = make_compiler(&t); + if (!c) { + EXPECT(0, "compiler_new"); + return; + } + if (setjmp(c->panic)) { + compiler_run_cleanups(c); + cfree_compiler_free((CfreeCompiler*)c); + EXPECT(0, "panic"); + return; + } + ObjBuilder* ob = obj_new(c); + Pool* p = c->global; + Sym nm = pool_intern_cstr(p, ".text$long_name_section"); + ObjSecId sec = obj_section(ob, nm, SEC_TEXT, SF_ALLOC | SF_EXEC, 16); + obj_write(ob, sec, TEXT_X64, sizeof TEXT_X64); + obj_finalize(ob); + + run_roundtrip(c, ob, verify_long_section_name); + + obj_free(ob); + cfree_compiler_free((CfreeCompiler*)c); +} + +/* test_long_symbol_name: symbol whose name exceeds 8 bytes — uses + * the LongName (Zeroes=0, Offset) wire form. */ + +static void verify_long_symbol_name(const ObjBuilder* ob, Pool* p) { + ObjSymId s = find_sym_named(ob, p, "very_long_symbol_name"); + EXPECT(s != OBJ_SYM_NONE, "long-named symbol not present"); + if (s) { + const ObjSym* sym = obj_symbol_get(ob, s); + EXPECT(sym->bind == SB_GLOBAL, "long sym bind=%u", sym->bind); + EXPECT(sym->kind == SK_FUNC, "long sym kind=%u", sym->kind); + } +} + +static void test_long_symbol_name(void) { + g_test_name = "long_symbol_name"; + CfreeTarget t; + target_x64_windows(&t); + Compiler* c = make_compiler(&t); + if (!c) { + EXPECT(0, "compiler_new"); + return; + } + if (setjmp(c->panic)) { + compiler_run_cleanups(c); + cfree_compiler_free((CfreeCompiler*)c); + EXPECT(0, "panic"); + return; + } + ObjBuilder* ob = obj_new(c); + Pool* p = c->global; + Sym tn = pool_intern_cstr(p, ".text"); + Sym sn = pool_intern_cstr(p, "very_long_symbol_name"); + ObjSecId sec = obj_section(ob, tn, SEC_TEXT, SF_ALLOC | SF_EXEC, 16); + obj_write(ob, sec, TEXT_X64, sizeof TEXT_X64); + obj_symbol(ob, sn, SB_GLOBAL, SK_FUNC, sec, 0, sizeof TEXT_X64); + obj_finalize(ob); + + run_roundtrip(c, ob, verify_long_symbol_name); + + obj_free(ob); + cfree_compiler_free((CfreeCompiler*)c); +} + +/* test_weak_global: weak global symbol — IMAGE_SYM_CLASS_WEAK_EXTERNAL + * with a weak-extern aux record. */ + +static void verify_weak_global(const ObjBuilder* ob, Pool* p) { + ObjSymId s = find_sym_named(ob, p, "weak_sym"); + EXPECT(s != OBJ_SYM_NONE, "weak_sym not present"); + if (s) { + const ObjSym* sym = obj_symbol_get(ob, s); + EXPECT(sym->bind == SB_WEAK, "weak_sym bind=%u (want SB_WEAK=%u)", + sym->bind, SB_WEAK); + } +} + +static void test_weak_global(void) { + g_test_name = "weak_global"; + CfreeTarget t; + target_x64_windows(&t); + Compiler* c = make_compiler(&t); + if (!c) { + EXPECT(0, "compiler_new"); + return; + } + if (setjmp(c->panic)) { + compiler_run_cleanups(c); + cfree_compiler_free((CfreeCompiler*)c); + EXPECT(0, "panic"); + return; + } + ObjBuilder* ob = obj_new(c); + Pool* p = c->global; + Sym tn = pool_intern_cstr(p, ".text"); + Sym wn = pool_intern_cstr(p, "weak_sym"); + ObjSecId sec = obj_section(ob, tn, SEC_TEXT, SF_ALLOC | SF_EXEC, 16); + obj_write(ob, sec, TEXT_X64, sizeof TEXT_X64); + obj_symbol(ob, wn, SB_WEAK, SK_FUNC, sec, 0, sizeof TEXT_X64); + obj_finalize(ob); + + run_roundtrip(c, ob, verify_weak_global); + + obj_free(ob); + cfree_compiler_free((CfreeCompiler*)c); +} + +/* test_common_symbol: COFF common — UNDEFINED section number with + * Value > 0 holding the size. */ + +static void verify_common_symbol(const ObjBuilder* ob, Pool* p) { + ObjSymId s = find_sym_named(ob, p, "common_var"); + EXPECT(s != OBJ_SYM_NONE, "common_var not present"); + if (s) { + const ObjSym* sym = obj_symbol_get(ob, s); + EXPECT(sym->kind == SK_COMMON, "common_var kind=%u (want SK_COMMON=%u)", + sym->kind, SK_COMMON); + EXPECT(sym->size == 128, "common_var size=%llu (want 128)", + (unsigned long long)sym->size); + } +} + +static void test_common_symbol(void) { + g_test_name = "common_symbol"; + CfreeTarget t; + target_x64_windows(&t); + Compiler* c = make_compiler(&t); + if (!c) { + EXPECT(0, "compiler_new"); + return; + } + if (setjmp(c->panic)) { + compiler_run_cleanups(c); + cfree_compiler_free((CfreeCompiler*)c); + EXPECT(0, "panic"); + return; + } + ObjBuilder* ob = obj_new(c); + Pool* p = c->global; + Sym cn = pool_intern_cstr(p, "common_var"); + obj_symbol_ex(ob, cn, SB_GLOBAL, SV_DEFAULT, SK_COMMON, OBJ_SEC_NONE, 0, 128, + 1); + obj_finalize(ob); + + run_roundtrip(c, ob, verify_common_symbol); + + obj_free(ob); + cfree_compiler_free((CfreeCompiler*)c); +} + +/* test_comdat_group: two sections wired into one COMDAT group. */ + +static void verify_comdat_group(const ObjBuilder* ob, Pool* p) { + const Section* tsec = find_section_named(ob, p, ".text$x"); + const Section* dsec = find_section_named(ob, p, ".data$x"); + EXPECT(tsec != NULL, ".text$x missing"); + EXPECT(dsec != NULL, ".data$x missing"); + if (tsec) + EXPECT((tsec->flags & SF_GROUP) != 0, ".text$x missing SF_GROUP"); + if (dsec) + EXPECT((dsec->flags & SF_GROUP) != 0, ".data$x missing SF_GROUP"); + + /* COFF encodes COMDAT per-section (each member section carries its + * own section-definition aux with the selection rule); the wire + * format has no SHT_GROUP-style "N-member" record. read_coff + * therefore emits one ObjGroup per COMDAT section — two input + * COMDAT sections => two single-section groups after round-trip. + * Each carries the section's section-symbol as its signature. */ + ObjGroupIter* it = obj_groupiter_new(ob); + ObjGroupEntry e; + int seen = 0; + u32 total_member_sections = 0; + while (obj_groupiter_next(it, &e)) { + if (e.group->removed) continue; + ++seen; + total_member_sections += e.group->nsections; + } + obj_groupiter_free(it); + EXPECT(seen == 2, "expected 2 groups after COMDAT round-trip, got %d", seen); + EXPECT(total_member_sections == 2, + "total COMDAT member sections=%u (want 2)", + total_member_sections); +} + +static void test_comdat_group(void) { + g_test_name = "comdat_group"; + CfreeTarget t; + target_x64_windows(&t); + Compiler* c = make_compiler(&t); + if (!c) { + EXPECT(0, "compiler_new"); + return; + } + if (setjmp(c->panic)) { + compiler_run_cleanups(c); + cfree_compiler_free((CfreeCompiler*)c); + EXPECT(0, "panic"); + return; + } + ObjBuilder* ob = obj_new(c); + Pool* p = c->global; + /* Short section names (<= 8 bytes) — section names that overflow into + * the strtab don't round-trip COMDAT detection because the section + * symbol's name is truncated on emit but the reader compares the + * resolved long name. See CORPUS.md §10 / src/obj/coff_read.c + * is_section_sym logic. */ + Sym tn = pool_intern_cstr(p, ".text$x"); + Sym dn = pool_intern_cstr(p, ".data$x"); + Sym sign = pool_intern_cstr(p, "inline_fn"); + + ObjSecId tsec = obj_section(ob, tn, SEC_TEXT, + SF_ALLOC | SF_EXEC | SF_GROUP, 16); + ObjSecId dsec = obj_section(ob, dn, SEC_DATA, + SF_ALLOC | SF_WRITE | SF_GROUP, 8); + obj_write(ob, tsec, TEXT_X64, sizeof TEXT_X64); + static const uint8_t z8[8] = {0}; + obj_write(ob, dsec, z8, sizeof z8); + + ObjSymId sig = + obj_symbol(ob, sign, SB_WEAK, SK_FUNC, tsec, 0, sizeof TEXT_X64); + ObjGroupId gid = obj_group(ob, sign, sig, CFREE_OBJ_GROUP_COMDAT); + obj_group_add_section(ob, gid, tsec); + obj_group_add_section(ob, gid, dsec); + obj_section_set_group(ob, tsec, gid); + obj_section_set_group(ob, dsec, gid); + + obj_finalize(ob); + + run_roundtrip(c, ob, verify_comdat_group); + + obj_free(ob); + cfree_compiler_free((CfreeCompiler*)c); +} + +/* test_static_local_symbol: STATIC storage class — file-local symbol. */ + +static void verify_static_local(const ObjBuilder* ob, Pool* p) { + ObjSymId s = find_sym_named(ob, p, "local_fn"); + EXPECT(s != OBJ_SYM_NONE, "local_fn not present"); + if (s) { + const ObjSym* sym = obj_symbol_get(ob, s); + EXPECT(sym->bind == SB_LOCAL, "local_fn bind=%u (want SB_LOCAL=%u)", + sym->bind, SB_LOCAL); + } +} + +static void test_static_local_symbol(void) { + g_test_name = "static_local_symbol"; + CfreeTarget t; + target_x64_windows(&t); + Compiler* c = make_compiler(&t); + if (!c) { + EXPECT(0, "compiler_new"); + return; + } + if (setjmp(c->panic)) { + compiler_run_cleanups(c); + cfree_compiler_free((CfreeCompiler*)c); + EXPECT(0, "panic"); + return; + } + ObjBuilder* ob = obj_new(c); + Pool* p = c->global; + Sym tn = pool_intern_cstr(p, ".text"); + Sym ln = pool_intern_cstr(p, "local_fn"); + ObjSecId sec = obj_section(ob, tn, SEC_TEXT, SF_ALLOC | SF_EXEC, 16); + obj_write(ob, sec, TEXT_X64, sizeof TEXT_X64); + obj_symbol(ob, ln, SB_LOCAL, SK_FUNC, sec, 0, sizeof TEXT_X64); + obj_finalize(ob); + + run_roundtrip(c, ob, verify_static_local); + + obj_free(ob); + cfree_compiler_free((CfreeCompiler*)c); +} + +/* test_section_symbol_synthesis: input has no explicit SK_SECTION + * symbol; readback should contain one per kept section (from the + * emitter-synthesized SECTION primary + section-definition aux). */ + +static void verify_section_symbol_synthesis(const ObjBuilder* ob, Pool* p) { + ObjSymIter* it = obj_symiter_new(ob); + ObjSymEntry e; + int n_section_syms = 0; + while (obj_symiter_next(it, &e)) { + if (e.sym->removed) continue; + if (e.sym->kind == SK_SECTION) ++n_section_syms; + } + obj_symiter_free(it); + EXPECT(n_section_syms >= 1, + "no SK_SECTION symbols after round-trip (expected at least one)"); + /* Best-effort: text + data + bss + rdata = 4. */ + EXPECT(n_section_syms == 4, + "section-symbol count=%d (want 4: text/data/bss/rdata)", + n_section_syms); + (void)p; +} + +static void test_section_symbol_synthesis(void) { + g_test_name = "section_symbol_synthesis"; + CfreeTarget t; + target_x64_windows(&t); + Compiler* c = make_compiler(&t); + if (!c) { + EXPECT(0, "compiler_new"); + return; + } + if (setjmp(c->panic)) { + compiler_run_cleanups(c); + cfree_compiler_free((CfreeCompiler*)c); + EXPECT(0, "panic"); + return; + } + ObjBuilder* ob = obj_new(c); + Pool* p = c->global; + + ObjSecId text = obj_section(ob, pool_intern_cstr(p, ".text"), SEC_TEXT, + SF_ALLOC | SF_EXEC, 16); + obj_write(ob, text, TEXT_X64, sizeof TEXT_X64); + ObjSecId data = obj_section(ob, pool_intern_cstr(p, ".data"), SEC_DATA, + SF_ALLOC | SF_WRITE, 8); + static const uint8_t z8[8] = {0}; + obj_write(ob, data, z8, sizeof z8); + ObjSecId rdata = obj_section(ob, pool_intern_cstr(p, ".rdata"), SEC_RODATA, + SF_ALLOC, 8); + obj_write(ob, rdata, "hi\0", 3); + ObjSecId bss = obj_section_ex(ob, pool_intern_cstr(p, ".bss"), SEC_BSS, + SSEM_NOBITS, SF_ALLOC | SF_WRITE, 8, 0, 0, 0); + obj_reserve_bss(ob, bss, 16, 8); + + obj_finalize(ob); + + run_roundtrip(c, ob, verify_section_symbol_synthesis); + + obj_free(ob); + cfree_compiler_free((CfreeCompiler*)c); +} + +/* test_tls_section: ".tls$" section gets SF_TLS on readback (name- + * based detection in read_coff). */ + +static void verify_tls_section(const ObjBuilder* ob, Pool* p) { + const Section* s = find_section_named(ob, p, ".tls$"); + EXPECT(s != NULL, ".tls$ not present"); + if (s) { + EXPECT((s->flags & SF_TLS) != 0, + ".tls$ missing SF_TLS (flags=0x%x)", s->flags); + } +} + +static void test_tls_section(void) { + g_test_name = "tls_section"; + CfreeTarget t; + target_x64_windows(&t); + Compiler* c = make_compiler(&t); + if (!c) { + EXPECT(0, "compiler_new"); + return; + } + if (setjmp(c->panic)) { + compiler_run_cleanups(c); + cfree_compiler_free((CfreeCompiler*)c); + EXPECT(0, "panic"); + return; + } + ObjBuilder* ob = obj_new(c); + Pool* p = c->global; + Sym nm = pool_intern_cstr(p, ".tls$"); + Sym vn = pool_intern_cstr(p, "tls_var"); + ObjSecId sec = obj_section(ob, nm, SEC_DATA, SF_ALLOC | SF_WRITE | SF_TLS, 8); + static const uint8_t z8[8] = {0}; + obj_write(ob, sec, z8, sizeof z8); + obj_symbol(ob, vn, SB_GLOBAL, SK_OBJ, sec, 0, sizeof z8); + obj_finalize(ob); + + run_roundtrip(c, ob, verify_tls_section); + + obj_free(ob); + cfree_compiler_free((CfreeCompiler*)c); +} + +/* test_align_nibble: section with a non-trivial alignment (4096) + * round-trips via the ALIGN_4096BYTES nibble. */ + +static void verify_align_nibble(const ObjBuilder* ob, Pool* p) { + const Section* s = find_section_named(ob, p, ".rdata"); + EXPECT(s != NULL, ".rdata not present"); + if (s) { + EXPECT(s->align == 4096, ".rdata align=%u (want 4096)", s->align); + } +} + +static void test_align_nibble(void) { + g_test_name = "align_nibble"; + CfreeTarget t; + target_x64_windows(&t); + Compiler* c = make_compiler(&t); + if (!c) { + EXPECT(0, "compiler_new"); + return; + } + if (setjmp(c->panic)) { + compiler_run_cleanups(c); + cfree_compiler_free((CfreeCompiler*)c); + EXPECT(0, "panic"); + return; + } + ObjBuilder* ob = obj_new(c); + Pool* p = c->global; + Sym nm = pool_intern_cstr(p, ".rdata"); + ObjSecId sec = obj_section(ob, nm, SEC_RODATA, SF_ALLOC, 4096); + static const uint8_t z[16] = {0}; + obj_write(ob, sec, z, sizeof z); + obj_finalize(ob); + + run_roundtrip(c, ob, verify_align_nibble); + + obj_free(ob); + cfree_compiler_free((CfreeCompiler*)c); +} + +/* test_empty_obj: no sections, no symbols. Smallest valid .obj. */ + +static void verify_empty_obj(const ObjBuilder* ob, Pool* p) { + (void)p; + u32 n = obj_section_count(ob); + /* obj_section_count includes the id-0 placeholder. */ + int real = 0; + for (u32 i = 1; i < n; ++i) { + const Section* s = obj_section_get(ob, i); + if (!s->removed) ++real; + } + EXPECT(real == 0, "empty obj has %d sections after round-trip", real); +} + +static void test_empty_obj(void) { + g_test_name = "empty_obj"; + CfreeTarget t; + target_x64_windows(&t); + Compiler* c = make_compiler(&t); + if (!c) { + EXPECT(0, "compiler_new"); + return; + } + if (setjmp(c->panic)) { + compiler_run_cleanups(c); + cfree_compiler_free((CfreeCompiler*)c); + EXPECT(0, "panic"); + return; + } + ObjBuilder* ob = obj_new(c); + obj_finalize(ob); + + run_roundtrip(c, ob, verify_empty_obj); + + obj_free(ob); + cfree_compiler_free((CfreeCompiler*)c); +} + +/* ---- short-import (Microsoft .lib member) smoke ------------------ */ + +/* Hand-build a 45-byte short-import record: + * header (20) + "ExitProcess\0" (12) + "KERNEL32.dll\0" (13) = 45 + * SizeOfData = 25 (the two NUL-terminated strings). + * Machine = AMD64 (0x8664). + * TypeFlags = (Type=CODE) | (NameType=NAME << 2) = 0 | (1<<2) = 4. + * + * Verifies that read_coff dispatches to the short-import path, the + * synthesized ObjBuilder has the imported symbol and its `__imp_*` + * alias defined at OBJ_SEC_NONE (DSO-shape), and the providing DLL + * name is recoverable via obj_get_coff_import_dll. */ +static void test_short_import_amd64(void) { + g_test_name = "short_import_amd64"; + CfreeTarget t; + target_x64_windows(&t); + Compiler* c = make_compiler(&t); + if (!c) { + EXPECT(0, "compiler_new"); + return; + } + if (setjmp(c->panic)) { + compiler_run_cleanups(c); + cfree_compiler_free((CfreeCompiler*)c); + EXPECT(0, "panic during test"); + return; + } + + static const char kSym[] = "ExitProcess"; /* 11 chars + NUL = 12 */ + static const char kDll[] = "KERNEL32.dll"; /* 12 chars + NUL = 13 */ + const uint32_t kSymLen = (uint32_t)(sizeof kSym - 1); + const uint32_t kDllLen = (uint32_t)(sizeof kDll - 1); + const uint32_t kDataLen = sizeof kSym + sizeof kDll; /* 12 + 13 = 25 */ + const size_t kTotal = 20 + kDataLen; /* 45 */ + uint8_t buf[64]; + EXPECT(kTotal <= sizeof buf, "buf too small"); + memset(buf, 0, kTotal); + /* Header. */ + buf[0] = 0x00; buf[1] = 0x00; /* Sig1 = 0 */ + buf[2] = 0xFF; buf[3] = 0xFF; /* Sig2 = 0xFFFF */ + buf[4] = 0x00; buf[5] = 0x00; /* Version = 0 */ + buf[6] = 0x64; buf[7] = 0x86; /* Machine = AMD64 (0x8664) */ + /* TimeDateStamp = 0 (bytes 8..11 already 0). */ + buf[12] = (uint8_t)(kDataLen & 0xFF); + buf[13] = (uint8_t)((kDataLen >> 8) & 0xFF); + buf[14] = (uint8_t)((kDataLen >> 16) & 0xFF); + buf[15] = (uint8_t)((kDataLen >> 24) & 0xFF); + /* OrdinalOrHint = 0 (16..17). */ + /* TypeFlags = Type=CODE(0) | NameType=NAME(1)<<2 = 0x0004. */ + buf[18] = 0x04; buf[19] = 0x00; + /* Body: symbol name NUL DLL name NUL. */ + memcpy(buf + 20, kSym, sizeof kSym); + memcpy(buf + 20 + sizeof kSym, kDll, sizeof kDll); + + ObjBuilder* ob = read_coff(c, "short-import", buf, kTotal); + EXPECT(ob != NULL, "read_coff returned NULL on short-import"); + if (!ob) { + cfree_compiler_free((CfreeCompiler*)c); + return; + } + + Pool* p = c->global; + ObjSymId sid = find_sym_named(ob, p, kSym); + EXPECT(sid != OBJ_SYM_NONE, "missing imported symbol"); + if (sid) { + const ObjSym* s = obj_symbol_get(ob, sid); + EXPECT(s->bind == SB_GLOBAL, "imported sym bind=%u (want SB_GLOBAL)", + s->bind); + EXPECT(s->kind == SK_FUNC, "imported sym kind=%u (want SK_FUNC)", s->kind); + EXPECT(s->section_id == OBJ_SEC_NONE, + "imported sym section_id=%u (want OBJ_SEC_NONE)", + (unsigned)s->section_id); + } + + ObjSymId imp_id = find_sym_named(ob, p, "__imp_ExitProcess"); + EXPECT(imp_id != OBJ_SYM_NONE, "missing __imp_<name> alias"); + if (imp_id) { + const ObjSym* s = obj_symbol_get(ob, imp_id); + EXPECT(s->bind == SB_GLOBAL, "__imp_ bind=%u (want SB_GLOBAL)", s->bind); + EXPECT(s->section_id == OBJ_SEC_NONE, + "__imp_ section_id=%u (want OBJ_SEC_NONE)", + (unsigned)s->section_id); + } + + Sym dll = 0; + int got = obj_get_coff_import_dll(ob, &dll); + EXPECT(got, "obj_get_coff_import_dll returned 0 (annotation missing)"); + if (got) EXPECT(sym_eq_str(p, dll, kDll), "DLL name mismatch"); + + (void)kSymLen; (void)kDllLen; + obj_free(ob); + cfree_compiler_free((CfreeCompiler*)c); +} + +/* ---- driver -------------------------------------------------------- */ + +typedef void (*TestFn)(void); + +static const struct { + const char* name; + TestFn fn; +} TESTS[] = { + {"header_minimal_x64", test_header_minimal_x64}, + {"header_minimal_aa64", test_header_minimal_aa64}, + {"text_only_x64", test_text_only_x64}, + {"text_only_aa64", test_text_only_aa64}, + {"rodata", test_rodata}, + {"bss", test_bss}, + {"data_with_reloc_abs64_x64", test_data_with_reloc_abs64_x64}, + {"data_with_reloc_abs64_aa64", test_data_with_reloc_abs64_aa64}, + {"reloc_rel32_x64", test_data_with_reloc_rel32_x64}, + {"aa64_branch26", test_aa64_branch26}, + {"aa64_pagebase_pageoffset", test_aa64_pagebase_pageoffset}, + {"long_section_name", test_long_section_name}, + {"long_symbol_name", test_long_symbol_name}, + {"weak_global", test_weak_global}, + {"common_symbol", test_common_symbol}, + {"comdat_group", test_comdat_group}, + {"static_local_symbol", test_static_local_symbol}, + {"section_symbol_synthesis", test_section_symbol_synthesis}, + {"tls_section", test_tls_section}, + {"align_nibble", test_align_nibble}, + {"empty_obj", test_empty_obj}, + {"short_import_amd64", test_short_import_amd64}, +}; +static const size_t NTESTS = sizeof TESTS / sizeof TESTS[0]; + +int main(void) { + for (size_t i = 0; i < NTESTS; ++i) { + int before = g_failures; + TESTS[i].fn(); + if (g_failures == before) { + fprintf(stderr, " ok %s\n", TESTS[i].name); + } else { + fprintf(stderr, " FAIL %s\n", TESTS[i].name); + } + } + if (g_failures) { + fprintf(stderr, "FAILED %d assertion(s) across %zu tests\n", g_failures, + NTESTS); + return 1; + } + fprintf(stderr, "OK %zu tests\n", NTESTS); + return 0; +} + diff --git a/test/coff/pe-dso-forwarder.c b/test/coff/pe-dso-forwarder.c @@ -0,0 +1,303 @@ +/* read_coff_dso forwarder-export contract test. + * + * Synthesizes a minimal PE32+ DLL with two named exports — one direct + * (EAT RVA outside the export directory's range) and one forwarder + * (EAT RVA inside the export directory's range, contents + * "OTHERDLL.OtherSym") — and asserts that read_coff_dso surfaces both + * as OBJ_SEC_NONE globals on the returned ObjBuilder. cfree's linker + * does not follow forwarder chains: the symbols just need to be + * defined so import resolution succeeds, and the OS loader follows + * the chain at runtime. This test locks in that contract. */ + +#include <cfree/core.h> +#include <cfree/object.h> +#include <setjmp.h> +#include <stdarg.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#include "core/core.h" +#include "core/pool.h" +#include "obj/coff.h" +#include "obj/obj.h" + +/* ---- env vtables --------------------------------------------------- */ + +static void* heap_alloc(CfreeHeap* h, size_t n, size_t a) { + (void)h; + (void)a; + return n ? malloc(n) : NULL; +} +static void* heap_realloc(CfreeHeap* h, void* p, size_t o, size_t n, size_t a) { + (void)h; + (void)o; + (void)a; + return realloc(p, n); +} +static void heap_free(CfreeHeap* h, void* p, size_t n) { + (void)h; + (void)n; + free(p); +} +static CfreeHeap g_heap = {heap_alloc, heap_realloc, heap_free, NULL}; + +static void diag_emit(CfreeDiagSink* s, CfreeDiagKind k, CfreeSrcLoc loc, + const char* fmt, va_list ap) { + static const char* names[] = {"note", "warning", "error", "fatal"}; + (void)s; + (void)loc; + fprintf(stderr, "%s: ", names[k]); + vfprintf(stderr, fmt, ap); + fputc('\n', stderr); +} +static CfreeDiagSink g_diag = {diag_emit, NULL, 0, 0}; + +static int g_failures; +#define EXPECT(cond, ...) \ + do { \ + if (!(cond)) { \ + fprintf(stderr, "FAIL %s:%d: ", __FILE__, __LINE__); \ + fprintf(stderr, __VA_ARGS__); \ + fputc('\n', stderr); \ + g_failures++; \ + } \ + } while (0) + +/* ---- compiler ----------------------------------------------------- */ + +static CfreeContext g_ctx; + +static void target_x64_windows(CfreeTarget* t) { + memset(t, 0, sizeof *t); + t->arch = CFREE_ARCH_X86_64; + t->os = CFREE_OS_WINDOWS; + t->obj = CFREE_OBJ_COFF; + t->ptr_size = 8; + t->ptr_align = 8; + t->big_endian = false; + t->pic = CFREE_PIC_PIE; + t->code_model = CFREE_CM_SMALL; +} + +static Compiler* make_compiler(const CfreeTarget* t) { + memset(&g_ctx, 0, sizeof g_ctx); + g_ctx.heap = &g_heap; + g_ctx.diag = &g_diag; + g_ctx.now = -1; + CfreeCompiler* cc = NULL; + if (cfree_compiler_new(*t, &g_ctx, &cc) != CFREE_OK || !cc) return NULL; + return (Compiler*)cc; +} + +/* ---- little-endian writers ---------------------------------------- */ + +static void wr_u16(uint8_t* p, uint16_t v) { + p[0] = (uint8_t)(v & 0xFF); + p[1] = (uint8_t)((v >> 8) & 0xFF); +} +static void wr_u32(uint8_t* p, uint32_t v) { + p[0] = (uint8_t)(v & 0xFF); + p[1] = (uint8_t)((v >> 8) & 0xFF); + p[2] = (uint8_t)((v >> 16) & 0xFF); + p[3] = (uint8_t)((v >> 24) & 0xFF); +} + +/* ---- synthetic PE32+ DLL builder ---------------------------------- */ + +/* Layout (file offsets): + * 0x000 .. 0x03F DOS header (e_lfanew = 0x40) + * 0x040 .. 0x043 "PE\0\0" + * 0x044 .. 0x057 IMAGE_FILE_HEADER (20 bytes) + * 0x058 .. 0x147 IMAGE_OPTIONAL_HEADER64 (240 bytes) + * 0x148 .. 0x16F one IMAGE_SECTION_HEADER (40 bytes) + * 0x170 .. 0x36F section raw data (RVA 0x1000, 0x200 bytes) + * + * The single section ".edata" at RVA 0x1000 carries the export + * directory plus its tables and strings. The export DataDirectory + * record points at the start of that section and covers everything + * including the forwarder target string so the reader classifies + * "OTHERDLL.OtherSym" EAT entries as forwarders. */ + +#define E_LFANEW 0x40u +#define FH_OFF (E_LFANEW + 4u) +#define OH_OFF (FH_OFF + COFF_FILE_HEADER_SIZE) +#define SH_OFF (OH_OFF + COFF_OPT_HDR64_SIZE) +#define RAW_OFF 0x170u +#define SEC_VA 0x1000u +#define SEC_RAW_SZ 0x200u +#define FILE_SIZE (RAW_OFF + SEC_RAW_SZ) + +/* In-section offsets (relative to RAW_OFF / RVA = SEC_VA + off). */ +#define EXP_DIR_OFF 0u +#define EAT_OFF (EXP_DIR_OFF + COFF_EXPORT_DIR_SIZE) /* +40 */ +#define EAT_COUNT 2u +#define ENT_OFF (EAT_OFF + EAT_COUNT * 4u) /* +48 */ +#define ENT_COUNT 2u +#define ORD_OFF (ENT_OFF + ENT_COUNT * 4u) /* +56 */ +#define DLLNAME_OFF (ORD_OFF + ENT_COUNT * 2u) /* +60 */ + +static const char kDllName[] = "TestDll.dll"; +static const char kDirect[] = "DirectFn"; +static const char kForwarded[] = "ForwardedFn"; +static const char kForwardTarget[] = "OTHERDLL.OtherSym"; + +#define DIRECT_NAME_OFF (DLLNAME_OFF + (uint32_t)sizeof kDllName) +#define FORWARDED_NAME_OFF (DIRECT_NAME_OFF + (uint32_t)sizeof kDirect) +#define FORWARD_TGT_OFF (FORWARDED_NAME_OFF + (uint32_t)sizeof kForwarded) +#define EXP_DIR_END (FORWARD_TGT_OFF + (uint32_t)sizeof kForwardTarget) + +/* Some RVA outside the export directory range — interpreted as a + * direct export pointing into the (notional) code section. */ +#define DIRECT_FN_RVA 0x2000u + +static void build_dso(uint8_t* buf) { + memset(buf, 0, FILE_SIZE); + + /* DOS header. */ + wr_u16(buf + 0, IMAGE_DOS_SIGNATURE); + wr_u32(buf + 60, E_LFANEW); + + /* PE signature. */ + wr_u32(buf + E_LFANEW, IMAGE_NT_SIGNATURE); + + /* IMAGE_FILE_HEADER. */ + wr_u16(buf + FH_OFF + 0, IMAGE_FILE_MACHINE_AMD64); + wr_u16(buf + FH_OFF + 2, 1); /* NumberOfSections */ + wr_u32(buf + FH_OFF + 4, 0); /* TimeDateStamp */ + wr_u32(buf + FH_OFF + 8, 0); /* PointerToSymbolTable */ + wr_u32(buf + FH_OFF + 12, 0); /* NumberOfSymbols */ + wr_u16(buf + FH_OFF + 16, COFF_OPT_HDR64_SIZE); + wr_u16(buf + FH_OFF + 18, IMAGE_FILE_DLL); + + /* IMAGE_OPTIONAL_HEADER64. Only the fields the reader inspects + * matter: Magic, and the export DataDirectory at index 0. */ + wr_u16(buf + OH_OFF + 0, IMAGE_NT_OPTIONAL_HDR64_MAGIC); + /* Data directories live at the tail of the optional header. */ + uint32_t dd_off = OH_OFF + COFF_OPT_HDR64_SIZE + - COFF_NUM_DATA_DIRECTORIES * COFF_DATA_DIRECTORY_SIZE; + uint32_t exp_rva = SEC_VA + EXP_DIR_OFF; + uint32_t exp_size = EXP_DIR_END; + wr_u32(buf + dd_off + IMAGE_DIRECTORY_ENTRY_EXPORT * 8u + 0, exp_rva); + wr_u32(buf + dd_off + IMAGE_DIRECTORY_ENTRY_EXPORT * 8u + 4, exp_size); + + /* One section header: ".edata". */ + memcpy(buf + SH_OFF + 0, ".edata\0\0", 8); + wr_u32(buf + SH_OFF + 8, exp_size); /* VirtualSize */ + wr_u32(buf + SH_OFF + 12, SEC_VA); /* VirtualAddress */ + wr_u32(buf + SH_OFF + 16, SEC_RAW_SZ); /* SizeOfRawData */ + wr_u32(buf + SH_OFF + 20, RAW_OFF); /* PointerToRawData */ + wr_u32(buf + SH_OFF + 24, 0); /* PtrToRelocations */ + wr_u32(buf + SH_OFF + 28, 0); /* PtrToLinenumbers */ + wr_u16(buf + SH_OFF + 32, 0); /* NumberOfRelocations */ + wr_u16(buf + SH_OFF + 34, 0); /* NumberOfLinenumbers */ + wr_u32(buf + SH_OFF + 36, 0x40000040u); /* Characteristics: + INITIALIZED_DATA | + MEM_READ */ + + /* Section raw data — written via RAW_OFF + off. */ + uint8_t* sec = buf + RAW_OFF; + + /* Export Directory header. */ + wr_u32(sec + EXP_DIR_OFF + 0, 0); /* Characteristics */ + wr_u32(sec + EXP_DIR_OFF + 4, 0); /* TimeDateStamp */ + wr_u16(sec + EXP_DIR_OFF + 8, 0); /* MajorVersion */ + wr_u16(sec + EXP_DIR_OFF + 10, 0); /* MinorVersion */ + wr_u32(sec + EXP_DIR_OFF + 12, SEC_VA + DLLNAME_OFF); /* Name */ + wr_u32(sec + EXP_DIR_OFF + 16, 1); /* Base */ + wr_u32(sec + EXP_DIR_OFF + 20, EAT_COUNT); /* NumberOfFunctions */ + wr_u32(sec + EXP_DIR_OFF + 24, ENT_COUNT); /* NumberOfNames */ + wr_u32(sec + EXP_DIR_OFF + 28, SEC_VA + EAT_OFF); /* AddressOfFunctions */ + wr_u32(sec + EXP_DIR_OFF + 32, SEC_VA + ENT_OFF); /* AddressOfNames */ + wr_u32(sec + EXP_DIR_OFF + 36, SEC_VA + ORD_OFF); /* AddressOfNameOrds */ + + /* EAT: index 0 = direct (outside export-dir range); + * index 1 = forwarder (inside export-dir range, pointing at + * the OTHERDLL.OtherSym string). */ + wr_u32(sec + EAT_OFF + 0u, DIRECT_FN_RVA); + wr_u32(sec + EAT_OFF + 4u, SEC_VA + FORWARD_TGT_OFF); + + /* ENT: RVAs of the two name strings, in alphabetical-ish order. + * The reader walks ENT[i] -> Ord[i] -> EAT[Ord[i]]. */ + wr_u32(sec + ENT_OFF + 0u, SEC_VA + DIRECT_NAME_OFF); + wr_u32(sec + ENT_OFF + 4u, SEC_VA + FORWARDED_NAME_OFF); + + /* Ordinal table: index into the EAT. */ + wr_u16(sec + ORD_OFF + 0u, 0); + wr_u16(sec + ORD_OFF + 2u, 1); + + /* Strings. */ + memcpy(sec + DLLNAME_OFF, kDllName, sizeof kDllName); + memcpy(sec + DIRECT_NAME_OFF, kDirect, sizeof kDirect); + memcpy(sec + FORWARDED_NAME_OFF, kForwarded, sizeof kForwarded); + memcpy(sec + FORWARD_TGT_OFF, kForwardTarget, sizeof kForwardTarget); +} + +/* ---- main --------------------------------------------------------- */ + +static int has_sym(const ObjBuilder* ob, Pool* p, const char* name) { + Sym needle = pool_intern_cstr(p, name); + ObjSymIter* it = obj_symiter_new(ob); + ObjSymEntry e; + int found = 0; + while (obj_symiter_next(it, &e)) { + if (e.sym && !e.sym->removed && e.sym->name == needle && + e.sym->section_id == OBJ_SEC_NONE && e.sym->bind == SB_GLOBAL) { + found = 1; + break; + } + } + obj_symiter_free(it); + return found; +} + +int main(void) { + CfreeTarget t; + target_x64_windows(&t); + Compiler* c = make_compiler(&t); + if (!c) { + fprintf(stderr, "FAIL: compiler_new\n"); + return 1; + } + if (setjmp(c->panic)) { + fprintf(stderr, "FAIL: panic during pe-dso-forwarder\n"); + compiler_run_cleanups(c); + cfree_compiler_free((CfreeCompiler*)c); + return 1; + } + + uint8_t* buf = (uint8_t*)malloc(FILE_SIZE); + EXPECT(buf != NULL, "malloc PE buffer"); + if (!buf) { + cfree_compiler_free((CfreeCompiler*)c); + return 1; + } + build_dso(buf); + + Sym soname = 0; + ObjBuilder* ob = read_coff_dso(c, "TestDll.dll", buf, FILE_SIZE, &soname); + EXPECT(ob != NULL, "read_coff_dso returned NULL"); + + /* soname propagated from the Export Directory's Name field. */ + Sym expected_soname = pool_intern_cstr(c->global, kDllName); + EXPECT(soname == expected_soname, + "soname mismatch (expected interned \"%s\")", kDllName); + + /* Both exports must surface — direct and forwarder treated the same + * way by read_coff_dso (the OS loader chases the forwarder chain at + * runtime; the linker just needs the name defined). */ + EXPECT(has_sym(ob, c->global, kDirect), + "direct export \"%s\" missing from ObjBuilder", kDirect); + EXPECT(has_sym(ob, c->global, kForwarded), + "forwarded export \"%s\" missing from ObjBuilder", kForwarded); + + free(buf); + cfree_compiler_free((CfreeCompiler*)c); + + if (g_failures) { + fprintf(stderr, "FAILED %d assertion(s)\n", g_failures); + return 1; + } + fprintf(stderr, "OK pe-dso-forwarder\n"); + return 0; +} diff --git a/test/coff/pe-import-mingw.c b/test/coff/pe-import-mingw.c @@ -0,0 +1,377 @@ +/* PE import-directory smoke test using a real mingw archive. + * + * Counterpart to pe-import-smoke.c, which exercises the Microsoft + * short-import format (Sig1=0/Sig2=0xFFFF). Mingw archives use the + * long-form layout instead — every member is a regular long-form COFF + * `.o` file with `.idata$N` sections — so this test drives the + * long-form absorption path in link_add_archive_bytes (per-function + * stubs reclassified as DSO shims at archive-ingest time, head/trailer + * members dropped). + * + * Skips cleanly when the mingw toolchain isn't installed. + * + * Verification: assemble the target program against the archive, write + * a PE32+ to /tmp, then probe with x86_64-w64-mingw32-objdump -p and + * assert (a) the import directory has KERNEL32.dll, (b) ExitProcess is + * the only resolved import. */ + +#include <cfree/core.h> +#include <cfree/link.h> +#include <cfree/object.h> +#include <setjmp.h> +#include <stdarg.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> + +#include "core/core.h" +#include "core/pool.h" +#include "link/link.h" +#include "obj/obj.h" + +#define MINGW_ARCHIVE_PATH \ + "/opt/homebrew/opt/mingw-w64/toolchain-x86_64/x86_64-w64-mingw32/lib/" \ + "libkernel32.a" + +/* mingw's archive declares the DLL name in uppercase. */ +#define MINGW_DLL_NAME "KERNEL32.dll" +/* Whichever case derive_dll_name_from_archive_path picks; matched + * case-insensitively in objdump output below. */ +#define MINGW_IMPORT_FN "ExitProcess" + +/* The exit-process program: e8 disp32 c3 (call ExitProcess; ret). */ +static const uint8_t PROG_TEXT_X64[6] = {0xe8, 0, 0, 0, 0, 0xc3}; + +/* ---- env vtables --------------------------------------------------- */ + +static void* heap_alloc(CfreeHeap* h, size_t n, size_t a) { + (void)h; + (void)a; + return n ? malloc(n) : NULL; +} +static void* heap_realloc(CfreeHeap* h, void* p, size_t o, size_t n, size_t a) { + (void)h; + (void)o; + (void)a; + return realloc(p, n); +} +static void heap_free(CfreeHeap* h, void* p, size_t n) { + (void)h; + (void)n; + free(p); +} +static CfreeHeap g_heap = {heap_alloc, heap_realloc, heap_free, NULL}; + +static void diag_emit(CfreeDiagSink* s, CfreeDiagKind k, CfreeSrcLoc loc, + const char* fmt, va_list ap) { + static const char* names[] = {"note", "warning", "error", "fatal"}; + (void)s; + (void)loc; + fprintf(stderr, "%s: ", names[k]); + vfprintf(stderr, fmt, ap); + fputc('\n', stderr); +} +static CfreeDiagSink g_diag = {diag_emit, NULL, 0, 0}; + +/* ---- failure tracking --------------------------------------------- */ + +static int g_failures; +#define EXPECT(cond, ...) \ + do { \ + if (!(cond)) { \ + fprintf(stderr, "FAIL %s:%d: ", __FILE__, __LINE__); \ + fprintf(stderr, __VA_ARGS__); \ + fputc('\n', stderr); \ + g_failures++; \ + } \ + } while (0) + +/* ---- target / compiler ------------------------------------------- */ + +static CfreeContext g_ctx; + +static void target_x64_windows(CfreeTarget* t) { + memset(t, 0, sizeof *t); + t->arch = CFREE_ARCH_X86_64; + t->os = CFREE_OS_WINDOWS; + t->obj = CFREE_OBJ_COFF; + t->ptr_size = 8; + t->ptr_align = 8; + t->big_endian = false; + t->pic = CFREE_PIC_PIE; + t->code_model = CFREE_CM_SMALL; +} + +static Compiler* make_compiler(const CfreeTarget* t) { + memset(&g_ctx, 0, sizeof g_ctx); + g_ctx.heap = &g_heap; + g_ctx.diag = &g_diag; + g_ctx.now = -1; + CfreeCompiler* cc = NULL; + if (cfree_compiler_new(*t, &g_ctx, &cc) != CFREE_OK || !cc) return NULL; + return (Compiler*)cc; +} + +/* ---- program ObjBuilder builder ----------------------------------- */ + +static ObjBuilder* build_program(Compiler* c) { + ObjBuilder* ob = obj_new(c); + Pool* p = c->global; + Sym text_name = pool_intern_cstr(p, ".text"); + Sym main_name = pool_intern_cstr(p, "mainCRTStartup"); + Sym exit_name = pool_intern_cstr(p, MINGW_IMPORT_FN); + ObjSecId text = obj_section(ob, text_name, SEC_TEXT, + SF_ALLOC | SF_EXEC, 16); + obj_write(ob, text, PROG_TEXT_X64, sizeof PROG_TEXT_X64); + obj_symbol(ob, main_name, SB_GLOBAL, SK_FUNC, text, 0, + sizeof PROG_TEXT_X64); + ObjSymId exit_sym = + obj_symbol(ob, exit_name, SB_GLOBAL, SK_UNDEF, OBJ_SEC_NONE, 0, 0); + obj_reloc(ob, text, 1, R_PC32, exit_sym, -4); + obj_finalize(ob); + return ob; +} + +/* ---- archive / objdump helpers ------------------------------------ */ + +static int have_mingw_objdump(void) { + FILE* fp = popen( + "command -v x86_64-w64-mingw32-objdump 2>/dev/null", "r"); + if (!fp) return 0; + char buf[256]; + size_t n = fread(buf, 1, sizeof buf - 1, fp); + pclose(fp); + return n > 0; +} + +static uint8_t* slurp_file(const char* path, size_t* len_out) { + FILE* fp = fopen(path, "rb"); + if (!fp) return NULL; + if (fseek(fp, 0, SEEK_END) != 0) { + fclose(fp); + return NULL; + } + long sz = ftell(fp); + if (sz < 0) { + fclose(fp); + return NULL; + } + rewind(fp); + uint8_t* buf = (uint8_t*)malloc((size_t)sz); + if (!buf) { + fclose(fp); + return NULL; + } + size_t got = fread(buf, 1, (size_t)sz, fp); + fclose(fp); + if (got != (size_t)sz) { + free(buf); + return NULL; + } + *len_out = (size_t)sz; + return buf; +} + +static char* slurp_cmd(const char* cmd) { + FILE* fp = popen(cmd, "r"); + if (!fp) return NULL; + size_t cap = 4096, len = 0; + char* buf = (char*)malloc(cap); + if (!buf) { + pclose(fp); + return NULL; + } + for (;;) { + if (len + 1024 + 1 > cap) { + cap *= 2; + char* nb = (char*)realloc(buf, cap); + if (!nb) { + free(buf); + pclose(fp); + return NULL; + } + buf = nb; + } + size_t got = fread(buf + len, 1, 1024, fp); + len += got; + if (got < 1024) break; + } + int rc = pclose(fp); + (void)rc; + buf[len] = '\0'; + return buf; +} + +static int contains_ci(const char* haystack, const char* needle) { + size_t nn = strlen(needle); + for (const char* p = haystack; *p; ++p) { + size_t i = 0; + while (i < nn) { + int hc = (unsigned char)p[i]; + int nc = (unsigned char)needle[i]; + if (hc >= 'A' && hc <= 'Z') hc += 32; + if (nc >= 'A' && nc <= 'Z') nc += 32; + if (hc != nc) break; + ++i; + } + if (i == nn) return 1; + } + return 0; +} + +/* ---- main ---------------------------------------------------------- */ + +int main(void) { + if (!have_mingw_objdump()) { + fprintf(stderr, "SKIP: x86_64-w64-mingw32-objdump not on PATH\n"); + return 0; + } + + size_t ar_len = 0; + uint8_t* ar_bytes = slurp_file(MINGW_ARCHIVE_PATH, &ar_len); + if (!ar_bytes || !ar_len) { + fprintf(stderr, "SKIP: cannot read %s\n", MINGW_ARCHIVE_PATH); + free(ar_bytes); + return 0; + } + + CfreeTarget t; + target_x64_windows(&t); + Compiler* c = make_compiler(&t); + if (!c) { + fprintf(stderr, "FAIL: compiler_new\n"); + free(ar_bytes); + return 1; + } + if (setjmp(c->panic)) { + fprintf(stderr, "FAIL: panic during pe-import-mingw\n"); + compiler_run_cleanups(c); + cfree_compiler_free((CfreeCompiler*)c); + free(ar_bytes); + return 1; + } + + /* 1. Program ObjBuilder calling ExitProcess. */ + ObjBuilder* prog = build_program(c); + + /* 2. Drive the linker. The archive feeds in raw — link_add_archive_bytes + * classifies its ~3000 members and rewrites the per-function stubs + * into DSO shims; demand resolution then pulls only ExitProcess. */ + Linker* l = link_new(c); + EXPECT(l != NULL, "link_new returned NULL"); + link_add_obj(l, prog); + LinkInputId ar_id = link_add_archive_bytes(l, "libkernel32.a", ar_bytes, + ar_len, /*whole_archive=*/0, + /*link_mode=*/0, + /*group_id=*/0); + EXPECT(ar_id != LINK_INPUT_NONE, + "link_add_archive_bytes returned LINK_INPUT_NONE"); + link_set_entry(l, "mainCRTStartup"); + link_set_pie(l, 1); + link_set_emit_static_exe(l, 1); + + LinkImage* img = link_resolve(l); + EXPECT(img != NULL, "link_resolve returned NULL"); + if (!img) { + link_free(l); + cfree_compiler_free((CfreeCompiler*)c); + free(ar_bytes); + return 1; + } + + /* Sanity: ExitProcess should be present and marked imported. */ + { + Sym exit_name = pool_intern_cstr(c->global, MINGW_IMPORT_FN); + const LinkSymbol* found = NULL; + for (LinkSymId i = 1;; ++i) { + const LinkSymbol* s = link_symbol(img, i); + if (!s) break; + if (s->name == exit_name) { + found = s; + break; + } + } + EXPECT(found != NULL, + "%s LinkSymbol not present after link_resolve", MINGW_IMPORT_FN); + if (found) { + EXPECT(found->imported, + "%s.imported=0 (expected 1 after archive match)", + MINGW_IMPORT_FN); + } + } + + /* 3. Emit the PE. */ + CfreeWriter* w = NULL; + if (cfree_writer_mem(&g_heap, &w) != CFREE_OK || !w) { + fprintf(stderr, "FAIL: cfree_writer_mem\n"); + link_image_free(img); + link_free(l); + cfree_compiler_free((CfreeCompiler*)c); + free(ar_bytes); + return 1; + } + link_emit_image_writer(img, w); + + size_t out_len = 0; + const uint8_t* out_bytes = cfree_writer_mem_bytes(w, &out_len); + EXPECT(out_len > 0, "link_emit_image_writer produced %zu bytes", out_len); + + const char* exe_path = "/tmp/pe-import-mingw.exe"; + (void)unlink(exe_path); + FILE* fp = fopen(exe_path, "wb"); + EXPECT(fp != NULL, "fopen(%s) for write", exe_path); + if (fp) { + size_t wr = fwrite(out_bytes, 1, out_len, fp); + EXPECT(wr == out_len, "fwrite wrote %zu / %zu", wr, out_len); + fclose(fp); + } + + cfree_writer_close(w); + link_image_free(img); + link_free(l); + + /* 4. Probe with objdump. */ + char* dump_p = slurp_cmd( + "x86_64-w64-mingw32-objdump -p /tmp/pe-import-mingw.exe 2>&1"); + EXPECT(dump_p != NULL, "slurp objdump -p"); + if (dump_p) { + /* KERNEL32.dll listed (case-insensitive — derive_dll_name picks + * lowercase, but mingw objdump renders names verbatim from the + * import directory's name string). */ + EXPECT(contains_ci(dump_p, "DLL Name: " MINGW_DLL_NAME) || + contains_ci(dump_p, "DLL Name: kernel32.dll"), + "objdump -p: KERNEL32.dll not in import directory\n---\n%s\n---", + dump_p); + EXPECT(strstr(dump_p, MINGW_IMPORT_FN) != NULL, + "objdump -p: '%s' not in import directory\n---\n%s\n---", + MINGW_IMPORT_FN, dump_p); + /* Verify it's the only KERNEL32 import — no other functions + * pulled in (dead-strip working). Count NUL-separated entries + * under DLL Name: KERNEL32 by counting hint/name lines that + * start with whitespace followed by a hex hint. mingw objdump + * prints them like: + * vma: Hint/Ord Member-Name Bound-To + * 3008 6 ExitProcess + * We just confirm the expected one shows up; an over-pull + * would show extra names like CreateFileA, CloseHandle, etc. + * The dead-strip pass should suppress everything except the + * single referenced symbol. */ + EXPECT(strstr(dump_p, "CreateFile") == NULL, + "objdump -p: unexpected CreateFile import (dead-strip " + "failure?)\n---\n%s\n---", dump_p); + EXPECT(strstr(dump_p, "CloseHandle") == NULL, + "objdump -p: unexpected CloseHandle import\n---\n%s\n---", dump_p); + free(dump_p); + } + + cfree_compiler_free((CfreeCompiler*)c); + free(ar_bytes); + + if (g_failures) { + fprintf(stderr, "FAILED %d assertion(s)\n", g_failures); + return 1; + } + fprintf(stderr, "OK pe-import-mingw\n"); + return 0; +} diff --git a/test/coff/pe-import-smoke.c b/test/coff/pe-import-smoke.c @@ -0,0 +1,435 @@ +/* PE import-directory smoke test — Phase 4.5 from doc/WINDOWS.md. + * + * Exercises the full chain: + * short-import shim bytes + * -> link_add_obj_bytes (reclassifies as DSO via OBJ_EXT_COFF + * annotation set by read_coff's short-import path) + * -> link_resolve (marks ExitProcess as imported, dso_input_id + * = the shim) + * -> link_emit_image_writer -> link_emit_coff (synthesizes + * .idata, IAT, per-arch IAT stub in .text) + * + * No execution — verification is byte-shape only via mingw's + * x86_64-w64-mingw32-objdump -p (import directory / headers) + * x86_64-w64-mingw32-objdump -d (disassembly of .text shows the + * call through the IAT stub) + * + * Skips cleanly with non-zero diagnostic-style message but exit 0 + * when the mingw objdump is not on PATH. */ + +#include <cfree/core.h> +#include <cfree/link.h> +#include <cfree/object.h> +#include <setjmp.h> +#include <stdarg.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> + +#include "core/core.h" +#include "core/pool.h" +#include "link/link.h" +#include "obj/obj.h" + +/* ---- short-import wire constants (mirror the spec in + * test/coff/cfree-roundtrip-coff.c::test_short_import_amd64). ---- */ +#define SHIM_HEADER_SIZE 20u +#define SHIM_SYM_CSTR "ExitProcess" +#define SHIM_DLL_CSTR "KERNEL32.dll" +#define SHIM_SYM_NUL_LEN 12u /* "ExitProcess\0" */ +#define SHIM_DLL_NUL_LEN 13u /* "KERNEL32.dll\0" */ +#define SHIM_DATA_LEN (SHIM_SYM_NUL_LEN + SHIM_DLL_NUL_LEN) /* 25 */ +#define SHIM_TOTAL_LEN (SHIM_HEADER_SIZE + SHIM_DATA_LEN) /* 45 */ + +/* IMAGE_FILE_MACHINE_AMD64. */ +#define COFF_MACHINE_AMD64 0x8664u +/* Sig1=0, Sig2=0xFFFF marks a short-import record. */ +#define COFF_SHIMP_SIG2 0xFFFFu +/* TypeFlags = Type=CODE(0) | (NameType=NAME(1) << 2) = 0x0004. */ +#define COFF_SHIMP_TYPEFLAGS 0x0004u + +/* PE optional-header / data-directory constants we assert. */ +#define PE_DD_IDX_IMPORT 1 +#define PE_DD_IDX_IAT 12 + +/* The exit-process program: e8 disp32 c3 (call ExitProcess; ret). + * disp32 is patched by R_PC32 against an undef ExitProcess. */ +static const uint8_t PROG_TEXT_X64[6] = {0xe8, 0, 0, 0, 0, 0xc3}; + +/* ---- env vtables --------------------------------------------------- */ + +static void* heap_alloc(CfreeHeap* h, size_t n, size_t a) { + (void)h; + (void)a; + return n ? malloc(n) : NULL; +} +static void* heap_realloc(CfreeHeap* h, void* p, size_t o, size_t n, size_t a) { + (void)h; + (void)o; + (void)a; + return realloc(p, n); +} +static void heap_free(CfreeHeap* h, void* p, size_t n) { + (void)h; + (void)n; + free(p); +} +static CfreeHeap g_heap = {heap_alloc, heap_realloc, heap_free, NULL}; + +static void diag_emit(CfreeDiagSink* s, CfreeDiagKind k, CfreeSrcLoc loc, + const char* fmt, va_list ap) { + static const char* names[] = {"note", "warning", "error", "fatal"}; + (void)s; + (void)loc; + fprintf(stderr, "%s: ", names[k]); + vfprintf(stderr, fmt, ap); + fputc('\n', stderr); +} +static CfreeDiagSink g_diag = {diag_emit, NULL, 0, 0}; + +/* ---- failure tracking --------------------------------------------- */ + +static int g_failures; +#define EXPECT(cond, ...) \ + do { \ + if (!(cond)) { \ + fprintf(stderr, "FAIL %s:%d: ", __FILE__, __LINE__); \ + fprintf(stderr, __VA_ARGS__); \ + fputc('\n', stderr); \ + g_failures++; \ + } \ + } while (0) + +/* ---- target / compiler ------------------------------------------- */ + +static CfreeContext g_ctx; + +static void target_x64_windows(CfreeTarget* t) { + memset(t, 0, sizeof *t); + t->arch = CFREE_ARCH_X86_64; + t->os = CFREE_OS_WINDOWS; + t->obj = CFREE_OBJ_COFF; + t->ptr_size = 8; + t->ptr_align = 8; + t->big_endian = false; + t->pic = CFREE_PIC_PIE; + t->code_model = CFREE_CM_SMALL; +} + +static Compiler* make_compiler(const CfreeTarget* t) { + memset(&g_ctx, 0, sizeof g_ctx); + g_ctx.heap = &g_heap; + g_ctx.diag = &g_diag; + g_ctx.now = -1; + CfreeCompiler* cc = NULL; + if (cfree_compiler_new(*t, &g_ctx, &cc) != CFREE_OK || !cc) return NULL; + return (Compiler*)cc; +} + +/* ---- short-import shim builder ------------------------------------ */ + +static void build_short_import_amd64(uint8_t buf[SHIM_TOTAL_LEN]) { + memset(buf, 0, SHIM_TOTAL_LEN); + /* Sig1 = 0 (bytes 0..1 already 0). */ + /* Sig2 = 0xFFFF. */ + buf[2] = (uint8_t)(COFF_SHIMP_SIG2 & 0xFF); + buf[3] = (uint8_t)((COFF_SHIMP_SIG2 >> 8) & 0xFF); + /* Version = 0. */ + /* Machine. */ + buf[6] = (uint8_t)(COFF_MACHINE_AMD64 & 0xFF); + buf[7] = (uint8_t)((COFF_MACHINE_AMD64 >> 8) & 0xFF); + /* TimeDateStamp = 0 (bytes 8..11). */ + /* SizeOfData. */ + buf[12] = (uint8_t)(SHIM_DATA_LEN & 0xFFu); + buf[13] = (uint8_t)((SHIM_DATA_LEN >> 8) & 0xFFu); + buf[14] = (uint8_t)((SHIM_DATA_LEN >> 16) & 0xFFu); + buf[15] = (uint8_t)((SHIM_DATA_LEN >> 24) & 0xFFu); + /* OrdinalOrHint = 0 (16..17). */ + /* TypeFlags. */ + buf[18] = (uint8_t)(COFF_SHIMP_TYPEFLAGS & 0xFF); + buf[19] = (uint8_t)((COFF_SHIMP_TYPEFLAGS >> 8) & 0xFF); + /* Body: "ExitProcess\0" + "KERNEL32.dll\0". */ + memcpy(buf + SHIM_HEADER_SIZE, SHIM_SYM_CSTR, SHIM_SYM_NUL_LEN); + memcpy(buf + SHIM_HEADER_SIZE + SHIM_SYM_NUL_LEN, SHIM_DLL_CSTR, + SHIM_DLL_NUL_LEN); +} + +/* ---- program ObjBuilder builder ----------------------------------- */ + +static ObjBuilder* build_program(Compiler* c) { + ObjBuilder* ob = obj_new(c); + Pool* p = c->global; + Sym text_name = pool_intern_cstr(p, ".text"); + Sym main_name = pool_intern_cstr(p, "mainCRTStartup"); + Sym exit_name = pool_intern_cstr(p, SHIM_SYM_CSTR); + ObjSecId text = obj_section(ob, text_name, SEC_TEXT, + SF_ALLOC | SF_EXEC, 16); + obj_write(ob, text, PROG_TEXT_X64, sizeof PROG_TEXT_X64); + /* mainCRTStartup at .text offset 0. */ + obj_symbol(ob, main_name, SB_GLOBAL, SK_FUNC, text, 0, + sizeof PROG_TEXT_X64); + /* ExitProcess as undef; reloc against the `e8` displacement (offset 1). */ + ObjSymId exit_sym = + obj_symbol(ob, exit_name, SB_GLOBAL, SK_UNDEF, OBJ_SEC_NONE, 0, 0); + obj_reloc(ob, text, 1, R_PC32, exit_sym, -4); + obj_finalize(ob); + return ob; +} + +/* ---- objdump probe ------------------------------------------------ */ + +static int have_mingw_objdump(void) { + FILE* fp = popen( + "command -v x86_64-w64-mingw32-objdump 2>/dev/null", "r"); + if (!fp) return 0; + char buf[256]; + size_t n = fread(buf, 1, sizeof buf - 1, fp); + pclose(fp); + return n > 0; +} + +/* Run a shell command and slurp its stdout into a fresh malloc'd + * NUL-terminated string. Returns NULL on failure. */ +static char* slurp_cmd(const char* cmd) { + FILE* fp = popen(cmd, "r"); + if (!fp) return NULL; + size_t cap = 4096, len = 0; + char* buf = (char*)malloc(cap); + if (!buf) { + pclose(fp); + return NULL; + } + for (;;) { + if (len + 1024 + 1 > cap) { + cap *= 2; + char* nb = (char*)realloc(buf, cap); + if (!nb) { + free(buf); + pclose(fp); + return NULL; + } + buf = nb; + } + size_t got = fread(buf + len, 1, 1024, fp); + len += got; + if (got < 1024) break; + } + int rc = pclose(fp); + (void)rc; + buf[len] = '\0'; + return buf; +} + +/* ---- main ---------------------------------------------------------- */ + +int main(void) { + if (!have_mingw_objdump()) { + fprintf(stderr, "SKIP: x86_64-w64-mingw32-objdump not on PATH\n"); + return 0; + } + + CfreeTarget t; + target_x64_windows(&t); + Compiler* c = make_compiler(&t); + if (!c) { + fprintf(stderr, "FAIL: compiler_new\n"); + return 1; + } + if (setjmp(c->panic)) { + fprintf(stderr, "FAIL: panic during pe-import-smoke\n"); + compiler_run_cleanups(c); + cfree_compiler_free((CfreeCompiler*)c); + return 1; + } + + /* 1. Program ObjBuilder. */ + ObjBuilder* prog = build_program(c); + + /* 2. Short-import shim bytes. */ + uint8_t shim[SHIM_TOTAL_LEN]; + build_short_import_amd64(shim); + + /* 3. Drive the linker. */ + Linker* l = link_new(c); + EXPECT(l != NULL, "link_new returned NULL"); + link_add_obj(l, prog); + LinkInputId dso_id = link_add_obj_bytes( + l, "ExitProcess.lib-member", shim, SHIM_TOTAL_LEN); + EXPECT(dso_id != LINK_INPUT_NONE, + "link_add_obj_bytes returned LINK_INPUT_NONE for short-import shim"); + link_set_entry(l, "mainCRTStartup"); + link_set_pie(l, 1); + link_set_emit_static_exe(l, 1); + + LinkImage* img = link_resolve(l); + EXPECT(img != NULL, "link_resolve returned NULL"); + if (!img) { + link_free(l); + cfree_compiler_free((CfreeCompiler*)c); + return 1; + } + + /* Sanity: walk LinkSyms and find ExitProcess. The globals hashmap only + * holds defined symbols, so link_symbol_lookup can't find an imported + * undef by name — iterate the dense LinkSyms array instead. */ + { + Sym exit_name = pool_intern_cstr(c->global, SHIM_SYM_CSTR); + const LinkSymbol* found = NULL; + /* link_symbol returns NULL once we walk off the end. */ + for (LinkSymId i = 1;; ++i) { + const LinkSymbol* s = link_symbol(img, i); + if (!s) break; + if (s->name == exit_name) { + found = s; + break; + } + } + EXPECT(found != NULL, + "ExitProcess LinkSymbol not present after link_resolve"); + if (found) { + EXPECT(found->imported, + "ExitProcess.imported=0 (expected 1 after DSO match)"); + EXPECT(found->dso_input_id == dso_id, + "ExitProcess.dso_input_id=%u (expected %u)", + (unsigned)found->dso_input_id, (unsigned)dso_id); + } + } + + /* 4. Emit the PE. */ + CfreeWriter* w = NULL; + if (cfree_writer_mem(&g_heap, &w) != CFREE_OK || !w) { + fprintf(stderr, "FAIL: cfree_writer_mem\n"); + link_image_free(img); + link_free(l); + cfree_compiler_free((CfreeCompiler*)c); + return 1; + } + link_emit_image_writer(img, w); + + size_t out_len = 0; + const uint8_t* out_bytes = cfree_writer_mem_bytes(w, &out_len); + EXPECT(out_len > 0, "link_emit_image_writer produced %zu bytes", out_len); + + /* 5. Write to /tmp and shell out to objdump. */ + const char* exe_path = "/tmp/pe-import-smoke.exe"; + (void)unlink(exe_path); + FILE* fp = fopen(exe_path, "wb"); + EXPECT(fp != NULL, "fopen(%s) for write", exe_path); + if (fp) { + size_t wr = fwrite(out_bytes, 1, out_len, fp); + EXPECT(wr == out_len, "fwrite wrote %zu / %zu", wr, out_len); + fclose(fp); + } + + cfree_writer_close(w); + link_image_free(img); + link_free(l); + + /* objdump -p shows headers + import directory. */ + char* dump_p = slurp_cmd( + "x86_64-w64-mingw32-objdump -p /tmp/pe-import-smoke.exe 2>&1"); + EXPECT(dump_p != NULL, "slurp objdump -p"); + if (dump_p) { + EXPECT(strstr(dump_p, "Magic\t\t\t020b") != NULL || + strstr(dump_p, "Magic\t020b") != NULL || + strstr(dump_p, "020b\t(PE32+)") != NULL || + strstr(dump_p, "PE32+") != NULL, + "objdump -p: missing PE32+ magic 020b\n---\n%s\n---", dump_p); + EXPECT(strstr(dump_p, "SectionAlignment") != NULL && + strstr(dump_p, "00001000") != NULL, + "objdump -p: SectionAlignment 0x1000 missing"); + EXPECT(strstr(dump_p, "FileAlignment") != NULL && + strstr(dump_p, "00000200") != NULL, + "objdump -p: FileAlignment 0x200 missing"); + EXPECT(strstr(dump_p, "Subsystem") != NULL, + "objdump -p: Subsystem line missing"); + /* mingw objdump prints "Subsystem\t\t00000003\t(Windows CUI)" */ + EXPECT(strstr(dump_p, "Windows CUI") != NULL || + strstr(dump_p, "(Windows CUI)") != NULL, + "objdump -p: Subsystem != Windows CUI\n---\n%s\n---", dump_p); + /* Import directory: DLL Name: KERNEL32.dll. */ + EXPECT(strstr(dump_p, "DLL Name: " SHIM_DLL_CSTR) != NULL, + "objdump -p: 'DLL Name: %s' not found\n---\n%s\n---", + SHIM_DLL_CSTR, dump_p); + /* The hint/name array prints "<hint> <name>". Check ExitProcess + * appears in the import list. */ + EXPECT(strstr(dump_p, SHIM_SYM_CSTR) != NULL, + "objdump -p: '%s' not in import directory\n---\n%s\n---", + SHIM_SYM_CSTR, dump_p); + /* Data directories: IMPORT (idx 1) and IAT (idx 12) must be set. + * mingw objdump prints them as + * "Entry 1 NNNNNNNN NNNNNNNN Import Directory" + * "Entry c NNNNNNNN NNNNNNNN Import Address Table Directory" + * Reject "00000000 00000000" on those lines. */ + { + const char* imp_line = strstr(dump_p, "Import Directory"); + EXPECT(imp_line != NULL, + "objdump -p: 'Import Directory' line missing"); + if (imp_line) { + /* Walk back to start of line. */ + const char* ls = imp_line; + while (ls > dump_p && ls[-1] != '\n') --ls; + EXPECT(strstr(ls, "00000000 00000000 [size]") == NULL && + strstr(ls, "\t00000000\t00000000\t") == NULL, + "Import Directory data-dir entry is zero\nline: %.*s", + (int)(imp_line - ls + (int)strlen("Import Directory")), ls); + } + const char* iat_line = + strstr(dump_p, "Import Address Table Directory"); + EXPECT(iat_line != NULL, + "objdump -p: 'Import Address Table Directory' line missing"); + if (iat_line) { + const char* ls = iat_line; + while (ls > dump_p && ls[-1] != '\n') --ls; + EXPECT(strstr(ls, "00000000 00000000 [size]") == NULL && + strstr(ls, "\t00000000\t00000000\t") == NULL, + "IAT data-dir entry is zero\nline: %.*s", + (int)(iat_line - ls + + (int)strlen("Import Address Table Directory")), + ls); + } + } + free(dump_p); + } + + /* objdump -d: confirm the .text disassembly has the call (from + * mainCRTStartup) plus the per-arch IAT stub `jmp *off(%rip)` that + * link_emit_coff appends. The PE has no symbol table — there's no + * <mainCRTStartup> label in the disassembly, just .text. */ + char* dump_d = slurp_cmd( + "x86_64-w64-mingw32-objdump -d /tmp/pe-import-smoke.exe 2>&1"); + EXPECT(dump_d != NULL, "slurp objdump -d"); + if (dump_d) { + EXPECT(strstr(dump_d, "<.text>") != NULL, + "objdump -d: <.text> section header missing\n---\n%s\n---", + dump_d); + /* The mainCRTStartup body is a `call <disp32>` at the entry. The + * disp32 must have been patched away from zero by the linker — + * objdump renders it as `call 0xNNNNNNNN`, never `call 0x0`. */ + EXPECT(strstr(dump_d, "call ") != NULL || strstr(dump_d, "callq ") != NULL, + "objdump -d: no call instruction in disassembly\n---\n%s\n---", + dump_d); + EXPECT(strstr(dump_d, "call 0x0\n") == NULL && + strstr(dump_d, "callq 0x0\n") == NULL, + "objdump -d: call target left at 0x0 (unrelocated)\n---\n%s\n---", + dump_d); + /* The IAT stub is the `ff 25 disp32` indirect jmp the per-arch + * stub emitter appends to .text for the imported symbol. */ + EXPECT(strstr(dump_d, "jmp *") != NULL || + strstr(dump_d, "jmpq *") != NULL || + strstr(dump_d, "ff 25") != NULL, + "objdump -d: no IAT stub `jmp *off(%%rip)` in .text\n---\n%s\n---", + dump_d); + free(dump_d); + } + + cfree_compiler_free((CfreeCompiler*)c); + + if (g_failures) { + fprintf(stderr, "FAILED %d assertion(s)\n", g_failures); + return 1; + } + fprintf(stderr, "OK pe-import-smoke\n"); + return 0; +} diff --git a/test/coff/pe-mixed-archive.c b/test/coff/pe-mixed-archive.c @@ -0,0 +1,336 @@ +/* Mixed-member archive ingestion test. + * + * Verifies that a single archive containing BOTH a short-import member + * and a long-form COFF object with a real defined symbol satisfies + * references from both shapes in one pass — the same composition + * llvm-mingw's libucrt.a uses (api-ms-win-crt-*.dll short imports + * alongside lib64_libucrt_extra_a-*.o helpers). + * + * Composition: + * Member A: short-import record (Sig1=0/Sig2=0xFFFF) for `ImportedFn` + * living in `FOO.dll`. + * Member B: a normal COFF object (emit_coff'd from a tiny ObjBuilder) + * defining `g_helper_value` in `.data`. + * + * The program references both via R_PC32 from .text. After link_resolve + * we assert: + * - ImportedFn surfaces as imported (dso_input_id != 0). + * - g_helper_value resolves to a real defined LinkSym. + * No external tools required. */ + +#include <cfree/archive.h> +#include <cfree/core.h> +#include <cfree/link.h> +#include <cfree/object.h> +#include <setjmp.h> +#include <stdarg.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#include "core/core.h" +#include "core/pool.h" +#include "link/link.h" +#include "obj/obj.h" + +/* ---- short-import wire constants (mirror pe-import-smoke.c). ---- */ +#define SHIM_HEADER_SIZE 20u +#define SHIM_SYM_CSTR "ImportedFn" +#define SHIM_DLL_CSTR "FOO.dll" +#define SHIM_SYM_NUL_LEN 11u /* "ImportedFn\0" */ +#define SHIM_DLL_NUL_LEN 8u /* "FOO.dll\0" */ +#define SHIM_DATA_LEN (SHIM_SYM_NUL_LEN + SHIM_DLL_NUL_LEN) /* 19 */ +#define SHIM_TOTAL_LEN (SHIM_HEADER_SIZE + SHIM_DATA_LEN) /* 39 */ +#define COFF_MACHINE_AMD64 0x8664u +#define COFF_SHIMP_SIG2 0xFFFFu +#define COFF_SHIMP_TYPEFLAGS 0x0004u /* Type=CODE | NameType=NAME */ + +#define HELPER_SYM_CSTR "g_helper_value" + +/* Program text: two `call disp32` instructions plus `ret`. Each call's + * disp32 is patched by the linker via R_PC32 against an undef target. */ +static const uint8_t PROG_TEXT_X64[11] = { + 0xe8, 0, 0, 0, 0, /* call ImportedFn */ + 0xe8, 0, 0, 0, 0, /* call g_helper_value (target treated as PC-rel + reference; data symbols can be referenced the + same way for the purposes of this test — the + linker just resolves the symbol address) */ + 0xc3, /* ret */ +}; + +/* ---- env vtables --------------------------------------------------- */ + +static void* heap_alloc(CfreeHeap* h, size_t n, size_t a) { + (void)h; + (void)a; + return n ? malloc(n) : NULL; +} +static void* heap_realloc(CfreeHeap* h, void* p, size_t o, size_t n, size_t a) { + (void)h; + (void)o; + (void)a; + return realloc(p, n); +} +static void heap_free(CfreeHeap* h, void* p, size_t n) { + (void)h; + (void)n; + free(p); +} +static CfreeHeap g_heap = {heap_alloc, heap_realloc, heap_free, NULL}; + +static void diag_emit(CfreeDiagSink* s, CfreeDiagKind k, CfreeSrcLoc loc, + const char* fmt, va_list ap) { + static const char* names[] = {"note", "warning", "error", "fatal"}; + (void)s; + (void)loc; + fprintf(stderr, "%s: ", names[k]); + vfprintf(stderr, fmt, ap); + fputc('\n', stderr); +} +static CfreeDiagSink g_diag = {diag_emit, NULL, 0, 0}; + +static int g_failures; +#define EXPECT(cond, ...) \ + do { \ + if (!(cond)) { \ + fprintf(stderr, "FAIL %s:%d: ", __FILE__, __LINE__); \ + fprintf(stderr, __VA_ARGS__); \ + fputc('\n', stderr); \ + g_failures++; \ + } \ + } while (0) + +/* ---- compiler / target -------------------------------------------- */ + +static CfreeContext g_ctx; + +static void target_x64_windows(CfreeTarget* t) { + memset(t, 0, sizeof *t); + t->arch = CFREE_ARCH_X86_64; + t->os = CFREE_OS_WINDOWS; + t->obj = CFREE_OBJ_COFF; + t->ptr_size = 8; + t->ptr_align = 8; + t->big_endian = false; + t->pic = CFREE_PIC_PIE; + t->code_model = CFREE_CM_SMALL; +} + +static Compiler* make_compiler(const CfreeTarget* t) { + memset(&g_ctx, 0, sizeof g_ctx); + g_ctx.heap = &g_heap; + g_ctx.diag = &g_diag; + g_ctx.now = -1; + CfreeCompiler* cc = NULL; + if (cfree_compiler_new(*t, &g_ctx, &cc) != CFREE_OK || !cc) return NULL; + return (Compiler*)cc; +} + +/* ---- builders ----------------------------------------------------- */ + +static void build_short_import_amd64(uint8_t buf[SHIM_TOTAL_LEN]) { + memset(buf, 0, SHIM_TOTAL_LEN); + /* Sig1=0, Sig2=0xFFFF. */ + buf[2] = (uint8_t)(COFF_SHIMP_SIG2 & 0xFF); + buf[3] = (uint8_t)((COFF_SHIMP_SIG2 >> 8) & 0xFF); + /* Machine. */ + buf[6] = (uint8_t)(COFF_MACHINE_AMD64 & 0xFF); + buf[7] = (uint8_t)((COFF_MACHINE_AMD64 >> 8) & 0xFF); + /* SizeOfData. */ + buf[12] = (uint8_t)(SHIM_DATA_LEN & 0xFFu); + buf[13] = (uint8_t)((SHIM_DATA_LEN >> 8) & 0xFFu); + /* TypeFlags = CODE | NAME. */ + buf[18] = (uint8_t)(COFF_SHIMP_TYPEFLAGS & 0xFF); + buf[19] = (uint8_t)((COFF_SHIMP_TYPEFLAGS >> 8) & 0xFF); + memcpy(buf + SHIM_HEADER_SIZE, SHIM_SYM_CSTR, SHIM_SYM_NUL_LEN); + memcpy(buf + SHIM_HEADER_SIZE + SHIM_SYM_NUL_LEN, SHIM_DLL_CSTR, + SHIM_DLL_NUL_LEN); +} + +/* Build a long-form COFF object that defines `g_helper_value` in .data. */ +static uint8_t* build_helper_object(Compiler* c, size_t* len_out) { + ObjBuilder* ob = obj_new(c); + Pool* p = c->global; + Sym data_name = pool_intern_cstr(p, ".data"); + Sym helper_name = pool_intern_cstr(p, HELPER_SYM_CSTR); + ObjSecId data = obj_section(ob, data_name, SEC_DATA, + SF_ALLOC | SF_WRITE, 4); + static const uint8_t kHelperBytes[4] = {0x2A, 0x00, 0x00, 0x00}; + obj_write(ob, data, kHelperBytes, sizeof kHelperBytes); + obj_symbol(ob, helper_name, SB_GLOBAL, SK_OBJ, data, 0, + sizeof kHelperBytes); + obj_finalize(ob); + + CfreeWriter* w = NULL; + if (cfree_writer_mem(&g_heap, &w) != CFREE_OK || !w) return NULL; + emit_coff(c, ob, w); + size_t n = 0; + const uint8_t* src = cfree_writer_mem_bytes(w, &n); + uint8_t* copy = (uint8_t*)malloc(n); + if (copy && n) memcpy(copy, src, n); + cfree_writer_close(w); + *len_out = n; + return copy; +} + +/* Program: .text references both ImportedFn (function) and + * g_helper_value (data) via R_PC32 relocs. */ +static ObjBuilder* build_program(Compiler* c) { + ObjBuilder* ob = obj_new(c); + Pool* p = c->global; + Sym text_name = pool_intern_cstr(p, ".text"); + Sym main_name = pool_intern_cstr(p, "mainCRTStartup"); + Sym import_name = pool_intern_cstr(p, SHIM_SYM_CSTR); + Sym helper_name = pool_intern_cstr(p, HELPER_SYM_CSTR); + ObjSecId text = obj_section(ob, text_name, SEC_TEXT, + SF_ALLOC | SF_EXEC, 16); + obj_write(ob, text, PROG_TEXT_X64, sizeof PROG_TEXT_X64); + obj_symbol(ob, main_name, SB_GLOBAL, SK_FUNC, text, 0, + sizeof PROG_TEXT_X64); + ObjSymId import_sym = + obj_symbol(ob, import_name, SB_GLOBAL, SK_UNDEF, OBJ_SEC_NONE, 0, 0); + ObjSymId helper_sym = + obj_symbol(ob, helper_name, SB_GLOBAL, SK_UNDEF, OBJ_SEC_NONE, 0, 0); + obj_reloc(ob, text, 1, R_PC32, import_sym, -4); + obj_reloc(ob, text, 6, R_PC32, helper_sym, -4); + obj_finalize(ob); + return ob; +} + +/* ---- main --------------------------------------------------------- */ + +int main(void) { + CfreeTarget t; + target_x64_windows(&t); + Compiler* c = make_compiler(&t); + if (!c) { + fprintf(stderr, "FAIL: compiler_new\n"); + return 1; + } + if (setjmp(c->panic)) { + fprintf(stderr, "FAIL: panic during pe-mixed-archive\n"); + compiler_run_cleanups(c); + cfree_compiler_free((CfreeCompiler*)c); + return 1; + } + + ObjBuilder* prog = build_program(c); + + /* Member A: short-import shim. */ + uint8_t shim[SHIM_TOTAL_LEN]; + build_short_import_amd64(shim); + + /* Member B: long-form COFF object defining g_helper_value. */ + size_t helper_len = 0; + uint8_t* helper_bytes = build_helper_object(c, &helper_len); + EXPECT(helper_bytes != NULL && helper_len > 0, + "build_helper_object produced %zu bytes", helper_len); + if (!helper_bytes) { + cfree_compiler_free((CfreeCompiler*)c); + return 1; + } + + /* Assemble both into an archive. cfree's archive ingestion walks + * every member regardless of the symbol index, so symbol_index=0 is + * sufficient — the linker rediscovers each member's exports during + * scan_presence_before. */ + CfreeBytes members[2]; + members[0].name = "importfn.o"; + members[0].data = shim; + members[0].len = SHIM_TOTAL_LEN; + members[1].name = "helper.o"; + members[1].data = helper_bytes; + members[1].len = helper_len; + + CfreeWriter* aw = NULL; + if (cfree_writer_mem(&g_heap, &aw) != CFREE_OK || !aw) { + fprintf(stderr, "FAIL: writer_mem for archive\n"); + free(helper_bytes); + cfree_compiler_free((CfreeCompiler*)c); + return 1; + } + CfreeArWriteOptions opts; + memset(&opts, 0, sizeof opts); + CfreeStatus arst = cfree_ar_write(aw, members, 2, &opts); + EXPECT(arst == CFREE_OK, "cfree_ar_write rc=%d", (int)arst); + + size_t ar_len = 0; + const uint8_t* ar_view = cfree_writer_mem_bytes(aw, &ar_len); + uint8_t* ar_bytes = (uint8_t*)malloc(ar_len); + if (ar_bytes && ar_len) memcpy(ar_bytes, ar_view, ar_len); + cfree_writer_close(aw); + EXPECT(ar_bytes != NULL && ar_len > 0, + "archive empty after cfree_ar_write (len=%zu)", ar_len); + if (!ar_bytes) { + free(helper_bytes); + cfree_compiler_free((CfreeCompiler*)c); + return 1; + } + + /* Drive the linker. Name the archive `libmixed.a` so + * derive_dll_name_from_archive_path treats it as a potential import + * archive (archive_dll_name="mixed.dll") — the short-import member + * carries its own DLL name (FOO.dll) and overrides this fallback, and + * the long-form COFF object is classified COFF_AR_KEEP and read as a + * normal object regardless. */ + Linker* l = link_new(c); + EXPECT(l != NULL, "link_new returned NULL"); + link_add_obj(l, prog); + LinkInputId ar_id = link_add_archive_bytes(l, "libmixed.a", ar_bytes, + ar_len, /*whole_archive=*/0, + /*link_mode=*/0, + /*group_id=*/0); + EXPECT(ar_id != LINK_INPUT_NONE, + "link_add_archive_bytes returned LINK_INPUT_NONE"); + link_set_entry(l, "mainCRTStartup"); + link_set_pie(l, 1); + link_set_emit_static_exe(l, 1); + + LinkImage* img = link_resolve(l); + EXPECT(img != NULL, "link_resolve returned NULL"); + if (img) { + Sym import_name = pool_intern_cstr(c->global, SHIM_SYM_CSTR); + Sym helper_name = pool_intern_cstr(c->global, HELPER_SYM_CSTR); + const LinkSymbol* importf = NULL; + const LinkSymbol* helper = NULL; + for (LinkSymId i = 1;; ++i) { + const LinkSymbol* s = link_symbol(img, i); + if (!s) break; + if (s->name == import_name) importf = s; + else if (s->name == helper_name) helper = s; + } + EXPECT(importf != NULL, + "ImportedFn LinkSymbol not present after link_resolve"); + if (importf) { + EXPECT(importf->imported, + "ImportedFn.imported=0 (expected 1; short-import member " + "should be pulled in and routed as DSO)"); + EXPECT(importf->dso_input_id != 0, + "ImportedFn.dso_input_id=0 (expected nonzero — short-import " + "member should be present as a DSO input)"); + } + EXPECT(helper != NULL, + "g_helper_value LinkSymbol not present after link_resolve"); + if (helper) { + EXPECT(!helper->imported, + "g_helper_value.imported=1 (expected 0; helper.o is a real " + "COFF object, not an import shim)"); + EXPECT(helper->section_id != LINK_SEC_NONE, + "g_helper_value.section_id=NONE (expected a real .data " + "section after long-form COFF ingestion)"); + } + link_image_free(img); + } + link_free(l); + + free(ar_bytes); + free(helper_bytes); + cfree_compiler_free((CfreeCompiler*)c); + + if (g_failures) { + fprintf(stderr, "FAILED %d assertion(s)\n", g_failures); + return 1; + } + fprintf(stderr, "OK pe-mixed-archive\n"); + return 0; +} diff --git a/test/coff/windows-system-dlls-smoke.sh b/test/coff/windows-system-dlls-smoke.sh @@ -0,0 +1,444 @@ +#!/usr/bin/env bash +# Windows system-DLL coverage smoke. +# +# Companion to windows-ucrt-hosted-smoke.sh: that script proves the UCRT +# console + GUI link round-trip for one program per surface (Sleep, +# runtime, stdio, TLS, GUI WinMain). This script broadens the surface +# across the typical large system DLLs an application links against: +# +# user32 + gdi32 (GUI window + drawing) +# advapi32 (registry) +# ws2_32 (Winsock lifecycle) +# ole32 (COM init) +# shell32 (CommandLineToArgvW) +# comctl32 (InitCommonControls) +# libucrt.a (mixed short-import + long-form members) +# +# Each program is built with `cfree cc` for both x86_64-windows and +# aarch64-windows; the link-level check inspects the PE import +# directory via `cfree objdump -p`. The Wine runtime check is run +# conditionally — same pattern as windows-ucrt-hosted-smoke.sh, and +# silently skipped when the matching podman/Wine container is absent. +# +# Skip semantics: prints `SKIP: ...` and exits 0 when the llvm-mingw +# UCRT sysroot is not discoverable. + +set -euo pipefail + +ROOT=${CFREE_TEST_ROOT:-$(cd "$(dirname "$0")/../.." && pwd)} +CFREE=${CFREE:-"$ROOT/build/cfree"} +SDK=${CFREE_MINGW_SYSROOT:-} + +find_sdk() { + local arch=$1 + local d + for d in \ + /tmp/llvm-mingw*/llvm-mingw-*-ucrt-*/"$arch"-w64-mingw32 \ + /tmp/llvm-mingw*/"$arch"-w64-mingw32 \ + /private/tmp/llvm-mingw*/llvm-mingw-*-ucrt-*/"$arch"-w64-mingw32 \ + /private/tmp/llvm-mingw*/"$arch"-w64-mingw32; do + if [ -d "$d/lib" ] && [ -r "$d/include/windows.h" ]; then + printf '%s\n' "$d" + return 0 + fi + done + return 1 +} + +sdk_for_arch() { + local arch=$1 + local base + if [ -n "$SDK" ]; then + if [ "$(basename "$SDK")" = "$arch-w64-mingw32" ]; then + printf '%s\n' "$SDK" + return 0 + fi + base=$(dirname "$SDK") + if [ -d "$base/$arch-w64-mingw32/lib" ] && + [ -r "$base/$arch-w64-mingw32/include/windows.h" ]; then + printf '%s\n' "$base/$arch-w64-mingw32" + return 0 + fi + fi + find_sdk "$arch" +} + +if [ ! -x "$CFREE" ]; then + echo "FAIL windows-system-dlls-smoke: cfree binary not found: $CFREE" >&2 + exit 1 +fi + +TMP=${TMPDIR:-/tmp} +WORK=$(mktemp -d "$TMP/cfree-windows-system-dlls-smoke.XXXXXX") +WORK_REAL=$(cd "$WORK" && pwd -P) +trap 'rm -rf "$WORK"' EXIT + +GUI_C=$WORK/gui_hello_window.c +GDI_C=$WORK/gdi_drawing.c +REG_C=$WORK/advapi32_registry.c +WS_C=$WORK/ws2_32_socket.c +OLE_C=$WORK/ole32_coinit.c +SHELL_C=$WORK/shell32_argv.c +COMCTL_C=$WORK/comctl32_init.c +MIXED_C=$WORK/mixed_ucrt.c + +# A hidden window + minimal message-pump program. Wine in a headless +# container may legitimately refuse to create a real window; the +# program tolerates that and returns 0 so the link-level imports check +# is what gates the test. +cat >"$GUI_C" <<'SRC' +#define UNICODE +#define _UNICODE +#include <windows.h> + +static LRESULT CALLBACK wp(HWND h, UINT m, WPARAM w, LPARAM l) { + if (m == WM_DESTROY) PostQuitMessage(0); + return DefWindowProcW(h, m, w, l); +} + +int WINAPI WinMain(HINSTANCE i, HINSTANCE p, LPSTR c, int s) { + (void)p; (void)c; (void)s; + WNDCLASSEXW wc; + ZeroMemory(&wc, sizeof(wc)); + wc.cbSize = sizeof(wc); + wc.lpfnWndProc = wp; + wc.hInstance = i; + wc.lpszClassName = L"cfree_hello"; + wc.hCursor = LoadCursorW(NULL, IDC_ARROW); + (void)RegisterClassExW(&wc); + HWND h = CreateWindowExW(0, L"cfree_hello", L"cfree", WS_OVERLAPPEDWINDOW, + 0, 0, 16, 16, NULL, NULL, i, NULL); + if (h) { + PostMessageW(h, WM_QUIT, 0, 0); + MSG msg; + while (PeekMessageW(&msg, NULL, 0, 0, PM_REMOVE)) { + if (msg.message == WM_QUIT) break; + TranslateMessage(&msg); + DispatchMessageW(&msg); + } + DestroyWindow(h); + } + return 0; +} +SRC + +# gdi32 surface: create a memory DC, select a stock font and brush, +# release. Stock objects do not require an active display, so this +# runs cleanly under Wine in headless containers. +cat >"$GDI_C" <<'SRC' +#include <windows.h> + +int main(void) { + HDC screen = GetDC(NULL); + HDC mem = CreateCompatibleDC(screen); + HGDIOBJ old_font = SelectObject(mem, GetStockObject(SYSTEM_FONT)); + HGDIOBJ old_brush = SelectObject(mem, GetStockObject(WHITE_BRUSH)); + TEXTMETRICW tm; + GetTextMetricsW(mem, &tm); + SelectObject(mem, old_font); + SelectObject(mem, old_brush); + DeleteDC(mem); + ReleaseDC(NULL, screen); + return tm.tmHeight > 0 ? 0 : 1; +} +SRC + +# advapi32 surface: open a well-known read-only registry key and close +# it. HKEY_CURRENT_USER\Environment exists by default under Wine. +cat >"$REG_C" <<'SRC' +#include <windows.h> + +int main(void) { + HKEY h = NULL; + LONG rc = RegOpenKeyExW(HKEY_CURRENT_USER, L"Environment", 0, KEY_READ, &h); + if (rc == ERROR_SUCCESS) { + DWORD subkeys = 0, values = 0; + RegQueryInfoKeyW(h, NULL, NULL, NULL, &subkeys, NULL, NULL, &values, + NULL, NULL, NULL, NULL); + RegCloseKey(h); + return 0; + } + /* Some Wine configurations may not pre-create the Environment key. + * The link-level test (imports satisfied) is what we care about. */ + return rc == ERROR_FILE_NOT_FOUND ? 0 : 2; +} +SRC + +# ws2_32 surface: full WSAStartup/socket/closesocket/WSACleanup +# lifecycle with no network traffic. +cat >"$WS_C" <<'SRC' +#include <winsock2.h> +#include <windows.h> + +int main(void) { + WSADATA wsa; + if (WSAStartup(MAKEWORD(2, 2), &wsa) != 0) return 1; + SOCKET s = socket(AF_INET, SOCK_DGRAM, 0); + if (s == INVALID_SOCKET) { WSACleanup(); return 2; } + closesocket(s); + WSACleanup(); + return 0; +} +SRC + +# ole32 surface: COM apartment init/teardown. +cat >"$OLE_C" <<'SRC' +#include <windows.h> +#include <objbase.h> + +int main(void) { + HRESULT hr = CoInitializeEx(NULL, COINIT_MULTITHREADED); + if (FAILED(hr)) return 1; + CoUninitialize(); + return 0; +} +SRC + +# shell32 surface: CommandLineToArgvW round-trip. +cat >"$SHELL_C" <<'SRC' +#include <windows.h> +#include <shellapi.h> + +int main(void) { + int argc = 0; + LPWSTR cmd = GetCommandLineW(); + LPWSTR* argv = CommandLineToArgvW(cmd, &argc); + int ok = (argv != NULL && argc >= 1); + if (argv) LocalFree(argv); + return ok ? 0 : 1; +} +SRC + +# comctl32 surface: legacy InitCommonControls. Pulls in comctl32.dll +# imports without needing a real display. +cat >"$COMCTL_C" <<'SRC' +#include <windows.h> +#include <commctrl.h> + +int main(void) { + INITCOMMONCONTROLSEX icc; + ZeroMemory(&icc, sizeof(icc)); + icc.dwSize = sizeof(icc); + icc.dwICC = ICC_STANDARD_CLASSES; + /* Both spellings exist in libcomctl32.a; we use the simpler one. */ + InitCommonControls(); + return InitCommonControlsEx(&icc) ? 0 : 0; +} +SRC + +# Mixed libucrt.a members: pulls in both short-import members +# (api-ms-win-crt-stdio for puts/fflush) and a long-form COFF helper +# (fabsf lives in lib64_libmingwex_a-*.o as a real .o member). +cat >"$MIXED_C" <<'SRC' +#include <math.h> +#include <stdio.h> +#include <windows.h> + +int main(void) { + float x = fabsf(-1.5f); + HANDLE out = GetStdHandle(STD_OUTPUT_HANDLE); + DWORD wrote = 0; + WriteFile(out, "mixed-ok\n", 9, &wrote, 0); + fflush(stdout); + return x == 1.5f ? 0 : 1; +} +SRC + +check_no_legacy_crt_imports() { + local dump=$1 + local what=$2 + if grep -Eiq 'DLL Name: (msvcrt|ucrt)\.dll' "$dump"; then + echo "FAIL windows-system-dlls-smoke: $what imports legacy CRT DLL" >&2 + grep -Ei 'DLL Name: (msvcrt|ucrt)\.dll' "$dump" >&2 + exit 1 + fi +} + +run_wine_if_available() { + local label=$1 + local image=$2 + local pod_arch=$3 + local exe=$4 + shift 4 + + if ! command -v podman >/dev/null 2>&1; then + echo "SKIP windows-system-dlls-smoke: podman unavailable for $label Wine run" + return 0 + fi + if ! podman image exists "$image" >/dev/null 2>&1; then + echo "SKIP windows-system-dlls-smoke: $image unavailable for $label Wine run" + return 0 + fi + + podman run --rm --arch "$pod_arch" -v "$WORK_REAL:/probe:ro" "$image" \ + bash -lc " + export WINEDEBUG=-all WINEPREFIX=/tmp/wineprefix + timeout 120s /usr/lib/wine/wine64 /probe/$(basename "$exe") $* + rc=\$? + echo \"$label exit=\$rc\" + test \"\$rc\" -eq 0 + " +} + +# build_and_check <label> <c-source> <exe> <dump> <link-mode> <libs> +# <expected-dll-1> [<expected-dll-2> ...] -- [<expected-sym-1> ...] +# +# link-mode is "console" or "windows" (drives -mconsole vs -mwindows). +# libs is a space-separated list of `-l<name>` archives to add (e.g. +# "gdi32 ws2_32") beyond the driver-auto-linked set +# (kernel32/user32/advapi32/shell32/msvcrt/mingwex/mingw32/moldname). +build_and_check() { + local label=$1 + local csrc=$2 + local exe=$3 + local dump=$4 + local mode=$5 + local libs=$6 + shift 6 + + local dlls=() + local syms=() + local in_syms=0 + while [ $# -gt 0 ]; do + if [ "$1" = "--" ]; then in_syms=1; shift; continue; fi + if [ "$in_syms" -eq 0 ]; then dlls+=("$1"); else syms+=("$1"); fi + shift + done + + local mode_flag=-mconsole + if [ "$mode" = "windows" ]; then mode_flag=-mwindows; fi + + local extra_lflags=() + local lib + for lib in $libs; do extra_lflags+=("-l$lib"); done + + "$CFREE" cc -target "$TARGET" --sysroot "$ARCH_SDK" "$mode_flag" \ + "$csrc" "${extra_lflags[@]}" -o "$exe" + "$CFREE" objdump -p "$exe" >"$dump" + check_no_legacy_crt_imports "$dump" "$label PE" + + local d + for d in "${dlls[@]}"; do + if ! grep -Fq "DLL Name: $d" "$dump"; then + echo "FAIL windows-system-dlls-smoke: $label: expected import of $d" >&2 + grep -F 'DLL Name:' "$dump" >&2 || true + exit 1 + fi + done + + local s + for s in "${syms[@]}"; do + if ! grep -Fq "Name: $s" "$dump"; then + echo "FAIL windows-system-dlls-smoke: $label: expected import symbol $s" >&2 + exit 1 + fi + done + + if [ "$mode" = "windows" ] && + ! grep -Fq "Subsystem: 2 (WINDOWS_GUI)" "$dump"; then + echo "FAIL windows-system-dlls-smoke: $label: subsystem != WINDOWS_GUI" >&2 + exit 1 + fi +} + +ran=0 +for arch in x86_64 aarch64; do + case "$arch" in + x86_64) + TARGET=x86_64-windows + LABEL=x64 + IMAGE=localhost/cfree-wine-amd64 + POD_ARCH=amd64 + ;; + aarch64) + TARGET=aarch64-windows + LABEL=aarch64 + IMAGE=localhost/cfree-wine-arm64 + POD_ARCH=arm64 + ;; + esac + + if ! ARCH_SDK=$(sdk_for_arch "$arch"); then + echo "SKIP windows-system-dlls-smoke: no $arch llvm-mingw UCRT sysroot" + continue + fi + if [ ! -r "$ARCH_SDK/include/windows.h" ] || + [ ! -r "$ARCH_SDK/lib/libucrt.a" ]; then + echo "SKIP windows-system-dlls-smoke: invalid UCRT llvm-mingw sysroot: $ARCH_SDK" + continue + fi + + ran=1 + + # ---- GUI hello window (user32 + gdi32 + kernel32) ---- + GUI_EXE=$WORK/gui_hello_window-$LABEL.exe + GUI_DUMP=$WORK/gui_hello_window-$LABEL.dump + build_and_check "$LABEL gui_hello_window" "$GUI_C" "$GUI_EXE" "$GUI_DUMP" \ + windows "" USER32.dll KERNEL32.dll -- RegisterClassExW CreateWindowExW \ + DefWindowProcW PeekMessageW DispatchMessageW + run_wine_if_available "$LABEL gui_hello_window" "$IMAGE" "$POD_ARCH" \ + "$GUI_EXE" + + # ---- gdi32 surface ---- + GDI_EXE=$WORK/gdi_drawing-$LABEL.exe + GDI_DUMP=$WORK/gdi_drawing-$LABEL.dump + build_and_check "$LABEL gdi_drawing" "$GDI_C" "$GDI_EXE" "$GDI_DUMP" \ + console gdi32 GDI32.dll USER32.dll -- CreateCompatibleDC GetStockObject \ + SelectObject DeleteDC + run_wine_if_available "$LABEL gdi_drawing" "$IMAGE" "$POD_ARCH" "$GDI_EXE" + + # ---- advapi32 surface ---- + REG_EXE=$WORK/advapi32_registry-$LABEL.exe + REG_DUMP=$WORK/advapi32_registry-$LABEL.dump + build_and_check "$LABEL advapi32_registry" "$REG_C" "$REG_EXE" "$REG_DUMP" \ + console "" ADVAPI32.dll KERNEL32.dll -- RegOpenKeyExW RegCloseKey \ + RegQueryInfoKeyW + run_wine_if_available "$LABEL advapi32_registry" "$IMAGE" "$POD_ARCH" \ + "$REG_EXE" + + # ---- ws2_32 surface ---- + WS_EXE=$WORK/ws2_32_socket-$LABEL.exe + WS_DUMP=$WORK/ws2_32_socket-$LABEL.dump + build_and_check "$LABEL ws2_32_socket" "$WS_C" "$WS_EXE" "$WS_DUMP" \ + console ws2_32 WS2_32.dll KERNEL32.dll -- WSAStartup WSACleanup socket \ + closesocket + run_wine_if_available "$LABEL ws2_32_socket" "$IMAGE" "$POD_ARCH" "$WS_EXE" + + # ---- ole32 surface ---- + OLE_EXE=$WORK/ole32_coinit-$LABEL.exe + OLE_DUMP=$WORK/ole32_coinit-$LABEL.dump + build_and_check "$LABEL ole32_coinit" "$OLE_C" "$OLE_EXE" "$OLE_DUMP" \ + console ole32 ole32.dll KERNEL32.dll -- CoInitializeEx CoUninitialize + run_wine_if_available "$LABEL ole32_coinit" "$IMAGE" "$POD_ARCH" "$OLE_EXE" + + # ---- shell32 surface ---- + SHELL_EXE=$WORK/shell32_argv-$LABEL.exe + SHELL_DUMP=$WORK/shell32_argv-$LABEL.dump + build_and_check "$LABEL shell32_argv" "$SHELL_C" "$SHELL_EXE" "$SHELL_DUMP" \ + console "" SHELL32.dll KERNEL32.dll -- CommandLineToArgvW + run_wine_if_available "$LABEL shell32_argv" "$IMAGE" "$POD_ARCH" \ + "$SHELL_EXE" + + # ---- comctl32 surface ---- + COMCTL_EXE=$WORK/comctl32_init-$LABEL.exe + COMCTL_DUMP=$WORK/comctl32_init-$LABEL.dump + build_and_check "$LABEL comctl32_init" "$COMCTL_C" "$COMCTL_EXE" \ + "$COMCTL_DUMP" console comctl32 COMCTL32.dll KERNEL32.dll -- \ + InitCommonControls InitCommonControlsEx + run_wine_if_available "$LABEL comctl32_init" "$IMAGE" "$POD_ARCH" \ + "$COMCTL_EXE" + + # ---- mixed libucrt.a (short-import + long-form helper) ---- + MIXED_EXE=$WORK/mixed_ucrt-$LABEL.exe + MIXED_DUMP=$WORK/mixed_ucrt-$LABEL.dump + build_and_check "$LABEL mixed_ucrt" "$MIXED_C" "$MIXED_EXE" "$MIXED_DUMP" \ + console "" KERNEL32.dll api-ms-win-crt-stdio-l1-1-0.dll -- fflush + run_wine_if_available "$LABEL mixed_ucrt" "$IMAGE" "$POD_ARCH" "$MIXED_EXE" +done + +if [ "$ran" -eq 0 ]; then + echo "SKIP windows-system-dlls-smoke: set CFREE_MINGW_SYSROOT or install llvm-mingw UCRT under /tmp/llvm-mingw*" + exit 0 +fi + +echo "PASS windows-system-dlls-smoke: user32/gdi32, advapi32, ws2_32, ole32, shell32, comctl32, mixed UCRT for x64/aarch64" diff --git a/test/coff/windows-ucrt-hosted-smoke.sh b/test/coff/windows-ucrt-hosted-smoke.sh @@ -0,0 +1,450 @@ +#!/usr/bin/env bash +set -euo pipefail + +ROOT=${CFREE_TEST_ROOT:-$(cd "$(dirname "$0")/../.." && pwd)} +CFREE=${CFREE:-"$ROOT/build/cfree"} +SDK=${CFREE_MINGW_SYSROOT:-} + +find_sdk() { + local arch=$1 + local d + for d in \ + /tmp/llvm-mingw*/llvm-mingw-*-ucrt-*/"$arch"-w64-mingw32 \ + /tmp/llvm-mingw*/"$arch"-w64-mingw32 \ + /private/tmp/llvm-mingw*/llvm-mingw-*-ucrt-*/"$arch"-w64-mingw32 \ + /private/tmp/llvm-mingw*/"$arch"-w64-mingw32; do + if [ -d "$d/lib" ] && [ -r "$d/include/windows.h" ]; then + printf '%s\n' "$d" + return 0 + fi + done + return 1 +} + +sdk_for_arch() { + local arch=$1 + local base + if [ -n "$SDK" ]; then + if [ "$(basename "$SDK")" = "$arch-w64-mingw32" ]; then + printf '%s\n' "$SDK" + return 0 + fi + base=$(dirname "$SDK") + if [ -d "$base/$arch-w64-mingw32/lib" ] && + [ -r "$base/$arch-w64-mingw32/include/windows.h" ]; then + printf '%s\n' "$base/$arch-w64-mingw32" + return 0 + fi + fi + find_sdk "$arch" +} + +if [ ! -x "$CFREE" ]; then + echo "FAIL windows-ucrt-hosted-smoke: cfree binary not found: $CFREE" >&2 + exit 1 +fi + +TMP=${TMPDIR:-/tmp} +WORK=$(mktemp -d "$TMP/cfree-windows-ucrt-smoke.XXXXXX") +WORK_REAL=$(cd "$WORK" && pwd -P) +trap 'rm -rf "$WORK"' EXIT + +CONSOLE_C=$WORK/windows-h.c +HEADER_C=$WORK/windows-h-coverage.c +RUNTIME_C=$WORK/runtime.c +STDIO_C=$WORK/stdio.c +IMPORTDATA_C=$WORK/import-data.c +GUI_C=$WORK/gui.c +TLS_C=$WORK/tls.c + +cat >"$CONSOLE_C" <<'SRC' +#include <windows.h> +int main(void) { Sleep(1); return 0; } +SRC + +cat >"$HEADER_C" <<'SRC' +#include <windows.h> +#include <fileapi.h> +#include <processthreadsapi.h> +#include <synchapi.h> +#include <errhandlingapi.h> +#include <winuser.h> + +_Static_assert(sizeof(long) == 4, "windows long is LLP64"); +_Static_assert(sizeof(WCHAR) == 2, "WCHAR is UTF-16"); +_Static_assert(sizeof(void*) == 8, "PE32+ pointer size"); + +static DWORD WINAPI thread_proc(LPVOID ctx) { + return (DWORD)(ULONG_PTR)ctx; +} + +static BOOL CALLBACK enum_windows_proc(HWND hwnd, LPARAM lparam) { + RECT r; + POINT p; + WINDOWPLACEMENT wp; + ZeroMemory(&wp, sizeof(wp)); + wp.length = sizeof(wp); + GetClientRect(hwnd, &r); + p.x = r.left; + p.y = r.top; + ClientToScreen(hwnd, &p); + SetLastError((DWORD)lparam); + return TRUE; +} + +int main(void) { + HANDLE out = GetStdHandle(STD_OUTPUT_HANDLE); + DWORD wrote = 0; + const char msg[] = "windows coverage\n"; + WriteFile(out, msg, sizeof(msg) - 1, &wrote, NULL); + + SECURITY_ATTRIBUTES sa; + ZeroMemory(&sa, sizeof(sa)); + sa.nLength = sizeof(sa); + sa.bInheritHandle = FALSE; + + WCHAR tmp_path[MAX_PATH]; + WCHAR file_path[MAX_PATH]; + GetTempPathW(MAX_PATH, tmp_path); + GetTempFileNameW(tmp_path, L"cfr", 0, file_path); + HANDLE h = CreateFileW(file_path, GENERIC_READ | GENERIC_WRITE, 0, &sa, + CREATE_ALWAYS, FILE_ATTRIBUTE_TEMPORARY, NULL); + if (h != INVALID_HANDLE_VALUE) { + LARGE_INTEGER pos; + pos.QuadPart = 0; + SetFilePointerEx(h, pos, NULL, FILE_BEGIN); + CloseHandle(h); + DeleteFileW(file_path); + } + + CRITICAL_SECTION cs; + InitializeCriticalSection(&cs); + EnterCriticalSection(&cs); + LeaveCriticalSection(&cs); + DeleteCriticalSection(&cs); + + DWORD tid = 0; + HANDLE th = CreateThread(NULL, 0, thread_proc, (LPVOID)(ULONG_PTR)3, 0, &tid); + if (th) { + WaitForSingleObject(th, INFINITE); + CloseHandle(th); + } + + EnumWindows(enum_windows_proc, 0); + MessageBoxW(NULL, L"", L"", MB_OK | MB_SETFOREGROUND); + return 0; +} +SRC + +cat >"$RUNTIME_C" <<'SRC' +#include <windows.h> +#include <stdlib.h> +#include <string.h> + +static int cmp_ints(const void *a, const void *b) { + int ia = *(const int *)a; + int ib = *(const int *)b; + return (ia > ib) - (ia < ib); +} + +static int has_env(char **envp, const char *prefix) { + size_t n = strlen(prefix); + if (!envp) return 0; + for (; *envp; ++envp) { + if (strncmp(*envp, prefix, n) == 0) return 1; + } + return 0; +} + +int main(int argc, char **argv, char **envp) { + if (argc < 3) return 10; + if (strcmp(argv[1], "alpha") != 0 || strcmp(argv[2], "beta") != 0) return 11; + if (!has_env(envp, "CFREE_WIN_PROBE=present")) return 12; + + HANDLE out = GetStdHandle(STD_OUTPUT_HANDLE); + HANDLE err = GetStdHandle(STD_ERROR_HANDLE); + DWORD wrote = 0; + WriteFile(out, "stdout-ok\n", 10, &wrote, 0); + WriteFile(err, "stderr-ok\n", 10, &wrote, 0); + + HANDLE heap = GetProcessHeap(); + char *mem = (char *)HeapAlloc(heap, 0, 32); + if (!mem) return 13; + strcpy(mem, "heap-ok"); + if (strcmp(mem, "heap-ok") != 0) return 14; + HeapFree(heap, 0, mem); + + int vals[4] = {4, 1, 3, 2}; + qsort(vals, 4, sizeof(vals[0]), cmp_ints); + if (vals[0] != 1 || vals[3] != 4) return 15; + + SetLastError(1234); + if (GetLastError() != 1234) return 16; + + char dir[MAX_PATH]; + char path[MAX_PATH]; + if (!GetTempPathA(MAX_PATH, dir)) return 17; + wsprintfA(path, "%scfree-runtime-%lu.tmp", dir, + (unsigned long)GetCurrentProcessId()); + HANDLE f = CreateFileA(path, GENERIC_READ | GENERIC_WRITE, 0, 0, + CREATE_ALWAYS, FILE_ATTRIBUTE_TEMPORARY, 0); + if (f == INVALID_HANDLE_VALUE) return 18; + if (!WriteFile(f, "file-ok", 7, &wrote, 0) || wrote != 7) return 19; + SetFilePointer(f, 0, 0, FILE_BEGIN); + char buf[8]; + DWORD got = 0; + memset(buf, 0, sizeof(buf)); + if (!ReadFile(f, buf, 7, &got, 0)) return 20; + CloseHandle(f); + DeleteFileA(path); + if (got != 7 || strcmp(buf, "file-ok") != 0) return 21; + return 0; +} +SRC + +cat >"$STDIO_C" <<'SRC' +#define _INC_STDIO_S +#include <stdio.h> + +int main(void) { + puts("puts-ok"); + fputs("fputs-ok\n", stdout); + printf("printf-ok\n"); + fflush(stdout); + return 0; +} +SRC + +cat >"$IMPORTDATA_C" <<'SRC' +#include <windows.h> + +extern char **__dcrt_initial_narrow_environment; + +int main(void) { + HANDLE out = GetStdHandle(STD_OUTPUT_HANDLE); + DWORD wrote = 0; + WriteFile(out, "importdata\n", 11, &wrote, 0); + if (&__dcrt_initial_narrow_environment == 0) return 10; + if (__dcrt_initial_narrow_environment == 0) return 11; + return 0; +} +SRC + +cat >"$GUI_C" <<'SRC' +#include <windows.h> +int WINAPI WinMain(HINSTANCE hinst, HINSTANCE prev, LPSTR cmd, int show) { + (void)hinst; + (void)prev; + (void)cmd; + (void)show; + return 0; +} +SRC + +cat >"$TLS_C" <<'SRC' +struct tlsdir { + unsigned long long start; + unsigned long long end; + unsigned long long index; + unsigned long long callbacks; + unsigned int zero_fill; + unsigned int characteristics; +}; + +extern unsigned char __ImageBase; +extern const struct tlsdir _tls_used; + +_Thread_local int tls_init = 7; +_Thread_local int tls_zero; + +static unsigned int rd32(const unsigned char *p) { + return (unsigned int)p[0] | + ((unsigned int)p[1] << 8) | + ((unsigned int)p[2] << 16) | + ((unsigned int)p[3] << 24); +} + +static int check_tls_directory(void) { + const unsigned char *image = &__ImageBase; + unsigned int pe = rd32(image + 0x3c); + const unsigned char *opt = image + pe + 4 + 20; + const unsigned char *dir = opt + 112 + 9 * 8; + unsigned int tls_rva = rd32(dir); + unsigned int tls_size = rd32(dir + 4); + if (tls_size != 40) return 34; + if ((const unsigned char *)&_tls_used != image + tls_rva) return 35; + if (_tls_used.start == 0 || _tls_used.end <= _tls_used.start) return 36; + if (_tls_used.index == 0) return 37; + if (_tls_used.zero_fill != 0 || _tls_used.characteristics != 0) return 38; + return 0; +} + +static int bump(void) { + tls_zero += 3; + tls_init += tls_zero; + return tls_init; +} + +int main(void) { + int dir = check_tls_directory(); + if (dir) return dir; + int a = bump(); + int b = bump(); + if (a != 10) return 31; + if (b != 16) return 32; + if (tls_zero != 6) return 33; + return 0; +} +SRC + +check_no_legacy_crt_imports() { + local dump=$1 + local what=$2 + if grep -Eiq 'DLL Name: (msvcrt|ucrt)\.dll' "$dump"; then + echo "FAIL windows-ucrt-hosted-smoke: $what imports legacy CRT DLL directly" >&2 + grep -Ei 'DLL Name: (msvcrt|ucrt)\.dll' "$dump" >&2 + exit 1 + fi +} + +run_wine_if_available() { + local label=$1 + local image=$2 + local pod_arch=$3 + local exe=$4 + shift 4 + + if ! command -v podman >/dev/null 2>&1; then + echo "SKIP windows-ucrt-hosted-smoke: podman unavailable for $label Wine run" + return 0 + fi + if ! podman image exists "$image" >/dev/null 2>&1; then + echo "SKIP windows-ucrt-hosted-smoke: $image unavailable for $label Wine run" + return 0 + fi + + podman run --rm --arch "$pod_arch" -v "$WORK_REAL:/probe:ro" "$image" \ + bash -lc " + export WINEDEBUG=-all WINEPREFIX=/tmp/wineprefix CFREE_WIN_PROBE=present + timeout 120s /usr/lib/wine/wine64 /probe/$(basename "$exe") $* + rc=\$? + echo \"$label exit=\$rc\" + test \"\$rc\" -eq 0 + " +} + +ran=0 +for arch in x86_64 aarch64; do + case "$arch" in + x86_64) + target=x86_64-windows + label=x64 + image=localhost/cfree-wine-amd64 + pod_arch=amd64 + ;; + aarch64) + target=aarch64-windows + label=aarch64 + image=localhost/cfree-wine-arm64 + pod_arch=arm64 + ;; + esac + + if ! ARCH_SDK=$(sdk_for_arch "$arch"); then + echo "SKIP windows-ucrt-hosted-smoke: no $arch llvm-mingw UCRT sysroot" + continue + fi + if [ ! -r "$ARCH_SDK/include/windows.h" ] || + [ ! -r "$ARCH_SDK/lib/libmsvcrt.a" ]; then + echo "FAIL windows-ucrt-hosted-smoke: invalid UCRT llvm-mingw sysroot: $ARCH_SDK" >&2 + exit 1 + fi + + ran=1 + CONSOLE_EXE=$WORK/windows-h-$arch.exe + CONSOLE_DUMP=$WORK/windows-h-$arch.dump + HEADER_EXE=$WORK/windows-h-coverage-$arch.exe + HEADER_DUMP=$WORK/windows-h-coverage-$arch.dump + RUNTIME_EXE=$WORK/runtime-$arch.exe + RUNTIME_DUMP=$WORK/runtime-$arch.dump + STDIO_EXE=$WORK/stdio-$arch.exe + STDIO_DUMP=$WORK/stdio-$arch.dump + IMPORTDATA_EXE=$WORK/import-data-$arch.exe + IMPORTDATA_DUMP=$WORK/import-data-$arch.dump + TLS_EXE=$WORK/tls-$arch.exe + TLS_DUMP=$WORK/tls-$arch.dump + GUI_EXE=$WORK/gui-$arch.exe + GUI_DUMP=$WORK/gui-$arch.dump + + "$CFREE" cc -target "$target" --sysroot "$ARCH_SDK" -mconsole \ + "$CONSOLE_C" -o "$CONSOLE_EXE" + "$CFREE" objdump -p "$CONSOLE_EXE" >"$CONSOLE_DUMP" + check_no_legacy_crt_imports "$CONSOLE_DUMP" "$label console PE" + if grep -Fq "Name: __set_app_type" "$CONSOLE_DUMP"; then + echo "FAIL windows-ucrt-hosted-smoke: weak alias leaked as __set_app_type import" >&2 + exit 1 + fi + grep -Fq "DLL Name: KERNEL32.dll" "$CONSOLE_DUMP" + grep -Fq "Name: Sleep" "$CONSOLE_DUMP" + grep -Fq "DLL Name: api-ms-win-crt-runtime-l1-1-0.dll" "$CONSOLE_DUMP" + grep -Fq "Name: _set_app_type" "$CONSOLE_DUMP" + run_wine_if_available "$label Sleep" "$image" "$pod_arch" "$CONSOLE_EXE" + + "$CFREE" cc -target "$target" --sysroot "$ARCH_SDK" -mconsole \ + "$HEADER_C" -o "$HEADER_EXE" + "$CFREE" objdump -p "$HEADER_EXE" >"$HEADER_DUMP" + check_no_legacy_crt_imports "$HEADER_DUMP" "$label windows.h coverage PE" + grep -Fq "Name: CreateFileW" "$HEADER_DUMP" + grep -Fq "Name: CreateThread" "$HEADER_DUMP" + grep -Fq "Name: WaitForSingleObject" "$HEADER_DUMP" + grep -Fq "Name: MessageBoxW" "$HEADER_DUMP" + + "$CFREE" cc -target "$target" --sysroot "$ARCH_SDK" \ + "$RUNTIME_C" -o "$RUNTIME_EXE" + "$CFREE" objdump -p "$RUNTIME_EXE" >"$RUNTIME_DUMP" + check_no_legacy_crt_imports "$RUNTIME_DUMP" "$label runtime PE" + grep -Fq "Name: HeapAlloc" "$RUNTIME_DUMP" + grep -Fq "Name: CreateFileA" "$RUNTIME_DUMP" + grep -Fq "Name: qsort" "$RUNTIME_DUMP" + run_wine_if_available "$label runtime" "$image" "$pod_arch" "$RUNTIME_EXE" \ + alpha beta + + "$CFREE" cc -target "$target" --sysroot "$ARCH_SDK" \ + "$STDIO_C" -o "$STDIO_EXE" + "$CFREE" objdump -p "$STDIO_EXE" >"$STDIO_DUMP" + check_no_legacy_crt_imports "$STDIO_DUMP" "$label UCRT stdio PE" + grep -Fq "DLL Name: api-ms-win-crt-stdio-l1-1-0.dll" "$STDIO_DUMP" + grep -Fq "Name: fflush" "$STDIO_DUMP" + run_wine_if_available "$label UCRT stdio" "$image" "$pod_arch" "$STDIO_EXE" + + "$CFREE" cc -target "$target" --sysroot "$ARCH_SDK" \ + "$IMPORTDATA_C" -o "$IMPORTDATA_EXE" + "$CFREE" objdump -p "$IMPORTDATA_EXE" >"$IMPORTDATA_DUMP" + check_no_legacy_crt_imports "$IMPORTDATA_DUMP" "$label imported-data PE" + grep -Fq "DLL Name: api-ms-win-crt-private-l1-1-0.dll" "$IMPORTDATA_DUMP" + grep -Fq "Name: __dcrt_initial_narrow_environment" "$IMPORTDATA_DUMP" + run_wine_if_available "$label imported-data" "$image" "$pod_arch" \ + "$IMPORTDATA_EXE" + + "$CFREE" cc -target "$target" --sysroot "$ARCH_SDK" \ + "$TLS_C" -o "$TLS_EXE" + "$CFREE" objdump -p "$TLS_EXE" >"$TLS_DUMP" + check_no_legacy_crt_imports "$TLS_DUMP" "$label TLS PE" + grep -Eq '^[[:space:]]*9[[:space:]]+TLS[[:space:]]+0x[0-9a-fA-F]+[[:space:]]+0x00000028' \ + "$TLS_DUMP" + + "$CFREE" cc -target "$target" --sysroot "$ARCH_SDK" -mwindows \ + "$GUI_C" -o "$GUI_EXE" + "$CFREE" objdump -p "$GUI_EXE" >"$GUI_DUMP" + grep -Fq "Subsystem: 2 (WINDOWS_GUI)" "$GUI_DUMP" + check_no_legacy_crt_imports "$GUI_DUMP" "$label GUI PE" + + run_wine_if_available "$label TLS" "$image" "$pod_arch" "$TLS_EXE" +done + +if [ "$ran" -eq 0 ]; then + echo "SKIP windows-ucrt-hosted-smoke: set CFREE_MINGW_SYSROOT or install llvm-mingw UCRT under /tmp/llvm-mingw*" + exit 0 +fi + +echo "PASS windows-ucrt-hosted-smoke: Sleep, windows.h, runtime, UCRT stdio/imports/imported-data, GUI, and TLS for x64/aarch64" diff --git a/test/objdump/aarch64-windows/cases/01-coff-file-header.expected b/test/objdump/aarch64-windows/cases/01-coff-file-header.expected @@ -0,0 +1,5 @@ +t.obj: file format coff64-arm64 +architecture: arm64, flags 0x00000011: +HAS_RELOC, HAS_SYMS +start address 0x0000000000000000 +format: coff64 diff --git a/test/objdump/aarch64-windows/cases/01-coff-file-header.sh b/test/objdump/aarch64-windows/cases/01-coff-file-header.sh @@ -0,0 +1,14 @@ +# Golden: -f file header summary for an aarch64 Windows .obj. +# Mirrors the x86_64-windows case so the arch dispatch in -f and +# the COFF code path both stay covered. + +cat > t.c <<'EOF' +int main(void) { return 0; } +EOF +"$CFREE" cc -target aarch64-windows -c t.c -o t.obj +"$CFREE" objdump -f t.obj | awk ' +/file format/ {print; next} +/^architecture:/ {print; next} +/^HAS_/ {print; next} +/^start address/ {print; next} +/^format: coff64/ {print "format: coff64"}' diff --git a/test/objdump/x86_64-windows/cases/01-coff-characteristics.expected b/test/objdump/x86_64-windows/cases/01-coff-characteristics.expected @@ -0,0 +1,4 @@ +t.obj: file format coff64-x86_64 +Idx Name Size Align Flags +section: .text CONTENTS,ALLOC,LOAD,READONLY,CODE +raw: 0x60300020 diff --git a/test/objdump/x86_64-windows/cases/01-coff-characteristics.sh b/test/objdump/x86_64-windows/cases/01-coff-characteristics.sh @@ -0,0 +1,13 @@ +# Golden: COFF section characteristics decode for an x86_64 Windows +# .obj. Asserts that the Characteristics field is surfaced under -h +# and that the canonical IMAGE_SCN_* bits land in the tag column. + +cat > t.c <<'EOF' +int main(void) { return 0; } +EOF +"$CFREE" cc -target x86_64-windows -c t.c -o t.obj +"$CFREE" objdump -h t.obj | awk ' +/file format/ {print; next} +/^Idx Name/ {print; next} +/^ *[0-9]+ \.text/ {print "section:", $2, $5} +/Characteristics:.*0x[0-9a-f]+/ {print "raw:", $NF; exit}' diff --git a/test/objdump/x86_64-windows/cases/02-coff-file-header.expected b/test/objdump/x86_64-windows/cases/02-coff-file-header.expected @@ -0,0 +1,5 @@ +t.obj: file format coff64-x86_64 +architecture: x86_64, flags 0x00000011: +HAS_RELOC, HAS_SYMS +start address 0x0000000000000000 +format: coff64 diff --git a/test/objdump/x86_64-windows/cases/02-coff-file-header.sh b/test/objdump/x86_64-windows/cases/02-coff-file-header.sh @@ -0,0 +1,14 @@ +# Golden: -f file header summary for an x86_64 Windows .obj. Asserts +# architecture/HAS_SYMS/HAS_RELOC reporting after wiring -f from +# silent-noop to a real summary. + +cat > t.c <<'EOF' +int main(void) { return 0; } +EOF +"$CFREE" cc -target x86_64-windows -c t.c -o t.obj +"$CFREE" objdump -f t.obj | awk ' +/file format/ {print; next} +/^architecture:/ {print; next} +/^HAS_/ {print; next} +/^start address/ {print; next} +/^format: coff64/ {print "format: coff64"}' diff --git a/test/objdump/x86_64-windows/cases/03-pe-private-headers.expected b/test/objdump/x86_64-windows/cases/03-pe-private-headers.expected @@ -0,0 +1,8 @@ +found: PE32+ header line +Magic: 0x20b +subsystem: WINDOWS_CUI +DllCharacteristics: +Data Directories: + Idx Name RVA Size +The Import Tables: +import: KERNEL32.dll diff --git a/test/objdump/x86_64-windows/cases/03-pe-private-headers.sh b/test/objdump/x86_64-windows/cases/03-pe-private-headers.sh @@ -0,0 +1,38 @@ +# Golden: objdump -p on a linked PE32+ executable. Asserts the +# private-header walker prints the DOS/PE signature, optional-header +# highlights, and at least one DLL import section. Gated on llvm-mingw +# UCRT availability — most CI hosts won't have it. + +find_sdk() { + local d + for d in \ + /tmp/llvm-mingw*/llvm-mingw-*-ucrt-*/x86_64-w64-mingw32 \ + /tmp/llvm-mingw*/x86_64-w64-mingw32 \ + /private/tmp/llvm-mingw*/llvm-mingw-*-ucrt-*/x86_64-w64-mingw32 \ + /private/tmp/llvm-mingw*/x86_64-w64-mingw32; do + if [ -d "$d/lib" ] && [ -r "$d/include/windows.h" ]; then + printf '%s\n' "$d" + return 0 + fi + done + return 1 +} + +SDK=$(find_sdk) || { + echo "SKIP: no llvm-mingw UCRT sysroot" + exit 0 +} + +cat > t.c <<'EOF' +int main(void) { return 0; } +EOF +"$CFREE" cc -target x86_64-windows --sysroot "$SDK" t.c -o t.exe +"$CFREE" objdump -p t.exe | awk ' +/PE32\+ private headers/ {print "found: PE32+ header line"; next} +/^ Magic:/ {print $1, $2; next} +/^ Subsystem:.*WINDOWS_CUI/ {print "subsystem: WINDOWS_CUI"; next} +/^ DllCharacteristics:/ {print $1; next} +/^Data Directories:/ {print; next} +/^ Idx Name/ {print; next} +/^The Import Tables:/ {print; next} +/^ DLL Name: KERNEL32\.dll/ {print "import: KERNEL32.dll"; next}' diff --git a/test/objdump/x86_64-windows/cases/04-pe-sections.expected b/test/objdump/x86_64-windows/cases/04-pe-sections.expected @@ -0,0 +1,4 @@ +found: PE sections +found: PE header row +section: .text +section: .idata diff --git a/test/objdump/x86_64-windows/cases/04-pe-sections.sh b/test/objdump/x86_64-windows/cases/04-pe-sections.sh @@ -0,0 +1,34 @@ +# Golden: objdump -h on a linked PE32+ executable. Asserts the PE +# section walker fires (since cfree_obj_open does not parse PE +# images) and produces a section table with the canonical headers. +# Gated on llvm-mingw UCRT availability. + +find_sdk() { + local d + for d in \ + /tmp/llvm-mingw*/llvm-mingw-*-ucrt-*/x86_64-w64-mingw32 \ + /tmp/llvm-mingw*/x86_64-w64-mingw32 \ + /private/tmp/llvm-mingw*/llvm-mingw-*-ucrt-*/x86_64-w64-mingw32 \ + /private/tmp/llvm-mingw*/x86_64-w64-mingw32; do + if [ -d "$d/lib" ] && [ -r "$d/include/windows.h" ]; then + printf '%s\n' "$d" + return 0 + fi + done + return 1 +} + +SDK=$(find_sdk) || { + echo "SKIP: no llvm-mingw UCRT sysroot" + exit 0 +} + +cat > t.c <<'EOF' +int main(void) { return 0; } +EOF +"$CFREE" cc -target x86_64-windows --sysroot "$SDK" t.c -o t.exe +"$CFREE" objdump -h t.exe | awk ' +/Sections \(PE image\)/ {print "found: PE sections"; next} +/^Idx Name.*VMA.*Size/ {print "found: PE header row"; next} +/^ *[0-9]+ \.text/ {print "section: .text"; next} +/^ *[0-9]+ \.idata/ {print "section: .idata"; next}' diff --git a/test/test.mk b/test/test.mk @@ -27,9 +27,9 @@ # asm_parse / cfree_disasm_iter_* are still stubs; the harness builds # and runs end-to-end so the wiring stays exercised. See doc/ASM.md. -.PHONY: test test-driver test-lex test-pp test-pp-err test-elf test-ar test-ar-driver test-strip-driver test-objcopy-driver test-objdump-driver test-link test-cg-api test-toy test-opt test-dwarf test-debug test-parse test-parse-err test-asm test-wasm-front test-isa test-aa64-inline test-rv64-inline test-rv64-jit test-emu test-x64-inline test-x64-dbg test-rt-headers test-rt-runtime test-musl test-musl-rv64 test-glibc test-glibc-rv64 test-lib-deps test-smoke-x64 test-smoke-rv64 test-cbackend rv64-doctor +.PHONY: test test-driver test-lex test-pp test-pp-err test-elf test-coff test-coff-mingw-import test-coff-windows-ucrt test-ar test-ar-driver test-strip-driver test-objcopy-driver test-objdump-driver test-link test-cg-api test-toy test-opt test-dwarf test-debug test-parse test-parse-err test-asm test-wasm-front test-isa test-aa64-inline test-rv64-inline test-rv64-jit test-emu test-x64-inline test-x64-dbg test-rt-headers test-rt-runtime test-musl test-musl-rv64 test-glibc test-glibc-rv64 test-lib-deps test-smoke-x64 test-smoke-rv64 test-cbackend rv64-doctor -test: test-driver test-lex test-pp test-pp-err test-elf test-ar test-ar-driver test-strip-driver test-objcopy-driver test-objdump-driver test-link test-toy test-dwarf test-debug test-parse test-parse-err test-asm test-isa test-aa64-inline test-rv64-inline test-rv64-jit test-emu test-x64-inline test-x64-dbg test-rt-headers test-lib-deps +test: test-driver test-lex test-pp test-pp-err test-elf test-coff test-ar test-ar-driver test-strip-driver test-objcopy-driver test-objdump-driver test-link test-toy test-dwarf test-debug test-parse test-parse-err test-asm test-isa test-aa64-inline test-rv64-inline test-rv64-jit test-emu test-x64-inline test-x64-dbg test-rt-headers test-lib-deps # `test-cbackend` is intentionally not in the default `test` target: the # Phase 1 C backend skips most fixtures pending later phases, which would # add noise to the default summary. Run it explicitly to gate progress. @@ -260,8 +260,13 @@ test-rt-runtime: bin rt $(LINK_EXE_RUNNER) # void* to a function pointer, which pedantic rejects under C11. HARNESS_CFLAGS = -std=c11 -Wall -Wextra -Werror -isysroot $(SYSROOT) -Iinclude -Itest -ROUNDTRIP_BIN = build/test/cfree-roundtrip -ROUNDTRIP_BIN_MACHO = build/test/cfree-roundtrip-macho +ROUNDTRIP_BIN = build/test/cfree-roundtrip +ROUNDTRIP_BIN_MACHO = build/test/cfree-roundtrip-macho +ROUNDTRIP_BIN_COFF = build/test/cfree-roundtrip-coff +COFF_IMPORT_SMOKE_BIN = build/test/pe-import-smoke +COFF_IMPORT_MINGW_BIN = build/test/pe-import-mingw +COFF_DSO_FORWARDER_BIN = build/test/pe-dso-forwarder +COFF_MIXED_ARCHIVE_BIN = build/test/pe-mixed-archive LINK_EXE_RUNNER = build/test/link-exe-runner JIT_RUNNER = build/test/jit-runner PARSE_RUNNER = build/test/parse-runner @@ -278,6 +283,45 @@ $(ROUNDTRIP_BIN_MACHO): test/macho/cfree-roundtrip-macho.c $(LIB_AR) @mkdir -p $(dir $@) $(CC) $(HARNESS_CFLAGS) -Isrc test/macho/cfree-roundtrip-macho.c $(LIB_AR) -o $@ +# PE/COFF round-trip harness (test/coff/). All-in-one binary: builds +# hand-crafted ObjBuilders and asserts emit_coff/read_coff round-trip +# stability for both x86_64-windows and aarch64-windows. +$(ROUNDTRIP_BIN_COFF): test/coff/cfree-roundtrip-coff.c $(LIB_AR) + @mkdir -p $(dir $@) + $(CC) $(HARNESS_CFLAGS) -Isrc test/coff/cfree-roundtrip-coff.c $(LIB_AR) -o $@ + +# PE import-directory smoke test (test/coff/pe-import-smoke.c). +# Exercises the full chain: short-import shim bytes -> link_add_obj_bytes +# (reclassified as DSO) -> link_resolve -> link_emit_coff. Verifies the +# produced PE32+ via x86_64-w64-mingw32-objdump; skips cleanly if absent. +$(COFF_IMPORT_SMOKE_BIN): test/coff/pe-import-smoke.c $(LIB_AR) + @mkdir -p $(dir $@) + $(CC) $(HARNESS_CFLAGS) -Isrc test/coff/pe-import-smoke.c $(LIB_AR) -o $@ + +# PE import test against a real mingw archive (test/coff/pe-import-mingw.c). +# Exercises the long-form import-archive absorption path +# (link_add_archive_bytes -> classify_coff_archive_member). Skips cleanly +# when the mingw toolchain isn't installed. +$(COFF_IMPORT_MINGW_BIN): test/coff/pe-import-mingw.c $(LIB_AR) + @mkdir -p $(dir $@) + $(CC) $(HARNESS_CFLAGS) -Isrc test/coff/pe-import-mingw.c $(LIB_AR) -o $@ + +# read_coff_dso forwarder-export contract (test/coff/pe-dso-forwarder.c). +# Synthesizes a tiny PE32+ DLL with one direct and one forwarder export +# and asserts both surface as OBJ_SEC_NONE globals on the ObjBuilder. +$(COFF_DSO_FORWARDER_BIN): test/coff/pe-dso-forwarder.c $(LIB_AR) + @mkdir -p $(dir $@) + $(CC) $(HARNESS_CFLAGS) -Isrc test/coff/pe-dso-forwarder.c $(LIB_AR) -o $@ + +# Mixed-member archive (test/coff/pe-mixed-archive.c). Verifies that +# one archive containing both a short-import member and a long-form +# COFF object with a defined data symbol satisfies references through +# both shapes — the same composition libucrt.a uses (API-set imports +# alongside lib64_libucrt_extra_a-*.o helpers). +$(COFF_MIXED_ARCHIVE_BIN): test/coff/pe-mixed-archive.c $(LIB_AR) + @mkdir -p $(dir $@) + $(CC) $(HARNESS_CFLAGS) -Isrc test/coff/pe-mixed-archive.c $(LIB_AR) -o $@ + $(LINK_EXE_RUNNER): test/link/harness/link_exe_runner.c $(LIB_AR) @mkdir -p $(dir $@) $(CC) $(HARNESS_CFLAGS) test/link/harness/link_exe_runner.c $(LIB_AR) -o $@ @@ -297,6 +341,25 @@ $(WASM_TOOL): test/wasm/harness/wasm_tool.c $(LIB_AR) test-elf: lib bin-soft $(ROUNDTRIP_BIN) bash test/elf/run.sh +# PE/COFF round-trip harness plus optional hosted Windows smoke. The +# UCRT smoke self-skips when llvm-mingw is not installed. +test-coff: lib bin rt-aarch64-windows $(ROUNDTRIP_BIN_COFF) $(COFF_IMPORT_SMOKE_BIN) $(COFF_DSO_FORWARDER_BIN) $(COFF_MIXED_ARCHIVE_BIN) + $(ROUNDTRIP_BIN_COFF) + $(COFF_IMPORT_SMOKE_BIN) + $(COFF_DSO_FORWARDER_BIN) + $(COFF_MIXED_ARCHIVE_BIN) + bash test/coff/windows-ucrt-hosted-smoke.sh + bash test/coff/windows-system-dlls-smoke.sh + +# Separate target so it can be skipped gracefully if mingw isn't +# installed. The test itself self-skips on missing tooling, but the +# build target only fires when explicitly requested. +test-coff-mingw-import: lib $(COFF_IMPORT_MINGW_BIN) + $(COFF_IMPORT_MINGW_BIN) + +test-coff-windows-ucrt: bin rt-aarch64-windows + bash test/coff/windows-ucrt-hosted-smoke.sh + test-link: lib $(ROUNDTRIP_BIN) $(ROUNDTRIP_BIN_MACHO) $(LINK_EXE_RUNNER) $(JIT_RUNNER) bash test/link/run.sh