kit

kit
git clone https://git.ryansepassi.com/git/kit.git
Log | Files | Refs | README

commit b56c3ece7c3472325f2596146c84be3b61cf2079
parent 2f677dcf2322c6276eb432fbadbfe5d3a8fdbad1
Author: Ryan Sepassi <rsepassi@gmail.com>
Date:   Mon, 25 May 2026 03:10:25 -0700

Add object format registry

Diffstat:
Mdoc/REGISTRY.md | 64+++++++++++++++++++++++++++++++++++++---------------------------
Msrc/api/object_builder.c | 21+++++----------------
Msrc/api/object_file.c | 15++++-----------
Msrc/link/link.c | 506++++++++++---------------------------------------------------------------------
Asrc/obj/coff_archive.c | 283+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Asrc/obj/format.h | 67+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Asrc/obj/registry.c | 143+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Mtest/coff/pe-import-mingw.c | 2+-
Mtest/coff/pe-import-smoke.c | 2+-
Mtest/coff/pe-mixed-archive.c | 2+-
10 files changed, 603 insertions(+), 502 deletions(-)

diff --git a/doc/REGISTRY.md b/doc/REGISTRY.md @@ -78,62 +78,72 @@ No new vtable or registry code; this axis is the cheapest of the four. ## Axis 2: Object/image formats -**Status: no vtable exists today; switches in multiple call sites. -Largest piece of new work.** +**Status: vtable and registry exist; source gating remains.** -Today `emit_elf` / `emit_macho` / `emit_coff` are bare functions -(`src/obj/{elf,macho,coff}_emit.c`), the read paths are likewise bare -(`*_read.c`), and link-image emission dispatches via a switch in -`src/link/link.c:958` calling `link_emit_elf` / `link_emit_macho` / -`link_emit_coff`. DSO header readers (`*_read_dso.c`) follow the same -shape. +`emit_elf` / `emit_macho` / `emit_coff` are still implemented as +format-specific functions (`src/obj/{elf,macho,coff}_emit.c`), as are +the read paths (`*_read.c`), DSO readers (`*_read_dso.c`), and +link-image emitters (`src/link/link_{elf,macho,coff}.c`). Generic call +sites now reach them through `ObjFormatImpl` in `src/obj/format.h` and +`src/obj/registry.c`. -**New vtable** (`src/obj/format.h`): +**Vtable** (`src/obj/format.h`): ```c typedef struct ObjFormatImpl { - CfreeObjFormat kind; + ObjFmt kind; + CfreeBinFmt bin_fmt; const char* name; + const char* read_name; + const char* read_dso_name; /* Relocatable object emit + read. */ void (*emit)(Compiler*, ObjBuilder*, Writer*); - CfreeStatus (*read)(Compiler*, const u8* data, size_t len, - ObjBuilder* out); + ObjBuilder* (*read)(Compiler*, const char* name, const u8* data, size_t len); + + /* DSO header reader for `-lfoo` resolution against .so/.dylib/.dll + * plus Mach-O .tbd handling via obj_format_dso_reader_for_bytes(). */ + ObjBuilder* (*read_dso)(Compiler*, const char* name, const u8*, size_t, + Sym* soname_out); /* Link-image emit (executable / shared object). */ void (*link_emit)(LinkImage*, Writer*); - /* DSO header reader for `-lfoo` resolution against .so/.dylib/.dll. */ - CfreeStatus (*read_dso)(Compiler*, const u8*, size_t, /* ... */); + /* Optional format-specific linker input policy. */ + int (*classify_obj_input)(Compiler*, ObjBuilder*, Sym* soname_out); + Sym (*archive_hint)(Compiler*, const char* archive_name); + ObjFormatArchiveAction (*archive_member)(Compiler*, + const ObjFormatArchiveMember*, + ObjBuilder** out); } ObjFormatImpl; -const ObjFormatImpl* obj_format_lookup(CfreeObjFormat); +const ObjFormatImpl* obj_format_lookup(ObjFmt); +const ObjFormatImpl* obj_format_lookup_bin(CfreeBinFmt); ``` -**New registry** (`src/obj/registry.c`), gated the same way as the arch +**Registry** (`src/obj/registry.c`), gated the same way as the arch registry: ```c -extern const ObjFormatImpl obj_format_impl_elf; -extern const ObjFormatImpl obj_format_impl_macho; -extern const ObjFormatImpl obj_format_impl_coff; - static const ObjFormatImpl* const obj_format_impls[] = { -#if CFREE_OBJ_ELF +#if CFREE_OBJ_ELF_ENABLED &obj_format_impl_elf, #endif -#if CFREE_OBJ_MACHO - &obj_format_impl_macho, -#endif -#if CFREE_OBJ_COFF +#if CFREE_OBJ_COFF_ENABLED &obj_format_impl_coff, #endif +#if CFREE_OBJ_MACHO_ENABLED + &obj_format_impl_macho, +#endif + &obj_format_impl_wasm, }; ``` **Call-site changes**: the switch in `src/link/link.c` and the obj -emit/read entry points collapse to -`obj_format_lookup(target.obj_format)->fn(...)`. +emit/read entry points have collapsed to +`obj_format_lookup(target.obj_format)->fn(...)`. COFF short-import and +long-form import-archive handling now live behind object-format hooks, +so linker input ingestion stays generic over the object format. The arch-side format reloc tables (`ArchElfOps`, `ArchMachoOps`, `ArchCoffOps` on `ArchImpl`) stay where they are — they're the diff --git a/src/api/object_builder.c b/src/api/object_builder.c @@ -4,6 +4,7 @@ #include <string.h> #include "core/core.h" +#include "obj/format.h" #include "obj/obj.h" static ObjSecId pub_to_intern_sec(CfreeObjSection s) { @@ -268,24 +269,12 @@ CfreeStatus cfree_obj_builder_emit(CfreeObjBuilder* b, CfreeWriter* w) { CfreeStatus cfree_obj_builder_emit_as(CfreeObjBuilder* b, CfreeObjFmt fmt, CfreeWriter* w) { Compiler* c; + const ObjFormatImpl* impl; if (!b || !w) return CFREE_INVALID; c = obj_compiler(b); if (!c) return CFREE_INVALID; - switch (fmt) { - case CFREE_OBJ_ELF: - emit_elf(c, b, w); - break; - case CFREE_OBJ_COFF: - emit_coff(c, b, w); - break; - case CFREE_OBJ_MACHO: - emit_macho(c, b, w); - break; - case CFREE_OBJ_WASM: - emit_wasm(c, b, w); - break; - default: - return CFREE_UNSUPPORTED; - } + impl = obj_format_lookup(fmt); + if (!impl || !impl->emit) return CFREE_UNSUPPORTED; + impl->emit(c, b, w); return cfree_writer_status(w); } diff --git a/src/api/object_file.c b/src/api/object_file.c @@ -10,6 +10,7 @@ #include "core/heap.h" #include "core/pool.h" #include "core/slice.h" +#include "obj/format.h" #include "obj/obj.h" struct CfreeObjFile { @@ -25,17 +26,9 @@ struct CfreeObjFile { static ObjBuilder* obj_read_bytes(Compiler* c, const char* name, const u8* data, size_t len, ObjFmt fmt) { - switch (fmt) { - case CFREE_OBJ_ELF: - return read_elf(c, name, data, len); - case CFREE_OBJ_COFF: - return read_coff(c, name, data, len); - case CFREE_OBJ_MACHO: - return read_macho(c, name, data, len); - case CFREE_OBJ_WASM: - return read_wasm(c, name, data, len); - } - return NULL; + const ObjFormatImpl* impl = obj_format_lookup(fmt); + if (!impl || !impl->read) return NULL; + return impl->read(c, name, data, len); } CfreeStatus cfree_obj_open(const CfreeContext* ctx, CfreeSlice name, diff --git a/src/link/link.c b/src/link/link.c @@ -22,6 +22,7 @@ #include "core/slice.h" #include "core/vec.h" #include "link/link_internal.h" +#include "obj/format.h" /* ---- SrcLoc helper ---- */ @@ -121,37 +122,24 @@ LinkInputId link_add_obj(Linker* l, ObjBuilder* ob) { LinkInputId link_add_obj_bytes(Linker* l, const char* name, const u8* data, size_t len) { - /* Detect format from magic and dispatch to the matching reader. - * The returned ObjBuilder is owned by the linker and freed via the - * input cleanup. ELF and Mach-O are supported. */ ObjBuilder* ob; LinkInput* in; LinkInputId id; CfreeBinFmt fmt; + const ObjFormatImpl* impl; const char* reader_name; if (!l || !data || !len) return LINK_INPUT_NONE; fmt = cfree_detect_fmt(data, len); - switch (fmt) { - case CFREE_BIN_ELF: - ob = read_elf(l->c, name, data, len); - reader_name = "read_elf"; - break; - case CFREE_BIN_MACHO: - ob = read_macho(l->c, name, data, len); - reader_name = "read_macho"; - break; - case CFREE_BIN_COFF: - ob = read_coff(l->c, name, data, len); - reader_name = "read_coff"; - break; - default: - compiler_panic(l->c, no_loc(), - "link_add_obj_bytes: unsupported object format " - "(fmt=%u) for '%.*s'", - (u32)fmt, - SLICE_ARG(name ? slice_from_cstr(name) - : SLICE_LIT("(unnamed)"))); - } + impl = obj_format_lookup_bin(fmt); + if (!impl || !impl->read) + compiler_panic(l->c, no_loc(), + "link_add_obj_bytes: unsupported object format " + "(fmt=%u) for '%.*s'", + (u32)fmt, + SLICE_ARG(name ? slice_from_cstr(name) + : SLICE_LIT("(unnamed)"))); + reader_name = impl->read_name; + ob = impl->read(l->c, name, data, len); if (!ob) compiler_panic(l->c, no_loc(), "link_add_obj_bytes: %.*s returned NULL for '%.*s'", @@ -162,16 +150,12 @@ LinkInputId link_add_obj_bytes(Linker* l, const char* name, const u8* data, in->order = l->next_input_order++; in->obj = ob; /* re-uses the ObjBuilder slot for ownership */ in->name = name ? pool_intern_slice(l->c->global, slice_from_cstr(name)) : 0; - /* PE/COFF short-import: read_coff_short_import stashes the providing - * DLL name on the builder. Reclassify the input as a DSO so the - * resolver treats its symbols as exports (matching the .lib archive - * member path in include_archive_member). */ { - Sym coff_dll = 0; - if (fmt == CFREE_BIN_COFF && obj_get_coff_import_dll(ob, &coff_dll) && - coff_dll) { + Sym soname = 0; + if (impl->classify_obj_input && + impl->classify_obj_input(l->c, ob, &soname)) { in->kind = LINK_INPUT_DSO_BYTES; - in->soname = coff_dll; + in->soname = soname; } else { in->kind = LINK_INPUT_OBJ_BYTES; } @@ -186,47 +170,18 @@ LinkInputId link_add_dso_bytes(Linker* l, const char* name, const u8* data, LinkInputId id; Sym soname = 0; CfreeBinFmt fmt; + ObjFormatDsoReader reader; const char* reader_name; if (!l || !data || !len) return LINK_INPUT_NONE; - /* Three DSO surfaces are supported on input: - * - ELF ET_DYN .so (read_elf_dso) - * - Mach-O MH_DYLIB / MH_BUNDLE (read_macho_dso) - * - Apple .tbd text-based stubs (read_tbd; magic is "---") - * The first two are detected via cfree_detect_fmt; .tbd is a textual - * format with no binary magic, so we sniff the leading "---". */ - if (len >= 3 && data[0] == '-' && data[1] == '-' && data[2] == '-') { - ob = read_tbd(l->c, name, data, len, &soname); - reader_name = "read_tbd"; - } else { - fmt = cfree_detect_fmt(data, len); - switch (fmt) { - case CFREE_BIN_ELF: - ob = read_elf_dso(l->c, name, data, len, &soname); - reader_name = "read_elf_dso"; - break; - case CFREE_BIN_MACHO: - ob = read_macho_dso(l->c, name, data, len, &soname); - reader_name = "read_macho_dso"; - break; - case CFREE_BIN_COFF: - case CFREE_BIN_PE: - /* Both spellings route through read_coff_dso: CFREE_BIN_PE is - * the MZ/PE-signed form (a real .dll), CFREE_BIN_COFF can land - * here when the caller hands us a single short-import record - * directly (rare; .lib archives are the usual conveyance and - * are handled by link_add_archive_bytes). */ - ob = read_coff_dso(l->c, name, data, len, &soname); - reader_name = "read_coff_dso"; - break; - default: - compiler_panic(l->c, no_loc(), - "link_add_dso_bytes: unsupported DSO format " - "(fmt=%u) for '%.*s'", - (u32)fmt, - SLICE_ARG(name ? slice_from_cstr(name) - : SLICE_LIT("(unnamed)"))); - } - } + if (!obj_format_dso_reader_for_bytes(data, len, &fmt, &reader)) + compiler_panic(l->c, no_loc(), + "link_add_dso_bytes: unsupported DSO format " + "(fmt=%u) for '%.*s'", + (u32)fmt, + SLICE_ARG(name ? slice_from_cstr(name) + : SLICE_LIT("(unnamed)"))); + reader_name = reader.name; + ob = reader.read(l->c, name, data, len, &soname); if (!ob) compiler_panic(l->c, no_loc(), "link_add_dso_bytes: %.*s returned NULL for '%.*s'", @@ -255,313 +210,6 @@ LinkInputId link_add_dso_bytes(Linker* l, const char* name, const u8* data, return id; } -/* ---- COFF long-form import-archive support ---- - * - * mingw `.a` archives (e.g. libkernel32.a) don't use the Microsoft - * short-import record format (Sig1=0/Sig2=0xFFFF — handled in - * read_coff_short_import). Instead every archive member is a regular - * long-form COFF .o file containing `.idata$N` sections. Three flavors - * appear: - * - * - Head member (e.g. libkernel32h.o): defines `_head_lib64_<lib>_a`, - * has `.idata$2` with one IMAGE_IMPORT_DESCRIPTOR template plus - * sentinel `.idata$4` / `.idata$5` slots. - * - Trailer member (e.g. libkernel32t.o): defines - * `__lib64_lib<lib>_a_iname` (the DLL-name string), with tiny - * `.idata$4` / `.idata$5` / `.idata$7` terminators. - * - Per-function stub (e.g. libkernel32s00001.o for ExitProcess): - * defines `__imp_<name>` (the IAT slot in `.idata$5`) and the bare - * `<name>` in `.text` (a 6-byte `ff 25 disp32` indirect jump - * against `__imp_<name>`). Carries `.idata$4` (ILT), `.idata$5` - * (IAT), `.idata$6` (hint+name), `.idata$7` (DLL-name back-ptr). - * - * cfree's link-emit path synthesizes the entire .idata from - * LinkSymbol.imported entries (link_emit_coff). The long-form members' - * `.idata$N` byte content is therefore redundant — only the symbol - * naming the export matters. We absorb the per-function stubs at - * archive-ingest time by rewriting them into short-import-shaped - * DSO shims (matching what read_coff_short_import produces), and we - * drop the head/trailer members entirely. - * - * DLL-name source: mingw/llvm-mingw long import members are named after - * the providing DLL (`KERNEL32.dll`, `api-ms-win-crt-runtime-l1-1-0.dll`, - * ...). Fall back to the archive filename for older import archives. - * - * The conversion is gated on Compiler.target.obj == CFREE_OBJ_COFF so - * non-Windows targets are unaffected. */ - -typedef enum CoffArMemberClass { - COFF_AR_KEEP = 0, /* regular .obj — leave as LINK_INPUT_OBJ_BYTES */ - COFF_AR_SHIM = 1, /* per-function stub — replaced with DSO shim */ - COFF_AR_SKIP = 2, /* head/trailer — drop entirely */ -} CoffArMemberClass; - -static const char kCoffImpPrefix_[] = "__imp_"; -static const u32 kCoffImpPrefixLen_ = (u32)(sizeof kCoffImpPrefix_ - 1u); -static const char kCoffHeadPrefix_[] = "_head_"; -static const u32 kCoffHeadPrefixLen_ = (u32)(sizeof kCoffHeadPrefix_ - 1u); -static const char kCoffInameSuffix_[] = "_iname"; -static const u32 kCoffInameSuffixLen_ = (u32)(sizeof kCoffInameSuffix_ - 1u); - -/* Derive a DLL name from the archive path. Handles: - * path/to/libkernel32.a -> "kernel32.dll" - * path/to/libkernel32.dll.a -> "kernel32.dll" - * path/to/kernel32.lib -> "kernel32.dll" - * path/to/libfoo -> "foo.dll" - * If nothing recognizable, returns the interned basename verbatim - * (callers can still match by name; case-insensitive at runtime). */ -static Sym derive_dll_name_from_archive_path(Compiler* c, const char* path) { - const char* base; - const char* p; - size_t n; - size_t out_len; - char* out; - Sym sym; - if (!path || !*path) return 0; - base = path; - for (p = path; *p; ++p) - if (*p == '/' || *p == '\\') base = p + 1; - n = slice_from_cstr(base).len; - /* Strip trailing ".dll.a" / ".a" / ".lib" (case-sensitive — mingw - * uses lowercase, MSVC uses .lib). */ - if (n >= 6 && memcmp(base + n - 6, ".dll.a", 6) == 0) n -= 6; - else if (n >= 2 && memcmp(base + n - 2, ".a", 2) == 0) n -= 2; - else if (n >= 4 && memcmp(base + n - 4, ".lib", 4) == 0) n -= 4; - /* Strip leading "lib" prefix. */ - if (n >= 3 && memcmp(base, "lib", 3) == 0) { - base += 3; - n -= 3; - } - if (n == 0) return 0; - /* Append ".dll". */ - out_len = n + 4u; - out = (char*)arena_array(c->scratch, char, out_len); - memcpy(out, base, n); - memcpy(out + n, ".dll", 4); - sym = pool_intern_slice(c->global, (Slice){ .s = out, .len = (u32)out_len }); - return sym; -} - -static Sym derive_dll_name_from_archive_member(Compiler* c, - const char* member_name, - Sym fallback) { - const char* base; - const char* p; - size_t n; - if (!member_name || !*member_name) return fallback; - base = member_name; - for (p = member_name; *p; ++p) - if (*p == '/' || *p == '\\') base = p + 1; - n = slice_from_cstr(base).len; - if (n >= 4 && memcmp(base + n - 4, ".dll", 4) == 0) - return pool_intern_slice(c->global, (Slice){ .s = base, .len = (u32)n }); - if (n >= 4 && memcmp(base + n - 4, ".DLL", 4) == 0) - return pool_intern_slice(c->global, (Slice){ .s = base, .len = (u32)n }); - return fallback; -} - -/* Resolve a COFF symbol-record's name to (ptr, len) without copying. - * Mirrors the resolve_sym_name helper in coff_read.c: 8-byte short - * name in-record, or (Zeroes==0, Offset) into the string table. */ -static void coff_resolve_sym_name_(const u8* rec, const u8* strtab, - u32 strtab_size, const char** name_out, - u32* len_out) { - u32 z = (u32)rec[0] | ((u32)rec[1] << 8) | ((u32)rec[2] << 16) | - ((u32)rec[3] << 24); - if (z == 0) { - u32 off = (u32)rec[4] | ((u32)rec[5] << 8) | ((u32)rec[6] << 16) | - ((u32)rec[7] << 24); - if (off >= strtab_size) { - *name_out = ""; - *len_out = 0; - return; - } - const char* s = (const char*)(strtab + off); - u32 max = strtab_size - off; - u32 n = 0; - while (n < max && s[n] != '\0') ++n; - *name_out = s; - *len_out = n; - return; - } - u32 n = 0; - while (n < 8 && rec[n] != '\0') ++n; - *name_out = (const char*)rec; - *len_out = n; -} - -/* Byte-level classifier that walks a long-form COFF member's symbol - * table directly, without running read_coff. We use this instead of - * the post-read_coff approach because mingw's archives contain reloc - * types read_coff doesn't grok (e.g. IMAGE_REL_AMD64_SECTION/SECREL - * in `.idata$N` sections), and we want to drop those members entirely - * rather than fail at read time. - * - * Returns SHIM / SKIP / KEEP. On SHIM, *out_name is the interned bare - * `<name>` (the export's real symbol, decoded from `__imp_<name>`). - * - * The COFF byte layout we rely on: header is fixed 20 bytes; symbol - * table starts at PointerToSymbolTable; each symbol record is - * COFF_SYMBOL_SIZE (18) bytes including aux slots. String table - * follows symtab: u32 size header + bytes. */ -#define COFF_SYM_REC_SIZE_ 18u -#define COFF_FILE_HDR_SIZE_ 20u -#define COFF_SYM_CLASS_EXTERNAL_ 2u - -static CoffArMemberClass classify_coff_archive_member_bytes( - Compiler* c, const u8* data, size_t len, Sym* out_name) { - u32 ptr_to_symtab; - u32 nsymbols; - u16 nsections; - const u8* sym_base; - const u8* strtab; - u32 strtab_size; - int has_imp = 0; - int has_idata = 0; - int has_head_def = 0; - int has_iname_def = 0; - Sym imp_bare_name = 0; - u32 i; - *out_name = 0; - if (len < COFF_FILE_HDR_SIZE_) return COFF_AR_KEEP; - nsections = (u16)((u32)data[2] | ((u32)data[3] << 8)); - ptr_to_symtab = (u32)data[8] | ((u32)data[9] << 8) | - ((u32)data[10] << 16) | ((u32)data[11] << 24); - nsymbols = (u32)data[12] | ((u32)data[13] << 8) | - ((u32)data[14] << 16) | ((u32)data[15] << 24); - if (nsymbols == 0 || ptr_to_symtab == 0) return COFF_AR_KEEP; - if ((u64)COFF_FILE_HDR_SIZE_ + (u64)nsections * 40u <= (u64)len) { - u32 si; - for (si = 0; si < (u32)nsections; ++si) { - const u8* sh = data + COFF_FILE_HDR_SIZE_ + (u64)si * 40u; - if (memcmp(sh, ".idata$", 7) == 0) { - has_idata = 1; - break; - } - } - } - if ((u64)ptr_to_symtab + (u64)nsymbols * (u64)COFF_SYM_REC_SIZE_ > (u64)len) - return COFF_AR_KEEP; - sym_base = data + ptr_to_symtab; - /* String table follows symtab. Leading u32 = total size (incl. self). - * Absent if there's no room after symtab. */ - { - u64 symtab_end = - (u64)ptr_to_symtab + (u64)nsymbols * (u64)COFF_SYM_REC_SIZE_; - if (symtab_end + 4u <= (u64)len) { - u32 declared = (u32)data[symtab_end] | - ((u32)data[symtab_end + 1] << 8) | - ((u32)data[symtab_end + 2] << 16) | - ((u32)data[symtab_end + 3] << 24); - if (declared < 4u || symtab_end + (u64)declared > (u64)len) { - strtab = NULL; - strtab_size = 0; - } else { - strtab = data + symtab_end; - strtab_size = declared; - } - } else { - strtab = NULL; - strtab_size = 0; - } - } - i = 0; - while (i < nsymbols) { - const u8* p = sym_base + (u64)i * COFF_SYM_REC_SIZE_; - u16 sec_num = (u16)((u32)p[12] | ((u32)p[13] << 8)); - u8 sclass = p[16]; - u8 naux = p[17]; - const char* nm = NULL; - u32 nlen = 0; - /* Only consider defined external symbols. UNDEF (sec_num==0) - * gives no information about what this object *provides*. */ - if (sclass == COFF_SYM_CLASS_EXTERNAL_ && sec_num != 0) { - coff_resolve_sym_name_(p, strtab, strtab_size, &nm, &nlen); - if (nlen > kCoffImpPrefixLen_ && - memcmp(nm, kCoffImpPrefix_, kCoffImpPrefixLen_) == 0) { - has_imp = 1; - if (imp_bare_name == 0) { - const char* tail = nm + kCoffImpPrefixLen_; - u32 tail_len = nlen - kCoffImpPrefixLen_; - imp_bare_name = pool_intern_slice(c->global, (Slice){ .s = tail, .len = tail_len }); - } - } else if (nlen > kCoffHeadPrefixLen_ && - memcmp(nm, kCoffHeadPrefix_, kCoffHeadPrefixLen_) == 0) { - has_head_def = 1; - } else if (nlen > kCoffInameSuffixLen_ && - memcmp(nm + nlen - kCoffInameSuffixLen_, kCoffInameSuffix_, - kCoffInameSuffixLen_) == 0) { - has_iname_def = 1; - } - } - /* Skip primary + aux records. */ - i += 1u + (u32)naux; - } - if (has_imp && has_idata) { - *out_name = imp_bare_name; - return COFF_AR_SHIM; - } - if (has_head_def || has_iname_def) return COFF_AR_SKIP; - return COFF_AR_KEEP; -} - -/* Build a fresh ObjBuilder containing just `<name>` and `__imp_<name>` - * as defined-at-OBJ_SEC_NONE globals (the shape read_coff_dso / - * read_coff_short_import produce for a DLL export), and annotate it - * with the providing DLL name. Mirrors read_coff_short_import. */ -static ObjBuilder* build_coff_long_import_shim(Compiler* c, Sym bare_name, - Sym dll_name) { - ObjBuilder* ob; - const char* bare; - size_t bare_len = 0; - u32 imp_len; - char* imp_buf; - Sym imp_sn; - ObjSymId id; - ObjSymId imp_id; - if (bare_name == 0 || dll_name == 0) return NULL; - { - Slice bare_s = pool_slice(c->global, bare_name); - bare = bare_s.s; - bare_len = bare_s.len; - } - if (!bare || bare_len == 0) return NULL; - ob = obj_new(c); - if (!ob) return NULL; - id = obj_symbol_ex(ob, bare_name, SB_GLOBAL, SV_DEFAULT, SK_FUNC, - OBJ_SEC_NONE, 0, 0, 0); - obj_sym_mark_referenced(ob, id); - imp_len = kCoffImpPrefixLen_ + (u32)bare_len; - imp_buf = (char*)arena_array(c->scratch, char, imp_len); - memcpy(imp_buf, kCoffImpPrefix_, kCoffImpPrefixLen_); - memcpy(imp_buf + kCoffImpPrefixLen_, bare, bare_len); - imp_sn = pool_intern_slice(c->global, (Slice){ .s = imp_buf, .len = imp_len }); - imp_id = obj_symbol_ex(ob, imp_sn, SB_GLOBAL, SV_DEFAULT, SK_OBJ, - OBJ_SEC_NONE, 0, 0, 0); - obj_sym_mark_referenced(ob, imp_id); - obj_set_coff_import_dll(ob, dll_name); - obj_finalize(ob); - return ob; -} - -static int coff_skip_long_import_shim_bare(Compiler* c, Sym bare_name) { - const char* s; - size_t n = 0; - if (!bare_name) return 0; - { - Slice s_s = pool_slice(c->global, bare_name); - s = s_s.s; - n = s_s.len; - } - if (!s) return 0; - /* llvm-mingw's UCRT libmsvcrt.a intentionally provides these legacy - * CRT entry helpers as regular archive members later in the same - * archive. Prefer those wrappers over the older direct msvcrt.dll - * import shims. */ - return (n == 13 && memcmp(s, "__getmainargs", 13) == 0) || - (n == 13 && memcmp(s, "__p___initenv", 13) == 0); -} - LinkInputId link_add_archive_bytes(Linker* l, const char* name, const u8* data, size_t len, u8 whole_archive, u8 link_mode, u8 group_id) { @@ -570,12 +218,13 @@ LinkInputId link_add_archive_bytes(Linker* l, const char* name, const u8* data, CfreeArMember mem; LinkArchive* ar; u32 n; - Sym archive_dll_name = 0; - int is_coff_target = (l && l->c->target.obj == CFREE_OBJ_COFF); + Sym archive_hint = 0; + const ObjFormatImpl* target_impl; if (!l || !data || !len) return LINK_INPUT_NONE; - if (is_coff_target) - archive_dll_name = derive_dll_name_from_archive_path(l->c, name); + target_impl = obj_format_lookup(l->c->target.obj); + if (target_impl && target_impl->archive_hint) + archive_hint = target_impl->archive_hint(l->c, name); in_arc.data = data; in_arc.len = len; @@ -625,28 +274,19 @@ LinkInputId link_add_archive_bytes(Linker* l, const char* name, const u8* data, while (cfree_ar_iter_next(it, &mem) == CFREE_ITER_ITEM && n < ar->nmembers) { ObjBuilder* ob = NULL; CfreeBinFmt mfmt = cfree_detect_fmt(mem.data, mem.size); - /* COFF long-form import-archive absorption (mingw `.a`). Classify - * the member from raw bytes *before* read_coff so we can drop - * members carrying `.idata$N` reloc types read_coff doesn't model - * (e.g. IMAGE_REL_AMD64_SECTION) without ever invoking the reader - * on them. KEEP members fall through to the standard read path. */ - if (mfmt == CFREE_BIN_COFF && is_coff_target && archive_dll_name != 0) { - Sym bare = 0; - CoffArMemberClass cls = classify_coff_archive_member_bytes( - l->c, mem.data, mem.size, &bare); - if (cls == COFF_AR_SHIM) { - if (coff_skip_long_import_shim_bare(l->c, bare)) { - ob = NULL; - } else { - Sym member_dll = - derive_dll_name_from_archive_member(l->c, mem.name.s, - archive_dll_name); - ob = build_coff_long_import_shim(l->c, bare, member_dll); - } - } else if (cls == COFF_AR_SKIP) { - ob = NULL; - } - if (cls != COFF_AR_KEEP) { + const ObjFormatImpl* member_impl = obj_format_lookup_bin(mfmt); + if (target_impl && target_impl->archive_member) { + ObjFormatArchiveMember desc; + ObjFormatArchiveAction action; + memset(&desc, 0, sizeof(desc)); + desc.archive_name = name; + desc.member_name = mem.name.s; + desc.data = mem.data; + desc.len = mem.size; + desc.bin_fmt = mfmt; + desc.archive_hint = archive_hint; + action = target_impl->archive_member(l->c, &desc, &ob); + if (action != OBJ_FORMAT_ARCHIVE_KEEP) { ar->members[n].name = mem.name.len ? pool_intern_slice(l->c->global, mem.name) : 0; ar->members[n].obj = ob; @@ -654,26 +294,16 @@ LinkInputId link_add_archive_bytes(Linker* l, const char* name, const u8* data, continue; } } - switch (mfmt) { - case CFREE_BIN_ELF: - ob = read_elf(l->c, mem.name.s, mem.data, mem.size); - break; - case CFREE_BIN_MACHO: - ob = read_macho(l->c, mem.name.s, mem.data, mem.size); - break; - case CFREE_BIN_COFF: - ob = read_coff(l->c, mem.name.s, mem.data, mem.size); - break; - default: - compiler_panic(l->c, no_loc(), - "link_add_archive_bytes: unsupported member " - "format (fmt=%u) for '%.*s' in archive '%.*s'", - (u32)mfmt, - SLICE_ARG(mem.name.len ? mem.name - : SLICE_LIT("(unnamed)")), - SLICE_ARG(name ? slice_from_cstr(name) - : SLICE_LIT("(unnamed)"))); - } + if (!member_impl || !member_impl->read) + compiler_panic(l->c, no_loc(), + "link_add_archive_bytes: unsupported member " + "format (fmt=%u) for '%.*s' in archive '%.*s'", + (u32)mfmt, + SLICE_ARG(mem.name.len ? mem.name + : SLICE_LIT("(unnamed)")), + SLICE_ARG(name ? slice_from_cstr(name) + : SLICE_LIT("(unnamed)"))); + ob = member_impl->read(l->c, mem.name.s, mem.data, mem.size); if (!ob) compiler_panic(l->c, no_loc(), "link_add_archive_bytes: object read failed for " @@ -970,31 +600,17 @@ void link_resolve_extend(Linker* l, LinkImage* img) { "yet implemented"); } -/* ---- public emit dispatcher ---- - * - * Per-format peers of link_emit_elf: link_emit_macho and link_emit_coff - * (both deferred) slot in here. Until those land, the unimplemented - * cases panic with a format-specific diagnostic rather than the - * catch-all. */ +/* ---- public emit dispatcher ---- */ void link_emit_image_writer(LinkImage* img, Writer* w) { + const ObjFormatImpl* fmt; if (!img || !w) return; - switch (img->c->target.obj) { - case CFREE_OBJ_ELF: - link_emit_elf(img, w); - return; - case CFREE_OBJ_MACHO: - link_emit_macho(img, w); - return; - case CFREE_OBJ_COFF: - link_emit_coff(img, w); - return; - case CFREE_OBJ_WASM: - compiler_panic(img->c, no_loc(), - "link_emit_image_writer: Wasm linker emit not yet " - "implemented"); + fmt = obj_format_lookup(img->c->target.obj); + if (fmt && fmt->link_emit) { + fmt->link_emit(img, w); + return; } compiler_panic(img->c, no_loc(), - "link_emit_image_writer: unknown obj format %u", + "link_emit_image_writer: unsupported obj format %u", (u32)img->c->target.obj); } diff --git a/src/obj/coff_archive.c b/src/obj/coff_archive.c @@ -0,0 +1,283 @@ +#include "obj/format.h" + +#include <string.h> + +#include "core/arena.h" +#include "core/core.h" +#include "core/pool.h" +#include "core/slice.h" +#include "obj/obj.h" + +/* mingw import archives store import thunks as ordinary COFF members with + * .idata$N sections. cfree's PE emitter synthesizes .idata itself, so the + * format hook below rewrites per-symbol members into DSO-shaped shims and + * drops archive head/trailer members before the generic linker reads them. */ + +typedef enum CoffArMemberClass { + COFF_AR_KEEP = 0, + COFF_AR_SHIM = 1, + COFF_AR_SKIP = 2, +} CoffArMemberClass; + +static const char kCoffImpPrefix_[] = "__imp_"; +static const u32 kCoffImpPrefixLen_ = (u32)(sizeof kCoffImpPrefix_ - 1u); +static const char kCoffHeadPrefix_[] = "_head_"; +static const u32 kCoffHeadPrefixLen_ = (u32)(sizeof kCoffHeadPrefix_ - 1u); +static const char kCoffInameSuffix_[] = "_iname"; +static const u32 kCoffInameSuffixLen_ = (u32)(sizeof kCoffInameSuffix_ - 1u); + +int coff_classify_obj_input(Compiler* c, ObjBuilder* ob, Sym* soname_out) { + Sym dll = 0; + (void)c; + if (!obj_get_coff_import_dll(ob, &dll) || !dll) return 0; + if (soname_out) *soname_out = dll; + return 1; +} + +Sym coff_archive_hint(Compiler* c, const char* path) { + const char* base; + const char* p; + size_t n; + size_t out_len; + char* out; + if (!c || !path || !*path) return 0; + base = path; + for (p = path; *p; ++p) + if (*p == '/' || *p == '\\') base = p + 1; + n = slice_from_cstr(base).len; + if (n >= 6 && memcmp(base + n - 6, ".dll.a", 6) == 0) + n -= 6; + else if (n >= 2 && memcmp(base + n - 2, ".a", 2) == 0) + n -= 2; + else if (n >= 4 && memcmp(base + n - 4, ".lib", 4) == 0) + n -= 4; + if (n >= 3 && memcmp(base, "lib", 3) == 0) { + base += 3; + n -= 3; + } + if (n == 0) return 0; + out_len = n + 4u; + out = (char*)arena_array(c->scratch, char, out_len); + memcpy(out, base, n); + memcpy(out + n, ".dll", 4); + return pool_intern_slice(c->global, (Slice){ .s = out, .len = (u32)out_len }); +} + +static Sym derive_dll_name_from_archive_member(Compiler* c, + const char* member_name, + Sym fallback) { + const char* base; + const char* p; + size_t n; + if (!member_name || !*member_name) return fallback; + base = member_name; + for (p = member_name; *p; ++p) + if (*p == '/' || *p == '\\') base = p + 1; + n = slice_from_cstr(base).len; + if (n >= 4 && memcmp(base + n - 4, ".dll", 4) == 0) + return pool_intern_slice(c->global, (Slice){ .s = base, .len = (u32)n }); + if (n >= 4 && memcmp(base + n - 4, ".DLL", 4) == 0) + return pool_intern_slice(c->global, (Slice){ .s = base, .len = (u32)n }); + return fallback; +} + +static void coff_resolve_sym_name_(const u8* rec, const u8* strtab, + u32 strtab_size, const char** name_out, + u32* len_out) { + u32 z = (u32)rec[0] | ((u32)rec[1] << 8) | ((u32)rec[2] << 16) | + ((u32)rec[3] << 24); + if (z == 0) { + u32 off = (u32)rec[4] | ((u32)rec[5] << 8) | ((u32)rec[6] << 16) | + ((u32)rec[7] << 24); + const char* s; + u32 max; + u32 n = 0; + if (!strtab || off >= strtab_size) { + *name_out = ""; + *len_out = 0; + return; + } + s = (const char*)(strtab + off); + max = strtab_size - off; + while (n < max && s[n] != '\0') ++n; + *name_out = s; + *len_out = n; + return; + } + { + u32 n = 0; + while (n < 8 && rec[n] != '\0') ++n; + *name_out = (const char*)rec; + *len_out = n; + } +} + +#define COFF_SYM_REC_SIZE_ 18u +#define COFF_FILE_HDR_SIZE_ 20u +#define COFF_SYM_CLASS_EXTERNAL_ 2u + +static CoffArMemberClass classify_coff_archive_member_bytes( + Compiler* c, const u8* data, size_t len, Sym* out_name) { + u32 ptr_to_symtab; + u32 nsymbols; + u16 nsections; + const u8* sym_base; + const u8* strtab; + u32 strtab_size; + int has_imp = 0; + int has_idata = 0; + int has_head_def = 0; + int has_iname_def = 0; + Sym imp_bare_name = 0; + u32 i; + *out_name = 0; + if (len < COFF_FILE_HDR_SIZE_) return COFF_AR_KEEP; + nsections = (u16)((u32)data[2] | ((u32)data[3] << 8)); + ptr_to_symtab = (u32)data[8] | ((u32)data[9] << 8) | + ((u32)data[10] << 16) | ((u32)data[11] << 24); + nsymbols = (u32)data[12] | ((u32)data[13] << 8) | + ((u32)data[14] << 16) | ((u32)data[15] << 24); + if (nsymbols == 0 || ptr_to_symtab == 0) return COFF_AR_KEEP; + if ((u64)COFF_FILE_HDR_SIZE_ + (u64)nsections * 40u <= (u64)len) { + u32 si; + for (si = 0; si < (u32)nsections; ++si) { + const u8* sh = data + COFF_FILE_HDR_SIZE_ + (u64)si * 40u; + if (memcmp(sh, ".idata$", 7) == 0) { + has_idata = 1; + break; + } + } + } + if ((u64)ptr_to_symtab + (u64)nsymbols * (u64)COFF_SYM_REC_SIZE_ > (u64)len) + return COFF_AR_KEEP; + sym_base = data + ptr_to_symtab; + { + u64 symtab_end = + (u64)ptr_to_symtab + (u64)nsymbols * (u64)COFF_SYM_REC_SIZE_; + if (symtab_end + 4u <= (u64)len) { + u32 declared = (u32)data[symtab_end] | + ((u32)data[symtab_end + 1] << 8) | + ((u32)data[symtab_end + 2] << 16) | + ((u32)data[symtab_end + 3] << 24); + if (declared < 4u || symtab_end + (u64)declared > (u64)len) { + strtab = NULL; + strtab_size = 0; + } else { + strtab = data + symtab_end; + strtab_size = declared; + } + } else { + strtab = NULL; + strtab_size = 0; + } + } + i = 0; + while (i < nsymbols) { + const u8* p = sym_base + (u64)i * COFF_SYM_REC_SIZE_; + u16 sec_num = (u16)((u32)p[12] | ((u32)p[13] << 8)); + u8 sclass = p[16]; + u8 naux = p[17]; + const char* nm = NULL; + u32 nlen = 0; + if (sclass == COFF_SYM_CLASS_EXTERNAL_ && sec_num != 0) { + coff_resolve_sym_name_(p, strtab, strtab_size, &nm, &nlen); + if (nlen > kCoffImpPrefixLen_ && + memcmp(nm, kCoffImpPrefix_, kCoffImpPrefixLen_) == 0) { + has_imp = 1; + if (imp_bare_name == 0) { + const char* tail = nm + kCoffImpPrefixLen_; + u32 tail_len = nlen - kCoffImpPrefixLen_; + imp_bare_name = + pool_intern_slice(c->global, (Slice){ .s = tail, .len = tail_len }); + } + } else if (nlen > kCoffHeadPrefixLen_ && + memcmp(nm, kCoffHeadPrefix_, kCoffHeadPrefixLen_) == 0) { + has_head_def = 1; + } else if (nlen > kCoffInameSuffixLen_ && + memcmp(nm + nlen - kCoffInameSuffixLen_, kCoffInameSuffix_, + kCoffInameSuffixLen_) == 0) { + has_iname_def = 1; + } + } + i += 1u + (u32)naux; + } + if (has_imp && has_idata) { + *out_name = imp_bare_name; + return COFF_AR_SHIM; + } + if (has_head_def || has_iname_def) return COFF_AR_SKIP; + return COFF_AR_KEEP; +} + +static ObjBuilder* build_coff_long_import_shim(Compiler* c, Sym bare_name, + Sym dll_name) { + ObjBuilder* ob; + const char* bare; + size_t bare_len = 0; + u32 imp_len; + char* imp_buf; + Sym imp_sn; + ObjSymId id; + ObjSymId imp_id; + if (bare_name == 0 || dll_name == 0) return NULL; + { + Slice bare_s = pool_slice(c->global, bare_name); + bare = bare_s.s; + bare_len = bare_s.len; + } + if (!bare || bare_len == 0) return NULL; + ob = obj_new(c); + if (!ob) return NULL; + id = obj_symbol_ex(ob, bare_name, SB_GLOBAL, SV_DEFAULT, SK_FUNC, + OBJ_SEC_NONE, 0, 0, 0); + obj_sym_mark_referenced(ob, id); + imp_len = kCoffImpPrefixLen_ + (u32)bare_len; + imp_buf = (char*)arena_array(c->scratch, char, imp_len); + memcpy(imp_buf, kCoffImpPrefix_, kCoffImpPrefixLen_); + memcpy(imp_buf + kCoffImpPrefixLen_, bare, bare_len); + imp_sn = pool_intern_slice(c->global, (Slice){ .s = imp_buf, .len = imp_len }); + imp_id = obj_symbol_ex(ob, imp_sn, SB_GLOBAL, SV_DEFAULT, SK_OBJ, + OBJ_SEC_NONE, 0, 0, 0); + obj_sym_mark_referenced(ob, imp_id); + obj_set_coff_import_dll(ob, dll_name); + obj_finalize(ob); + return ob; +} + +static int coff_skip_long_import_shim_bare(Compiler* c, Sym bare_name) { + const char* s; + size_t n = 0; + if (!bare_name) return 0; + { + Slice s_s = pool_slice(c->global, bare_name); + s = s_s.s; + n = s_s.len; + } + if (!s) return 0; + return (n == 13 && memcmp(s, "__getmainargs", 13) == 0) || + (n == 13 && memcmp(s, "__p___initenv", 13) == 0); +} + +ObjFormatArchiveAction coff_archive_member(Compiler* c, + const ObjFormatArchiveMember* mem, + ObjBuilder** out) { + CoffArMemberClass cls; + Sym bare = 0; + if (out) *out = NULL; + if (!c || !mem || mem->bin_fmt != CFREE_BIN_COFF || mem->archive_hint == 0) + return OBJ_FORMAT_ARCHIVE_KEEP; + cls = classify_coff_archive_member_bytes(c, mem->data, mem->len, &bare); + if (cls == COFF_AR_SHIM) { + if (coff_skip_long_import_shim_bare(c, bare)) { + return OBJ_FORMAT_ARCHIVE_SKIP; + } else { + Sym member_dll = + derive_dll_name_from_archive_member(c, mem->member_name, + mem->archive_hint); + if (out) *out = build_coff_long_import_shim(c, bare, member_dll); + return OBJ_FORMAT_ARCHIVE_REPLACE; + } + } + if (cls == COFF_AR_SKIP) return OBJ_FORMAT_ARCHIVE_SKIP; + return OBJ_FORMAT_ARCHIVE_KEEP; +} diff --git a/src/obj/format.h b/src/obj/format.h @@ -0,0 +1,67 @@ +#ifndef CFREE_OBJ_FORMAT_H +#define CFREE_OBJ_FORMAT_H + +#include <stddef.h> + +#include <cfree/object.h> + +#include "core/core.h" + +typedef struct LinkImage LinkImage; + +typedef ObjBuilder* (*ObjFormatReadFn)(Compiler*, const char* name, + const u8* data, size_t len); +typedef ObjBuilder* (*ObjFormatReadDsoFn)(Compiler*, const char* name, + const u8* data, size_t len, + Sym* soname_out); +typedef void (*ObjFormatEmitFn)(Compiler*, ObjBuilder*, Writer*); +typedef void (*ObjFormatLinkEmitFn)(LinkImage*, Writer*); + +typedef enum ObjFormatArchiveAction { + OBJ_FORMAT_ARCHIVE_KEEP = 0, + OBJ_FORMAT_ARCHIVE_REPLACE = 1, + OBJ_FORMAT_ARCHIVE_SKIP = 2, +} ObjFormatArchiveAction; + +typedef struct ObjFormatArchiveMember { + const char* archive_name; + const char* member_name; + const u8* data; + size_t len; + CfreeBinFmt bin_fmt; + Sym archive_hint; +} ObjFormatArchiveMember; + +typedef struct ObjFormatDsoReader { + const struct ObjFormatImpl* format; + ObjFormatReadDsoFn read; + const char* name; +} ObjFormatDsoReader; + +typedef struct ObjFormatImpl { + ObjFmt kind; + CfreeBinFmt bin_fmt; + const char* name; + const char* read_name; + const char* read_dso_name; + + ObjFormatEmitFn emit; + ObjFormatReadFn read; + ObjFormatReadDsoFn read_dso; + ObjFormatLinkEmitFn link_emit; + + /* Optional format-specific linker ingestion policy. */ + int (*classify_obj_input)(Compiler*, ObjBuilder*, Sym* soname_out); + Sym (*archive_hint)(Compiler*, const char* archive_name); + ObjFormatArchiveAction (*archive_member)(Compiler*, + const ObjFormatArchiveMember*, + ObjBuilder** out); +} ObjFormatImpl; + +const ObjFormatImpl* obj_format_lookup(ObjFmt fmt); +const ObjFormatImpl* obj_format_lookup_bin(CfreeBinFmt fmt); +int obj_format_dso_reader_for_bytes(const u8* data, size_t len, + CfreeBinFmt* bin_out, + ObjFormatDsoReader* out); + +#endif diff --git a/src/obj/registry.c b/src/obj/registry.c @@ -0,0 +1,143 @@ +#include "obj/format.h" + +#include <cfree/config.h> +#include <string.h> + +#include "core/slice.h" +#include "obj/obj.h" + +void link_emit_elf(LinkImage*, Writer*); +void link_emit_macho(LinkImage*, Writer*); +void link_emit_coff(LinkImage*, Writer*); + +#if CFREE_OBJ_COFF_ENABLED +int coff_classify_obj_input(Compiler*, ObjBuilder*, Sym* soname_out); +Sym coff_archive_hint(Compiler*, const char* archive_name); +ObjFormatArchiveAction coff_archive_member(Compiler*, + const ObjFormatArchiveMember*, + ObjBuilder** out); +#endif + +static const ObjFormatImpl obj_format_impl_wasm = { + .kind = CFREE_OBJ_WASM, + .bin_fmt = CFREE_BIN_WASM, + .name = "wasm", + .read_name = "read_wasm", + .read_dso_name = NULL, + .emit = emit_wasm, + .read = read_wasm, + .read_dso = NULL, + .link_emit = NULL, +}; + +#if CFREE_OBJ_ELF_ENABLED +static const ObjFormatImpl obj_format_impl_elf = { + .kind = CFREE_OBJ_ELF, + .bin_fmt = CFREE_BIN_ELF, + .name = "elf", + .read_name = "read_elf", + .read_dso_name = "read_elf_dso", + .emit = emit_elf, + .read = read_elf, + .read_dso = read_elf_dso, + .link_emit = link_emit_elf, +}; +#endif + +#if CFREE_OBJ_MACHO_ENABLED +static const ObjFormatImpl obj_format_impl_macho = { + .kind = CFREE_OBJ_MACHO, + .bin_fmt = CFREE_BIN_MACHO, + .name = "macho", + .read_name = "read_macho", + .read_dso_name = "read_macho_dso", + .emit = emit_macho, + .read = read_macho, + .read_dso = read_macho_dso, + .link_emit = link_emit_macho, +}; +#endif + +#if CFREE_OBJ_COFF_ENABLED +static const ObjFormatImpl obj_format_impl_coff = { + .kind = CFREE_OBJ_COFF, + .bin_fmt = CFREE_BIN_COFF, + .name = "coff", + .read_name = "read_coff", + .read_dso_name = "read_coff_dso", + .emit = emit_coff, + .read = read_coff, + .read_dso = read_coff_dso, + .link_emit = link_emit_coff, + .classify_obj_input = coff_classify_obj_input, + .archive_hint = coff_archive_hint, + .archive_member = coff_archive_member, +}; +#endif + +static const ObjFormatImpl* const obj_format_impls[] = { +#if CFREE_OBJ_ELF_ENABLED + &obj_format_impl_elf, +#endif +#if CFREE_OBJ_COFF_ENABLED + &obj_format_impl_coff, +#endif +#if CFREE_OBJ_MACHO_ENABLED + &obj_format_impl_macho, +#endif + &obj_format_impl_wasm, +}; + +const ObjFormatImpl* obj_format_lookup(ObjFmt fmt) { + u32 i; + for (i = 0; i < (u32)(sizeof obj_format_impls / sizeof obj_format_impls[0]); + ++i) { + if (obj_format_impls[i]->kind == fmt) return obj_format_impls[i]; + } + return NULL; +} + +const ObjFormatImpl* obj_format_lookup_bin(CfreeBinFmt fmt) { + switch (fmt) { + case CFREE_BIN_ELF: + return obj_format_lookup(CFREE_OBJ_ELF); + case CFREE_BIN_COFF: + return obj_format_lookup(CFREE_OBJ_COFF); + case CFREE_BIN_MACHO: + return obj_format_lookup(CFREE_OBJ_MACHO); + case CFREE_BIN_WASM: + return obj_format_lookup(CFREE_OBJ_WASM); + default: + return NULL; + } +} + +int obj_format_dso_reader_for_bytes(const u8* data, size_t len, + CfreeBinFmt* bin_out, + ObjFormatDsoReader* out) { + const ObjFormatImpl* fmt; + CfreeBinFmt bin; + if (!out) return 0; + memset(out, 0, sizeof(*out)); + if (bin_out) *bin_out = CFREE_BIN_UNKNOWN; + if (!data) return 0; + +#if CFREE_OBJ_MACHO_ENABLED + if (len >= 3 && data[0] == '-' && data[1] == '-' && data[2] == '-') { + out->format = &obj_format_impl_macho; + out->read = read_tbd; + out->name = "read_tbd"; + return 1; + } +#endif + + bin = cfree_detect_fmt(data, len); + if (bin_out) *bin_out = bin; + fmt = (bin == CFREE_BIN_PE) ? obj_format_lookup(CFREE_OBJ_COFF) + : obj_format_lookup_bin(bin); + if (!fmt || !fmt->read_dso) return 0; + out->format = fmt; + out->read = fmt->read_dso; + out->name = fmt->read_dso_name; + return 1; +} diff --git a/test/coff/pe-import-mingw.c b/test/coff/pe-import-mingw.c @@ -267,7 +267,7 @@ int main(void) { /*group_id=*/0); EXPECT(ar_id != LINK_INPUT_NONE, "link_add_archive_bytes returned LINK_INPUT_NONE"); - link_set_entry(l, "mainCRTStartup"); + link_set_entry(l, CFREE_SLICE_LIT("mainCRTStartup")); link_set_pie(l, 1); link_set_emit_static_exe(l, 1); diff --git a/test/coff/pe-import-smoke.c b/test/coff/pe-import-smoke.c @@ -258,7 +258,7 @@ int main(void) { l, "ExitProcess.lib-member", shim, SHIM_TOTAL_LEN); EXPECT(dso_id != LINK_INPUT_NONE, "link_add_obj_bytes returned LINK_INPUT_NONE for short-import shim"); - link_set_entry(l, "mainCRTStartup"); + link_set_entry(l, CFREE_SLICE_LIT("mainCRTStartup")); link_set_pie(l, 1); link_set_emit_static_exe(l, 1); diff --git a/test/coff/pe-mixed-archive.c b/test/coff/pe-mixed-archive.c @@ -282,7 +282,7 @@ int main(void) { /*group_id=*/0); EXPECT(ar_id != LINK_INPUT_NONE, "link_add_archive_bytes returned LINK_INPUT_NONE"); - link_set_entry(l, "mainCRTStartup"); + link_set_entry(l, CFREE_SLICE_LIT("mainCRTStartup")); link_set_pie(l, 1); link_set_emit_static_exe(l, 1);