commit b56c3ece7c3472325f2596146c84be3b61cf2079
parent 2f677dcf2322c6276eb432fbadbfe5d3a8fdbad1
Author: Ryan Sepassi <rsepassi@gmail.com>
Date: Mon, 25 May 2026 03:10:25 -0700
Add object format registry
Diffstat:
10 files changed, 603 insertions(+), 502 deletions(-)
diff --git a/doc/REGISTRY.md b/doc/REGISTRY.md
@@ -78,62 +78,72 @@ No new vtable or registry code; this axis is the cheapest of the four.
## Axis 2: Object/image formats
-**Status: no vtable exists today; switches in multiple call sites.
-Largest piece of new work.**
+**Status: vtable and registry exist; source gating remains.**
-Today `emit_elf` / `emit_macho` / `emit_coff` are bare functions
-(`src/obj/{elf,macho,coff}_emit.c`), the read paths are likewise bare
-(`*_read.c`), and link-image emission dispatches via a switch in
-`src/link/link.c:958` calling `link_emit_elf` / `link_emit_macho` /
-`link_emit_coff`. DSO header readers (`*_read_dso.c`) follow the same
-shape.
+`emit_elf` / `emit_macho` / `emit_coff` are still implemented as
+format-specific functions (`src/obj/{elf,macho,coff}_emit.c`), as are
+the read paths (`*_read.c`), DSO readers (`*_read_dso.c`), and
+link-image emitters (`src/link/link_{elf,macho,coff}.c`). Generic call
+sites now reach them through `ObjFormatImpl` in `src/obj/format.h` and
+`src/obj/registry.c`.
-**New vtable** (`src/obj/format.h`):
+**Vtable** (`src/obj/format.h`):
```c
typedef struct ObjFormatImpl {
- CfreeObjFormat kind;
+ ObjFmt kind;
+ CfreeBinFmt bin_fmt;
const char* name;
+ const char* read_name;
+ const char* read_dso_name;
/* Relocatable object emit + read. */
void (*emit)(Compiler*, ObjBuilder*, Writer*);
- CfreeStatus (*read)(Compiler*, const u8* data, size_t len,
- ObjBuilder* out);
+ ObjBuilder* (*read)(Compiler*, const char* name, const u8* data, size_t len);
+
+ /* DSO header reader for `-lfoo` resolution against .so/.dylib/.dll
+ * plus Mach-O .tbd handling via obj_format_dso_reader_for_bytes(). */
+ ObjBuilder* (*read_dso)(Compiler*, const char* name, const u8*, size_t,
+ Sym* soname_out);
/* Link-image emit (executable / shared object). */
void (*link_emit)(LinkImage*, Writer*);
- /* DSO header reader for `-lfoo` resolution against .so/.dylib/.dll. */
- CfreeStatus (*read_dso)(Compiler*, const u8*, size_t, /* ... */);
+ /* Optional format-specific linker input policy. */
+ int (*classify_obj_input)(Compiler*, ObjBuilder*, Sym* soname_out);
+ Sym (*archive_hint)(Compiler*, const char* archive_name);
+ ObjFormatArchiveAction (*archive_member)(Compiler*,
+ const ObjFormatArchiveMember*,
+ ObjBuilder** out);
} ObjFormatImpl;
-const ObjFormatImpl* obj_format_lookup(CfreeObjFormat);
+const ObjFormatImpl* obj_format_lookup(ObjFmt);
+const ObjFormatImpl* obj_format_lookup_bin(CfreeBinFmt);
```
-**New registry** (`src/obj/registry.c`), gated the same way as the arch
+**Registry** (`src/obj/registry.c`), gated the same way as the arch
registry:
```c
-extern const ObjFormatImpl obj_format_impl_elf;
-extern const ObjFormatImpl obj_format_impl_macho;
-extern const ObjFormatImpl obj_format_impl_coff;
-
static const ObjFormatImpl* const obj_format_impls[] = {
-#if CFREE_OBJ_ELF
+#if CFREE_OBJ_ELF_ENABLED
&obj_format_impl_elf,
#endif
-#if CFREE_OBJ_MACHO
- &obj_format_impl_macho,
-#endif
-#if CFREE_OBJ_COFF
+#if CFREE_OBJ_COFF_ENABLED
&obj_format_impl_coff,
#endif
+#if CFREE_OBJ_MACHO_ENABLED
+ &obj_format_impl_macho,
+#endif
+ &obj_format_impl_wasm,
};
```
**Call-site changes**: the switch in `src/link/link.c` and the obj
-emit/read entry points collapse to
-`obj_format_lookup(target.obj_format)->fn(...)`.
+emit/read entry points have collapsed to
+`obj_format_lookup(target.obj_format)->fn(...)`. COFF short-import and
+long-form import-archive handling now live behind object-format hooks,
+so linker input ingestion stays generic over the object format.
The arch-side format reloc tables (`ArchElfOps`, `ArchMachoOps`,
`ArchCoffOps` on `ArchImpl`) stay where they are — they're the
diff --git a/src/api/object_builder.c b/src/api/object_builder.c
@@ -4,6 +4,7 @@
#include <string.h>
#include "core/core.h"
+#include "obj/format.h"
#include "obj/obj.h"
static ObjSecId pub_to_intern_sec(CfreeObjSection s) {
@@ -268,24 +269,12 @@ CfreeStatus cfree_obj_builder_emit(CfreeObjBuilder* b, CfreeWriter* w) {
CfreeStatus cfree_obj_builder_emit_as(CfreeObjBuilder* b, CfreeObjFmt fmt,
CfreeWriter* w) {
Compiler* c;
+ const ObjFormatImpl* impl;
if (!b || !w) return CFREE_INVALID;
c = obj_compiler(b);
if (!c) return CFREE_INVALID;
- switch (fmt) {
- case CFREE_OBJ_ELF:
- emit_elf(c, b, w);
- break;
- case CFREE_OBJ_COFF:
- emit_coff(c, b, w);
- break;
- case CFREE_OBJ_MACHO:
- emit_macho(c, b, w);
- break;
- case CFREE_OBJ_WASM:
- emit_wasm(c, b, w);
- break;
- default:
- return CFREE_UNSUPPORTED;
- }
+ impl = obj_format_lookup(fmt);
+ if (!impl || !impl->emit) return CFREE_UNSUPPORTED;
+ impl->emit(c, b, w);
return cfree_writer_status(w);
}
diff --git a/src/api/object_file.c b/src/api/object_file.c
@@ -10,6 +10,7 @@
#include "core/heap.h"
#include "core/pool.h"
#include "core/slice.h"
+#include "obj/format.h"
#include "obj/obj.h"
struct CfreeObjFile {
@@ -25,17 +26,9 @@ struct CfreeObjFile {
static ObjBuilder* obj_read_bytes(Compiler* c, const char* name, const u8* data,
size_t len, ObjFmt fmt) {
- switch (fmt) {
- case CFREE_OBJ_ELF:
- return read_elf(c, name, data, len);
- case CFREE_OBJ_COFF:
- return read_coff(c, name, data, len);
- case CFREE_OBJ_MACHO:
- return read_macho(c, name, data, len);
- case CFREE_OBJ_WASM:
- return read_wasm(c, name, data, len);
- }
- return NULL;
+ const ObjFormatImpl* impl = obj_format_lookup(fmt);
+ if (!impl || !impl->read) return NULL;
+ return impl->read(c, name, data, len);
}
CfreeStatus cfree_obj_open(const CfreeContext* ctx, CfreeSlice name,
diff --git a/src/link/link.c b/src/link/link.c
@@ -22,6 +22,7 @@
#include "core/slice.h"
#include "core/vec.h"
#include "link/link_internal.h"
+#include "obj/format.h"
/* ---- SrcLoc helper ---- */
@@ -121,37 +122,24 @@ LinkInputId link_add_obj(Linker* l, ObjBuilder* ob) {
LinkInputId link_add_obj_bytes(Linker* l, const char* name, const u8* data,
size_t len) {
- /* Detect format from magic and dispatch to the matching reader.
- * The returned ObjBuilder is owned by the linker and freed via the
- * input cleanup. ELF and Mach-O are supported. */
ObjBuilder* ob;
LinkInput* in;
LinkInputId id;
CfreeBinFmt fmt;
+ const ObjFormatImpl* impl;
const char* reader_name;
if (!l || !data || !len) return LINK_INPUT_NONE;
fmt = cfree_detect_fmt(data, len);
- switch (fmt) {
- case CFREE_BIN_ELF:
- ob = read_elf(l->c, name, data, len);
- reader_name = "read_elf";
- break;
- case CFREE_BIN_MACHO:
- ob = read_macho(l->c, name, data, len);
- reader_name = "read_macho";
- break;
- case CFREE_BIN_COFF:
- ob = read_coff(l->c, name, data, len);
- reader_name = "read_coff";
- break;
- default:
- compiler_panic(l->c, no_loc(),
- "link_add_obj_bytes: unsupported object format "
- "(fmt=%u) for '%.*s'",
- (u32)fmt,
- SLICE_ARG(name ? slice_from_cstr(name)
- : SLICE_LIT("(unnamed)")));
- }
+ impl = obj_format_lookup_bin(fmt);
+ if (!impl || !impl->read)
+ compiler_panic(l->c, no_loc(),
+ "link_add_obj_bytes: unsupported object format "
+ "(fmt=%u) for '%.*s'",
+ (u32)fmt,
+ SLICE_ARG(name ? slice_from_cstr(name)
+ : SLICE_LIT("(unnamed)")));
+ reader_name = impl->read_name;
+ ob = impl->read(l->c, name, data, len);
if (!ob)
compiler_panic(l->c, no_loc(),
"link_add_obj_bytes: %.*s returned NULL for '%.*s'",
@@ -162,16 +150,12 @@ LinkInputId link_add_obj_bytes(Linker* l, const char* name, const u8* data,
in->order = l->next_input_order++;
in->obj = ob; /* re-uses the ObjBuilder slot for ownership */
in->name = name ? pool_intern_slice(l->c->global, slice_from_cstr(name)) : 0;
- /* PE/COFF short-import: read_coff_short_import stashes the providing
- * DLL name on the builder. Reclassify the input as a DSO so the
- * resolver treats its symbols as exports (matching the .lib archive
- * member path in include_archive_member). */
{
- Sym coff_dll = 0;
- if (fmt == CFREE_BIN_COFF && obj_get_coff_import_dll(ob, &coff_dll) &&
- coff_dll) {
+ Sym soname = 0;
+ if (impl->classify_obj_input &&
+ impl->classify_obj_input(l->c, ob, &soname)) {
in->kind = LINK_INPUT_DSO_BYTES;
- in->soname = coff_dll;
+ in->soname = soname;
} else {
in->kind = LINK_INPUT_OBJ_BYTES;
}
@@ -186,47 +170,18 @@ LinkInputId link_add_dso_bytes(Linker* l, const char* name, const u8* data,
LinkInputId id;
Sym soname = 0;
CfreeBinFmt fmt;
+ ObjFormatDsoReader reader;
const char* reader_name;
if (!l || !data || !len) return LINK_INPUT_NONE;
- /* Three DSO surfaces are supported on input:
- * - ELF ET_DYN .so (read_elf_dso)
- * - Mach-O MH_DYLIB / MH_BUNDLE (read_macho_dso)
- * - Apple .tbd text-based stubs (read_tbd; magic is "---")
- * The first two are detected via cfree_detect_fmt; .tbd is a textual
- * format with no binary magic, so we sniff the leading "---". */
- if (len >= 3 && data[0] == '-' && data[1] == '-' && data[2] == '-') {
- ob = read_tbd(l->c, name, data, len, &soname);
- reader_name = "read_tbd";
- } else {
- fmt = cfree_detect_fmt(data, len);
- switch (fmt) {
- case CFREE_BIN_ELF:
- ob = read_elf_dso(l->c, name, data, len, &soname);
- reader_name = "read_elf_dso";
- break;
- case CFREE_BIN_MACHO:
- ob = read_macho_dso(l->c, name, data, len, &soname);
- reader_name = "read_macho_dso";
- break;
- case CFREE_BIN_COFF:
- case CFREE_BIN_PE:
- /* Both spellings route through read_coff_dso: CFREE_BIN_PE is
- * the MZ/PE-signed form (a real .dll), CFREE_BIN_COFF can land
- * here when the caller hands us a single short-import record
- * directly (rare; .lib archives are the usual conveyance and
- * are handled by link_add_archive_bytes). */
- ob = read_coff_dso(l->c, name, data, len, &soname);
- reader_name = "read_coff_dso";
- break;
- default:
- compiler_panic(l->c, no_loc(),
- "link_add_dso_bytes: unsupported DSO format "
- "(fmt=%u) for '%.*s'",
- (u32)fmt,
- SLICE_ARG(name ? slice_from_cstr(name)
- : SLICE_LIT("(unnamed)")));
- }
- }
+ if (!obj_format_dso_reader_for_bytes(data, len, &fmt, &reader))
+ compiler_panic(l->c, no_loc(),
+ "link_add_dso_bytes: unsupported DSO format "
+ "(fmt=%u) for '%.*s'",
+ (u32)fmt,
+ SLICE_ARG(name ? slice_from_cstr(name)
+ : SLICE_LIT("(unnamed)")));
+ reader_name = reader.name;
+ ob = reader.read(l->c, name, data, len, &soname);
if (!ob)
compiler_panic(l->c, no_loc(),
"link_add_dso_bytes: %.*s returned NULL for '%.*s'",
@@ -255,313 +210,6 @@ LinkInputId link_add_dso_bytes(Linker* l, const char* name, const u8* data,
return id;
}
-/* ---- COFF long-form import-archive support ----
- *
- * mingw `.a` archives (e.g. libkernel32.a) don't use the Microsoft
- * short-import record format (Sig1=0/Sig2=0xFFFF — handled in
- * read_coff_short_import). Instead every archive member is a regular
- * long-form COFF .o file containing `.idata$N` sections. Three flavors
- * appear:
- *
- * - Head member (e.g. libkernel32h.o): defines `_head_lib64_<lib>_a`,
- * has `.idata$2` with one IMAGE_IMPORT_DESCRIPTOR template plus
- * sentinel `.idata$4` / `.idata$5` slots.
- * - Trailer member (e.g. libkernel32t.o): defines
- * `__lib64_lib<lib>_a_iname` (the DLL-name string), with tiny
- * `.idata$4` / `.idata$5` / `.idata$7` terminators.
- * - Per-function stub (e.g. libkernel32s00001.o for ExitProcess):
- * defines `__imp_<name>` (the IAT slot in `.idata$5`) and the bare
- * `<name>` in `.text` (a 6-byte `ff 25 disp32` indirect jump
- * against `__imp_<name>`). Carries `.idata$4` (ILT), `.idata$5`
- * (IAT), `.idata$6` (hint+name), `.idata$7` (DLL-name back-ptr).
- *
- * cfree's link-emit path synthesizes the entire .idata from
- * LinkSymbol.imported entries (link_emit_coff). The long-form members'
- * `.idata$N` byte content is therefore redundant — only the symbol
- * naming the export matters. We absorb the per-function stubs at
- * archive-ingest time by rewriting them into short-import-shaped
- * DSO shims (matching what read_coff_short_import produces), and we
- * drop the head/trailer members entirely.
- *
- * DLL-name source: mingw/llvm-mingw long import members are named after
- * the providing DLL (`KERNEL32.dll`, `api-ms-win-crt-runtime-l1-1-0.dll`,
- * ...). Fall back to the archive filename for older import archives.
- *
- * The conversion is gated on Compiler.target.obj == CFREE_OBJ_COFF so
- * non-Windows targets are unaffected. */
-
-typedef enum CoffArMemberClass {
- COFF_AR_KEEP = 0, /* regular .obj — leave as LINK_INPUT_OBJ_BYTES */
- COFF_AR_SHIM = 1, /* per-function stub — replaced with DSO shim */
- COFF_AR_SKIP = 2, /* head/trailer — drop entirely */
-} CoffArMemberClass;
-
-static const char kCoffImpPrefix_[] = "__imp_";
-static const u32 kCoffImpPrefixLen_ = (u32)(sizeof kCoffImpPrefix_ - 1u);
-static const char kCoffHeadPrefix_[] = "_head_";
-static const u32 kCoffHeadPrefixLen_ = (u32)(sizeof kCoffHeadPrefix_ - 1u);
-static const char kCoffInameSuffix_[] = "_iname";
-static const u32 kCoffInameSuffixLen_ = (u32)(sizeof kCoffInameSuffix_ - 1u);
-
-/* Derive a DLL name from the archive path. Handles:
- * path/to/libkernel32.a -> "kernel32.dll"
- * path/to/libkernel32.dll.a -> "kernel32.dll"
- * path/to/kernel32.lib -> "kernel32.dll"
- * path/to/libfoo -> "foo.dll"
- * If nothing recognizable, returns the interned basename verbatim
- * (callers can still match by name; case-insensitive at runtime). */
-static Sym derive_dll_name_from_archive_path(Compiler* c, const char* path) {
- const char* base;
- const char* p;
- size_t n;
- size_t out_len;
- char* out;
- Sym sym;
- if (!path || !*path) return 0;
- base = path;
- for (p = path; *p; ++p)
- if (*p == '/' || *p == '\\') base = p + 1;
- n = slice_from_cstr(base).len;
- /* Strip trailing ".dll.a" / ".a" / ".lib" (case-sensitive — mingw
- * uses lowercase, MSVC uses .lib). */
- if (n >= 6 && memcmp(base + n - 6, ".dll.a", 6) == 0) n -= 6;
- else if (n >= 2 && memcmp(base + n - 2, ".a", 2) == 0) n -= 2;
- else if (n >= 4 && memcmp(base + n - 4, ".lib", 4) == 0) n -= 4;
- /* Strip leading "lib" prefix. */
- if (n >= 3 && memcmp(base, "lib", 3) == 0) {
- base += 3;
- n -= 3;
- }
- if (n == 0) return 0;
- /* Append ".dll". */
- out_len = n + 4u;
- out = (char*)arena_array(c->scratch, char, out_len);
- memcpy(out, base, n);
- memcpy(out + n, ".dll", 4);
- sym = pool_intern_slice(c->global, (Slice){ .s = out, .len = (u32)out_len });
- return sym;
-}
-
-static Sym derive_dll_name_from_archive_member(Compiler* c,
- const char* member_name,
- Sym fallback) {
- const char* base;
- const char* p;
- size_t n;
- if (!member_name || !*member_name) return fallback;
- base = member_name;
- for (p = member_name; *p; ++p)
- if (*p == '/' || *p == '\\') base = p + 1;
- n = slice_from_cstr(base).len;
- if (n >= 4 && memcmp(base + n - 4, ".dll", 4) == 0)
- return pool_intern_slice(c->global, (Slice){ .s = base, .len = (u32)n });
- if (n >= 4 && memcmp(base + n - 4, ".DLL", 4) == 0)
- return pool_intern_slice(c->global, (Slice){ .s = base, .len = (u32)n });
- return fallback;
-}
-
-/* Resolve a COFF symbol-record's name to (ptr, len) without copying.
- * Mirrors the resolve_sym_name helper in coff_read.c: 8-byte short
- * name in-record, or (Zeroes==0, Offset) into the string table. */
-static void coff_resolve_sym_name_(const u8* rec, const u8* strtab,
- u32 strtab_size, const char** name_out,
- u32* len_out) {
- u32 z = (u32)rec[0] | ((u32)rec[1] << 8) | ((u32)rec[2] << 16) |
- ((u32)rec[3] << 24);
- if (z == 0) {
- u32 off = (u32)rec[4] | ((u32)rec[5] << 8) | ((u32)rec[6] << 16) |
- ((u32)rec[7] << 24);
- if (off >= strtab_size) {
- *name_out = "";
- *len_out = 0;
- return;
- }
- const char* s = (const char*)(strtab + off);
- u32 max = strtab_size - off;
- u32 n = 0;
- while (n < max && s[n] != '\0') ++n;
- *name_out = s;
- *len_out = n;
- return;
- }
- u32 n = 0;
- while (n < 8 && rec[n] != '\0') ++n;
- *name_out = (const char*)rec;
- *len_out = n;
-}
-
-/* Byte-level classifier that walks a long-form COFF member's symbol
- * table directly, without running read_coff. We use this instead of
- * the post-read_coff approach because mingw's archives contain reloc
- * types read_coff doesn't grok (e.g. IMAGE_REL_AMD64_SECTION/SECREL
- * in `.idata$N` sections), and we want to drop those members entirely
- * rather than fail at read time.
- *
- * Returns SHIM / SKIP / KEEP. On SHIM, *out_name is the interned bare
- * `<name>` (the export's real symbol, decoded from `__imp_<name>`).
- *
- * The COFF byte layout we rely on: header is fixed 20 bytes; symbol
- * table starts at PointerToSymbolTable; each symbol record is
- * COFF_SYMBOL_SIZE (18) bytes including aux slots. String table
- * follows symtab: u32 size header + bytes. */
-#define COFF_SYM_REC_SIZE_ 18u
-#define COFF_FILE_HDR_SIZE_ 20u
-#define COFF_SYM_CLASS_EXTERNAL_ 2u
-
-static CoffArMemberClass classify_coff_archive_member_bytes(
- Compiler* c, const u8* data, size_t len, Sym* out_name) {
- u32 ptr_to_symtab;
- u32 nsymbols;
- u16 nsections;
- const u8* sym_base;
- const u8* strtab;
- u32 strtab_size;
- int has_imp = 0;
- int has_idata = 0;
- int has_head_def = 0;
- int has_iname_def = 0;
- Sym imp_bare_name = 0;
- u32 i;
- *out_name = 0;
- if (len < COFF_FILE_HDR_SIZE_) return COFF_AR_KEEP;
- nsections = (u16)((u32)data[2] | ((u32)data[3] << 8));
- ptr_to_symtab = (u32)data[8] | ((u32)data[9] << 8) |
- ((u32)data[10] << 16) | ((u32)data[11] << 24);
- nsymbols = (u32)data[12] | ((u32)data[13] << 8) |
- ((u32)data[14] << 16) | ((u32)data[15] << 24);
- if (nsymbols == 0 || ptr_to_symtab == 0) return COFF_AR_KEEP;
- if ((u64)COFF_FILE_HDR_SIZE_ + (u64)nsections * 40u <= (u64)len) {
- u32 si;
- for (si = 0; si < (u32)nsections; ++si) {
- const u8* sh = data + COFF_FILE_HDR_SIZE_ + (u64)si * 40u;
- if (memcmp(sh, ".idata$", 7) == 0) {
- has_idata = 1;
- break;
- }
- }
- }
- if ((u64)ptr_to_symtab + (u64)nsymbols * (u64)COFF_SYM_REC_SIZE_ > (u64)len)
- return COFF_AR_KEEP;
- sym_base = data + ptr_to_symtab;
- /* String table follows symtab. Leading u32 = total size (incl. self).
- * Absent if there's no room after symtab. */
- {
- u64 symtab_end =
- (u64)ptr_to_symtab + (u64)nsymbols * (u64)COFF_SYM_REC_SIZE_;
- if (symtab_end + 4u <= (u64)len) {
- u32 declared = (u32)data[symtab_end] |
- ((u32)data[symtab_end + 1] << 8) |
- ((u32)data[symtab_end + 2] << 16) |
- ((u32)data[symtab_end + 3] << 24);
- if (declared < 4u || symtab_end + (u64)declared > (u64)len) {
- strtab = NULL;
- strtab_size = 0;
- } else {
- strtab = data + symtab_end;
- strtab_size = declared;
- }
- } else {
- strtab = NULL;
- strtab_size = 0;
- }
- }
- i = 0;
- while (i < nsymbols) {
- const u8* p = sym_base + (u64)i * COFF_SYM_REC_SIZE_;
- u16 sec_num = (u16)((u32)p[12] | ((u32)p[13] << 8));
- u8 sclass = p[16];
- u8 naux = p[17];
- const char* nm = NULL;
- u32 nlen = 0;
- /* Only consider defined external symbols. UNDEF (sec_num==0)
- * gives no information about what this object *provides*. */
- if (sclass == COFF_SYM_CLASS_EXTERNAL_ && sec_num != 0) {
- coff_resolve_sym_name_(p, strtab, strtab_size, &nm, &nlen);
- if (nlen > kCoffImpPrefixLen_ &&
- memcmp(nm, kCoffImpPrefix_, kCoffImpPrefixLen_) == 0) {
- has_imp = 1;
- if (imp_bare_name == 0) {
- const char* tail = nm + kCoffImpPrefixLen_;
- u32 tail_len = nlen - kCoffImpPrefixLen_;
- imp_bare_name = pool_intern_slice(c->global, (Slice){ .s = tail, .len = tail_len });
- }
- } else if (nlen > kCoffHeadPrefixLen_ &&
- memcmp(nm, kCoffHeadPrefix_, kCoffHeadPrefixLen_) == 0) {
- has_head_def = 1;
- } else if (nlen > kCoffInameSuffixLen_ &&
- memcmp(nm + nlen - kCoffInameSuffixLen_, kCoffInameSuffix_,
- kCoffInameSuffixLen_) == 0) {
- has_iname_def = 1;
- }
- }
- /* Skip primary + aux records. */
- i += 1u + (u32)naux;
- }
- if (has_imp && has_idata) {
- *out_name = imp_bare_name;
- return COFF_AR_SHIM;
- }
- if (has_head_def || has_iname_def) return COFF_AR_SKIP;
- return COFF_AR_KEEP;
-}
-
-/* Build a fresh ObjBuilder containing just `<name>` and `__imp_<name>`
- * as defined-at-OBJ_SEC_NONE globals (the shape read_coff_dso /
- * read_coff_short_import produce for a DLL export), and annotate it
- * with the providing DLL name. Mirrors read_coff_short_import. */
-static ObjBuilder* build_coff_long_import_shim(Compiler* c, Sym bare_name,
- Sym dll_name) {
- ObjBuilder* ob;
- const char* bare;
- size_t bare_len = 0;
- u32 imp_len;
- char* imp_buf;
- Sym imp_sn;
- ObjSymId id;
- ObjSymId imp_id;
- if (bare_name == 0 || dll_name == 0) return NULL;
- {
- Slice bare_s = pool_slice(c->global, bare_name);
- bare = bare_s.s;
- bare_len = bare_s.len;
- }
- if (!bare || bare_len == 0) return NULL;
- ob = obj_new(c);
- if (!ob) return NULL;
- id = obj_symbol_ex(ob, bare_name, SB_GLOBAL, SV_DEFAULT, SK_FUNC,
- OBJ_SEC_NONE, 0, 0, 0);
- obj_sym_mark_referenced(ob, id);
- imp_len = kCoffImpPrefixLen_ + (u32)bare_len;
- imp_buf = (char*)arena_array(c->scratch, char, imp_len);
- memcpy(imp_buf, kCoffImpPrefix_, kCoffImpPrefixLen_);
- memcpy(imp_buf + kCoffImpPrefixLen_, bare, bare_len);
- imp_sn = pool_intern_slice(c->global, (Slice){ .s = imp_buf, .len = imp_len });
- imp_id = obj_symbol_ex(ob, imp_sn, SB_GLOBAL, SV_DEFAULT, SK_OBJ,
- OBJ_SEC_NONE, 0, 0, 0);
- obj_sym_mark_referenced(ob, imp_id);
- obj_set_coff_import_dll(ob, dll_name);
- obj_finalize(ob);
- return ob;
-}
-
-static int coff_skip_long_import_shim_bare(Compiler* c, Sym bare_name) {
- const char* s;
- size_t n = 0;
- if (!bare_name) return 0;
- {
- Slice s_s = pool_slice(c->global, bare_name);
- s = s_s.s;
- n = s_s.len;
- }
- if (!s) return 0;
- /* llvm-mingw's UCRT libmsvcrt.a intentionally provides these legacy
- * CRT entry helpers as regular archive members later in the same
- * archive. Prefer those wrappers over the older direct msvcrt.dll
- * import shims. */
- return (n == 13 && memcmp(s, "__getmainargs", 13) == 0) ||
- (n == 13 && memcmp(s, "__p___initenv", 13) == 0);
-}
-
LinkInputId link_add_archive_bytes(Linker* l, const char* name, const u8* data,
size_t len, u8 whole_archive, u8 link_mode,
u8 group_id) {
@@ -570,12 +218,13 @@ LinkInputId link_add_archive_bytes(Linker* l, const char* name, const u8* data,
CfreeArMember mem;
LinkArchive* ar;
u32 n;
- Sym archive_dll_name = 0;
- int is_coff_target = (l && l->c->target.obj == CFREE_OBJ_COFF);
+ Sym archive_hint = 0;
+ const ObjFormatImpl* target_impl;
if (!l || !data || !len) return LINK_INPUT_NONE;
- if (is_coff_target)
- archive_dll_name = derive_dll_name_from_archive_path(l->c, name);
+ target_impl = obj_format_lookup(l->c->target.obj);
+ if (target_impl && target_impl->archive_hint)
+ archive_hint = target_impl->archive_hint(l->c, name);
in_arc.data = data;
in_arc.len = len;
@@ -625,28 +274,19 @@ LinkInputId link_add_archive_bytes(Linker* l, const char* name, const u8* data,
while (cfree_ar_iter_next(it, &mem) == CFREE_ITER_ITEM && n < ar->nmembers) {
ObjBuilder* ob = NULL;
CfreeBinFmt mfmt = cfree_detect_fmt(mem.data, mem.size);
- /* COFF long-form import-archive absorption (mingw `.a`). Classify
- * the member from raw bytes *before* read_coff so we can drop
- * members carrying `.idata$N` reloc types read_coff doesn't model
- * (e.g. IMAGE_REL_AMD64_SECTION) without ever invoking the reader
- * on them. KEEP members fall through to the standard read path. */
- if (mfmt == CFREE_BIN_COFF && is_coff_target && archive_dll_name != 0) {
- Sym bare = 0;
- CoffArMemberClass cls = classify_coff_archive_member_bytes(
- l->c, mem.data, mem.size, &bare);
- if (cls == COFF_AR_SHIM) {
- if (coff_skip_long_import_shim_bare(l->c, bare)) {
- ob = NULL;
- } else {
- Sym member_dll =
- derive_dll_name_from_archive_member(l->c, mem.name.s,
- archive_dll_name);
- ob = build_coff_long_import_shim(l->c, bare, member_dll);
- }
- } else if (cls == COFF_AR_SKIP) {
- ob = NULL;
- }
- if (cls != COFF_AR_KEEP) {
+ const ObjFormatImpl* member_impl = obj_format_lookup_bin(mfmt);
+ if (target_impl && target_impl->archive_member) {
+ ObjFormatArchiveMember desc;
+ ObjFormatArchiveAction action;
+ memset(&desc, 0, sizeof(desc));
+ desc.archive_name = name;
+ desc.member_name = mem.name.s;
+ desc.data = mem.data;
+ desc.len = mem.size;
+ desc.bin_fmt = mfmt;
+ desc.archive_hint = archive_hint;
+ action = target_impl->archive_member(l->c, &desc, &ob);
+ if (action != OBJ_FORMAT_ARCHIVE_KEEP) {
ar->members[n].name =
mem.name.len ? pool_intern_slice(l->c->global, mem.name) : 0;
ar->members[n].obj = ob;
@@ -654,26 +294,16 @@ LinkInputId link_add_archive_bytes(Linker* l, const char* name, const u8* data,
continue;
}
}
- switch (mfmt) {
- case CFREE_BIN_ELF:
- ob = read_elf(l->c, mem.name.s, mem.data, mem.size);
- break;
- case CFREE_BIN_MACHO:
- ob = read_macho(l->c, mem.name.s, mem.data, mem.size);
- break;
- case CFREE_BIN_COFF:
- ob = read_coff(l->c, mem.name.s, mem.data, mem.size);
- break;
- default:
- compiler_panic(l->c, no_loc(),
- "link_add_archive_bytes: unsupported member "
- "format (fmt=%u) for '%.*s' in archive '%.*s'",
- (u32)mfmt,
- SLICE_ARG(mem.name.len ? mem.name
- : SLICE_LIT("(unnamed)")),
- SLICE_ARG(name ? slice_from_cstr(name)
- : SLICE_LIT("(unnamed)")));
- }
+ if (!member_impl || !member_impl->read)
+ compiler_panic(l->c, no_loc(),
+ "link_add_archive_bytes: unsupported member "
+ "format (fmt=%u) for '%.*s' in archive '%.*s'",
+ (u32)mfmt,
+ SLICE_ARG(mem.name.len ? mem.name
+ : SLICE_LIT("(unnamed)")),
+ SLICE_ARG(name ? slice_from_cstr(name)
+ : SLICE_LIT("(unnamed)")));
+ ob = member_impl->read(l->c, mem.name.s, mem.data, mem.size);
if (!ob)
compiler_panic(l->c, no_loc(),
"link_add_archive_bytes: object read failed for "
@@ -970,31 +600,17 @@ void link_resolve_extend(Linker* l, LinkImage* img) {
"yet implemented");
}
-/* ---- public emit dispatcher ----
- *
- * Per-format peers of link_emit_elf: link_emit_macho and link_emit_coff
- * (both deferred) slot in here. Until those land, the unimplemented
- * cases panic with a format-specific diagnostic rather than the
- * catch-all. */
+/* ---- public emit dispatcher ---- */
void link_emit_image_writer(LinkImage* img, Writer* w) {
+ const ObjFormatImpl* fmt;
if (!img || !w) return;
- switch (img->c->target.obj) {
- case CFREE_OBJ_ELF:
- link_emit_elf(img, w);
- return;
- case CFREE_OBJ_MACHO:
- link_emit_macho(img, w);
- return;
- case CFREE_OBJ_COFF:
- link_emit_coff(img, w);
- return;
- case CFREE_OBJ_WASM:
- compiler_panic(img->c, no_loc(),
- "link_emit_image_writer: Wasm linker emit not yet "
- "implemented");
+ fmt = obj_format_lookup(img->c->target.obj);
+ if (fmt && fmt->link_emit) {
+ fmt->link_emit(img, w);
+ return;
}
compiler_panic(img->c, no_loc(),
- "link_emit_image_writer: unknown obj format %u",
+ "link_emit_image_writer: unsupported obj format %u",
(u32)img->c->target.obj);
}
diff --git a/src/obj/coff_archive.c b/src/obj/coff_archive.c
@@ -0,0 +1,283 @@
+#include "obj/format.h"
+
+#include <string.h>
+
+#include "core/arena.h"
+#include "core/core.h"
+#include "core/pool.h"
+#include "core/slice.h"
+#include "obj/obj.h"
+
+/* mingw import archives store import thunks as ordinary COFF members with
+ * .idata$N sections. cfree's PE emitter synthesizes .idata itself, so the
+ * format hook below rewrites per-symbol members into DSO-shaped shims and
+ * drops archive head/trailer members before the generic linker reads them. */
+
+typedef enum CoffArMemberClass {
+ COFF_AR_KEEP = 0,
+ COFF_AR_SHIM = 1,
+ COFF_AR_SKIP = 2,
+} CoffArMemberClass;
+
+static const char kCoffImpPrefix_[] = "__imp_";
+static const u32 kCoffImpPrefixLen_ = (u32)(sizeof kCoffImpPrefix_ - 1u);
+static const char kCoffHeadPrefix_[] = "_head_";
+static const u32 kCoffHeadPrefixLen_ = (u32)(sizeof kCoffHeadPrefix_ - 1u);
+static const char kCoffInameSuffix_[] = "_iname";
+static const u32 kCoffInameSuffixLen_ = (u32)(sizeof kCoffInameSuffix_ - 1u);
+
+int coff_classify_obj_input(Compiler* c, ObjBuilder* ob, Sym* soname_out) {
+ Sym dll = 0;
+ (void)c;
+ if (!obj_get_coff_import_dll(ob, &dll) || !dll) return 0;
+ if (soname_out) *soname_out = dll;
+ return 1;
+}
+
+Sym coff_archive_hint(Compiler* c, const char* path) {
+ const char* base;
+ const char* p;
+ size_t n;
+ size_t out_len;
+ char* out;
+ if (!c || !path || !*path) return 0;
+ base = path;
+ for (p = path; *p; ++p)
+ if (*p == '/' || *p == '\\') base = p + 1;
+ n = slice_from_cstr(base).len;
+ if (n >= 6 && memcmp(base + n - 6, ".dll.a", 6) == 0)
+ n -= 6;
+ else if (n >= 2 && memcmp(base + n - 2, ".a", 2) == 0)
+ n -= 2;
+ else if (n >= 4 && memcmp(base + n - 4, ".lib", 4) == 0)
+ n -= 4;
+ if (n >= 3 && memcmp(base, "lib", 3) == 0) {
+ base += 3;
+ n -= 3;
+ }
+ if (n == 0) return 0;
+ out_len = n + 4u;
+ out = (char*)arena_array(c->scratch, char, out_len);
+ memcpy(out, base, n);
+ memcpy(out + n, ".dll", 4);
+ return pool_intern_slice(c->global, (Slice){ .s = out, .len = (u32)out_len });
+}
+
+static Sym derive_dll_name_from_archive_member(Compiler* c,
+ const char* member_name,
+ Sym fallback) {
+ const char* base;
+ const char* p;
+ size_t n;
+ if (!member_name || !*member_name) return fallback;
+ base = member_name;
+ for (p = member_name; *p; ++p)
+ if (*p == '/' || *p == '\\') base = p + 1;
+ n = slice_from_cstr(base).len;
+ if (n >= 4 && memcmp(base + n - 4, ".dll", 4) == 0)
+ return pool_intern_slice(c->global, (Slice){ .s = base, .len = (u32)n });
+ if (n >= 4 && memcmp(base + n - 4, ".DLL", 4) == 0)
+ return pool_intern_slice(c->global, (Slice){ .s = base, .len = (u32)n });
+ return fallback;
+}
+
+static void coff_resolve_sym_name_(const u8* rec, const u8* strtab,
+ u32 strtab_size, const char** name_out,
+ u32* len_out) {
+ u32 z = (u32)rec[0] | ((u32)rec[1] << 8) | ((u32)rec[2] << 16) |
+ ((u32)rec[3] << 24);
+ if (z == 0) {
+ u32 off = (u32)rec[4] | ((u32)rec[5] << 8) | ((u32)rec[6] << 16) |
+ ((u32)rec[7] << 24);
+ const char* s;
+ u32 max;
+ u32 n = 0;
+ if (!strtab || off >= strtab_size) {
+ *name_out = "";
+ *len_out = 0;
+ return;
+ }
+ s = (const char*)(strtab + off);
+ max = strtab_size - off;
+ while (n < max && s[n] != '\0') ++n;
+ *name_out = s;
+ *len_out = n;
+ return;
+ }
+ {
+ u32 n = 0;
+ while (n < 8 && rec[n] != '\0') ++n;
+ *name_out = (const char*)rec;
+ *len_out = n;
+ }
+}
+
+#define COFF_SYM_REC_SIZE_ 18u
+#define COFF_FILE_HDR_SIZE_ 20u
+#define COFF_SYM_CLASS_EXTERNAL_ 2u
+
+static CoffArMemberClass classify_coff_archive_member_bytes(
+ Compiler* c, const u8* data, size_t len, Sym* out_name) {
+ u32 ptr_to_symtab;
+ u32 nsymbols;
+ u16 nsections;
+ const u8* sym_base;
+ const u8* strtab;
+ u32 strtab_size;
+ int has_imp = 0;
+ int has_idata = 0;
+ int has_head_def = 0;
+ int has_iname_def = 0;
+ Sym imp_bare_name = 0;
+ u32 i;
+ *out_name = 0;
+ if (len < COFF_FILE_HDR_SIZE_) return COFF_AR_KEEP;
+ nsections = (u16)((u32)data[2] | ((u32)data[3] << 8));
+ ptr_to_symtab = (u32)data[8] | ((u32)data[9] << 8) |
+ ((u32)data[10] << 16) | ((u32)data[11] << 24);
+ nsymbols = (u32)data[12] | ((u32)data[13] << 8) |
+ ((u32)data[14] << 16) | ((u32)data[15] << 24);
+ if (nsymbols == 0 || ptr_to_symtab == 0) return COFF_AR_KEEP;
+ if ((u64)COFF_FILE_HDR_SIZE_ + (u64)nsections * 40u <= (u64)len) {
+ u32 si;
+ for (si = 0; si < (u32)nsections; ++si) {
+ const u8* sh = data + COFF_FILE_HDR_SIZE_ + (u64)si * 40u;
+ if (memcmp(sh, ".idata$", 7) == 0) {
+ has_idata = 1;
+ break;
+ }
+ }
+ }
+ if ((u64)ptr_to_symtab + (u64)nsymbols * (u64)COFF_SYM_REC_SIZE_ > (u64)len)
+ return COFF_AR_KEEP;
+ sym_base = data + ptr_to_symtab;
+ {
+ u64 symtab_end =
+ (u64)ptr_to_symtab + (u64)nsymbols * (u64)COFF_SYM_REC_SIZE_;
+ if (symtab_end + 4u <= (u64)len) {
+ u32 declared = (u32)data[symtab_end] |
+ ((u32)data[symtab_end + 1] << 8) |
+ ((u32)data[symtab_end + 2] << 16) |
+ ((u32)data[symtab_end + 3] << 24);
+ if (declared < 4u || symtab_end + (u64)declared > (u64)len) {
+ strtab = NULL;
+ strtab_size = 0;
+ } else {
+ strtab = data + symtab_end;
+ strtab_size = declared;
+ }
+ } else {
+ strtab = NULL;
+ strtab_size = 0;
+ }
+ }
+ i = 0;
+ while (i < nsymbols) {
+ const u8* p = sym_base + (u64)i * COFF_SYM_REC_SIZE_;
+ u16 sec_num = (u16)((u32)p[12] | ((u32)p[13] << 8));
+ u8 sclass = p[16];
+ u8 naux = p[17];
+ const char* nm = NULL;
+ u32 nlen = 0;
+ if (sclass == COFF_SYM_CLASS_EXTERNAL_ && sec_num != 0) {
+ coff_resolve_sym_name_(p, strtab, strtab_size, &nm, &nlen);
+ if (nlen > kCoffImpPrefixLen_ &&
+ memcmp(nm, kCoffImpPrefix_, kCoffImpPrefixLen_) == 0) {
+ has_imp = 1;
+ if (imp_bare_name == 0) {
+ const char* tail = nm + kCoffImpPrefixLen_;
+ u32 tail_len = nlen - kCoffImpPrefixLen_;
+ imp_bare_name =
+ pool_intern_slice(c->global, (Slice){ .s = tail, .len = tail_len });
+ }
+ } else if (nlen > kCoffHeadPrefixLen_ &&
+ memcmp(nm, kCoffHeadPrefix_, kCoffHeadPrefixLen_) == 0) {
+ has_head_def = 1;
+ } else if (nlen > kCoffInameSuffixLen_ &&
+ memcmp(nm + nlen - kCoffInameSuffixLen_, kCoffInameSuffix_,
+ kCoffInameSuffixLen_) == 0) {
+ has_iname_def = 1;
+ }
+ }
+ i += 1u + (u32)naux;
+ }
+ if (has_imp && has_idata) {
+ *out_name = imp_bare_name;
+ return COFF_AR_SHIM;
+ }
+ if (has_head_def || has_iname_def) return COFF_AR_SKIP;
+ return COFF_AR_KEEP;
+}
+
+static ObjBuilder* build_coff_long_import_shim(Compiler* c, Sym bare_name,
+ Sym dll_name) {
+ ObjBuilder* ob;
+ const char* bare;
+ size_t bare_len = 0;
+ u32 imp_len;
+ char* imp_buf;
+ Sym imp_sn;
+ ObjSymId id;
+ ObjSymId imp_id;
+ if (bare_name == 0 || dll_name == 0) return NULL;
+ {
+ Slice bare_s = pool_slice(c->global, bare_name);
+ bare = bare_s.s;
+ bare_len = bare_s.len;
+ }
+ if (!bare || bare_len == 0) return NULL;
+ ob = obj_new(c);
+ if (!ob) return NULL;
+ id = obj_symbol_ex(ob, bare_name, SB_GLOBAL, SV_DEFAULT, SK_FUNC,
+ OBJ_SEC_NONE, 0, 0, 0);
+ obj_sym_mark_referenced(ob, id);
+ imp_len = kCoffImpPrefixLen_ + (u32)bare_len;
+ imp_buf = (char*)arena_array(c->scratch, char, imp_len);
+ memcpy(imp_buf, kCoffImpPrefix_, kCoffImpPrefixLen_);
+ memcpy(imp_buf + kCoffImpPrefixLen_, bare, bare_len);
+ imp_sn = pool_intern_slice(c->global, (Slice){ .s = imp_buf, .len = imp_len });
+ imp_id = obj_symbol_ex(ob, imp_sn, SB_GLOBAL, SV_DEFAULT, SK_OBJ,
+ OBJ_SEC_NONE, 0, 0, 0);
+ obj_sym_mark_referenced(ob, imp_id);
+ obj_set_coff_import_dll(ob, dll_name);
+ obj_finalize(ob);
+ return ob;
+}
+
+static int coff_skip_long_import_shim_bare(Compiler* c, Sym bare_name) {
+ const char* s;
+ size_t n = 0;
+ if (!bare_name) return 0;
+ {
+ Slice s_s = pool_slice(c->global, bare_name);
+ s = s_s.s;
+ n = s_s.len;
+ }
+ if (!s) return 0;
+ return (n == 13 && memcmp(s, "__getmainargs", 13) == 0) ||
+ (n == 13 && memcmp(s, "__p___initenv", 13) == 0);
+}
+
+ObjFormatArchiveAction coff_archive_member(Compiler* c,
+ const ObjFormatArchiveMember* mem,
+ ObjBuilder** out) {
+ CoffArMemberClass cls;
+ Sym bare = 0;
+ if (out) *out = NULL;
+ if (!c || !mem || mem->bin_fmt != CFREE_BIN_COFF || mem->archive_hint == 0)
+ return OBJ_FORMAT_ARCHIVE_KEEP;
+ cls = classify_coff_archive_member_bytes(c, mem->data, mem->len, &bare);
+ if (cls == COFF_AR_SHIM) {
+ if (coff_skip_long_import_shim_bare(c, bare)) {
+ return OBJ_FORMAT_ARCHIVE_SKIP;
+ } else {
+ Sym member_dll =
+ derive_dll_name_from_archive_member(c, mem->member_name,
+ mem->archive_hint);
+ if (out) *out = build_coff_long_import_shim(c, bare, member_dll);
+ return OBJ_FORMAT_ARCHIVE_REPLACE;
+ }
+ }
+ if (cls == COFF_AR_SKIP) return OBJ_FORMAT_ARCHIVE_SKIP;
+ return OBJ_FORMAT_ARCHIVE_KEEP;
+}
diff --git a/src/obj/format.h b/src/obj/format.h
@@ -0,0 +1,67 @@
+#ifndef CFREE_OBJ_FORMAT_H
+#define CFREE_OBJ_FORMAT_H
+
+#include <stddef.h>
+
+#include <cfree/object.h>
+
+#include "core/core.h"
+
+typedef struct LinkImage LinkImage;
+
+typedef ObjBuilder* (*ObjFormatReadFn)(Compiler*, const char* name,
+ const u8* data, size_t len);
+typedef ObjBuilder* (*ObjFormatReadDsoFn)(Compiler*, const char* name,
+ const u8* data, size_t len,
+ Sym* soname_out);
+typedef void (*ObjFormatEmitFn)(Compiler*, ObjBuilder*, Writer*);
+typedef void (*ObjFormatLinkEmitFn)(LinkImage*, Writer*);
+
+typedef enum ObjFormatArchiveAction {
+ OBJ_FORMAT_ARCHIVE_KEEP = 0,
+ OBJ_FORMAT_ARCHIVE_REPLACE = 1,
+ OBJ_FORMAT_ARCHIVE_SKIP = 2,
+} ObjFormatArchiveAction;
+
+typedef struct ObjFormatArchiveMember {
+ const char* archive_name;
+ const char* member_name;
+ const u8* data;
+ size_t len;
+ CfreeBinFmt bin_fmt;
+ Sym archive_hint;
+} ObjFormatArchiveMember;
+
+typedef struct ObjFormatDsoReader {
+ const struct ObjFormatImpl* format;
+ ObjFormatReadDsoFn read;
+ const char* name;
+} ObjFormatDsoReader;
+
+typedef struct ObjFormatImpl {
+ ObjFmt kind;
+ CfreeBinFmt bin_fmt;
+ const char* name;
+ const char* read_name;
+ const char* read_dso_name;
+
+ ObjFormatEmitFn emit;
+ ObjFormatReadFn read;
+ ObjFormatReadDsoFn read_dso;
+ ObjFormatLinkEmitFn link_emit;
+
+ /* Optional format-specific linker ingestion policy. */
+ int (*classify_obj_input)(Compiler*, ObjBuilder*, Sym* soname_out);
+ Sym (*archive_hint)(Compiler*, const char* archive_name);
+ ObjFormatArchiveAction (*archive_member)(Compiler*,
+ const ObjFormatArchiveMember*,
+ ObjBuilder** out);
+} ObjFormatImpl;
+
+const ObjFormatImpl* obj_format_lookup(ObjFmt fmt);
+const ObjFormatImpl* obj_format_lookup_bin(CfreeBinFmt fmt);
+int obj_format_dso_reader_for_bytes(const u8* data, size_t len,
+ CfreeBinFmt* bin_out,
+ ObjFormatDsoReader* out);
+
+#endif
diff --git a/src/obj/registry.c b/src/obj/registry.c
@@ -0,0 +1,143 @@
+#include "obj/format.h"
+
+#include <cfree/config.h>
+#include <string.h>
+
+#include "core/slice.h"
+#include "obj/obj.h"
+
+void link_emit_elf(LinkImage*, Writer*);
+void link_emit_macho(LinkImage*, Writer*);
+void link_emit_coff(LinkImage*, Writer*);
+
+#if CFREE_OBJ_COFF_ENABLED
+int coff_classify_obj_input(Compiler*, ObjBuilder*, Sym* soname_out);
+Sym coff_archive_hint(Compiler*, const char* archive_name);
+ObjFormatArchiveAction coff_archive_member(Compiler*,
+ const ObjFormatArchiveMember*,
+ ObjBuilder** out);
+#endif
+
+static const ObjFormatImpl obj_format_impl_wasm = {
+ .kind = CFREE_OBJ_WASM,
+ .bin_fmt = CFREE_BIN_WASM,
+ .name = "wasm",
+ .read_name = "read_wasm",
+ .read_dso_name = NULL,
+ .emit = emit_wasm,
+ .read = read_wasm,
+ .read_dso = NULL,
+ .link_emit = NULL,
+};
+
+#if CFREE_OBJ_ELF_ENABLED
+static const ObjFormatImpl obj_format_impl_elf = {
+ .kind = CFREE_OBJ_ELF,
+ .bin_fmt = CFREE_BIN_ELF,
+ .name = "elf",
+ .read_name = "read_elf",
+ .read_dso_name = "read_elf_dso",
+ .emit = emit_elf,
+ .read = read_elf,
+ .read_dso = read_elf_dso,
+ .link_emit = link_emit_elf,
+};
+#endif
+
+#if CFREE_OBJ_MACHO_ENABLED
+static const ObjFormatImpl obj_format_impl_macho = {
+ .kind = CFREE_OBJ_MACHO,
+ .bin_fmt = CFREE_BIN_MACHO,
+ .name = "macho",
+ .read_name = "read_macho",
+ .read_dso_name = "read_macho_dso",
+ .emit = emit_macho,
+ .read = read_macho,
+ .read_dso = read_macho_dso,
+ .link_emit = link_emit_macho,
+};
+#endif
+
+#if CFREE_OBJ_COFF_ENABLED
+static const ObjFormatImpl obj_format_impl_coff = {
+ .kind = CFREE_OBJ_COFF,
+ .bin_fmt = CFREE_BIN_COFF,
+ .name = "coff",
+ .read_name = "read_coff",
+ .read_dso_name = "read_coff_dso",
+ .emit = emit_coff,
+ .read = read_coff,
+ .read_dso = read_coff_dso,
+ .link_emit = link_emit_coff,
+ .classify_obj_input = coff_classify_obj_input,
+ .archive_hint = coff_archive_hint,
+ .archive_member = coff_archive_member,
+};
+#endif
+
+static const ObjFormatImpl* const obj_format_impls[] = {
+#if CFREE_OBJ_ELF_ENABLED
+ &obj_format_impl_elf,
+#endif
+#if CFREE_OBJ_COFF_ENABLED
+ &obj_format_impl_coff,
+#endif
+#if CFREE_OBJ_MACHO_ENABLED
+ &obj_format_impl_macho,
+#endif
+ &obj_format_impl_wasm,
+};
+
+const ObjFormatImpl* obj_format_lookup(ObjFmt fmt) {
+ u32 i;
+ for (i = 0; i < (u32)(sizeof obj_format_impls / sizeof obj_format_impls[0]);
+ ++i) {
+ if (obj_format_impls[i]->kind == fmt) return obj_format_impls[i];
+ }
+ return NULL;
+}
+
+const ObjFormatImpl* obj_format_lookup_bin(CfreeBinFmt fmt) {
+ switch (fmt) {
+ case CFREE_BIN_ELF:
+ return obj_format_lookup(CFREE_OBJ_ELF);
+ case CFREE_BIN_COFF:
+ return obj_format_lookup(CFREE_OBJ_COFF);
+ case CFREE_BIN_MACHO:
+ return obj_format_lookup(CFREE_OBJ_MACHO);
+ case CFREE_BIN_WASM:
+ return obj_format_lookup(CFREE_OBJ_WASM);
+ default:
+ return NULL;
+ }
+}
+
+int obj_format_dso_reader_for_bytes(const u8* data, size_t len,
+ CfreeBinFmt* bin_out,
+ ObjFormatDsoReader* out) {
+ const ObjFormatImpl* fmt;
+ CfreeBinFmt bin;
+ if (!out) return 0;
+ memset(out, 0, sizeof(*out));
+ if (bin_out) *bin_out = CFREE_BIN_UNKNOWN;
+ if (!data) return 0;
+
+#if CFREE_OBJ_MACHO_ENABLED
+ if (len >= 3 && data[0] == '-' && data[1] == '-' && data[2] == '-') {
+ out->format = &obj_format_impl_macho;
+ out->read = read_tbd;
+ out->name = "read_tbd";
+ return 1;
+ }
+#endif
+
+ bin = cfree_detect_fmt(data, len);
+ if (bin_out) *bin_out = bin;
+ fmt = (bin == CFREE_BIN_PE) ? obj_format_lookup(CFREE_OBJ_COFF)
+ : obj_format_lookup_bin(bin);
+ if (!fmt || !fmt->read_dso) return 0;
+ out->format = fmt;
+ out->read = fmt->read_dso;
+ out->name = fmt->read_dso_name;
+ return 1;
+}
diff --git a/test/coff/pe-import-mingw.c b/test/coff/pe-import-mingw.c
@@ -267,7 +267,7 @@ int main(void) {
/*group_id=*/0);
EXPECT(ar_id != LINK_INPUT_NONE,
"link_add_archive_bytes returned LINK_INPUT_NONE");
- link_set_entry(l, "mainCRTStartup");
+ link_set_entry(l, CFREE_SLICE_LIT("mainCRTStartup"));
link_set_pie(l, 1);
link_set_emit_static_exe(l, 1);
diff --git a/test/coff/pe-import-smoke.c b/test/coff/pe-import-smoke.c
@@ -258,7 +258,7 @@ int main(void) {
l, "ExitProcess.lib-member", shim, SHIM_TOTAL_LEN);
EXPECT(dso_id != LINK_INPUT_NONE,
"link_add_obj_bytes returned LINK_INPUT_NONE for short-import shim");
- link_set_entry(l, "mainCRTStartup");
+ link_set_entry(l, CFREE_SLICE_LIT("mainCRTStartup"));
link_set_pie(l, 1);
link_set_emit_static_exe(l, 1);
diff --git a/test/coff/pe-mixed-archive.c b/test/coff/pe-mixed-archive.c
@@ -282,7 +282,7 @@ int main(void) {
/*group_id=*/0);
EXPECT(ar_id != LINK_INPUT_NONE,
"link_add_archive_bytes returned LINK_INPUT_NONE");
- link_set_entry(l, "mainCRTStartup");
+ link_set_entry(l, CFREE_SLICE_LIT("mainCRTStartup"));
link_set_pie(l, 1);
link_set_emit_static_exe(l, 1);