commit e5603fea9e7ddce3fdff1427bbf18ddab1ea87d1
parent 9d905b3c414afa1ee34e1825d62000f9acfda660
Author: Ryan Sepassi <rsepassi@gmail.com>
Date: Fri, 5 Jun 2026 10:29:07 -0700
modularity wave 1: add pluggable-axis capability hooks (additive infra)
Adds the abstraction hooks that wave 2/3 consumers will route through, all additive
(0/NULL defaults preserve existing behavior; old fns kept as thin wrappers):
ObjFormat (src/obj): obj_format_tls_model {ELF_LE,MACHO_DESCRIPTOR,WINDOWS_TEB} (tls_via_descriptor
now wraps it), c_label_prefix, default_entry_name, carries_file_only_debug, builds_own_static_got,
supports_symbol_feature, weak_undef_pulls_archive_member, static_ifunc_via_rela_iplt,
boundary_sym_kind, synth_inputs hook; ObjElfArchOps reloc_name/float_abi_from_e_flags/tls_tp_bias;
obj_macho_seckind_for_secname + obj_macho_native_secname; obj/public name<->fmt.
ABI (src/abi): ABIFuncInfo.vararg_fp_via_int (set by aapcs64_windows).
ArchImpl (src/arch): resolve_float_abi hook + riscv body; KIT_CG_BACKEND_ICACHE_COHERENT (x64,wasm);
kit_arch_ptr_size.
Frontend (src/api,lang): KitFrontendVTable names/nnames + kit_language_for_name/kit_language_name;
KitFrontendCaps.cache_repl_toplevel_source (toy).
Internal obj helpers (c_mangle, default_entry, extern_via_got, tls bias) migrated to read the new
vtable fields. Build + targeted tests (cg-api,isa,link,elf,debug,dwarf,parse,smoke-x64,smoke-rv64) green.
Diffstat:
25 files changed, 1305 insertions(+), 30 deletions(-)
diff --git a/doc/plan/README.md b/doc/plan/README.md
@@ -11,6 +11,7 @@ shrinks to whatever remains open.
| [RELEASE.md](RELEASE.md) | Cross-cutting initial-release punchlist: release scope, deferred features, and per-subsystem completion/validation items. | — |
| [OPTIMIZER.md](OPTIMIZER.md) | Completing the O2 SSA mid-end, expanded inlining, -O0/-O1 performance work, machine register-constraint improvements. | [../OPT.md](../OPT.md) |
| [LINKER.md](LINKER.md) | Incremental linking: the file-based object-link redesign and remaining non-ELF format coverage. | [../LINK.md](../LINK.md) |
+| [RELOC.md](RELOC.md) | Genericizing the canonical-`RelocKind` half of the relocation layer: one per-arch `RelocDesc` table replacing the parallel width/GOT/name switches, the byte-patcher's encoders moved into the arch backends behind the single public entry, and the residual arch/OS identity gates removed. | [../OBJ.md](../OBJ.md), [../LINK.md](../LINK.md) |
| [JIT.md](JIT.md) | Function-level hot reload, Go-runtime-style codegen support, and remaining JIT host-portability work. | [../JIT.md](../JIT.md) |
| [DEBUG.md](DEBUG.md) | The Windows debugger host adapter, x64/rv64 displaced single-step, profiling, and DWARF gaps. | [../DBG.md](../DBG.md), [../DWARF.md](../DWARF.md) |
| [WASM.md](WASM.md) | Completing the Wasm object backend and remaining parser/validator coverage. | [../WASM.md](../WASM.md) |
diff --git a/doc/plan/RELOC.md b/doc/plan/RELOC.md
@@ -0,0 +1,339 @@
+# Relocation-layer genericization (planned work)
+
+## Status — 2026-06-05 — proposed; nothing built yet
+
+This roadmap makes the **canonical-`RelocKind` half** of the relocation subsystem
+as modular as the wire half already is. The goal is the project's standing
+contract (see [../INTERFACES.md](../INTERFACES.md)): code that depends on a
+pluggable item — here, the target **arch** — must never switch on its identity,
+and adding or changing an arch's relocations must touch exactly **one place**.
+Today the wire-translation half meets that bar; the canonical half does not.
+
+Design docs this work feeds back into once shipped:
+[../OBJ.md](../OBJ.md) ("Relocation model and the shared byte-patcher"),
+[../LINK.md](../LINK.md) (the reloc passes), [../INTERFACES.md](../INTERFACES.md)
+(the backend contract).
+
+## The thesis
+
+A relocation kind is a single logical entity with a handful of attributes — byte
+width, whether it is PC-relative, whether it loads a GOT slot, whether it is a
+TLS-GOT load, whether it is a branch needing a veneer, its display name, and how
+to patch its bytes. Today those attributes are **denormalized across five
+parallel `switch`/hook tables** that the compiler cannot keep in sync:
+
+| Attribute | Lives in | Form |
+|-----------|----------|------|
+| how to patch the bytes | `link_reloc_apply()` `src/obj/reloc_apply.c:83` | switch, 77 arms |
+| byte width | `reloc_width()` `src/link/link_reloc_layout.c:256` | switch |
+| uses GOT / is TLS-GOT | `reloc_uses_got()` / `reloc_is_tls_got()` `src/link/link_reloc_layout.c:392,380` | switch |
+| display name | `kit_obj_reloc_kind_name()` `src/api/object_file.c:358` | switch, keyed on (arch, fmt) |
+| branch / got-load / tlvp / direct-page | `LinkArchDesc.is_*` `src/link/link_arch.h:79-83` | per-arch hooks |
+
+Adding one relocation kind means editing up to four of these by hand, with no
+diagnostic if you miss one. Adding an **arch** means editing the three generic
+switches in `link_reloc_layout.c` and `api/object_file.c` even though a per-arch
+hook mechanism (`LinkArchDesc`) already exists right beside them. And because the
+canonical enum **arch-prefixes value-class kinds** that are byte-identical across
+arches (`R_X64_TPOFF64` vs `R_AARCH64_TPOFF64`), generic code is forced into a
+literal arch-identity switch to pick between them:
+
+```c
+/* src/link/link_reloc_layout.c:698 — the one arch-identity leak in the reloc layer */
+rrec.kind = (l->c->target.arch == KIT_ARCH_X86_64) ? R_X64_TPOFF64
+ : R_AARCH64_TPOFF64;
+```
+
+This plan normalizes the model: **one per-arch descriptor table** is the single
+source for every static attribute of a relocation kind, the byte-patcher's
+implementation moves to the arch backends that own the ISA knowledge (behind the
+same single public entry), and the two residual OS/format identity gates in the
+reloc path move to obj-layer predicates.
+
+## Baseline — already clean (context, not work)
+
+The **wire half is the model to imitate** and is out of scope except where noted:
+
+- **Per-(arch,format) wire translators.** `reloc_to`/`reloc_from` (and Mach-O's
+ `reloc_pcrel`/`reloc_length`) live in `src/obj/{elf,macho,coff}/reloc_<arch>.c`
+ and are reached only through the format sub-ops (`fmt->elf_arch(arch)->reloc_to`,
+ etc., `src/obj/format.h:36-67`). Adding a format, or an arch's wire encoding for
+ a format, is already a one-table change. These do **not** move.
+- **The single-entry byte-patcher boundary.** `link_reloc_apply(c, kind, P, S, A,
+ P)` is reused verbatim by the static linker, the JIT linker, the assembler, and
+ the emulator guest loader ([../OBJ.md](../OBJ.md): "one encoder, three loaders").
+ That **one-entry, one-encoder invariant is load-bearing** — it is why the three
+ loaders can never disagree on an encoding — and WS-D preserves it exactly: only
+ the *implementation behind* the entry is partitioned, never the entry itself.
+- **`LinkArchDesc` per-arch PLT/IPLT geometry + stub emitters**
+ (`src/link/link_arch.h`) is already the right shape and stays; WS-C/WS-D extend
+ it, they do not replace it.
+- **The canonical `RelocKind` enum** (`src/obj/obj.h:108`) as a concept — one
+ global enum, backends emit canonical kinds — is correct and stays. WS-A only
+ removes the *spurious arch-prefixed duplicates*, not the per-arch families that
+ are genuinely arch-specific (AArch64 ADRP/LDST immediates, RISC-V HI20/B-type
+ scatter, etc.).
+
+## The end state (ownership)
+
+```
+src/obj/reloc.c neutral core: RelocDesc rows + byte encoders for
+ the arch-independent data-word kinds (R_ABS*,
+ R_REL*, R_PC*, R_TPOFF*, R_GOT32, R_PLT32), and
+ the single public link_reloc_apply() dispatcher.
+src/arch/<arch>/reloc.c (NEW) that arch's RelocDesc rows (width + flags + name)
+ AND its instruction-immediate byte encoders,
+ registered on LinkArchDesc.
+src/obj/<fmt>/reloc_<arch>.c UNCHANGED — the per-(arch,fmt) wire translators.
+src/obj/coff/reloc.c COFF-specific kinds' RelocDesc rows (format, not arch).
+```
+
+After this, adding an arch's relocation is **one row** in that arch's `reloc.c`
+(plus its byte encoder and its wire translator, both already arch-local); adding
+an arch is one new `src/arch/<arch>/reloc.c`. No generic file in `src/link` or
+`src/api` enumerates relocation kinds any more.
+
+---
+
+## WS-A — Neutralize cross-arch duplicate value-class kinds (addresses **A**)
+
+**Problem.** `R_X64_TPOFF64` and `R_AARCH64_TPOFF64` are byte-identical in every
+table — same apply arm (`reloc_apply.c:97-107`, both `wr_u64_le(S+A)`), same
+width, same meaning ("64-bit TP-relative offset, written as a data word"). They
+are separate only because the names carry an arch prefix, and that split is what
+forces the arch-identity ternary at `link_reloc_layout.c:698`. `R_AARCH64_TPOFF64`
+is **link-internal only** (no wire mapping; it is minted solely by
+`link_emit_internal_tpoff64` to fill IE GOT slots), and `R_X64_TPOFF64` is the
+x86-64 wire kind that also doubles as that internal fill — so a single neutral
+kind serves both.
+
+**Change.** Introduce a neutral `R_TPOFF64` and delete both arch-prefixed forms.
+
+1. `src/obj/obj.h:108` — add `R_TPOFF64` to the neutral block (near `R_ABS64`);
+ remove `R_X64_TPOFF64` and `R_AARCH64_TPOFF64`.
+2. `src/link/link_reloc_layout.c:698-699` — collapse the ternary to
+ `rrec.kind = R_TPOFF64;`. **The arch-identity switch is gone.** Update the
+ `link_emit_internal_tpoff64` banner comment (`:680-685`) to drop the per-arch
+ spelling.
+3. `src/obj/reloc_apply.c:97-99` and `reloc_width()` `:272-279` — fold both old
+ cases into the `R_TPOFF64` arm (unchanged body).
+4. `src/obj/elf/reloc_x86_64.c:50-51,109-110` — map `R_TPOFF64 ↔
+ ELF_R_X86_64_TPOFF64` (the x64 table is the only one that serializes it; the
+ aa64 table needs no entry, matching today).
+5. `src/obj/elf/link.c:352` and `src/obj/obj.c` (`_CASE(R_AARCH64_TPOFF64)` in
+ `obj_reloc_kind_name`) — rename to `R_TPOFF64`.
+
+**Also audit** the rest of the enum for the same smell — arch-prefixed kinds whose
+apply/width/class are identical to a neutral kind — and neutralize any found. The
+known-good neutral set already present is `R_ABS32/64`, `R_REL32/64`, `R_PC32/64`,
+`R_GOT32`, `R_PLT32`; TPOFF64 is the one clear remaining duplicate. Kinds that are
+genuinely arch-specific (instruction-embedded immediates) **must not** be touched.
+
+**Oracle.** `make test-link test-elf test-smoke-x64 test-smoke-rv64
+test-aa64-inline`, then a TLS-exercising `test-toy` slice, then `make bootstrap`
+(IE-model TLS in the compiler's own source patches through `R_TPOFF64`).
+This WS removes confirmed audit finding **#25** and is self-contained — ship it
+first.
+
+---
+
+## WS-B — A single per-arch `RelocDesc` table (addresses **C**, foundation for **B**)
+
+**Problem.** `reloc_width()`, the GOT classifiers, and `kit_obj_reloc_kind_name()`
+are three generic switches each re-enumerating every arch's kinds. `#24` (the
+reloc-name table gated on `fmt == ELF && arch == X86_64` at `object_file.c:358`)
+is the worst of them — an identity switch on *two* axes in format-neutral API code.
+
+**Change.** Introduce one descriptor, owned per-arch, as the single source of a
+kind's static facts.
+
+```c
+/* src/obj/reloc.h (new) */
+typedef enum RelocDescFlag {
+ RELOC_PCREL = 1u << 0,
+ RELOC_USES_GOT = 1u << 1,
+ RELOC_IS_TLS_GOT = 1u << 2,
+ RELOC_IS_BRANCH = 1u << 3, /* needs a JIT/range veneer */
+ RELOC_IS_TLVP = 1u << 4, /* Mach-O TLV page/pageoff */
+ RELOC_DIRECT_PAGE = 1u << 5, /* Mach-O ADRP-direct */
+ RELOC_MARKER = 1u << 6, /* RELAX/ALIGN/TPREL_ADD — no bytes */
+ RELOC_WIDTH_DYN = 1u << 7, /* ULEB128 — width read from bytes at apply */
+} RelocDescFlag;
+
+typedef struct RelocDesc {
+ u8 width; /* 0 only for R_NONE; markers/dyn use a sentinel */
+ u8 flags; /* RelocDescFlag bitmask */
+ const char* name; /* canonical spelling, e.g. "AARCH64_CALL26" */
+} RelocDesc;
+
+/* Single lookup; caller always holds the target arch via c. */
+const RelocDesc* reloc_desc(const Compiler* c, RelocKind k);
+```
+
+**Ownership / assembly.** `reloc_desc()` resolves the neutral core kinds from a
+table in `src/obj/reloc.c`; for arch-family kinds it dispatches to
+`link_arch_desc_for(c)->reloc_desc(k)` (a new `LinkArchDesc` hook returning that
+arch's slice); COFF-family kinds resolve from a COFF-format slice. To add an arch
+you add its slice in `src/arch/<arch>/reloc.c` — no generic edit.
+
+**Migrate consumers to the descriptor:**
+- `reloc_width()` (`link_reloc_layout.c:256`) → delete; callers read
+ `reloc_desc(c, k)->width`. Keep the `RELOC_WIDTH_DYN` sentinel + the existing
+ ULEB128 offset-bounds guard (`link_reloc_layout.c:1118-1126`).
+- `kit_obj_reloc_kind_name()` (`object_file.c:358`) → delete the (arch,fmt) switch;
+ return `reloc_desc(file's c, k)->name`. Folds **#24**. The pre-existing neutral
+ `obj_reloc_kind_name` (`obj.h:559`) becomes the fallback for kinds with no
+ per-arch spelling.
+
+**Exhaustiveness test (the red-green anchor).** Add `test/obj/reloc_desc` that
+iterates **every** `RelocKind` value for each enabled arch and asserts
+`reloc_desc()` returns a row (non-NULL, `width != 0` unless `MARKER`/`WIDTH_DYN`).
+This converts "forgot a row when adding a kind" from a silent runtime default into
+a failing test — the durable guard that keeps the table honest. Seed it red by
+writing the test before the table is complete.
+
+**Oracle.** The new exhaustiveness test, then `make test-link test-elf test-macho
+test-ar`, then `make bootstrap` (byte-identity catches any width/name drift).
+
+---
+
+## WS-C — Route classification through the descriptor (addresses **B**)
+
+**Problem.** The generic GOT-layout pass hand-maintains `reloc_uses_got()` /
+`reloc_is_tls_got()` (`link_reloc_layout.c:380-404`) enumerating every arch's GOT
+relocs, while the Mach-O linker asks the per-arch `LinkArchDesc.is_got_load_reloc`
+/ `is_branch_reloc` / `is_tlvp_reloc` / `is_direct_page_reloc` hooks
+(`src/obj/macho/link.c`) for the *same* classification. Two mechanisms, one
+question, and the generic one leaks the arch enumeration into shared link code.
+
+**Change.** With WS-B's descriptor in place, classification is just a flag read:
+- `reloc_uses_got(k)` → `reloc_desc(c,k)->flags & RELOC_USES_GOT`.
+- `reloc_is_tls_got(k)` → `... & RELOC_IS_TLS_GOT`.
+- Delete the four `LinkArchDesc.is_*` hooks (`link_arch.h:79-82`) and their
+ per-arch impls in `src/arch/{aa64,x64,riscv}/link.c`; the Mach-O linker callers
+ (`macho/link.c:420,492,566,1483,1496,1505,1514,1563`) read the descriptor flags
+ instead. `needs_jit_call_stub` (still used at `link_reloc_layout.c:594,1095`)
+ becomes `RELOC_IS_BRANCH` (today it aliases `is_branch_reloc` on every arch).
+
+End state: **no generic file classifies relocations by enumerating arch kinds.**
+The per-arch knowledge that was split between the `is_*` hooks and the generic
+switches now lives once, as flags, in each arch's descriptor slice.
+
+**Oracle.** `make test-link test-macho` (Mach-O exercises every `is_*` path),
+`test-smoke-x64 test-smoke-rv64`, `make bootstrap`. macOS/aa64 bootstrap is the
+strongest check here since it drives the Mach-O GOT/TLVP/branch classifiers.
+
+---
+
+## WS-D — Partition the byte-patcher per-arch behind the single entry (addresses **D**)
+
+**Problem.** `src/obj/reloc_apply.c` lives in the format-neutral obj layer but
+encodes pure ISA knowledge — AArch64 imm19/imm26/ADRP page math, RISC-V U/I/S/B/J
+immediate scatter and the 0x800 HI20 bias, x64 field writes. Adding an arch means
+editing this shared file; the encoders belong in the backends, alongside that
+arch's MC emitter and wire translator (consistent with `link_arch.h`: "each
+backend's descriptor lives under `src/arch/<arch>/`").
+
+**Constraint (must not break).** `link_reloc_apply(c, kind, ...)` stays the **one
+public entry**, called unchanged by all four loaders (`asm.c:1296`, `emu/dl.c:15`,
+`link_jit.c`, `elf/macho/coff/link.c`). The "one encoder, three loaders" invariant
+in [../OBJ.md](../OBJ.md) is preserved — there is still exactly one encoder per
+kind; it just moves to the owning backend.
+
+**Change.**
+1. Keep `link_reloc_apply` in `src/obj/reloc.c` as the dispatcher. It handles the
+ **arch-neutral data-word arms inline** (`R_ABS32/64`, `R_REL*/PC*`, `R_TPOFF*`,
+ `R_GOT32`, `R_PLT32` data writes, the ULEB128 codec) — these are plain
+ `wr_uN_le` with no ISA knowledge and have no reason to live per-arch.
+2. For instruction-embedded kinds, dispatch to a new
+ `LinkArchDesc.reloc_apply_insn(c, k, P, S, A, P)` hook. Move the AArch64 arms
+ to `src/arch/aa64/reloc.c`, the RISC-V arms to `src/arch/riscv/reloc.c`, and
+ the x64 instruction arms (e.g. `R_X64_PC8`) to `src/arch/x64/reloc.c`. The
+ dispatcher selects the hook via `link_arch_desc_for(c)` — `c` (hence
+ `target.arch`) is available at every call site, verified across all callers.
+3. COFF-specific kinds (`R_COFF_*`) route to a COFF encoder slice.
+
+The arch `reloc.c` files created in WS-B (descriptor slices) become the natural
+home for these encoders too — each backend's `reloc.c` owns {desc rows, classifier
+flags, byte encoders} for its kinds, one file per arch.
+
+**Oracle.** This is the highest-blast-radius WS; lean on the exhaustiveness test +
+full matrix: `make test-link test-elf test-macho test-isa test-asm test-smoke-x64
+test-smoke-rv64 test-aa64-inline`, the JIT/emu reloc paths (`test-cg-api`, any
+`run`/`emu` smoke), then **both** bootstrap chains (`make bootstrap-debug
+bootstrap-release`) — byte-identity over the compiler's own object output is the
+definitive proof no encoding shifted. Do WS-D last and in one arch at a time
+(neutral-core extraction first, then aa64, then x64, then rv), keeping the old
+switch arms live until each arch's hook is proven, so every step is bisectable.
+
+---
+
+## WS-E — Remove the residual OS/format identity gates in the reloc path (addresses **E**)
+
+Two non-arch identity checks remain in `link_reloc_layout.c`:
+
+1. **FreeBSD static-IFUNC mechanism** (`:833-834`, audit finding **#18**):
+ ```c
+ int use_rela_iplt = l->emit_static_exe && l->c->target.os == KIT_OS_FREEBSD &&
+ l->c->target.obj == KIT_OBJ_ELF;
+ ```
+ "Does this OS's crt walk `[__rela_iplt_start, __rela_iplt_end)` before ctors"
+ is an OS/crt-personality property (the in-code comment notes glibc shares it).
+ Replace with an obj-layer predicate `obj_format_static_ifunc_via_rela_iplt(c)`
+ living beside the existing `obj_format_*` policy family (`src/obj/obj.h:686`),
+ where the `(FREEBSD, ELF)` knowledge legitimately resides. A future libc sharing
+ the mechanism is then a one-line table change.
+
+2. **IRELATIVE wire type via hardcoded `KIT_OBJ_ELF`** (`link_elf_irelative_type`,
+ `:808-813`): `obj_format_lookup(KIT_OBJ_ELF)->elf_arch(arch)->r_irelative`. Low
+ priority — it is already `use_rela_iplt`-gated and `.rela.plt` is intrinsically
+ ELF — but fold it under the WS-E predicate so the generic pass names no format
+ constant directly: have the predicate return both the boolean and the resolver
+ reloc, or query the resolved format rather than the literal `KIT_OBJ_ELF`.
+
+**Oracle.** `make test-link`, the FreeBSD IFUNC path via the FreeBSD VM lane
+(`scripts/freebsd_vm.sh` / `test-toy-freebsd-vm`; see
+[FREEBSD.md](FREEBSD.md)), and a musl/freestanding static-IFUNC case to confirm
+the ctor path is unchanged.
+
+---
+
+## Sequencing & risk
+
+Execution order is dependency-sound and each step is independently shippable:
+
+1. **WS-A** — small, self-contained, removes finding #25. No dependency. Ship first.
+2. **WS-B** — builds the descriptor + the exhaustiveness test (the safety net the
+ rest leans on). Folds #24.
+3. **WS-C** — consumes WS-B's flags; deletes the generic classifiers and the
+ `LinkArchDesc.is_*` hooks. Folds #18's sibling smell.
+4. **WS-D** — the deep refactor; gated behind WS-B's test, done one arch at a time.
+5. **WS-E** — independent of A–D; can land any time, grouped here for topicality.
+
+**Risk controls.** Every WS is red-green: WS-B's exhaustiveness test is written
+first and fails until each arch's table is complete. The **bootstrap** is the
+load-bearing oracle throughout — it patches every relocation kind the compiler
+emits for its own source, so a byte-identical stage2/stage3 is proof the encoding
+path is unchanged. Per CLAUDE.md, prefer targeted runs (specific arch/format
+suites) during iteration and redirect output to a file; reserve full `make
+bootstrap` for end-of-WS gates. Keep old code paths live beside new ones within a
+WS (especially WS-D, per-arch) so any regression bisects to a single arch's hook.
+
+## Done criteria
+
+- No file under `src/link/` or `src/api/` enumerates `RelocKind` arms or switches
+ on `target.arch` / `target.obj` in the relocation path. (`rg "case R_(AARCH64|X64|RV)_"
+ src/link src/api` returns nothing; the `link_reloc_layout.c:698` ternary is gone.)
+- Every relocation static attribute (width, name, GOT/TLS/branch/tlvp class) has
+ exactly one source: the per-arch `RelocDesc` slice. The `reloc_width`,
+ `reloc_uses_got`, `reloc_is_tls_got`, and `LinkArchDesc.is_*` enumerations are
+ deleted; `kit_obj_reloc_kind_name`'s (arch,fmt) switch is deleted.
+- `link_reloc_apply` remains the single public byte-patcher entry; its
+ instruction-encoding arms live in `src/arch/<arch>/reloc.c`, the obj layer keeps
+ only the arch-neutral data-word arms.
+- Adding a hypothetical new arch's relocation touches only that arch's
+ `src/arch/<arch>/reloc.c` and its `src/obj/<fmt>/reloc_<arch>.c` wire table —
+ verified by the `test/obj/reloc_desc` exhaustiveness test failing until the new
+ rows exist, and by no generic file needing edits.
+- `make bootstrap` (debug + release) reaches the byte-identical fixed point; the
+ full link/elf/macho/coff/isa/asm/smoke matrix passes.
diff --git a/include/kit/cg.h b/include/kit/cg.h
@@ -221,6 +221,13 @@ typedef enum KitCgBackendFeatureFlag {
KIT_CG_BACKEND_SIMD = 1ull << 3,
KIT_CG_BACKEND_POINTER_AUTH = 1ull << 4,
KIT_CG_BACKEND_BRANCH_PROTECTION = 1ull << 5,
+ /* Instruction and data caches are coherent: freshly written code is
+ * executable without an explicit cache-flush / instruction-sync sequence.
+ * Set for x86 (snooping I-cache) and wasm (no hardware cache model). Not set
+ * for aarch64 / RISC-V, where JITs and self-modifying code must issue an
+ * explicit __clear_cache (IC/DC maintenance + ISB; fence.i) before running
+ * newly emitted instructions. */
+ KIT_CG_BACKEND_ICACHE_COHERENT = 1ull << 6,
} KitCgBackendFeatureFlag;
/* Capability queries answer whether the selected target/API can lower the
@@ -233,6 +240,29 @@ KIT_API int kit_cg_target_supports_symbol_feature(KitCompiler*,
/* kit_cg_target_supports_intrinsic is declared after KitCgIntrinsic. */
KIT_API uint64_t kit_cg_target_backend_features(KitCompiler*);
+/* Pointer width in bytes for an architecture, as a target fact independent of
+ * any constructed KitTarget. Single source of truth for the byte-aligned
+ * pointer size, so the object-format detector, the driver triple parser, and
+ * any internal CG/link code agree on one mapping. Header-only (no KIT_API TU)
+ * so it links anywhere the public headers reach — including arches that have no
+ * codegen backend (x86_32 / arm32 / arm64 are still ABI-classifiable here).
+ * wasm reports 4 (wasm32; wasm64's 8-byte width is carried on the target spec,
+ * not the arch kind). Returns 0 for an unknown KitArchKind. */
+static inline uint8_t kit_arch_ptr_size(KitArchKind arch) {
+ switch (arch) {
+ case KIT_ARCH_X86_32:
+ case KIT_ARCH_ARM_32:
+ case KIT_ARCH_RV32:
+ case KIT_ARCH_WASM:
+ return 4u;
+ case KIT_ARCH_X86_64:
+ case KIT_ARCH_ARM_64:
+ case KIT_ARCH_RV64:
+ return 8u;
+ }
+ return 0u;
+}
+
/* ============================================================
* Memory Access
* ============================================================ */
diff --git a/include/kit/compile.h b/include/kit/compile.h
@@ -125,6 +125,11 @@ typedef void (*KitFrontendAbortFn)(KitFrontendState*);
typedef struct KitFrontendCaps {
bool preprocessor; /* honors KitFrontendCompileOptions.preprocess */
uint8_t lto_mode; /* KitFrontendLtoMode */
+ /* When true, a REPL driver should retain (cache) the verbatim toplevel
+ * source text it feeds to KIT_FRONTEND_INPUT_REPL_TOPLEVEL compiles, because
+ * the frontend re-reads earlier toplevel text on later compiles (toy today).
+ * Frontends that fully absorb each toplevel input leave this false. */
+ bool cache_repl_toplevel_source;
} KitFrontendCaps;
/* Parse the frontend-specific command-line flags a generic driver did not
@@ -156,6 +161,15 @@ typedef struct KitFrontendVTable {
const KitSlice* extensions;
uint32_t nextensions;
+ /* Counted list of canonical names and aliases (e.g. the `-x` spellings a
+ * driver accepts) that identify this frontend's language. kit_language_for_name
+ * walks every registered frontend's list (case-sensitively, matching the
+ * driver's exact `-x` spellings) to map a name back to a KitLanguage, and
+ * kit_language_name returns the first (canonical) entry for a language. May be
+ * NULL/0 for frontends with no name; such a language is then unnamed. */
+ const KitSlice* names;
+ uint32_t nnames;
+
/* Optional transaction hooks for incremental/REPL frontends. A compile
* stages new durable declarations; `commit` makes the most recent
* successful compile's declarations permanent, `abort` discards them and
@@ -185,6 +199,19 @@ typedef struct KitFrontendVTable {
* is unclaimed (or that has no extension) returns KIT_LANG_UNKNOWN rather
* than defaulting to any particular language. */
KIT_API KitLanguage kit_language_for_path(KitCompiler*, const char* path);
+
+/* Map a canonical name or alias (a `-x` spelling such as "c", "asm", "wat") to
+ * a language by walking every registered frontend's `names` list. Matching is
+ * case-sensitive, mirroring the driver's exact `-x` spellings. An unclaimed
+ * name (or NULL) returns KIT_LANG_UNKNOWN; no frontend is privileged. */
+KIT_API KitLanguage kit_language_for_name(KitCompiler*, const char* name);
+
+/* Return a language's canonical name: the first entry of the registered
+ * frontend's `names` list. Returns NULL when no frontend is registered for
+ * `lang` or it has no name. The returned pointer is owned by the frontend's
+ * static name table and is valid for the compiler's lifetime. */
+KIT_API const char* kit_language_name(KitCompiler*, KitLanguage);
+
KIT_API KitStatus kit_register_frontend(KitCompiler*, KitLanguage,
const KitFrontendVTable*);
diff --git a/include/kit/object.h b/include/kit/object.h
@@ -303,6 +303,17 @@ KIT_API KitBinFmt kit_detect_fmt(const uint8_t* data, size_t len);
KIT_API KitStatus kit_detect_target(const uint8_t* data, size_t len,
KitTargetSpec* out);
+/* Object-format name <-> KitObjFmt mapping (objcopy/objdump bfdname
+ * spellings). kit_obj_fmt_from_name returns KIT_OK and writes *out on a
+ * known name ("elf", "coff", "pe", "macho", "wasm"), KIT_NOT_FOUND
+ * otherwise. kit_obj_fmt_name returns the canonical NUL-terminated name
+ * for a KitObjFmt, or NULL for an out-of-range value.
+ *
+ * (Thin wrappers over the internal obj_format_fmt_* helpers; the
+ * implementation is added in src/api.) */
+KIT_API KitStatus kit_obj_fmt_from_name(const char* name, KitObjFmt* out);
+KIT_API const char* kit_obj_fmt_name(KitObjFmt fmt);
+
KIT_API KitStatus kit_obj_open(const KitContext*, KitSlice name,
const KitSlice*, KitObjFile** out);
KIT_API void kit_obj_free(KitObjFile*);
diff --git a/lang/c/c.c b/lang/c/c.c
@@ -128,6 +128,8 @@ static void c_frontend_free(KitFrontendState* frontend) {
* language-for-path lookup resolves them by the same registry walk as every
* other language, with no special fallback. */
static const KitSlice c_extensions[] = {KIT_SLICE_LIT("c"), KIT_SLICE_LIT("h")};
+/* Canonical `-x` name; mirrors the driver's "c" spelling. */
+static const KitSlice c_names[] = {KIT_SLICE_LIT("c")};
const KitFrontendVTable kit_c_frontend_vtable = {
c_frontend_new,
@@ -136,10 +138,12 @@ const KitFrontendVTable kit_c_frontend_vtable = {
c_frontend_free,
c_extensions,
(uint32_t)(sizeof c_extensions / sizeof c_extensions[0]),
+ c_names,
+ (uint32_t)(sizeof c_names / sizeof c_names[0]),
/* commit/abort: C has no durable cross-compile state yet */
NULL,
NULL,
- {true, KIT_FRONTEND_LTO_CG},
+ {true, KIT_FRONTEND_LTO_CG, false},
NULL, /* parse_options: C has no frontend-specific flags */
NULL, /* free_options */
};
diff --git a/lang/toy/compile.c b/lang/toy/compile.c
@@ -226,6 +226,8 @@ static void toy_frontend_free(KitFrontendState* frontend) {
}
static const KitSlice toy_extensions[] = {KIT_SLICE_LIT("toy")};
+/* Canonical `-x` name; mirrors the driver's "toy" spelling. */
+static const KitSlice toy_names[] = {KIT_SLICE_LIT("toy")};
const KitFrontendVTable kit_toy_frontend_vtable = {
toy_frontend_new,
@@ -234,9 +236,12 @@ const KitFrontendVTable kit_toy_frontend_vtable = {
toy_frontend_free,
toy_extensions,
(uint32_t)(sizeof toy_extensions / sizeof toy_extensions[0]),
+ toy_names,
+ (uint32_t)(sizeof toy_names / sizeof toy_names[0]),
toy_frontend_commit,
toy_frontend_abort,
- {false, KIT_FRONTEND_LTO_CG},
+ /* cache_repl_toplevel_source: the toy REPL re-reads earlier toplevel text. */
+ {false, KIT_FRONTEND_LTO_CG, true},
NULL, /* parse_options: no toy-specific flags yet */
NULL, /* free_options */
};
diff --git a/lang/wasm/wasm.c b/lang/wasm/wasm.c
@@ -151,6 +151,9 @@ static void wasm_frontend_free(KitFrontendState* frontend) {
static const KitSlice wasm_extensions[] = {KIT_SLICE_LIT("wat"),
KIT_SLICE_LIT("wasm")};
+/* Canonical `-x` name plus alias; mirrors the driver's "wasm"/"wat" spellings. */
+static const KitSlice wasm_names[] = {KIT_SLICE_LIT("wasm"),
+ KIT_SLICE_LIT("wat")};
const KitFrontendVTable kit_wasm_frontend_vtable = {
wasm_frontend_new,
@@ -159,9 +162,11 @@ const KitFrontendVTable kit_wasm_frontend_vtable = {
wasm_frontend_free,
wasm_extensions,
(uint32_t)(sizeof wasm_extensions / sizeof wasm_extensions[0]),
+ wasm_names,
+ (uint32_t)(sizeof wasm_names / sizeof wasm_names[0]),
NULL, /* commit: wasm has no durable cross-compile state */
NULL, /* abort */
- {false, KIT_FRONTEND_LTO_CG},
+ {false, KIT_FRONTEND_LTO_CG, false},
wasm_parse_options,
wasm_free_options,
};
diff --git a/src/abi/abi.h b/src/abi/abi.h
@@ -130,6 +130,12 @@ typedef struct ABIFuncInfo {
* Apple ARM64 sets this; AAPCS64 / SysV-x64 leave it 0 (variadics
* use the same register routing as fixed args). */
u8 vararg_on_stack;
+ /* True when floating-point arguments to a variadic function are passed
+ * through the integer argument registers/slots rather than the FP pool.
+ * Windows-ARM64 sets this so its plain-pointer `va_list` can walk a single
+ * register save area; AAPCS64 / SysV-x64 / Apple ARM64 leave it 0. Lets the
+ * aarch64 backend stop inspecting target.os==WINDOWS for varargs routing. */
+ u8 vararg_fp_via_int;
/* Minimum stack slot size/alignment for scalar stack-passed arguments.
* Zero means the backend default. Apple ARM64 uses 4-byte compact slots for
* stack arguments such as int32; AAPCS64 uses 8-byte slots. */
diff --git a/src/abi/abi_aapcs64_windows.c b/src/abi/abi_aapcs64_windows.c
@@ -53,6 +53,10 @@ static ABIFuncInfo* aapcs64_windows_compute_func_info(TargetABI* a,
* through the integer argument slots. That applies to named FP parameters
* too; trailing `...` arguments are handled by the call lowering path. */
if (info && info->variadic) {
+ /* Trait the aarch64 backend reads instead of checking target.os; the
+ * named FP parameters are remapped here, trailing `...` FP args are
+ * handled by the call lowering path off the same flag. */
+ info->vararg_fp_via_int = 1;
for (u16 i = 0; i < info->nparams; ++i) {
remap_fp_parts_to_int(a, (ABIArgInfo*)&info->params[i]);
}
diff --git a/src/api/compile.c b/src/api/compile.c
@@ -41,6 +41,8 @@ static KitStatus asm_frontend_compile(KitFrontendState* fe,
static void asm_frontend_free(KitFrontendState* fe);
static const KitSlice asm_extensions[] = {KIT_SLICE_LIT("s")};
+/* Canonical `-x` name plus alias; mirrors the driver's "asm"/"s" spellings. */
+static const KitSlice asm_names[] = {KIT_SLICE_LIT("asm"), KIT_SLICE_LIT("s")};
const KitFrontendVTable kit_asm_frontend_vtable = {
asm_frontend_new,
@@ -49,9 +51,11 @@ const KitFrontendVTable kit_asm_frontend_vtable = {
asm_frontend_free,
asm_extensions,
(uint32_t)(sizeof asm_extensions / sizeof asm_extensions[0]),
+ asm_names,
+ (uint32_t)(sizeof asm_names / sizeof asm_names[0]),
NULL, /* commit: asm has no durable cross-compile state */
NULL, /* abort */
- {false, KIT_FRONTEND_LTO_OPAQUE},
+ {false, KIT_FRONTEND_LTO_OPAQUE, false},
NULL, /* parse_options: no asm-specific flags */
NULL, /* free_options */
};
@@ -115,6 +119,41 @@ KitLanguage kit_language_for_path(KitCompiler* c, const char* path) {
return KIT_LANG_UNKNOWN;
}
+/* Compare a NUL-terminated name to a frontend's name slice, byte-for-byte
+ * (case-sensitive, mirroring the driver's exact `-x` spellings). Returns
+ * nonzero on a full match. */
+static int name_eq(const char* name, KitSlice pat) {
+ size_t i;
+ for (i = 0; i < pat.len; ++i) {
+ if (name[i] == '\0' || name[i] != pat.s[i]) return 0;
+ }
+ return name[pat.len] == '\0';
+}
+
+KitLanguage kit_language_for_name(KitCompiler* c, const char* name) {
+ unsigned lang;
+ if (!c || !name) return KIT_LANG_UNKNOWN;
+ for (lang = 0; lang < KIT_LANG_COUNT; ++lang) {
+ const KitFrontendVTable* v = c->frontends[lang];
+ uint32_t n;
+ if (!v || !v->names) continue;
+ for (n = 0; n < v->nnames; ++n) {
+ if (name_eq(name, v->names[n])) return (KitLanguage)lang;
+ }
+ }
+ return KIT_LANG_UNKNOWN;
+}
+
+const char* kit_language_name(KitCompiler* c, KitLanguage lang) {
+ const KitFrontendVTable* v;
+ if (!c || (unsigned)lang >= KIT_LANG_COUNT) return NULL;
+ v = c->frontends[lang];
+ if (!v || !v->names || v->nnames == 0) return NULL;
+ /* The first name is the canonical one. Name tables are KIT_SLICE_LIT over
+ * string literals, so the slice's bytes are NUL-terminated just past .len. */
+ return v->names[0].s;
+}
+
KitStatus kit_register_frontend(KitCompiler* c, KitLanguage lang,
const KitFrontendVTable* vtable) {
if (!c) return KIT_INVALID;
diff --git a/src/arch/arch.h b/src/arch/arch.h
@@ -320,6 +320,20 @@ typedef struct ArchImpl {
* rv_intrinsic / wasm_intrinsic). Read via kit_cg_target_supports_intrinsic.
*/
int (*supports_intrinsic)(const Compiler* c, KitCgIntrinsic intrin);
+
+ /* Resolve & validate the float ABI for the target being constructed, given
+ * the explicit -mabi string (`abi`, empty for none) and the already-resolved
+ * -march feature bits (`feature_words`/`nfeature_words`). On success returns
+ * KIT_OK with `spec->float_abi` set to the chosen KitFloatAbi. On a bad/
+ * mismatched ABI returns KIT_INVALID and writes a NUL-terminated message into
+ * `err` (capacity `errcap`) for the caller to surface as a diagnostic. NULL
+ * hook means the arch has no float-ABI axis: leave spec->float_abi at
+ * KIT_FLOAT_ABI_DEFAULT (the arch_resolve_float_abi wrapper no-ops). Set for
+ * RISC-V (handles rv32 + rv64); read via arch_resolve_float_abi. */
+ KitStatus (*resolve_float_abi)(const struct ArchImpl* impl,
+ KitTargetSpec* spec, const u64* feature_words,
+ u32 nfeature_words, KitSlice abi, char* err,
+ size_t errcap);
} ArchImpl;
const ArchImpl* arch_lookup(KitArchKind);
@@ -330,6 +344,26 @@ void arch_target_feature_defaults(const ArchImpl*, const Target*, u64* words,
KitStatus arch_target_feature_apply_isa(const ArchImpl*, const Target*,
KitSlice isa, u64* words, u32 nwords);
+/* Resolve & validate `spec->float_abi` from the explicit -mabi string and the
+ * resolved -march feature bits, dispatching to `impl->resolve_float_abi`. When
+ * `impl` is NULL or the arch sets no hook this is a no-op returning KIT_OK and
+ * leaving spec->float_abi untouched (KIT_FLOAT_ABI_DEFAULT) — exactly the old
+ * "non-RISC-V arches leave the float ABI at default" behavior. On a bad ABI the
+ * hook returns KIT_INVALID and fills `err` (NUL-terminated, capacity `errcap`).
+ * Header-only thin dispatch: the matching `arch_reloc_*` wrappers live in
+ * src/arch/registry.c, but float-ABI resolution runs during target
+ * construction (no Compiler yet), so this wrapper takes the ArchImpl directly.
+ */
+static inline KitStatus arch_resolve_float_abi(const ArchImpl* impl,
+ KitTargetSpec* spec,
+ const u64* feature_words,
+ u32 nfeature_words, KitSlice abi,
+ char* err, size_t errcap) {
+ if (!impl || !impl->resolve_float_abi) return KIT_OK;
+ return impl->resolve_float_abi(impl, spec, feature_words, nfeature_words, abi,
+ err, errcap);
+}
+
/* Spelling for a relocated operand in `cc -S` text, for the compiler's target
* arch+format. Returns 1 and fills *out when symbolizable, 0 to keep numeric
* (also when the arch provides no asm_ops). Thin dispatch over ArchAsmOps. */
diff --git a/src/arch/riscv/arch.c b/src/arch/riscv/arch.c
@@ -9,6 +9,7 @@
#include "arch/riscv/variant.h"
#include "cg/native_direct_target.h"
#include "core/bytes.h"
+#include "core/strbuf.h"
#include "link/link_arch.h"
#include "obj/obj.h"
@@ -413,6 +414,95 @@ static int rv64_supports_intrinsic(const Compiler* c, KitCgIntrinsic intrin) {
return 0;
}
+static int rv64_feature_get(const u64* words, u32 nwords, u32 idx) {
+ if (!words || idx / 64u >= nwords) return 0;
+ return (words[idx / 64u] & (1ull << (idx % 64u))) != 0;
+}
+
+/* RISC-V float-ABI resolution + validation, factored out of src/api/core.c so
+ * non-arch code resolves the ABI by capability (arch_resolve_float_abi) instead
+ * of an `arch == RV32 || RV64` branch. Shared by rv32 and rv64 (one backend).
+ * Mirrors the historical core.c logic exactly: an explicit -mabi (`abi`) picks
+ * the ABI and must match the pointer width; otherwise the ABI is derived from
+ * the resolved -march F/D bits. A hard single/double ABI requires the matching
+ * extension. Writes spec->float_abi on success; on error returns KIT_INVALID
+ * and fills `err`. */
+static KitStatus rv64_resolve_float_abi(const ArchImpl* impl,
+ KitTargetSpec* spec,
+ const u64* feature_words,
+ u32 nfeature_words, KitSlice abi,
+ char* err, size_t errcap) {
+ u32 fidx, didx;
+ int has_f;
+ int has_d;
+ KitFloatAbi fa;
+ StrBuf sb;
+ strbuf_init(&sb, err, errcap);
+ has_f = arch_target_feature_index(impl, kit_slice_cstr("f"), &fidx) &&
+ rv64_feature_get(feature_words, nfeature_words, fidx);
+ has_d = arch_target_feature_index(impl, kit_slice_cstr("d"), &didx) &&
+ rv64_feature_get(feature_words, nfeature_words, didx);
+
+ if (abi.s && abi.len) {
+ int is_ilp32 = 0;
+ int is_lp64 = 0;
+ if (kit_slice_eq_cstr(abi, "ilp32") || kit_slice_eq_cstr(abi, "ilp32f") ||
+ kit_slice_eq_cstr(abi, "ilp32d")) {
+ is_ilp32 = 1;
+ } else if (kit_slice_eq_cstr(abi, "lp64") ||
+ kit_slice_eq_cstr(abi, "lp64f") ||
+ kit_slice_eq_cstr(abi, "lp64d")) {
+ is_lp64 = 1;
+ } else {
+ strbuf_puts(&sb, "unsupported ABI for ");
+ strbuf_puts(&sb, impl->name);
+ strbuf_puts(&sb, ": ");
+ strbuf_put_slice(&sb, abi);
+ return KIT_INVALID;
+ }
+ /* Width prefix must match pointer size. */
+ if ((is_ilp32 && spec->ptr_size != 4u) ||
+ (is_lp64 && spec->ptr_size != 8u)) {
+ strbuf_puts(&sb, "ABI ");
+ strbuf_put_slice(&sb, abi);
+ strbuf_puts(&sb, " does not match pointer width for ");
+ strbuf_puts(&sb, impl->name);
+ return KIT_INVALID;
+ }
+ if (kit_slice_eq_cstr(abi, "ilp32d") || kit_slice_eq_cstr(abi, "lp64d")) {
+ fa = KIT_FLOAT_ABI_DOUBLE;
+ } else if (kit_slice_eq_cstr(abi, "ilp32f") ||
+ kit_slice_eq_cstr(abi, "lp64f")) {
+ fa = KIT_FLOAT_ABI_SINGLE;
+ } else {
+ fa = KIT_FLOAT_ABI_SOFT;
+ }
+ } else {
+ /* Derive from the resolved -march feature bits. */
+ if (has_d)
+ fa = KIT_FLOAT_ABI_DOUBLE;
+ else if (has_f)
+ fa = KIT_FLOAT_ABI_SINGLE;
+ else
+ fa = KIT_FLOAT_ABI_SOFT;
+ }
+
+ if (fa == KIT_FLOAT_ABI_SINGLE && !has_f) {
+ strbuf_puts(&sb,
+ "hardware single-float ABI requires the 'f' extension for ");
+ strbuf_puts(&sb, impl->name);
+ return KIT_INVALID;
+ }
+ if (fa == KIT_FLOAT_ABI_DOUBLE && !has_d) {
+ strbuf_puts(&sb,
+ "hardware double-float ABI requires the 'd' extension for ");
+ strbuf_puts(&sb, impl->name);
+ return KIT_INVALID;
+ }
+ spec->float_abi = (uint8_t)fa;
+ return KIT_OK;
+}
+
const ArchImpl arch_impl_rv64 = {
.backend = {.name = "rv64", .make = rv64_backend_make},
.kind = KIT_ARCH_RV64,
@@ -451,6 +541,7 @@ const ArchImpl arch_impl_rv64 = {
.atomic_lock_free_max = 8u,
.supports_call_conv = rv64_supports_call_conv,
.supports_intrinsic = rv64_supports_intrinsic,
+ .resolve_float_abi = rv64_resolve_float_abi,
};
/* RV32 shares nearly all of the RISC-V backend with rv64 — the per-XLEN
@@ -497,4 +588,5 @@ const ArchImpl arch_impl_rv32 = {
.atomic_lock_free_max = 4u,
.supports_call_conv = rv64_supports_call_conv,
.supports_intrinsic = rv64_supports_intrinsic,
+ .resolve_float_abi = rv64_resolve_float_abi,
};
diff --git a/src/arch/wasm/arch.c b/src/arch/wasm/arch.c
@@ -157,7 +157,8 @@ const ArchImpl arch_impl_wasm = {
.register_index = NULL,
.register_count = NULL,
.register_at = NULL,
- .backend_features = KIT_CG_BACKEND_STRICT_ALIGNMENT,
+ .backend_features =
+ KIT_CG_BACKEND_STRICT_ALIGNMENT | KIT_CG_BACKEND_ICACHE_COHERENT,
/* wasm32 has 4-byte pointers but lowers 8-byte (i64) atomics lock-free. */
.atomic_lock_free_max = 8u,
.supports_call_conv = wasm_supports_call_conv,
diff --git a/src/arch/x64/arch.c b/src/arch/x64/arch.c
@@ -238,7 +238,8 @@ const ArchImpl arch_impl_x64 = {
.cfi_cfa_init_reg = 7u,
.cfi_cfa_init_offset = 8,
.backend_features = KIT_CG_BACKEND_UNALIGNED_MEMORY |
- KIT_CG_BACKEND_RED_ZONE | KIT_CG_BACKEND_SIMD,
+ KIT_CG_BACKEND_RED_ZONE | KIT_CG_BACKEND_SIMD |
+ KIT_CG_BACKEND_ICACHE_COHERENT,
.atomic_lock_free_max = 8u,
.supports_call_conv = x64_supports_call_conv,
.supports_intrinsic = x64_supports_intrinsic,
diff --git a/src/obj/elf/elf.h b/src/obj/elf/elf.h
@@ -326,6 +326,9 @@ static inline u8 elf_st_other(u8 vis /* SymVis */) {
* archs land. */
u32 elf_aarch64_reloc_to(u32 kind /* RelocKind */);
u32 elf_aarch64_reloc_from(u32 elf_type);
+/* Diagnostic spelling of an AArch64 ELF reloc wire type; NULL on unknown
+ * (caller falls back to the format-neutral reloc_kind_name). */
+const char* elf_aarch64_reloc_name(u32 elf_type);
/* ---- x86_64 ELF reloc types ----
*
@@ -366,6 +369,8 @@ u32 elf_aarch64_reloc_from(u32 elf_type);
u32 elf_x86_64_reloc_to(u32 kind /* RelocKind */);
u32 elf_x86_64_reloc_from(u32 elf_type);
+/* Diagnostic spelling of an x86_64 ELF reloc wire type; NULL on unknown. */
+const char* elf_x86_64_reloc_name(u32 elf_type);
/* ---- RISC-V relocation types ----
*
@@ -425,6 +430,11 @@ u32 elf_riscv64_reloc_from(u32 elf_type);
* 64-bit-only kinds (R_ABS64 / R_RV_ADD64 / R_RV_SUB64) are unsupported. */
u32 elf_riscv32_reloc_to(u32 kind /* RelocKind */);
u32 elf_riscv32_reloc_from(u32 elf_type);
+/* Diagnostic spelling of a RISC-V ELF reloc wire type, and float-ABI
+ * decode from RISC-V e_flags. Both XLEN-neutral — shared by the rv64 and
+ * rv32 arch-ops descriptors. */
+const char* elf_riscv_reloc_name(u32 elf_type);
+KitFloatAbi elf_riscv_float_abi_from_e_flags(u32 e_flags);
/* ---- little-endian byte writers (Writer-based) ----
* Writes go through the shared writer_u*_le helpers (core/bytes.h); the
diff --git a/src/obj/elf/link.c b/src/obj/elf/link.c
@@ -362,9 +362,19 @@ static int reloc_is_tlsle(RelocKind k) {
* a 16-byte TCB ahead of .tdata and biases tp to match AArch64, so
* freestanding rv64/rv32 keep +16. */
static u64 tls_tcb_bias(Compiler* c) {
- if (c->target.arch == KIT_ARCH_RV64 || c->target.arch == KIT_ARCH_RV32)
- return c->target.os == KIT_OS_FREESTANDING ? TLS_TCB_SIZE : 0ull;
- return TLS_TCB_SIZE;
+ /* The per-arch freestanding bias lives in the ELF arch descriptor
+ * (ObjElfArchOps.tls_tp_bias): 16 for AArch64/RISC-V variant-I, 0 for
+ * x86_64 variant-II. RISC-V is the only arch whose *hosted* bias
+ * differs from its freestanding bias (the psABI points tp at the image
+ * start, so hosted libcs want +0); that split stays here. */
+ const ObjFormatImpl* fmt = obj_format_lookup(KIT_OBJ_ELF);
+ const ObjElfArchOps* arch =
+ (fmt && fmt->elf_arch) ? fmt->elf_arch(c->target.arch) : NULL;
+ u64 bias = arch ? (u64)arch->tls_tp_bias : TLS_TCB_SIZE;
+ if ((c->target.arch == KIT_ARCH_RV64 || c->target.arch == KIT_ARCH_RV32) &&
+ c->target.os != KIT_OS_FREESTANDING)
+ return 0ull;
+ return bias;
}
/* x86_64 SysV ABI: TLS variant II — the per-thread TLS image sits at
diff --git a/src/obj/elf/reloc_aarch64.c b/src/obj/elf/reloc_aarch64.c
@@ -188,3 +188,68 @@ u32 elf_aarch64_reloc_from(u32 elf_type) {
return (u32)-1; /* sentinel */
}
}
+
+/* Diagnostic spelling for an AArch64 ELF reloc *wire* type. Returns a
+ * static literal, or NULL for an unknown type (caller falls back to the
+ * format-neutral reloc_kind_name). Mirrors the wire-type set the reader
+ * recognizes. */
+const char* elf_aarch64_reloc_name(u32 elf_type) {
+ switch (elf_type) {
+ case ELF_R_AARCH64_NONE:
+ return "R_AARCH64_NONE";
+ case ELF_R_AARCH64_ABS64:
+ return "R_AARCH64_ABS64";
+ case ELF_R_AARCH64_ABS32:
+ return "R_AARCH64_ABS32";
+ case ELF_R_AARCH64_ABS16:
+ return "R_AARCH64_ABS16";
+ case ELF_R_AARCH64_PREL64:
+ return "R_AARCH64_PREL64";
+ case ELF_R_AARCH64_PREL32:
+ return "R_AARCH64_PREL32";
+ case ELF_R_AARCH64_PREL16:
+ return "R_AARCH64_PREL16";
+ case ELF_R_AARCH64_JUMP26:
+ return "R_AARCH64_JUMP26";
+ case ELF_R_AARCH64_CALL26:
+ return "R_AARCH64_CALL26";
+ case ELF_R_AARCH64_CONDBR19:
+ return "R_AARCH64_CONDBR19";
+ case ELF_R_AARCH64_TSTBR14:
+ return "R_AARCH64_TSTBR14";
+ case ELF_R_AARCH64_LD_PREL_LO19:
+ return "R_AARCH64_LD_PREL_LO19";
+ case ELF_R_AARCH64_ADR_PREL_LO21:
+ return "R_AARCH64_ADR_PREL_LO21";
+ case ELF_R_AARCH64_ADR_PREL_PG_HI21:
+ return "R_AARCH64_ADR_PREL_PG_HI21";
+ case ELF_R_AARCH64_ADR_PREL_PG_HI21_NC:
+ return "R_AARCH64_ADR_PREL_PG_HI21_NC";
+ case ELF_R_AARCH64_ADD_ABS_LO12_NC:
+ return "R_AARCH64_ADD_ABS_LO12_NC";
+ case ELF_R_AARCH64_LDST8_ABS_LO12_NC:
+ return "R_AARCH64_LDST8_ABS_LO12_NC";
+ case ELF_R_AARCH64_LDST16_ABS_LO12_NC:
+ return "R_AARCH64_LDST16_ABS_LO12_NC";
+ case ELF_R_AARCH64_LDST32_ABS_LO12_NC:
+ return "R_AARCH64_LDST32_ABS_LO12_NC";
+ case ELF_R_AARCH64_LDST64_ABS_LO12_NC:
+ return "R_AARCH64_LDST64_ABS_LO12_NC";
+ case ELF_R_AARCH64_LDST128_ABS_LO12_NC:
+ return "R_AARCH64_LDST128_ABS_LO12_NC";
+ case ELF_R_AARCH64_ADR_GOT_PAGE:
+ return "R_AARCH64_ADR_GOT_PAGE";
+ case ELF_R_AARCH64_LD64_GOT_LO12_NC:
+ return "R_AARCH64_LD64_GOT_LO12_NC";
+ case ELF_R_AARCH64_GLOB_DAT:
+ return "R_AARCH64_GLOB_DAT";
+ case ELF_R_AARCH64_JUMP_SLOT:
+ return "R_AARCH64_JUMP_SLOT";
+ case ELF_R_AARCH64_RELATIVE:
+ return "R_AARCH64_RELATIVE";
+ case ELF_R_AARCH64_COPY:
+ return "R_AARCH64_COPY";
+ default:
+ return NULL;
+ }
+}
diff --git a/src/obj/elf/reloc_riscv64.c b/src/obj/elf/reloc_riscv64.c
@@ -180,3 +180,117 @@ u32 elf_riscv64_reloc_from(u32 elf_type) {
return (u32)-1; /* sentinel */
}
}
+
+/* Diagnostic spelling for a RISC-V ELF reloc *wire* type. XLEN-neutral —
+ * shared by the rv64 and rv32 arch-ops descriptors. Returns a static
+ * literal, or NULL for an unknown type. */
+const char* elf_riscv_reloc_name(u32 elf_type) {
+ switch (elf_type) {
+ case ELF_R_RISCV_NONE:
+ return "R_RISCV_NONE";
+ case ELF_R_RISCV_32:
+ return "R_RISCV_32";
+ case ELF_R_RISCV_64:
+ return "R_RISCV_64";
+ case ELF_R_RISCV_RELATIVE:
+ return "R_RISCV_RELATIVE";
+ case ELF_R_RISCV_COPY:
+ return "R_RISCV_COPY";
+ case ELF_R_RISCV_JUMP_SLOT:
+ return "R_RISCV_JUMP_SLOT";
+ case ELF_R_RISCV_IRELATIVE:
+ return "R_RISCV_IRELATIVE";
+ case ELF_R_RISCV_BRANCH:
+ return "R_RISCV_BRANCH";
+ case ELF_R_RISCV_JAL:
+ return "R_RISCV_JAL";
+ case ELF_R_RISCV_CALL:
+ return "R_RISCV_CALL";
+ case ELF_R_RISCV_CALL_PLT:
+ return "R_RISCV_CALL_PLT";
+ case ELF_R_RISCV_GOT_HI20:
+ return "R_RISCV_GOT_HI20";
+ case ELF_R_RISCV_TLS_GOT_HI20:
+ return "R_RISCV_TLS_GOT_HI20";
+ case ELF_R_RISCV_TLS_GD_HI20:
+ return "R_RISCV_TLS_GD_HI20";
+ case ELF_R_RISCV_PCREL_HI20:
+ return "R_RISCV_PCREL_HI20";
+ case ELF_R_RISCV_PCREL_LO12_I:
+ return "R_RISCV_PCREL_LO12_I";
+ case ELF_R_RISCV_PCREL_LO12_S:
+ return "R_RISCV_PCREL_LO12_S";
+ case ELF_R_RISCV_HI20:
+ return "R_RISCV_HI20";
+ case ELF_R_RISCV_LO12_I:
+ return "R_RISCV_LO12_I";
+ case ELF_R_RISCV_LO12_S:
+ return "R_RISCV_LO12_S";
+ case ELF_R_RISCV_TPREL_HI20:
+ return "R_RISCV_TPREL_HI20";
+ case ELF_R_RISCV_TPREL_LO12_I:
+ return "R_RISCV_TPREL_LO12_I";
+ case ELF_R_RISCV_TPREL_LO12_S:
+ return "R_RISCV_TPREL_LO12_S";
+ case ELF_R_RISCV_TPREL_ADD:
+ return "R_RISCV_TPREL_ADD";
+ case ELF_R_RISCV_ADD8:
+ return "R_RISCV_ADD8";
+ case ELF_R_RISCV_ADD16:
+ return "R_RISCV_ADD16";
+ case ELF_R_RISCV_ADD32:
+ return "R_RISCV_ADD32";
+ case ELF_R_RISCV_ADD64:
+ return "R_RISCV_ADD64";
+ case ELF_R_RISCV_SUB8:
+ return "R_RISCV_SUB8";
+ case ELF_R_RISCV_SUB16:
+ return "R_RISCV_SUB16";
+ case ELF_R_RISCV_SUB32:
+ return "R_RISCV_SUB32";
+ case ELF_R_RISCV_SUB64:
+ return "R_RISCV_SUB64";
+ case ELF_R_RISCV_ALIGN:
+ return "R_RISCV_ALIGN";
+ case ELF_R_RISCV_RVC_BRANCH:
+ return "R_RISCV_RVC_BRANCH";
+ case ELF_R_RISCV_RVC_JUMP:
+ return "R_RISCV_RVC_JUMP";
+ case ELF_R_RISCV_RELAX:
+ return "R_RISCV_RELAX";
+ case ELF_R_RISCV_SUB6:
+ return "R_RISCV_SUB6";
+ case ELF_R_RISCV_SET6:
+ return "R_RISCV_SET6";
+ case ELF_R_RISCV_SET8:
+ return "R_RISCV_SET8";
+ case ELF_R_RISCV_SET16:
+ return "R_RISCV_SET16";
+ case ELF_R_RISCV_SET32:
+ return "R_RISCV_SET32";
+ case ELF_R_RISCV_32_PCREL:
+ return "R_RISCV_32_PCREL";
+ case ELF_R_RISCV_SET_ULEB128:
+ return "R_RISCV_SET_ULEB128";
+ case ELF_R_RISCV_SUB_ULEB128:
+ return "R_RISCV_SUB_ULEB128";
+ default:
+ return NULL;
+ }
+}
+
+/* Decode the float ABI from RISC-V ELF e_flags (EF_RISCV_FLOAT_ABI_*).
+ * XLEN-neutral — shared by the rv64 and rv32 arch-ops descriptors. */
+KitFloatAbi elf_riscv_float_abi_from_e_flags(u32 e_flags) {
+ switch (e_flags & EF_RISCV_FLOAT_ABI_MASK) {
+ case EF_RISCV_FLOAT_ABI_SOFT:
+ return KIT_FLOAT_ABI_SOFT;
+ case EF_RISCV_FLOAT_ABI_SINGLE:
+ return KIT_FLOAT_ABI_SINGLE;
+ case EF_RISCV_FLOAT_ABI_DOUBLE:
+ case EF_RISCV_FLOAT_ABI_QUAD:
+ return KIT_FLOAT_ABI_DOUBLE;
+ default:
+ return KIT_FLOAT_ABI_DEFAULT;
+ }
+}
diff --git a/src/obj/elf/reloc_x86_64.c b/src/obj/elf/reloc_x86_64.c
@@ -132,3 +132,72 @@ u32 elf_x86_64_reloc_from(u32 elf_type) {
return (u32)-1; /* sentinel */
}
}
+
+/* Diagnostic spelling for an x86_64 ELF reloc *wire* type. Returns a
+ * static literal, or NULL for an unknown type. */
+const char* elf_x86_64_reloc_name(u32 elf_type) {
+ switch (elf_type) {
+ case ELF_R_X86_64_NONE:
+ return "R_X86_64_NONE";
+ case ELF_R_X86_64_64:
+ return "R_X86_64_64";
+ case ELF_R_X86_64_PC32:
+ return "R_X86_64_PC32";
+ case ELF_R_X86_64_GOT32:
+ return "R_X86_64_GOT32";
+ case ELF_R_X86_64_PLT32:
+ return "R_X86_64_PLT32";
+ case ELF_R_X86_64_COPY:
+ return "R_X86_64_COPY";
+ case ELF_R_X86_64_GLOB_DAT:
+ return "R_X86_64_GLOB_DAT";
+ case ELF_R_X86_64_JUMP_SLOT:
+ return "R_X86_64_JUMP_SLOT";
+ case ELF_R_X86_64_RELATIVE:
+ return "R_X86_64_RELATIVE";
+ case ELF_R_X86_64_IRELATIVE:
+ return "R_X86_64_IRELATIVE";
+ case ELF_R_X86_64_GOTPCREL:
+ return "R_X86_64_GOTPCREL";
+ case ELF_R_X86_64_32:
+ return "R_X86_64_32";
+ case ELF_R_X86_64_32S:
+ return "R_X86_64_32S";
+ case ELF_R_X86_64_16:
+ return "R_X86_64_16";
+ case ELF_R_X86_64_PC16:
+ return "R_X86_64_PC16";
+ case ELF_R_X86_64_8:
+ return "R_X86_64_8";
+ case ELF_R_X86_64_PC8:
+ return "R_X86_64_PC8";
+ case ELF_R_X86_64_DTPMOD64:
+ return "R_X86_64_DTPMOD64";
+ case ELF_R_X86_64_DTPOFF64:
+ return "R_X86_64_DTPOFF64";
+ case ELF_R_X86_64_TPOFF64:
+ return "R_X86_64_TPOFF64";
+ case ELF_R_X86_64_TLSGD:
+ return "R_X86_64_TLSGD";
+ case ELF_R_X86_64_TLSLD:
+ return "R_X86_64_TLSLD";
+ case ELF_R_X86_64_DTPOFF32:
+ return "R_X86_64_DTPOFF32";
+ case ELF_R_X86_64_GOTTPOFF:
+ return "R_X86_64_GOTTPOFF";
+ case ELF_R_X86_64_TPOFF32:
+ return "R_X86_64_TPOFF32";
+ case ELF_R_X86_64_PC64:
+ return "R_X86_64_PC64";
+ case ELF_R_X86_64_GOTOFF64:
+ return "R_X86_64_GOTOFF64";
+ case ELF_R_X86_64_GOTPC32:
+ return "R_X86_64_GOTPC32";
+ case ELF_R_X86_64_GOTPCRELX:
+ return "R_X86_64_GOTPCRELX";
+ case ELF_R_X86_64_REX_GOTPCRELX:
+ return "R_X86_64_REX_GOTPCRELX";
+ default:
+ return NULL;
+ }
+}
diff --git a/src/obj/format.h b/src/obj/format.h
@@ -33,6 +33,14 @@ typedef void (*ObjFormatMachoStubFn)(u8* dst, u64 stub_vaddr,
typedef void (*ObjFormatCoffStubFn)(u8* dst, u64 stub_vaddr,
u64 iat_slot_vaddr);
+/* Synthetic-input hook: invoked before symbol resolution to inject a
+ * synthetic input object (e.g. the COFF __CTOR_LIST__/__DTOR_LIST__
+ * boundary blob). The hook receives the Linker so it can append a
+ * LinkInput; returns the freshly built ObjBuilder, or NULL when nothing
+ * needs synthesizing for the current target. Formats with no synthetic
+ * inputs leave this NULL. */
+typedef void (*ObjFormatSynthInputsFn)(Linker*);
+
typedef struct ObjElfArchOps {
KitArchKind arch;
u32 e_machine;
@@ -42,8 +50,23 @@ typedef struct ObjElfArchOps {
u32 r_glob_dat;
u32 r_jump_slot;
u32 r_irelative; /* R_*_IRELATIVE static-IFUNC resolver reloc (__rela_iplt). */
+ /* Variant-I TP bias: distance from the TLS image start to where `tp`
+ * points for a freestanding (kit start.c) layout. AArch64/RISC-V place
+ * a 16-byte TCB ahead of the image, so this is 16 for those arches; 0
+ * for variant-II arches (x86_64) and any arch with no TLS support. The
+ * hosted-vs-freestanding split for RISC-V is still applied by the
+ * caller in src/obj/elf/link.c; this field is the per-arch maximum. */
+ u32 tls_tp_bias;
u32 (*reloc_to)(u32 kind);
u32 (*reloc_from)(u32 wire_type);
+ /* Diagnostic spelling of a per-arch ELF reloc wire type (e.g.
+ * "R_AARCH64_CALL26"). NULL means "no per-arch name table"; callers
+ * fall back to the format-neutral reloc_kind_name(). */
+ const char* (*reloc_name)(u32 wire_type);
+ /* Decode the float ABI from this arch's ELF e_flags. RISC-V reads
+ * EF_RISCV_FLOAT_ABI_*; other arches have no float-ABI e_flags and
+ * leave this NULL (callers treat NULL as KIT_FLOAT_ABI_DEFAULT). */
+ KitFloatAbi (*float_abi_from_e_flags)(u32 e_flags);
} ObjElfArchOps;
typedef struct ObjMachoArchOps {
@@ -73,6 +96,9 @@ typedef enum ObjFormatArchiveAction {
OBJ_FORMAT_ARCHIVE_SKIP = 2,
} ObjFormatArchiveAction;
+/* ObjTlsModel (TLS access model) is defined in obj.h, the obj-layer
+ * public header that carries the obj_format_tls_model wrapper. */
+
typedef struct ObjFormatArchiveMember {
const char* archive_name;
const char* member_name;
@@ -127,6 +153,29 @@ typedef struct ObjFormatImpl {
const ObjFormatEmuOps* emu;
u8 split_sections_as_atoms;
+ /* C source-level symbol prefix the format prepends on disk: "_" for
+ * Mach-O, "" (or NULL, treated as "") for ELF / COFF / Wasm. Read by
+ * obj_format_c_mangle / obj_format_demangle_c. */
+ const char* c_label_prefix;
+ /* Default entry symbol name for a freshly created Linker on this
+ * format: "_main" for Mach-O (LC_MAIN), "mainCRTStartup" for COFF,
+ * "_start" for ELF / Wasm. NULL means "_start". */
+ const char* default_entry_name;
+ /* Carries DWARF debug sections file-only (not mapped into a loadable
+ * segment): ELF=1, Mach-O=1, COFF=0. */
+ u8 carries_file_only_debug;
+ /* Builds its own static GOT / non-lazy pointer table at link time even
+ * for a static image: Mach-O=1, else 0. */
+ u8 builds_own_static_got;
+ /* COFF pulls an archive member to satisfy a *weak* undef reference
+ * (binutils/PE COMDAT semantics); ELF/Mach-O only pull for strong
+ * undefs. COFF=1, else 0. */
+ u8 weak_undef_pulls_archive_member;
+
+ /* Inject a synthetic input object before symbol resolution. NULL when
+ * the format synthesizes nothing. */
+ ObjFormatSynthInputsFn synth_inputs;
+
const ObjElfArchOps* (*elf_arch)(KitArchKind);
const ObjElfArchOps* (*elf_machine)(u32 e_machine);
const ObjMachoArchOps* (*macho_arch)(KitArchKind);
@@ -148,4 +197,17 @@ int obj_format_dso_reader_for_bytes(const u8* data, size_t len,
KitBinFmt* bin_out,
ObjFormatDsoReader* out);
+/* Internal name<->KitObjFmt mapping, backed by the ObjFormatImpl name
+ * list. The thin public KIT_API wrappers (kit_obj_fmt_from_name /
+ * kit_obj_fmt_name, declared in include/kit/object.h) are added in
+ * src/api by a later wave; these are the internal data helpers.
+ *
+ * obj_format_fmt_from_name returns 1 and writes *out on a match (case
+ * sensitive, matching the canonical ObjFormatImpl.name spelling and any
+ * registered alias); returns 0 on an unknown name. obj_format_fmt_name
+ * returns the canonical NUL-terminated literal name, or NULL for an
+ * out-of-range KitObjFmt. */
+int obj_format_fmt_from_name(const char* name, KitObjFmt* out);
+const char* obj_format_fmt_name(KitObjFmt fmt);
+
#endif
diff --git a/src/obj/obj.h b/src/obj/obj.h
@@ -4,6 +4,11 @@
#include "core/buf.h"
#include "core/core.h"
+/* Forward decl: the synthetic-input hook (obj_format_synth_inputs) takes a
+ * Linker but obj.h must not pull in the link subsystem. Defined in
+ * src/link; only used here as an opaque pointer. */
+typedef struct Linker Linker;
+
typedef enum SecKind {
SEC_TEXT,
SEC_RODATA,
@@ -645,6 +650,26 @@ int obj_macho_debug_sectname(const char* name, size_t len, char out[17]);
* SEC_DEBUG), which callers spell from the section's own name. */
const char* obj_macho_canon_secname(SecKind kind);
+/* Inverse of obj_macho_canon_secname: classify a Mach-O native
+ * "segname,sectname" spelling (e.g. "__TEXT,__text", "__DATA,__bss")
+ * into a SecKind. Used by a format-neutral reader / objdump path that
+ * holds the on-disk Mach-O section name and wants the canonical kit
+ * SecKind without re-deriving the per-segment rules at every call.
+ * `name` / `len` are the comma-joined spelling. Returns 1 and writes
+ * *kind on a recognized spelling; returns 0 (leaving *kind untouched)
+ * for an unrecognized name (caller treats as SEC_OTHER). */
+int obj_macho_seckind_for_secname(const char* name, size_t len, SecKind* kind);
+
+/* Translate a kit-internal (ELF-spelled) section name to its Mach-O
+ * native spelling. Generalizes obj_macho_debug_sectname: handles the
+ * ".debug_*" -> "__DWARF,__debug_*" DWARF case and ".eh_frame" ->
+ * "__TEXT,__eh_frame". Writes the comma-joined "segname,sectname"
+ * (NUL-terminated) into `out` (>= 40 bytes covers seg(16)+','+sect(16)+
+ * NUL) and returns 1 when `name` is one of the recognized
+ * format-divergent sections; returns 0 (leaving `out` untouched)
+ * otherwise, so the caller falls back to its own spelling. */
+int obj_macho_native_secname(const char* name, size_t len, char out[40]);
+
/* ---- thread-local storage emission ---------------------------------
*
* The frontend collects a `_Thread_local` definition's bytes (or marks
@@ -729,6 +754,87 @@ void obj_format_demangle_c(const Compiler*, const char** name, size_t* len);
* as a NUL-terminated literal; the caller interns. */
const char* obj_format_default_entry_name(const Compiler*);
+/* C source-level symbol prefix the active object format prepends on disk:
+ * "_" for Mach-O, "" for ELF / COFF / Wasm. The single source of truth
+ * read by obj_format_c_mangle / obj_format_demangle_c; never NULL (a
+ * format with no prefix returns ""). */
+const char* obj_format_c_label_prefix(const Compiler*);
+
+/* ---- thread-local storage model ----
+ *
+ * How compiled code reaches a `_Thread_local` on a given (format, OS):
+ * OBJ_TLS_ELF_LE : direct TP-relative offset (ELF Local-Exec /
+ * Initial-Exec): `mrs tpidr_el0` + tprel.
+ * OBJ_TLS_MACHO_DESCRIPTOR: per-variable descriptor + thunk call; the
+ * TLVP reloc pair targets the descriptor.
+ * OBJ_TLS_WINDOWS_TEB : Windows TEB-based access (SECREL into the
+ * per-thread TLS block via the TEB). */
+typedef enum ObjTlsModel {
+ OBJ_TLS_ELF_LE = 0,
+ OBJ_TLS_MACHO_DESCRIPTOR = 1,
+ OBJ_TLS_WINDOWS_TEB = 2,
+} ObjTlsModel;
+
+/* Returns how compiled code reaches a `_Thread_local` on the active
+ * (format, OS): OBJ_TLS_WINDOWS_TEB for COFF, OBJ_TLS_MACHO_DESCRIPTOR
+ * for Mach-O, OBJ_TLS_ELF_LE otherwise. The single source of truth for
+ * the TLS-access decision; obj_format_tls_via_descriptor is now a thin
+ * wrapper over (model == OBJ_TLS_MACHO_DESCRIPTOR). */
+ObjTlsModel obj_format_tls_model(const Compiler*);
+
+/* True when the active object format carries DWARF debug sections
+ * file-only (not mapped into a loadable segment): ELF / Mach-O yes,
+ * COFF no. */
+int obj_format_carries_file_only_debug(const Compiler*);
+
+/* True when the active object format builds its own static GOT /
+ * non-lazy-pointer table at link time even for a static image:
+ * Mach-O yes, else no. */
+int obj_format_builds_own_static_got(const Compiler*);
+
+/* True when the active object format can represent a KitCgSymFeat
+ * `symfeat`. Today this is the TLS-model axis: ELF / Mach-O can
+ * represent every modeled TLS feature, COFF cannot (Windows TEB TLS
+ * uses a different mechanism). Non-TLS features return 1 for every
+ * format. `symfeat` is a KitCgSymFeat value (cast to int at the
+ * boundary). */
+int obj_format_supports_symbol_feature(const Compiler*, int symfeat);
+
+/* True when the active object format pulls an archive member to satisfy a
+ * *weak* undefined reference (PE/COFF COMDAT semantics). COFF yes,
+ * ELF / Mach-O no (they pull only for strong undefs). */
+int obj_format_weak_undef_pulls_archive_member(const Compiler*);
+
+/* True when static-IFUNC resolution on the active target goes through a
+ * `[__rela_iplt_start, __rela_iplt_end)` table of R_*_IRELATIVE relocs
+ * (walked by FreeBSD's crt before main) rather than kit's ctor-based
+ * __kit_ifunc_init path. The one place the (os == FREEBSD && obj == ELF)
+ * knowledge lives. */
+int obj_format_static_ifunc_via_rela_iplt(const Compiler*);
+
+/* Per-arch variant-I TP bias for the active target's ELF arch: distance
+ * from the TLS image start to where `tp` points in kit's freestanding
+ * layout (16 for AArch64/RISC-V, 0 for x86_64 variant-II). Returns 0
+ * for a non-ELF target or an arch with no ELF descriptor. The
+ * hosted-vs-freestanding RISC-V split is applied by the caller. */
+u32 obj_format_elf_tls_tp_bias(const Compiler*);
+
+/* Format boundary-symbol classifier. Asks the active object format
+ * whether `name` is a symbol the format itself owns as a boundary /
+ * synthetic global, and if so what SymKind it carries. Returns 1 and
+ * writes *symkind (a SymKind value) when the format owns `name`
+ * (PE `__ImageBase` / `_tls_used` -> SK_ABS); returns 0 otherwise,
+ * leaving *symkind untouched. Lets generic link code classify boundary
+ * symbols without a per-format switch. */
+int obj_format_boundary_sym_kind(const Compiler*, KitSlice name, int* symkind);
+
+/* Invoke the active object format's synthetic-input hook (if any) before
+ * symbol resolution. No-op for formats with no synthetic inputs. The
+ * hook builds and appends a synthetic LinkInput via Linker internals, so
+ * it takes the Linker; declared here as the obj-side dispatch point.
+ * (The COFF body is wired by T-LINK — see registry.c synth_inputs note.) */
+void obj_format_synth_inputs(const Compiler*, Linker*);
+
/* ---- format-specific extension payload ----
*
* Generic object tables stay format-neutral. Format-specific module-level
diff --git a/src/obj/obj_secnames.c b/src/obj/obj_secnames.c
@@ -17,6 +17,7 @@
* panics with a "TODO" until the macho writer lands in Phase 2/3. COFF
* panics in the same way and is filled in later. */
+#include <kit/cg.h>
#include <string.h>
#include "core/core.h"
@@ -26,6 +27,14 @@
#include "obj/format.h"
#include "obj/obj.h"
+/* The C-symbol prefix for the active object format, never NULL: a format
+ * row with a NULL c_label_prefix (or no format match) is treated as "". */
+const char* obj_format_c_label_prefix(const Compiler* c) {
+ const ObjFormatImpl* fmt = c ? obj_format_lookup(c->target.obj) : NULL;
+ const char* p = fmt ? fmt->c_label_prefix : NULL;
+ return p ? p : "";
+}
+
int obj_macho_debug_sectname(const char* name, size_t len, char out[17]) {
/* Only ".debug_*" sections translate here; ".eh_frame" lives in __TEXT
* and is handled by the writer's generic SecKind path and the reader's
@@ -62,6 +71,65 @@ const char* obj_macho_canon_secname(SecKind kind) {
}
}
+/* Inverse of obj_macho_canon_secname: classify a Mach-O native
+ * "segname,sectname" spelling into a SecKind. Mirrors the per-segment
+ * rules of the Mach-O reader (sec_kind_from_seg_sect in macho/read.c)
+ * for the canonical names, but is name-only (no S_TYPE flags) so a
+ * format-neutral caller can classify without the raw section header. */
+int obj_macho_seckind_for_secname(const char* name, size_t len,
+ SecKind* kind) {
+ const char* comma;
+ size_t seg_len, sect_off, sect_len;
+ if (!name || len == 0) return 0;
+ comma = (const char*)memchr(name, ',', len);
+ if (!comma) return 0;
+ seg_len = (size_t)(comma - name);
+ sect_off = seg_len + 1u;
+ sect_len = len - sect_off;
+ {
+ const char* seg = name;
+ const char* sect = name + sect_off;
+ SecKind k;
+ if (seg_len == 7 && memcmp(seg, "__DWARF", 7) == 0) {
+ k = SEC_DEBUG;
+ } else if (seg_len == 6 && memcmp(seg, "__TEXT", 6) == 0) {
+ k = (sect_len == 6 && memcmp(sect, "__text", 6) == 0) ? SEC_TEXT
+ : SEC_RODATA;
+ } else if (seg_len == 6 && memcmp(seg, "__DATA", 6) == 0) {
+ k = (sect_len == 5 && memcmp(sect, "__bss", 5) == 0) ? SEC_BSS : SEC_DATA;
+ } else {
+ return 0;
+ }
+ if (kind) *kind = k;
+ return 1;
+ }
+}
+
+/* Translate a kit-internal (ELF-spelled) section name to its Mach-O
+ * native "segname,sectname" spelling. Generalizes
+ * obj_macho_debug_sectname: the ".debug_*" DWARF case routes to
+ * "__DWARF,__debug_*" (truncated to Mach-O's 16-byte sectname), and
+ * ".eh_frame" routes to "__TEXT,__eh_frame". Returns 0 for any other
+ * name (caller falls back to its own spelling). */
+int obj_macho_native_secname(const char* name, size_t len, char out[40]) {
+ char ds[17];
+ if (!name || len == 0) return 0;
+ if (obj_macho_debug_sectname(name, len, ds)) {
+ /* "__DWARF," + ds (already "__debug_*", <=16 chars). */
+ size_t dl = slice_from_cstr(ds).len;
+ memcpy(out, "__DWARF,", 8);
+ memcpy(out + 8, ds, dl);
+ out[8 + dl] = '\0';
+ return 1;
+ }
+ if (len == 9 && memcmp(name, ".eh_frame", 9) == 0) {
+ memcpy(out, "__TEXT,__eh_frame", 17);
+ out[17] = '\0';
+ return 1;
+ }
+ return 0;
+}
+
static Sym secname_panic_unimpl(Compiler* c, const char* which) {
SrcLoc l = {0, 0, 0};
compiler_panic(c, l,
@@ -172,7 +240,9 @@ int obj_format_extern_via_got(const Compiler* c) {
* ELF -fPIC / -fPIE: extern data may resolve to a symbol defined
* in a DSO at runtime; the codegen must route through the GOT so
* the loader can patch a single slot rather than touching .text. */
- if (c->target.obj == KIT_OBJ_MACHO) return 1;
+ /* Mach-O always binds extern data through its own static GOT / non-lazy
+ * pointers — same property the builds_own_static_got field records. */
+ if (obj_format_builds_own_static_got(c)) return 1;
if (c->target.obj == KIT_OBJ_ELF &&
(c->target.pic == KIT_PIC_PIC || c->target.pic == KIT_PIC_PIE))
return 1;
@@ -202,25 +272,27 @@ int obj_format_split_sections_as_atoms(const Compiler* c) {
* `c->ctx->heap`, the same allocator the existing call sites
* (boundary_name, kit_jit_lookup, link_intern_c_name) already use. */
Sym obj_format_c_mangle(Compiler* c, const char* name) {
- size_t n;
+ size_t n, plen;
+ const char* prefix;
Heap* h;
char* buf;
Sym s;
SrcLoc loc = {0, 0, 0};
if (!c || !name) return 0;
- if (c->target.obj != KIT_OBJ_MACHO)
- return pool_intern_slice(c->global, slice_from_cstr(name));
+ prefix = obj_format_c_label_prefix(c);
+ plen = slice_from_cstr(prefix).len;
+ if (plen == 0) return pool_intern_slice(c->global, slice_from_cstr(name));
n = slice_from_cstr(name).len;
h = (Heap*)c->ctx->heap;
- buf = (char*)h->alloc(h, n + 2u, 1);
+ buf = (char*)h->alloc(h, n + plen + 1u, 1);
if (!buf)
compiler_panic(c, loc, "obj_format_c_mangle: oom prefixing '%.*s'",
SLICE_ARG(slice_from_cstr(name)));
- buf[0] = '_';
- memcpy(buf + 1, name, n);
- buf[n + 1] = 0;
- s = pool_intern_slice(c->global, (Slice){.s = buf, .len = (u32)(n + 1u)});
- h->free(h, buf, n + 2u);
+ memcpy(buf, prefix, plen);
+ memcpy(buf + plen, name, n);
+ buf[n + plen] = 0;
+ s = pool_intern_slice(c->global, (Slice){.s = buf, .len = (u32)(n + plen)});
+ h->free(h, buf, n + plen + 1u);
return s;
}
@@ -229,11 +301,15 @@ Sym obj_format_c_mangle(Compiler* c, const char* name) {
* and decrementing `*len`) so panic text shows the source-level name
* regardless of target format. No-op for formats with no prefix. */
void obj_format_demangle_c(const Compiler* c, const char** name, size_t* len) {
+ const char* prefix;
+ size_t plen;
if (!c || !name || !len || !*name) return;
- if (c->target.obj == KIT_OBJ_MACHO && *len >= 1u && (*name)[0] == '_') {
- ++(*name);
- --(*len);
- }
+ prefix = obj_format_c_label_prefix(c);
+ plen = slice_from_cstr(prefix).len;
+ if (plen == 0 || *len < plen) return;
+ if (memcmp(*name, prefix, plen) != 0) return;
+ *name += plen;
+ *len -= plen;
}
/* Default entry symbol name baked into a freshly created Linker for
@@ -242,10 +318,81 @@ void obj_format_demangle_c(const Compiler* c, const char** name, size_t* len) {
* historical `_start` produced by crt1.o. Returned as a NUL-terminated
* literal; caller interns. */
const char* obj_format_default_entry_name(const Compiler* c) {
- if (c && c->target.obj == KIT_OBJ_MACHO) return "_main";
- /* COFF: PE/Windows CRT entry sets up argc/argv and calls main.
- * Resolved against the user-supplied CRT archive (mingw's
- * libmingwex.a). See doc/OBJ.md. */
- if (c && c->target.obj == KIT_OBJ_COFF) return "mainCRTStartup";
- return "_start";
+ /* Mach-O: `_main` (LC_MAIN names main; dyld owns startup).
+ * COFF: `mainCRTStartup` (PE/Windows CRT entry sets up argc/argv and
+ * calls main; resolved against the user CRT archive, mingw's
+ * libmingwex.a — see doc/OBJ.md).
+ * ELF / Wasm: the historical `_start` produced by crt1.o.
+ * All driven by the per-format default_entry_name field; a row with a
+ * NULL field (or no format match) falls back to "_start". */
+ const ObjFormatImpl* fmt = c ? obj_format_lookup(c->target.obj) : NULL;
+ const char* e = fmt ? fmt->default_entry_name : NULL;
+ return e ? e : "_start";
+}
+
+int obj_format_carries_file_only_debug(const Compiler* c) {
+ const ObjFormatImpl* fmt = c ? obj_format_lookup(c->target.obj) : NULL;
+ return fmt && fmt->carries_file_only_debug;
+}
+
+int obj_format_builds_own_static_got(const Compiler* c) {
+ const ObjFormatImpl* fmt = c ? obj_format_lookup(c->target.obj) : NULL;
+ return fmt && fmt->builds_own_static_got;
+}
+
+int obj_format_weak_undef_pulls_archive_member(const Compiler* c) {
+ const ObjFormatImpl* fmt = c ? obj_format_lookup(c->target.obj) : NULL;
+ return fmt && fmt->weak_undef_pulls_archive_member;
+}
+
+int obj_format_supports_symbol_feature(const Compiler* c, int symfeat) {
+ /* The only format-divergent feature axis today is TLS access: COFF's
+ * Windows TEB model can't represent the ELF/Mach-O TLS-access features
+ * the CG layer mints. Every other (non-TLS) feature is representable
+ * by every format. */
+ switch (symfeat) {
+ case KIT_CG_SYMFEAT_TLS_LOCAL_EXEC:
+ case KIT_CG_SYMFEAT_TLS_INITIAL_EXEC:
+ case KIT_CG_SYMFEAT_TLS_LOCAL_DYNAMIC:
+ case KIT_CG_SYMFEAT_TLS_GENERAL_DYNAMIC:
+ return obj_format_tls_model(c) != OBJ_TLS_WINDOWS_TEB;
+ default:
+ return 1;
+ }
+}
+
+int obj_format_static_ifunc_via_rela_iplt(const Compiler* c) {
+ /* The single home for the (os == FREEBSD && obj == ELF) knowledge:
+ * FreeBSD's crt walks [__rela_iplt_start, __rela_iplt_end) of
+ * R_*_IRELATIVE relocs before main, so kit emits that table instead of
+ * the ctor-based __kit_ifunc_init path on FreeBSD/ELF. */
+ return c && c->target.os == KIT_OS_FREEBSD && c->target.obj == KIT_OBJ_ELF;
+}
+
+u32 obj_format_elf_tls_tp_bias(const Compiler* c) {
+ const ObjFormatImpl* fmt;
+ const ObjElfArchOps* arch;
+ if (!c || c->target.obj != KIT_OBJ_ELF) return 0u;
+ fmt = obj_format_lookup(KIT_OBJ_ELF);
+ arch = (fmt && fmt->elf_arch) ? fmt->elf_arch(c->target.arch) : NULL;
+ return arch ? arch->tls_tp_bias : 0u;
+}
+
+int obj_format_boundary_sym_kind(const Compiler* c, KitSlice name,
+ int* symkind) {
+ /* PE/COFF owns two synthetic absolute globals the linker emits:
+ * `__ImageBase` (image base for ASLR-relative math) and `_tls_used`
+ * (the IMAGE_TLS_DIRECTORY anchor). Both are SK_ABS. Other formats
+ * own no boundary symbols here. */
+ if (!c || c->target.obj != KIT_OBJ_COFF) return 0;
+ if (slice_eq_cstr(name, "__ImageBase") || slice_eq_cstr(name, "_tls_used")) {
+ if (symkind) *symkind = SK_ABS;
+ return 1;
+ }
+ return 0;
+}
+
+void obj_format_synth_inputs(const Compiler* c, Linker* l) {
+ const ObjFormatImpl* fmt = c ? obj_format_lookup(c->target.obj) : NULL;
+ if (fmt && fmt->synth_inputs) fmt->synth_inputs(l);
}
diff --git a/src/obj/obj_tls.c b/src/obj/obj_tls.c
@@ -34,6 +34,7 @@
#include "core/heap.h"
#include "core/pool.h"
#include "core/slice.h"
+#include "obj/format.h"
#include "obj/obj.h"
/* ObjBuilder is opaque outside obj.c; obj_tls.c reaches the bootstrap
@@ -42,8 +43,22 @@
ObjSymId obj_tlv_bootstrap_get(const ObjBuilder*);
void obj_tlv_bootstrap_set(ObjBuilder*, ObjSymId);
+/* TLS-access model for the active (format, OS): COFF -> Windows TEB,
+ * Mach-O -> per-variable descriptor + thunk, everything else -> ELF
+ * Local-Exec / Initial-Exec. The single source of truth for the
+ * TLS-access decision. */
+ObjTlsModel obj_format_tls_model(const Compiler* c) {
+ if (!c) return OBJ_TLS_ELF_LE;
+ if (c->target.obj == KIT_OBJ_COFF) return OBJ_TLS_WINDOWS_TEB;
+ if (c->target.obj == KIT_OBJ_MACHO) return OBJ_TLS_MACHO_DESCRIPTOR;
+ return OBJ_TLS_ELF_LE;
+}
+
+/* Thin wrapper kept for existing callers (per-arch ops.c, NativeDirect,
+ * internal define_tls dispatch); later waves migrate them to
+ * obj_format_tls_model directly. */
int obj_format_tls_via_descriptor(const Compiler* c) {
- return c->target.obj == KIT_OBJ_MACHO;
+ return obj_format_tls_model(c) == OBJ_TLS_MACHO_DESCRIPTOR;
}
static void define_tls_elf(ObjBuilder* ob, Compiler* c, ObjSymId sym,
diff --git a/src/obj/registry.c b/src/obj/registry.c
@@ -75,8 +75,12 @@ static const ObjElfArchOps obj_elf_arch_ops[] = {
.r_glob_dat = ELF_R_AARCH64_GLOB_DAT,
.r_jump_slot = ELF_R_AARCH64_JUMP_SLOT,
.r_irelative = ELF_R_AARCH64_IRELATIVE,
+ /* AAPCS64 variant-I: tp points at a 16-byte TCB ahead of the image. */
+ .tls_tp_bias = 16u,
.reloc_to = elf_aarch64_reloc_to,
.reloc_from = elf_aarch64_reloc_from,
+ .reloc_name = elf_aarch64_reloc_name,
+ .float_abi_from_e_flags = NULL,
},
#endif
#if KIT_ARCH_X64_ENABLED
@@ -89,8 +93,12 @@ static const ObjElfArchOps obj_elf_arch_ops[] = {
.r_glob_dat = ELF_R_X86_64_GLOB_DAT,
.r_jump_slot = ELF_R_X86_64_JUMP_SLOT,
.r_irelative = ELF_R_X86_64_IRELATIVE,
+ /* SysV variant-II: tp sits *past* the image; no positive TCB bias. */
+ .tls_tp_bias = 0u,
.reloc_to = elf_x86_64_reloc_to,
.reloc_from = elf_x86_64_reloc_from,
+ .reloc_name = elf_x86_64_reloc_name,
+ .float_abi_from_e_flags = NULL,
},
#endif
#if KIT_ARCH_RV64_ENABLED
@@ -103,8 +111,14 @@ static const ObjElfArchOps obj_elf_arch_ops[] = {
.r_glob_dat = ELF_R_RISCV_64,
.r_jump_slot = ELF_R_RISCV_JUMP_SLOT,
.r_irelative = ELF_R_RISCV_IRELATIVE,
+ /* Variant-I: kit's freestanding start.c biases tp by a 16-byte TCB to
+ * match AArch64. Hosted RISC-V wants +0; that hosted-vs-freestanding
+ * split is applied by the caller (src/obj/elf/link.c). */
+ .tls_tp_bias = 16u,
.reloc_to = elf_riscv64_reloc_to,
.reloc_from = elf_riscv64_reloc_from,
+ .reloc_name = elf_riscv_reloc_name,
+ .float_abi_from_e_flags = elf_riscv_float_abi_from_e_flags,
},
#endif
#if KIT_ARCH_RV32_ENABLED
@@ -121,8 +135,12 @@ static const ObjElfArchOps obj_elf_arch_ops[] = {
.r_glob_dat = ELF_R_RISCV_32,
.r_jump_slot = ELF_R_RISCV_JUMP_SLOT,
.r_irelative = ELF_R_RISCV_IRELATIVE,
+ /* See RV64: freestanding +16 TCB; hosted split applied by caller. */
+ .tls_tp_bias = 16u,
.reloc_to = elf_riscv32_reloc_to,
.reloc_from = elf_riscv32_reloc_from,
+ .reloc_name = elf_riscv_reloc_name,
+ .float_abi_from_e_flags = elf_riscv_float_abi_from_e_flags,
},
#endif
#if !KIT_ARCH_AA64_ENABLED && !KIT_ARCH_X64_ENABLED && \
@@ -298,6 +316,11 @@ static const ObjFormatImpl obj_format_impl_wasm = {
.read = read_wasm,
.read_dso = NULL,
.link_emit = NULL,
+ .c_label_prefix = "",
+ .default_entry_name = "_start",
+ .carries_file_only_debug = 0,
+ .builds_own_static_got = 0,
+ .weak_undef_pulls_archive_member = 0,
};
#endif
@@ -315,6 +338,11 @@ static const ObjFormatImpl obj_format_impl_elf = {
.layout_dyn = OBJ_LAYOUT_DYN,
.free_dyn = OBJ_FREE_DYN,
.emu = &elf_emu_ops,
+ .c_label_prefix = "",
+ .default_entry_name = "_start",
+ .carries_file_only_debug = 1,
+ .builds_own_static_got = 0,
+ .weak_undef_pulls_archive_member = 0,
.elf_arch = obj_elf_arch,
.elf_machine = obj_elf_machine,
};
@@ -332,6 +360,11 @@ static const ObjFormatImpl obj_format_impl_macho = {
.read_dso = read_macho_dso,
.link_emit = OBJ_LINK_EMIT_MACHO,
.split_sections_as_atoms = 1,
+ .c_label_prefix = "_",
+ .default_entry_name = "_main",
+ .carries_file_only_debug = 1,
+ .builds_own_static_got = 1,
+ .weak_undef_pulls_archive_member = 0,
.macho_arch = obj_macho_arch,
.macho_cputype = obj_macho_cputype,
};
@@ -348,6 +381,17 @@ static const ObjFormatImpl obj_format_impl_coff = {
.read = read_coff,
.read_dso = read_coff_dso,
.link_emit = OBJ_LINK_EMIT_COFF,
+ .c_label_prefix = "",
+ .default_entry_name = "mainCRTStartup",
+ .carries_file_only_debug = 0,
+ .builds_own_static_got = 0,
+ .weak_undef_pulls_archive_member = 1,
+ /* synth_inputs: the COFF __CTOR_LIST__/__DTOR_LIST__ + __chkstk
+ * synthesizer (link_synth_coff_ctor_dtor_list) genuinely needs Linker
+ * internals (LinkInput append, link_arch_desc_for); it cannot be
+ * expressed via ObjBuilder/Compiler/obj APIs alone. Left NULL here —
+ * T-LINK wires the existing function through this field. */
+ .synth_inputs = NULL,
.coff_arch = obj_coff_arch,
.coff_machine = obj_coff_machine,
.classify_obj_input = coff_classify_obj_input,
@@ -395,6 +439,40 @@ const ObjFormatImpl* obj_format_lookup_bin(KitBinFmt fmt) {
}
}
+/* Name<->KitObjFmt table. The canonical name comes first; trailing
+ * entries are accepted aliases (objcopy/objdump bfdname spellings). The
+ * canonical name for a fmt is always the first row whose `fmt` matches. */
+typedef struct ObjFmtNameRow {
+ KitObjFmt fmt;
+ const char* name;
+} ObjFmtNameRow;
+
+static const ObjFmtNameRow obj_fmt_names[] = {
+ {KIT_OBJ_ELF, "elf"}, {KIT_OBJ_COFF, "coff"},
+ {KIT_OBJ_COFF, "pe"}, {KIT_OBJ_MACHO, "macho"},
+ {KIT_OBJ_WASM, "wasm"},
+};
+
+int obj_format_fmt_from_name(const char* name, KitObjFmt* out) {
+ u32 i;
+ if (!name) return 0;
+ for (i = 0; i < (u32)(sizeof obj_fmt_names / sizeof obj_fmt_names[0]); ++i) {
+ if (strcmp(obj_fmt_names[i].name, name) == 0) {
+ if (out) *out = obj_fmt_names[i].fmt;
+ return 1;
+ }
+ }
+ return 0;
+}
+
+const char* obj_format_fmt_name(KitObjFmt fmt) {
+ u32 i;
+ for (i = 0; i < (u32)(sizeof obj_fmt_names / sizeof obj_fmt_names[0]); ++i) {
+ if (obj_fmt_names[i].fmt == fmt) return obj_fmt_names[i].name;
+ }
+ return NULL;
+}
+
int obj_format_dso_reader_for_bytes(const u8* data, size_t len,
KitBinFmt* bin_out,
ObjFormatDsoReader* out) {