commit 8ab800473391e2774d2c694e51c6bd7675c172d4
parent 79ae72f38b6d035e974765489d3e851a89196050
Author: Ryan Sepassi <rsepassi@gmail.com>
Date: Sat, 9 May 2026 18:00:38 -0700
link: linker script support (kernel.lds subset) + case 35
Implements the GNU-ld subset needed to link a freestanding aarch64
kernel image: ENTRY, SECTIONS{} with absolute `. = expr`, output
sections with `: ALIGN(N)`, `*(.glob …)` input matchers, in-section
and top-level symbol assignments, /DISCARD/, and ALIGN/MAX/MIN
expressions. New components:
- src/link/link_script.c: recursive-descent parser producing the
public CfreeLinkScript struct in compiler-arena memory; rejects
out-of-subset constructs (MEMORY, PROVIDE, KEEP, AT>, > REGION,
…) with a diagnostic.
- src/link/link_layout.c: layout_sections_scripted walks output
sections in declaration order, claims input sections via a `*`
glob, advances a `dot` location counter, materializes one
LinkSegment per non-empty output section, and tracks file offsets
on a separate cursor (vaddrs are absolute, file offsets sequential).
- src/link/link_elf.c: scripted images skip the headers PT_LOAD,
build-id PT_NOTE, and the address shift; only file offsets bump.
PAGE_SIZE promoted to link_internal.h and bumped to 16 KiB to
match Apple Silicon and the common Linux/AArch64 kernel config.
- src/link/link_reloc.c: implement R_REL64 / R_PC64 (AArch64
PREL64), needed by `.quad _end - _start` in the Image header.
Harness:
- test/link/harness/{link_exe,jit}_runner.c: --linker-script <path>
reads the file, calls cfree_link_script_parse, sets
CfreeLinkInputs.linker_script.
- test/link/cases/35_linker_script_kernel/: kernel.lds-style script
pinning .text at 0x40080000 with the arm64 Image header in
entry.S, qemu-system-aarch64 -kernel + semihosting SYS_EXIT_EXTENDED.
All 119 link cases pass (118 prior + new case 35).
Diffstat:
17 files changed, 1600 insertions(+), 50 deletions(-)
diff --git a/src/api/stubs.c b/src/api/stubs.c
@@ -209,19 +209,7 @@ int cfree_arch_reg_iter_next(CfreeArchRegIter* it, CfreeArchReg* o) {
}
void cfree_arch_reg_iter_free(CfreeArchRegIter* it) { (void)it; }
-/* Linker script parsing. */
-int cfree_link_script_parse(CfreeCompiler* c, const char* t, size_t l,
- const CfreeLinkScript** o) {
- (void)c;
- (void)t;
- (void)l;
- if (o) *o = 0;
- return 1;
-}
-void cfree_link_script_free(CfreeCompiler* c, const CfreeLinkScript* s) {
- (void)c;
- (void)s;
-}
+/* Linker script parsing lives in src/link/link_script.c. */
/* JIT lookup, view, addr_to_sym, and the symbol iterator live in
* src/link/link_jit.c. */
diff --git a/src/link/link.c b/src/link/link.c
@@ -247,8 +247,9 @@ void link_set_entry(Linker* l, const char* name) {
void link_set_script(Linker* l, const CfreeLinkScript* script) {
if (!l || !script) return;
- compiler_panic(l->c, no_loc(),
- "link_set_script: linker scripts not yet implemented");
+ l->script = script;
+ if (script->entry)
+ l->entry_name = pool_intern_cstr(l->c->global, script->entry);
}
void link_set_extern_resolver(Linker* l, LinkExternResolver fn, void* user) {
diff --git a/src/link/link_elf.c b/src/link/link_elf.c
@@ -104,7 +104,6 @@ typedef struct __attribute__((packed)) Shdr64 {
#define PT_NOTE 4
#define PT_TLS 7
-#define PAGE_SIZE 0x1000u
/* Static ET_EXEC base. ET_DYN (PIE) uses 0 — the loader picks the
* runtime base. The active value lives in `img_base` below; the macro
* stays for the static path's hard-coded vaddrs. */
@@ -138,6 +137,20 @@ static u32 perms_to_pflags(u32 secflags) {
return f;
}
+/* Scripted-layout post-pass: vaddrs are already final (the script
+ * pinned them via `. = …`), so only file offsets need to bump to
+ * leave room for ehdr+phdrs. Mirror of shift_image_addresses but
+ * touches only the file dimension. */
+static void shift_image_file_offsets(LinkImage* img, u64 delta) {
+ u32 i;
+ for (i = 0; i < img->nsegments; ++i)
+ img->segments[i].file_offset += delta;
+ for (i = 0; i < img->nsections; ++i)
+ img->sections[i].file_offset += delta;
+ for (i = 0; i < LinkRelocs_count(&img->relocs); ++i)
+ LinkRelocs_at(&img->relocs, i)->write_file_offset += delta;
+}
+
static void shift_image_addresses(LinkImage* img, u64 delta) {
u32 i;
for (i = 0; i < img->nsegments; ++i) {
@@ -572,20 +585,34 @@ void link_emit_elf_aarch64(LinkImage* img, Writer* w) {
/* PIE / ET_DYN: img_base is 0 (the loader picks the runtime base;
* absolute relocs against internal symbols are emitted as
* R_AARCH64_RELATIVE in .rela.dyn). Otherwise classic ET_EXEC at
- * IMAGE_BASE_STATIC. */
+ * IMAGE_BASE_STATIC.
+ *
+ * Scripted: the linker script pinned absolute vaddrs (e.g.
+ * `. = 0x40080000`); img_base stays 0 and the headers PT_LOAD /
+ * build-id note are dropped — the script's image is consumed by a
+ * raw loader (qemu -kernel, a bootloader) that doesn't need a
+ * self-describing memory image. */
int pie = img->pie;
- u64 img_base = pie ? 0ULL : IMAGE_BASE_STATIC;
+ int scripted = img->scripted;
+ u64 img_base = (pie || scripted) ? 0ULL : IMAGE_BASE_STATIC;
/* ---- plan number of program headers ----
*
* 1 headers PT_LOAD + nsegments PT_LOAD + 1 PT_NOTE (build-id)
* + 1 PT_TLS when this image carries any TLS sections.
- * + 4 dyn phdrs (PT_PHDR / PT_INTERP / PT_DYNAMIC / PT_GNU_STACK) on PIE. */
+ * + 4 dyn phdrs (PT_PHDR / PT_INTERP / PT_DYNAMIC / PT_GNU_STACK) on PIE.
+ *
+ * Scripted images skip the headers PT_LOAD and PT_NOTE: phdrs are
+ * just the per-segment PT_LOADs. */
u32 has_tls = img->tls_memsz ? 1u : 0u;
u32 nphdr_extra_dyn = pie ? 4u : 0u;
- u32 nphdr_total = 1u + img->nsegments + 1u + has_tls + nphdr_extra_dyn;
- u64 headers_size =
- sizeof(Ehdr64) + (u64)nphdr_total * sizeof(Phdr64) + BUILD_ID_NOTE_BYTES;
+ u32 nphdr_headers = scripted ? 0u : 1u;
+ u32 nphdr_buildid = scripted ? 0u : 1u;
+ u32 nphdr_total =
+ nphdr_headers + img->nsegments + nphdr_buildid + has_tls + nphdr_extra_dyn;
+ u64 build_id_note_bytes = scripted ? 0ULL : BUILD_ID_NOTE_BYTES;
+ u64 headers_size = sizeof(Ehdr64) + (u64)nphdr_total * sizeof(Phdr64) +
+ build_id_note_bytes;
u64 headers_load = ALIGN_UP(headers_size, (u64)PAGE_SIZE);
/* The build-id note lives inside the headers PT_LOAD at this offset. */
@@ -596,7 +623,10 @@ void link_emit_elf_aarch64(LinkImage* img, Writer* w) {
*
* Must happen before segshdrs/symtab construction so they observe
* post-shift vaddrs (the values that will land in the file). */
- shift_image_addresses(img, headers_load);
+ if (scripted)
+ shift_image_file_offsets(img, headers_load);
+ else
+ shift_image_addresses(img, headers_load);
apply_all_relocs(img, img_base);
/* ---- write .dynamic body + re-serialize .rela.dyn (PIE only) ----
@@ -702,8 +732,9 @@ void link_emit_elf_aarch64(LinkImage* img, Writer* w) {
* coming out of layout_dyn. Trailing capacity stays zero —
* readers stop at the first R_AARCH64_NONE record. */
{
- u8* rd_bytes = img->segment_bytes[dseg->id - 1] +
- (size_t)(sec_reladyn->file_offset - dseg->file_offset);
+ const LinkSegment* rdseg = &img->segments[sec_reladyn->segment_id - 1];
+ u8* rd_bytes = img->segment_bytes[rdseg->id - 1] +
+ (size_t)(sec_reladyn->file_offset - rdseg->file_offset);
u32 i;
for (i = 0; i < dyn->nrela_dyn; ++i) {
const DynRela* rr = &dyn->rela_dyn[i];
@@ -719,8 +750,9 @@ void link_emit_elf_aarch64(LinkImage* img, Writer* w) {
* dyn->rela_plt[i].r_offset along with the rest, so the post-shift
* values match the .got.plt slot vaddrs the loader will patch. */
if (sec_relaplt && dyn->nrela_plt) {
- u8* rp_bytes = img->segment_bytes[dseg->id - 1] +
- (size_t)(sec_relaplt->file_offset - dseg->file_offset);
+ const LinkSegment* rpseg = &img->segments[sec_relaplt->segment_id - 1];
+ u8* rp_bytes = img->segment_bytes[rpseg->id - 1] +
+ (size_t)(sec_relaplt->file_offset - rpseg->file_offset);
u32 i;
for (i = 0; i < dyn->nrela_plt; ++i) {
const DynRela* rr = &dyn->rela_plt[i];
@@ -964,16 +996,19 @@ void link_emit_elf_aarch64(LinkImage* img, Writer* w) {
phdrs[pi].p_align = 8;
pi++;
}
- /* Headers PT_LOAD (covers ehdr + phdrs + build-id note). */
- phdrs[pi].p_type = PT_LOAD;
- phdrs[pi].p_flags = PF_R;
- phdrs[pi].p_offset = 0;
- phdrs[pi].p_vaddr = img_base;
- phdrs[pi].p_paddr = img_base;
- phdrs[pi].p_filesz = headers_size;
- phdrs[pi].p_memsz = headers_size;
- phdrs[pi].p_align = PAGE_SIZE;
- pi++;
+ /* Headers PT_LOAD (covers ehdr + phdrs + build-id note).
+ * Scripted images don't emit one — see plan note above. */
+ if (!scripted) {
+ phdrs[pi].p_type = PT_LOAD;
+ phdrs[pi].p_flags = PF_R;
+ phdrs[pi].p_offset = 0;
+ phdrs[pi].p_vaddr = img_base;
+ phdrs[pi].p_paddr = img_base;
+ phdrs[pi].p_filesz = headers_size;
+ phdrs[pi].p_memsz = headers_size;
+ phdrs[pi].p_align = PAGE_SIZE;
+ pi++;
+ }
/* Per-segment PT_LOAD. */
u32 i;
for (i = 0; i < img->nsegments; ++i) {
@@ -988,16 +1023,18 @@ void link_emit_elf_aarch64(LinkImage* img, Writer* w) {
p->p_memsz = seg->mem_size;
p->p_align = seg->align ? seg->align : PAGE_SIZE;
}
- /* PT_NOTE for build-id. */
- phdrs[pi].p_type = PT_NOTE;
- phdrs[pi].p_flags = PF_R;
- phdrs[pi].p_offset = build_id_off;
- phdrs[pi].p_vaddr = build_id_addr;
- phdrs[pi].p_paddr = build_id_addr;
- phdrs[pi].p_filesz = BUILD_ID_NOTE_BYTES;
- phdrs[pi].p_memsz = BUILD_ID_NOTE_BYTES;
- phdrs[pi].p_align = 4;
- pi++;
+ /* PT_NOTE for build-id. Scripted images skip the build-id entirely. */
+ if (!scripted) {
+ phdrs[pi].p_type = PT_NOTE;
+ phdrs[pi].p_flags = PF_R;
+ phdrs[pi].p_offset = build_id_off;
+ phdrs[pi].p_vaddr = build_id_addr;
+ phdrs[pi].p_paddr = build_id_addr;
+ phdrs[pi].p_filesz = BUILD_ID_NOTE_BYTES;
+ phdrs[pi].p_memsz = BUILD_ID_NOTE_BYTES;
+ phdrs[pi].p_align = 4;
+ pi++;
+ }
/* PT_TLS describing the .tdata template + .tbss zero-fill.
* vaddr/file_offset point at the same bytes the matching
* PT_LOAD already covers — the loader uses PT_TLS to size
@@ -1097,8 +1134,10 @@ void link_emit_elf_aarch64(LinkImage* img, Writer* w) {
* u32 type = NT_GNU_BUILD_ID (3)
* "GNU\0"
* <16 bytes of build-id>
- */
- {
+ *
+ * Scripted images don't carry build-id; they have no PT_NOTE phdr to
+ * point at it and the file payload would just be dead bytes. */
+ if (!scripted) {
u8 nh[12];
u32 v;
v = NOTE_NAME_GNU_LEN;
diff --git a/src/link/link_internal.h b/src/link/link_internal.h
@@ -85,6 +85,11 @@ struct Linker {
LinkInputs inputs; /* LinkInputId = slot index + 1 */
LinkArchives archives;
Sym entry_name;
+ /* Set by link_set_script. NULL: layout takes the existing default
+ * bucket-based path. Non-NULL: layout_sections_scripted walks the
+ * script's output sections in declaration order. Borrowed; the
+ * script and every sub-object must outlive link_resolve. */
+ const CfreeLinkScript* script;
int gc_sections;
/* Set by cfree_link_exe before link_resolve. When 1, layout_iplt
* synthesizes a .init_array entry pointing at __cfree_ifunc_init so
@@ -281,8 +286,19 @@ struct LinkImage {
LinkDynState* dyn;
/* Mirror of Linker.emit_pie at link_resolve time; consulted by emit. */
int pie;
+ /* Set when layout was driven by Linker.script. The emitter then keeps
+ * segment vaddrs at their script-assigned absolute values, drops the
+ * self-describing headers PT_LOAD / build-id PT_NOTE, and only shifts
+ * file offsets to make room for ehdr+phdrs. */
+ u8 scripted;
};
+/* Page granularity used for ELF segment alignment and the file-offset /
+ * vaddr congruence the runtime loader requires. 16 KiB matches AArch64
+ * Apple Silicon and the common Linux/AArch64 kernel config; 4 KiB pages
+ * are also valid at runtime since 16K is a multiple. */
+#define PAGE_SIZE 0x4000u
+
/* Apply one relocation in place. P_bytes points at the first byte of the
* relocation site within the final memory; S is the resolved final
* address of the target symbol; A the addend; P the final address of
diff --git a/src/link/link_layout.c b/src/link/link_layout.c
@@ -678,7 +678,14 @@ typedef struct PlaceEntry {
u8 pad[3];
} PlaceEntry;
+static void layout_sections_scripted(Linker* l, LinkImage* img,
+ const GcLive* g);
+
static void layout_sections(Linker* l, LinkImage* img, const GcLive* g) {
+ if (l->script) {
+ layout_sections_scripted(l, img, g);
+ return;
+ }
Heap* h = img->heap;
u32 ii, j;
u32 total_kept = 0;
@@ -895,6 +902,416 @@ static void layout_sections(Linker* l, LinkImage* img, const GcLive* g) {
}
}
+/* ---- scripted layout (linker-script driven) ----
+ *
+ * Walks the CfreeLinkScript's output sections in declaration order,
+ * placing matched input sections at the dot location counter. One
+ * LinkSegment per non-DISCARD output section maps 1:1 to a PT_LOAD on
+ * emit. Symbol assignments (top-level and in-section) materialize as
+ * defined LinkSymbol globals via upsert_global_sym (the same upsert
+ * pattern emit_boundary_sym uses).
+ *
+ * Discard handling: `/DISCARD/` matches input sections by glob and
+ * leaves their per-input m->section[id] entry as LINK_SEC_NONE — the
+ * downstream emit_reloc_records / link_symbols_to_sections passes
+ * already treat that as "section dropped" so they're naturally
+ * excluded from segments, gc, and reloc apply. */
+
+/* `*` is the only metachar. Supported forms in the kernel.lds-style
+ * subset: trailing star (".text*"), leading star ("*COMMON" — not in
+ * kernel.lds but cheap), and exact literal. */
+static int match_glob(const char* pat, const char* name) {
+ size_t plen, nlen;
+ if (!pat || !name) return 0;
+ plen = strlen(pat);
+ nlen = strlen(name);
+ if (plen == 1 && pat[0] == '*') return 1;
+ if (plen >= 2 && pat[plen - 1] == '*') {
+ if (nlen + 1 < plen) return 0;
+ return memcmp(pat, name, plen - 1) == 0;
+ }
+ if (plen >= 2 && pat[0] == '*') {
+ if (nlen + 1 < plen) return 0;
+ return memcmp(pat + 1, name + (nlen - (plen - 1)), plen - 1) == 0;
+ }
+ return plen == nlen && memcmp(pat, name, plen) == 0;
+}
+
+static u64 eval_link_expr(Linker* l, LinkImage* img, u64 dot,
+ const CfreeLinkExpr* e, int* err) {
+ if (!e) {
+ *err = 1;
+ return 0;
+ }
+ switch ((CfreeLinkExprKind)e->kind) {
+ case CFREE_LE_INT:
+ return (u64)e->v.int_val;
+ case CFREE_LE_DOT:
+ return dot;
+ case CFREE_LE_SYM: {
+ Sym name = pool_intern_cstr(l->c->global, e->v.name);
+ LinkSymId id = symhash_get(&img->globals, name);
+ if (id == LINK_SYM_NONE) {
+ compiler_panic(l->c, no_loc(),
+ "linker script: undefined symbol '%s' in expression",
+ e->v.name);
+ }
+ return LinkSyms_at(&img->syms, id - 1)->vaddr;
+ }
+ case CFREE_LE_ADD:
+ return eval_link_expr(l, img, dot, e->v.bin.lhs, err) +
+ eval_link_expr(l, img, dot, e->v.bin.rhs, err);
+ case CFREE_LE_SUB:
+ return eval_link_expr(l, img, dot, e->v.bin.lhs, err) -
+ eval_link_expr(l, img, dot, e->v.bin.rhs, err);
+ case CFREE_LE_MUL:
+ return eval_link_expr(l, img, dot, e->v.bin.lhs, err) *
+ eval_link_expr(l, img, dot, e->v.bin.rhs, err);
+ case CFREE_LE_DIV: {
+ u64 rhs = eval_link_expr(l, img, dot, e->v.bin.rhs, err);
+ if (rhs == 0) {
+ *err = 1;
+ return 0;
+ }
+ return eval_link_expr(l, img, dot, e->v.bin.lhs, err) / rhs;
+ }
+ case CFREE_LE_AND:
+ return eval_link_expr(l, img, dot, e->v.bin.lhs, err) &
+ eval_link_expr(l, img, dot, e->v.bin.rhs, err);
+ case CFREE_LE_OR:
+ return eval_link_expr(l, img, dot, e->v.bin.lhs, err) |
+ eval_link_expr(l, img, dot, e->v.bin.rhs, err);
+ case CFREE_LE_XOR:
+ return eval_link_expr(l, img, dot, e->v.bin.lhs, err) ^
+ eval_link_expr(l, img, dot, e->v.bin.rhs, err);
+ case CFREE_LE_SHL:
+ return eval_link_expr(l, img, dot, e->v.bin.lhs, err)
+ << eval_link_expr(l, img, dot, e->v.bin.rhs, err);
+ case CFREE_LE_SHR:
+ return eval_link_expr(l, img, dot, e->v.bin.lhs, err) >>
+ eval_link_expr(l, img, dot, e->v.bin.rhs, err);
+ case CFREE_LE_ALIGN: {
+ u64 v = eval_link_expr(l, img, dot, e->v.align.val, err);
+ u64 a = eval_link_expr(l, img, dot, e->v.align.align, err);
+ if (a == 0) return v;
+ return ALIGN_UP(v, a);
+ }
+ case CFREE_LE_REGION_ORIGIN:
+ case CFREE_LE_REGION_LENGTH:
+ case CFREE_LE_MAX:
+ case CFREE_LE_MIN:
+ default:
+ compiler_panic(l->c, no_loc(),
+ "linker script: expression kind %u not supported",
+ (unsigned)e->kind);
+ return 0;
+ }
+}
+
+static void emit_boundary_sym(Linker* l, LinkImage* img, const char* name,
+ u64 vaddr); /* defined below */
+
+/* Upsert a global symbol with the given absolute vaddr. Mirrors the
+ * emit_boundary_sym pattern: satisfies any prior undef ref in place;
+ * fans out to per-input duplicate name slots. */
+static void upsert_global_sym(Linker* l, LinkImage* img, const char* name,
+ u64 vaddr) {
+ emit_boundary_sym(l, img, name, vaddr);
+}
+
+/* Apply one CfreeLinkAssignment. CFREE_LAS_DOT updates *dot; SYM /
+ * PROVIDE upserts a global. PROVIDE only fires when the name isn't
+ * already strongly defined; for v1 we accept it as equivalent to SYM
+ * (no input to kernel.lds defines these names). */
+static void apply_asn(Linker* l, LinkImage* img, u64* dot,
+ const CfreeLinkAssignment* asn) {
+ int err = 0;
+ u64 v = eval_link_expr(l, img, *dot, asn->expr, &err);
+ if (err) return;
+ switch ((CfreeLinkAsnKind)asn->kind) {
+ case CFREE_LAS_DOT:
+ if (v < *dot)
+ compiler_panic(l->c, no_loc(),
+ "linker script: dot moved backwards (%llu -> %llu)",
+ (unsigned long long)*dot, (unsigned long long)v);
+ *dot = v;
+ break;
+ case CFREE_LAS_SYM:
+ case CFREE_LAS_PROVIDE:
+ if (asn->sym) upsert_global_sym(l, img, asn->sym, v);
+ break;
+ }
+}
+
+static int input_match_section(const CfreeLinkInputMatch* m, const char* nm) {
+ /* file_pattern is ignored for v1 — kernel.lds uses `*(...)` only. */
+ return match_glob(m->section_pattern, nm);
+}
+
+static void layout_sections_scripted(Linker* l, LinkImage* img,
+ const GcLive* g) {
+ Heap* h = img->heap;
+ const CfreeLinkScript* script = l->script;
+ u64 dot = 0;
+ /* Scripted layout: vaddrs are absolute (driven by `dot`), but file
+ * offsets follow a separate cursor packed sequentially after the
+ * eventual ehdr+phdrs. The writer adds headers_load to file_offsets
+ * (only) post-layout. */
+ u64 file_cursor = 0;
+ u32 ii, j, k, si;
+ u32 total_kept = 0;
+
+ img->scripted = 1;
+
+ /* Pass 0: count GC-live, kept, allocatable input sections — the
+ * upper bound on placeable LinkSections. The actual count placed
+ * may be lower (DISCARD sinks, unmatched). */
+ for (ii = 0; ii < LinkInputs_count(&l->inputs); ++ii) {
+ ObjBuilder* ob = LinkInputs_at(&l->inputs, ii)->obj;
+ for (j = 1; j < obj_section_count(ob); ++j) {
+ const Section* s = obj_section_get(ob, j);
+ if (s && section_kept(s) && gc_live_get(g, ii, j)) ++total_kept;
+ }
+ }
+
+ /* Pre-allocate img->sections at the upper bound; img->nsections
+ * tracks the actual count placed. */
+ img->sections = total_kept ? (LinkSection*)h->alloc(
+ h, sizeof(*img->sections) * total_kept,
+ _Alignof(LinkSection))
+ : NULL;
+ if (total_kept && !img->sections)
+ compiler_panic(img->c, no_loc(), "link: oom on sections");
+
+ /* Per-section "claimed" bitmap to enforce single-claim across the
+ * whole script. Indexed by [input_idx][obj_sec_id]. */
+ u8** claimed = NULL;
+ if (LinkInputs_count(&l->inputs)) {
+ u32 ni = LinkInputs_count(&l->inputs);
+ claimed = (u8**)h->alloc(h, sizeof(*claimed) * ni, _Alignof(u8*));
+ if (!claimed) compiler_panic(img->c, no_loc(), "link: oom on claim map");
+ for (ii = 0; ii < ni; ++ii) {
+ ObjBuilder* ob = LinkInputs_at(&l->inputs, ii)->obj;
+ u32 nsec = obj_section_count(ob);
+ claimed[ii] = (u8*)h->alloc(h, nsec, 1);
+ if (!claimed[ii])
+ compiler_panic(img->c, no_loc(), "link: oom on claim row");
+ memset(claimed[ii], 0, nsec);
+ }
+ }
+
+ /* Pass 1: top-level dot assignments establish the base address
+ * before any placement. SYM/PROVIDE assignments at the top level
+ * are deferred to pass 3 so they capture the post-placement dot
+ * (e.g. `_end = .` at the script's tail). */
+ for (k = 0; k < script->ntop_asns; ++k) {
+ const CfreeLinkAssignment* a = &script->top_asns[k];
+ if (a->kind == CFREE_LAS_DOT) apply_asn(l, img, &dot, a);
+ }
+
+ /* Pre-allocate img->segments at the upper bound (one per non-DISCARD
+ * output section). */
+ u32 nseg_max = 0;
+ for (si = 0; si < script->nsections; ++si)
+ if (strcmp(script->sections[si].name, "/DISCARD/") != 0) ++nseg_max;
+ img->segments =
+ nseg_max ? (LinkSegment*)h->alloc(h, sizeof(*img->segments) * nseg_max,
+ _Alignof(LinkSegment))
+ : NULL;
+ img->segment_bytes =
+ nseg_max ? (u8**)h->alloc(h, sizeof(*img->segment_bytes) * nseg_max,
+ _Alignof(u8*))
+ : NULL;
+ img->segment_bytes_cap =
+ nseg_max
+ ? (size_t*)h->alloc(h, sizeof(*img->segment_bytes_cap) * nseg_max,
+ _Alignof(size_t))
+ : NULL;
+ if (nseg_max &&
+ (!img->segments || !img->segment_bytes || !img->segment_bytes_cap))
+ compiler_panic(img->c, no_loc(), "link: oom on segments");
+ if (nseg_max) {
+ memset(img->segment_bytes, 0, sizeof(*img->segment_bytes) * nseg_max);
+ memset(img->segment_bytes_cap, 0,
+ sizeof(*img->segment_bytes_cap) * nseg_max);
+ }
+
+ /* Pass 2: walk output sections in declaration order. */
+ for (si = 0; si < script->nsections; ++si) {
+ const CfreeLinkOutputSection* os = &script->sections[si];
+ int is_discard = (strcmp(os->name, "/DISCARD/") == 0);
+
+ if (is_discard) {
+ /* Mark every matched input section as claimed. We don't add
+ * them to img->sections; their m->section[id] stays
+ * LINK_SEC_NONE so reloc-apply / link_symbols_to_sections
+ * naturally skip them. */
+ u32 mi;
+ for (mi = 0; mi < os->ninputs; ++mi) {
+ const CfreeLinkInputMatch* im = &os->inputs[mi];
+ for (ii = 0; ii < LinkInputs_count(&l->inputs); ++ii) {
+ ObjBuilder* ob = LinkInputs_at(&l->inputs, ii)->obj;
+ for (j = 1; j < obj_section_count(ob); ++j) {
+ const Section* s;
+ const char* nm;
+ size_t nl;
+ if (claimed[ii][j]) continue;
+ s = obj_section_get(ob, j);
+ if (!s) continue;
+ nm = pool_str(l->c->global, s->name, &nl);
+ if (!nm) continue;
+ if (input_match_section(im, nm)) claimed[ii][j] = 1;
+ }
+ }
+ }
+ continue;
+ }
+
+ /* Non-DISCARD output section. Process all in-section asns first
+ * (header ALIGN encoded as the first dot-asn, plus any
+ * `__bss_start = .` style early captures), then walk inputs in
+ * declaration order, claiming matches across all inputs in input
+ * order. Each placed input section advances dot. */
+ u64 sec_start_dot;
+ u32 perms = 0;
+ LinkSegmentId seg_id = (LinkSegmentId)(img->nsegments + 1u);
+ LinkSegment* seg;
+ u64 file_size_accum = 0;
+ u64 mem_size_accum = 0;
+ u32 align_max = 1;
+ u32 nsec_in_seg = 0;
+ u32 first_section_idx = img->nsections;
+
+ /* Apply in-section asns (pre-input). */
+ for (k = 0; k < os->nasns; ++k) {
+ apply_asn(l, img, &dot, &os->asns[k]);
+ }
+ sec_start_dot = dot;
+
+ /* Walk input matches; for each, scan all inputs in input order. */
+ {
+ u32 mi;
+ for (mi = 0; mi < os->ninputs; ++mi) {
+ const CfreeLinkInputMatch* im = &os->inputs[mi];
+ for (ii = 0; ii < LinkInputs_count(&l->inputs); ++ii) {
+ ObjBuilder* ob = LinkInputs_at(&l->inputs, ii)->obj;
+ InputMap* m = &img->input_maps[ii];
+ for (j = 1; j < obj_section_count(ob); ++j) {
+ const Section* s;
+ const char* nm;
+ size_t nl;
+ u32 align;
+ u64 ofs;
+ LinkSection* ls;
+ LinkSectionId lsid;
+ if (claimed[ii][j]) continue;
+ if (!gc_live_get(g, ii, j)) continue;
+ s = obj_section_get(ob, j);
+ if (!s || !section_kept(s)) continue;
+ nm = pool_str(l->c->global, s->name, &nl);
+ if (!nm) continue;
+ if (!input_match_section(im, nm)) continue;
+
+ align = s->align ? s->align : 1u;
+ if (align > align_max) align_max = align;
+ dot = ALIGN_UP(dot, (u64)align);
+ ofs = dot;
+
+ lsid = (LinkSectionId)(img->nsections + 1u);
+ ls = &img->sections[img->nsections++];
+ memset(ls, 0, sizeof(*ls));
+ ls->id = lsid;
+ ls->input_id = LinkInputs_at(&l->inputs, ii)->id;
+ ls->obj_section_id = j;
+ ls->segment_id = seg_id;
+ ls->vaddr = ofs;
+ ls->size = (s->sem == SSEM_NOBITS) ? s->bss_size : s->bytes.total;
+ ls->flags = s->flags;
+ ls->align = align;
+ ls->name = s->name;
+ ls->sem = s->sem;
+ /* file_offset within the segment buffer: distance from
+ * sec_start_dot. NOBITS contributes no file bytes. */
+ ls->file_offset = ofs - sec_start_dot;
+ ls->input_offset = ls->file_offset;
+ m->section[j] = lsid;
+ claimed[ii][j] = 1;
+
+ dot += ls->size;
+ mem_size_accum = dot - sec_start_dot;
+ if (s->sem != SSEM_NOBITS) file_size_accum = dot - sec_start_dot;
+ perms |= (s->flags & (SF_EXEC | SF_WRITE | SF_TLS));
+ ++nsec_in_seg;
+ }
+ }
+ }
+ }
+
+ /* Materialize the segment for this output section. Empty output
+ * sections (no input matched) are dropped — they'd produce an
+ * empty PT_LOAD which the loader rejects. */
+ if (nsec_in_seg == 0) {
+ /* Roll back nsections (no entries appended in the empty case). */
+ continue;
+ }
+
+ seg = &img->segments[img->nsegments];
+ memset(seg, 0, sizeof(*seg));
+ seg->id = seg_id;
+ seg->flags = SF_ALLOC | perms;
+ seg->vaddr = sec_start_dot;
+ /* Page-align each segment's file offset so the writer can keep file
+ * offset and vaddr congruent mod page size for the runtime loader. */
+ file_cursor = ALIGN_UP(file_cursor, (u64)PAGE_SIZE);
+ seg->file_offset = file_cursor;
+ seg->mem_size = mem_size_accum;
+ seg->file_size = file_size_accum;
+ seg->align = align_max;
+ seg->nsections = nsec_in_seg;
+ file_cursor += file_size_accum;
+ if (file_size_accum) {
+ img->segment_bytes[img->nsegments] =
+ (u8*)h->alloc(h, (size_t)file_size_accum, 16);
+ if (!img->segment_bytes[img->nsegments])
+ compiler_panic(img->c, no_loc(), "link: oom on scripted segment bytes");
+ img->segment_bytes_cap[img->nsegments] = (size_t)file_size_accum;
+ memset(img->segment_bytes[img->nsegments], 0, (size_t)file_size_accum);
+ }
+
+ /* Shift each section's vaddr/file_offset onto the segment's
+ * absolute base. Sections were laid out with vaddr = absolute
+ * dot already, so vaddr is correct as-is; file_offset needs
+ * to become absolute (segment-base + relative). */
+ {
+ u32 fi;
+ for (fi = first_section_idx; fi < img->nsections; ++fi) {
+ LinkSection* ls = &img->sections[fi];
+ ls->file_offset = seg->file_offset + (ls->file_offset);
+ }
+ }
+
+ img->nsegments++;
+ }
+
+ /* Pass 3: top-level SYM / PROVIDE assignments capture the
+ * post-placement dot (e.g. `_end = .`). */
+ for (k = 0; k < script->ntop_asns; ++k) {
+ const CfreeLinkAssignment* a = &script->top_asns[k];
+ if (a->kind == CFREE_LAS_SYM || a->kind == CFREE_LAS_PROVIDE)
+ apply_asn(l, img, &dot, a);
+ }
+
+ /* Free claim map. */
+ if (claimed) {
+ u32 ni = LinkInputs_count(&l->inputs);
+ for (ii = 0; ii < ni; ++ii) {
+ ObjBuilder* ob = LinkInputs_at(&l->inputs, ii)->obj;
+ h->free(h, claimed[ii], obj_section_count(ob));
+ }
+ h->free(h, claimed, sizeof(*claimed) * ni);
+ }
+}
+
/* ---- pass 2b: COMMON symbol BSS allocation ---- */
/* After segments are laid out, extend the RW segment's BSS area to
* accommodate any SK_COMMON symbols that have no section yet. */
diff --git a/src/link/link_reloc.c b/src/link/link_reloc.c
@@ -41,6 +41,15 @@ void link_reloc_apply(Compiler* c, RelocKind k, u8* P_bytes, u64 S, i64 A,
wr_u32_le(P_bytes, (u32)((u64)v & 0xffffffffu));
return;
}
+ case R_REL64:
+ case R_PC64: {
+ /* 64-bit PC-relative; AArch64 R_AARCH64_PREL64. Used by
+ * `.quad sym1 - sym2` style symbol-difference encodings (e.g.
+ * the arm64 kernel image_size header field). */
+ i64 v = (i64)S + A - (i64)P;
+ wr_u64_le(P_bytes, (u64)v);
+ return;
+ }
case R_AARCH64_ABS16: {
u64 v = S + (u64)A;
wr_u16_le(P_bytes, (u16)(v & 0xffffu));
diff --git a/src/link/link_script.c b/src/link/link_script.c
@@ -0,0 +1,904 @@
+/* Linker-script parser: a minimal GNU-ld-subset front end that produces
+ * the structured CfreeLinkScript form documented in <cfree.h>. The
+ * applicator (link_layout.c) consumes the structured form; this file
+ * never speaks ELF or layout.
+ *
+ * Subset (driven by the kernel.lds at the head of doc/DESIGN.md §13):
+ * ENTRY(symbol)
+ * SECTIONS { ... }
+ * . = expr
+ * name = expr
+ * name : [ALIGN(N)] { body }
+ * /DISCARD/ : { body }
+ * body items: *(p1 p2 ...), name = expr, . = expr
+ * exprs: int literal (dec / 0x), `.`, ident, parens,
+ * + - * / & | ^ << >>, ALIGN(expr, align)
+ * slash-star comments; whitespace insensitive.
+ *
+ * Anything else (MEMORY, PROVIDE, KEEP, AT>, > REGION, OVERLAY, INSERT,
+ * OUTPUT_FORMAT, INPUT, GROUP, MAX, MIN, line comments, quoted strings,
+ * file patterns other than the implicit `*` of `*(...)`) is a parse
+ * error: emits a diagnostic and returns 1, leaving *out unchanged.
+ *
+ * Encoding contracts the applicator relies on:
+ * - /DISCARD/ is encoded as a CfreeLinkOutputSection with name
+ * "/DISCARD/" (a literal sentinel, not a parsed identifier).
+ * - An output section's `: ALIGN(N)` header is encoded as the first
+ * entry in its asns[]: a dot-assignment whose expr is ALIGN(., N).
+ * - `*(p1 p2 ...)` produces one CfreeLinkInputMatch per pattern with
+ * file_pattern = NULL (implicit `*`) and section_pattern set.
+ * COMMON is parsed as a literal pattern "COMMON".
+ *
+ * Allocation: every node and string is owned by the compiler's tu arena.
+ * cfree_link_script_free is therefore a no-op — the arena outlives the
+ * script and is collectively freed with the compiler. During parsing we
+ * grow temporary arrays on the host heap, then arena-copy at finish.
+ *
+ * Diagnostics: SourceManager registration of a script buffer is a future
+ * cleanup; for now diagnostics carry file_id = 0 and pack the byte
+ * offset into the SrcLoc.line field (col is computed inline). */
+
+#include <cfree.h>
+#include <stdarg.h>
+#include <string.h>
+
+#include "core/arena.h"
+#include "core/core.h"
+#include "core/diag.h"
+#include "core/heap.h"
+
+typedef struct LSP {
+ Compiler* c;
+ Heap* heap;
+ const char* src;
+ size_t len;
+ size_t pos;
+ /* one-bit error sticky: any diagnostic flips this and the parser
+ * unwinds without producing partial output. */
+ int err;
+} LSP;
+
+/* ---- diagnostics ---- */
+
+static SrcLoc lsp_loc(const LSP* p, size_t off) {
+ /* TODO: register the script buffer with SourceManager so diagnostics
+ * carry a real file_id; until then encode the byte offset as `line`
+ * and recompute a 1-based line/col on demand. */
+ SrcLoc l;
+ size_t i, line = 1, col = 1;
+ l.file_id = 0;
+ for (i = 0; i < off && i < p->len; ++i) {
+ if (p->src[i] == '\n') {
+ ++line;
+ col = 1;
+ } else {
+ ++col;
+ }
+ }
+ l.line = (u32)line;
+ l.col = (u32)col;
+ return l;
+}
+
+static void lsp_errf(LSP* p, size_t off, const char* fmt, ...) {
+ va_list ap;
+ if (!p->c || !p->c->env || !p->c->env->diag) {
+ p->err = 1;
+ return;
+ }
+ va_start(ap, fmt);
+ diag_emitv(p->c->env->diag, DIAG_ERROR, lsp_loc(p, off), fmt, ap);
+ va_end(ap);
+ p->err = 1;
+}
+
+/* ---- arena helpers ---- */
+
+static char* lsp_strdup(LSP* p, const char* s, size_t n) {
+ return arena_strdup(p->c->tu, s, n);
+}
+
+static CfreeLinkExpr* lsp_new_expr(LSP* p) {
+ return arena_znew(p->c->tu, CfreeLinkExpr);
+}
+
+/* ---- heap-backed temp vectors (copied to the arena at finish) ---- */
+
+typedef struct VecAsn {
+ CfreeLinkAssignment* p;
+ u32 n, cap;
+} VecAsn;
+typedef struct VecMatch {
+ CfreeLinkInputMatch* p;
+ u32 n, cap;
+} VecMatch;
+typedef struct VecSec {
+ CfreeLinkOutputSection* p;
+ u32 n, cap;
+} VecSec;
+
+static int vec_reserve_(LSP* p, void** ptr, u32* cap, u32 want, size_t es) {
+ u32 nc;
+ void* nb;
+ if (*cap >= want) return 0;
+ nc = *cap ? *cap * 2 : 8;
+ while (nc < want) nc *= 2;
+ nb = p->heap->realloc(p->heap, *ptr, (size_t)*cap * es, (size_t)nc * es,
+ sizeof(void*));
+ if (!nb) return 1;
+ *ptr = nb;
+ *cap = nc;
+ return 0;
+}
+
+#define VEC_PUSH(p, v, val) \
+ (vec_reserve_((p), (void**)&(v).p, &(v).cap, (v).n + 1, sizeof(*(v).p)) \
+ ? 1 \
+ : ((v).p[(v).n++] = (val), 0))
+
+static void vec_free_(LSP* p, void* ptr, u32 cap, size_t es) {
+ if (ptr) p->heap->free(p->heap, ptr, (size_t)cap * es);
+}
+
+/* ---- lex primitives ---- */
+
+static int is_id_start(int c) {
+ return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || c == '_' ||
+ c == '.';
+}
+static int is_id_cont(int c) {
+ return is_id_start(c) || (c >= '0' && c <= '9') || c == '-';
+}
+
+static void skip_ws(LSP* p) {
+ while (p->pos < p->len) {
+ char ch = p->src[p->pos];
+ if (ch == ' ' || ch == '\t' || ch == '\r' || ch == '\n') {
+ ++p->pos;
+ continue;
+ }
+ if (ch == '/' && p->pos + 1 < p->len && p->src[p->pos + 1] == '*') {
+ size_t start = p->pos;
+ p->pos += 2;
+ while (p->pos + 1 < p->len &&
+ !(p->src[p->pos] == '*' && p->src[p->pos + 1] == '/')) {
+ ++p->pos;
+ }
+ if (p->pos + 1 >= p->len) {
+ lsp_errf(p, start, "unterminated /* comment");
+ return;
+ }
+ p->pos += 2;
+ continue;
+ }
+ if (ch == '/' && p->pos + 1 < p->len && p->src[p->pos + 1] == '/') {
+ lsp_errf(p, p->pos, "// line comments not supported");
+ return;
+ }
+ break;
+ }
+}
+
+static int peek_ch(LSP* p) {
+ skip_ws(p);
+ if (p->err) return -1;
+ if (p->pos >= p->len) return -1;
+ return (unsigned char)p->src[p->pos];
+}
+
+static int match_ch(LSP* p, char ch) {
+ skip_ws(p);
+ if (p->err) return 0;
+ if (p->pos < p->len && p->src[p->pos] == ch) {
+ ++p->pos;
+ return 1;
+ }
+ return 0;
+}
+
+static int expect_ch(LSP* p, char ch) {
+ if (match_ch(p, ch)) return 0;
+ lsp_errf(p, p->pos, "expected '%c'", ch);
+ return 1;
+}
+
+/* Lex an identifier-or-section-name token in place: returns a pointer
+ * into p->src and length via *out_len. Section names like .text.* and
+ * /DISCARD/ are handled by the section-name-aware variant below. */
+static int lex_ident(LSP* p, const char** out, size_t* out_len) {
+ size_t start;
+ skip_ws(p);
+ if (p->err) return 1;
+ if (p->pos >= p->len || !is_id_start((unsigned char)p->src[p->pos])) {
+ lsp_errf(p, p->pos, "expected identifier");
+ return 1;
+ }
+ start = p->pos;
+ while (p->pos < p->len && is_id_cont((unsigned char)p->src[p->pos])) ++p->pos;
+ *out = p->src + start;
+ *out_len = p->pos - start;
+ return 0;
+}
+
+/* Match a literal keyword. Caller must have already peeked. */
+static int match_kw(LSP* p, const char* kw) {
+ size_t klen = strlen(kw);
+ size_t save;
+ skip_ws(p);
+ if (p->err) return 0;
+ save = p->pos;
+ if (p->pos + klen > p->len) return 0;
+ if (memcmp(p->src + p->pos, kw, klen) != 0) return 0;
+ /* must not glue to a following id-cont character */
+ if (p->pos + klen < p->len &&
+ is_id_cont((unsigned char)p->src[p->pos + klen]))
+ return 0;
+ p->pos += klen;
+ (void)save;
+ return 1;
+}
+
+/* ---- expression parser (precedence climbing) ----
+ *
+ * Levels (low -> high):
+ * 0: |
+ * 1: ^
+ * 2: &
+ * 3: << >>
+ * 4: + -
+ * 5: * /
+ * 6: unary (none beyond parenthesized atoms here)
+ * atom: int | . | ALIGN(e,a) | ident | (expr)
+ */
+
+static CfreeLinkExpr* parse_expr(LSP* p);
+
+static CfreeLinkExpr* parse_int(LSP* p) {
+ CfreeLinkExpr* e;
+ size_t start = p->pos;
+ i64 v = 0;
+ if (p->pos + 1 < p->len && p->src[p->pos] == '0' &&
+ (p->src[p->pos + 1] == 'x' || p->src[p->pos + 1] == 'X')) {
+ p->pos += 2;
+ if (p->pos >= p->len) {
+ lsp_errf(p, start, "malformed hex literal");
+ return NULL;
+ }
+ while (p->pos < p->len) {
+ char ch = p->src[p->pos];
+ int d;
+ if (ch >= '0' && ch <= '9')
+ d = ch - '0';
+ else if (ch >= 'a' && ch <= 'f')
+ d = 10 + (ch - 'a');
+ else if (ch >= 'A' && ch <= 'F')
+ d = 10 + (ch - 'A');
+ else
+ break;
+ v = (v << 4) | d;
+ ++p->pos;
+ }
+ if (p->pos == start + 2) {
+ lsp_errf(p, start, "empty hex literal");
+ return NULL;
+ }
+ } else {
+ while (p->pos < p->len && p->src[p->pos] >= '0' && p->src[p->pos] <= '9') {
+ v = v * 10 + (p->src[p->pos] - '0');
+ ++p->pos;
+ }
+ if (p->pos == start) {
+ lsp_errf(p, start, "expected integer");
+ return NULL;
+ }
+ }
+ e = lsp_new_expr(p);
+ if (!e) return NULL;
+ e->kind = CFREE_LE_INT;
+ e->v.int_val = v;
+ return e;
+}
+
+static CfreeLinkExpr* parse_atom(LSP* p) {
+ int ch;
+ skip_ws(p);
+ if (p->err) return NULL;
+ ch = peek_ch(p);
+ if (ch < 0) {
+ lsp_errf(p, p->pos, "unexpected end of expression");
+ return NULL;
+ }
+ if (ch == '(') {
+ CfreeLinkExpr* e;
+ ++p->pos;
+ e = parse_expr(p);
+ if (!e) return NULL;
+ if (expect_ch(p, ')')) return NULL;
+ return e;
+ }
+ if (ch == '.') {
+ /* `.` only — bare dot, not a dotted ident. We disambiguate by
+ * looking at the next char: a digit/letter/underscore/dot here is a
+ * lex error in this subset (no .text in expression position). */
+ size_t off = p->pos;
+ ++p->pos;
+ if (p->pos < p->len && is_id_cont((unsigned char)p->src[p->pos])) {
+ lsp_errf(p, off, "dotted identifiers not allowed in expressions");
+ return NULL;
+ }
+ {
+ CfreeLinkExpr* e = lsp_new_expr(p);
+ if (!e) return NULL;
+ e->kind = CFREE_LE_DOT;
+ return e;
+ }
+ }
+ if (ch >= '0' && ch <= '9') return parse_int(p);
+ if (is_id_start(ch)) {
+ /* either ALIGN(...) or a symbol reference */
+ if (match_kw(p, "ALIGN")) {
+ CfreeLinkExpr *val, *aln, *e;
+ if (expect_ch(p, '(')) return NULL;
+ val = parse_expr(p);
+ if (!val) return NULL;
+ if (expect_ch(p, ',')) return NULL;
+ aln = parse_expr(p);
+ if (!aln) return NULL;
+ if (expect_ch(p, ')')) return NULL;
+ e = lsp_new_expr(p);
+ if (!e) return NULL;
+ e->kind = CFREE_LE_ALIGN;
+ e->v.align.val = val;
+ e->v.align.align = aln;
+ return e;
+ }
+ if (match_kw(p, "MAX") || match_kw(p, "MIN")) {
+ lsp_errf(p, p->pos, "MAX/MIN not supported in this subset");
+ return NULL;
+ }
+ {
+ const char* s;
+ size_t n;
+ CfreeLinkExpr* e;
+ if (lex_ident(p, &s, &n)) return NULL;
+ e = lsp_new_expr(p);
+ if (!e) return NULL;
+ e->kind = CFREE_LE_SYM;
+ e->v.name = lsp_strdup(p, s, n);
+ return e;
+ }
+ }
+ lsp_errf(p, p->pos, "unexpected '%c' in expression", (char)ch);
+ return NULL;
+}
+
+/* Returns >=0 binding power for a binary operator at p->pos and
+ * advances past it; -1 if no binary operator at the lookahead. */
+static int try_take_binop(LSP* p, CfreeLinkExprKind* out_kind) {
+ int ch;
+ skip_ws(p);
+ if (p->err) return -1;
+ if (p->pos >= p->len) return -1;
+ ch = (unsigned char)p->src[p->pos];
+ switch (ch) {
+ case '|':
+ ++p->pos;
+ *out_kind = CFREE_LE_OR;
+ return 0;
+ case '^':
+ ++p->pos;
+ *out_kind = CFREE_LE_XOR;
+ return 1;
+ case '&':
+ ++p->pos;
+ *out_kind = CFREE_LE_AND;
+ return 2;
+ case '<':
+ if (p->pos + 1 < p->len && p->src[p->pos + 1] == '<') {
+ p->pos += 2;
+ *out_kind = CFREE_LE_SHL;
+ return 3;
+ }
+ return -1;
+ case '>':
+ if (p->pos + 1 < p->len && p->src[p->pos + 1] == '>') {
+ p->pos += 2;
+ *out_kind = CFREE_LE_SHR;
+ return 3;
+ }
+ return -1;
+ case '+':
+ ++p->pos;
+ *out_kind = CFREE_LE_ADD;
+ return 4;
+ case '-':
+ ++p->pos;
+ *out_kind = CFREE_LE_SUB;
+ return 4;
+ case '*':
+ ++p->pos;
+ *out_kind = CFREE_LE_MUL;
+ return 5;
+ case '/':
+ /* Division. Block-comment and /DISCARD/ openers are filtered
+ * elsewhere: skip_ws eats slash-star comments, and /DISCARD/ is
+ * recognized by the SECTIONS-body loop before expression
+ * context. */
+ ++p->pos;
+ *out_kind = CFREE_LE_DIV;
+ return 5;
+ default:
+ return -1;
+ }
+}
+
+static CfreeLinkExpr* parse_binop_rhs(LSP* p, int min_bp, CfreeLinkExpr* lhs) {
+ while (!p->err) {
+ size_t save;
+ CfreeLinkExprKind k;
+ int bp;
+ skip_ws(p);
+ if (p->err) return NULL;
+ save = p->pos;
+ bp = try_take_binop(p, &k);
+ if (bp < 0) return lhs;
+ if (bp < min_bp) {
+ p->pos = save;
+ return lhs;
+ }
+ {
+ CfreeLinkExpr* rhs = parse_atom(p);
+ CfreeLinkExpr* node;
+ if (!rhs) return NULL;
+ rhs = parse_binop_rhs(p, bp + 1, rhs);
+ if (!rhs) return NULL;
+ node = lsp_new_expr(p);
+ if (!node) return NULL;
+ node->kind = (uint8_t)k;
+ node->v.bin.lhs = lhs;
+ node->v.bin.rhs = rhs;
+ lhs = node;
+ }
+ }
+ return NULL;
+}
+
+static CfreeLinkExpr* parse_expr(LSP* p) {
+ CfreeLinkExpr* lhs = parse_atom(p);
+ if (!lhs) return NULL;
+ return parse_binop_rhs(p, 0, lhs);
+}
+
+/* ---- assignment helpers ---- */
+
+static int push_dot_align(LSP* p, VecAsn* asns, CfreeLinkExpr* align_n) {
+ CfreeLinkExpr* dot;
+ CfreeLinkExpr* aln;
+ CfreeLinkAssignment a;
+ dot = lsp_new_expr(p);
+ if (!dot) return 1;
+ dot->kind = CFREE_LE_DOT;
+ aln = lsp_new_expr(p);
+ if (!aln) return 1;
+ aln->kind = CFREE_LE_ALIGN;
+ aln->v.align.val = dot;
+ aln->v.align.align = align_n;
+ a.kind = CFREE_LAS_DOT;
+ a.sym = NULL;
+ a.expr = aln;
+ return VEC_PUSH(p, *asns, a);
+}
+
+/* ---- output section body ---- */
+
+static int parse_input_matchers(LSP* p, VecMatch* out) {
+ /* opening `*` already consumed by caller. expect `(p1 p2 ...)` */
+ if (expect_ch(p, '(')) return 1;
+ for (;;) {
+ int ch;
+ skip_ws(p);
+ if (p->err) return 1;
+ ch = peek_ch(p);
+ if (ch == ')') {
+ ++p->pos;
+ return 0;
+ }
+ if (ch < 0) {
+ lsp_errf(p, p->pos, "unterminated `*(...)`");
+ return 1;
+ }
+ /* a pattern is a section-name-like run: id-start chars plus '*'. */
+ {
+ size_t start;
+ const char* s;
+ size_t n;
+ CfreeLinkInputMatch m;
+ start = p->pos;
+ while (p->pos < p->len) {
+ char c = p->src[p->pos];
+ if (is_id_cont((unsigned char)c) || c == '*')
+ ++p->pos;
+ else
+ break;
+ }
+ n = p->pos - start;
+ if (n == 0) {
+ lsp_errf(p, p->pos, "expected section pattern");
+ return 1;
+ }
+ s = p->src + start;
+ m.file_pattern = NULL;
+ m.section_pattern = lsp_strdup(p, s, n);
+ m.keep = 0;
+ if (VEC_PUSH(p, *out, m)) return 1;
+ }
+ }
+}
+
+static int parse_section_body(LSP* p, VecMatch* inputs, VecAsn* asns) {
+ if (expect_ch(p, '{')) return 1;
+ for (;;) {
+ int ch;
+ skip_ws(p);
+ if (p->err) return 1;
+ ch = peek_ch(p);
+ if (ch == '}') {
+ ++p->pos;
+ return 0;
+ }
+ if (ch < 0) {
+ lsp_errf(p, p->pos, "unterminated section body");
+ return 1;
+ }
+ if (ch == '*') {
+ ++p->pos;
+ if (parse_input_matchers(p, inputs)) return 1;
+ continue;
+ }
+ if (ch == '.') {
+ /* `. = expr;` */
+ size_t off = p->pos;
+ ++p->pos;
+ skip_ws(p);
+ if (p->err) return 1;
+ if (!match_ch(p, '=')) {
+ lsp_errf(p, off, "expected `. = expr` in section body");
+ return 1;
+ }
+ {
+ CfreeLinkExpr* e = parse_expr(p);
+ CfreeLinkAssignment a;
+ if (!e) return 1;
+ if (!match_ch(p, ';')) { /* ; is optional but encouraged */
+ }
+ a.kind = CFREE_LAS_DOT;
+ a.sym = NULL;
+ a.expr = e;
+ if (VEC_PUSH(p, *asns, a)) return 1;
+ }
+ continue;
+ }
+ if (is_id_start(ch)) {
+ /* sym = expr; */
+ const char* s;
+ size_t n;
+ CfreeLinkExpr* e;
+ CfreeLinkAssignment a;
+ if (match_kw(p, "PROVIDE") || match_kw(p, "KEEP")) {
+ lsp_errf(p, p->pos, "PROVIDE/KEEP not supported in this subset");
+ return 1;
+ }
+ if (lex_ident(p, &s, &n)) return 1;
+ skip_ws(p);
+ if (p->err) return 1;
+ if (!match_ch(p, '=')) {
+ lsp_errf(p, p->pos, "expected `=` after `%.*s`", (int)n, s);
+ return 1;
+ }
+ e = parse_expr(p);
+ if (!e) return 1;
+ (void)match_ch(p, ';');
+ a.kind = CFREE_LAS_SYM;
+ a.sym = lsp_strdup(p, s, n);
+ a.expr = e;
+ if (VEC_PUSH(p, *asns, a)) return 1;
+ continue;
+ }
+ lsp_errf(p, p->pos, "unexpected '%c' in section body", (char)ch);
+ return 1;
+ }
+}
+
+/* ---- output section header ---- */
+
+static int parse_output_section(LSP* p, const char* name_buf, size_t name_len,
+ VecSec* sections) {
+ /* The `:` is the next non-ws char on entry. Header may carry
+ * `: ALIGN(N)` then `{ body }`. */
+ CfreeLinkOutputSection sec;
+ VecMatch inputs = {0};
+ VecAsn asns = {0};
+ CfreeLinkExpr* align_n = NULL;
+
+ if (expect_ch(p, ':')) return 1;
+ skip_ws(p);
+ if (p->err) return 1;
+ if (match_kw(p, "ALIGN")) {
+ if (expect_ch(p, '(')) return 1;
+ align_n = parse_expr(p);
+ if (!align_n) return 1;
+ if (expect_ch(p, ')')) return 1;
+ }
+ /* Reject AT>, > REGION, >REGION before the body. */
+ skip_ws(p);
+ if (p->err) return 1;
+ if (p->pos < p->len &&
+ (p->src[p->pos] == '>' || (p->src[p->pos] == 'A' && match_kw(p, "AT")))) {
+ lsp_errf(p, p->pos,
+ "memory-region placement (>REGION / AT>) not supported");
+ return 1;
+ }
+
+ /* Section header alignment is encoded as the first asn — applicator
+ * pulls it before processing inputs. */
+ if (align_n) {
+ if (push_dot_align(p, &asns, align_n)) goto fail;
+ }
+
+ if (parse_section_body(p, &inputs, &asns)) goto fail;
+
+ /* Optional trailing `> REGION` / `AT> REGION` / `: NOLOAD` etc. — all
+ * unsupported. We allow an optional trailing `;` and nothing else. */
+ (void)match_ch(p, ';');
+
+ /* Materialize. */
+ {
+ CfreeLinkInputMatch* arr_in = NULL;
+ CfreeLinkAssignment* arr_as = NULL;
+ if (inputs.n) {
+ arr_in = arena_array(p->c->tu, CfreeLinkInputMatch, inputs.n);
+ if (!arr_in) goto fail;
+ memcpy(arr_in, inputs.p, sizeof(*arr_in) * inputs.n);
+ }
+ if (asns.n) {
+ arr_as = arena_array(p->c->tu, CfreeLinkAssignment, asns.n);
+ if (!arr_as) goto fail;
+ memcpy(arr_as, asns.p, sizeof(*arr_as) * asns.n);
+ }
+ memset(&sec, 0, sizeof(sec));
+ sec.name = lsp_strdup(p, name_buf, name_len);
+ sec.inputs = arr_in;
+ sec.ninputs = inputs.n;
+ sec.asns = arr_as;
+ sec.nasns = asns.n;
+ }
+
+ vec_free_(p, inputs.p, inputs.cap, sizeof(*inputs.p));
+ vec_free_(p, asns.p, asns.cap, sizeof(*asns.p));
+
+ return VEC_PUSH(p, *sections, sec);
+
+fail:
+ vec_free_(p, inputs.p, inputs.cap, sizeof(*inputs.p));
+ vec_free_(p, asns.p, asns.cap, sizeof(*asns.p));
+ return 1;
+}
+
+/* ---- SECTIONS{...} ---- */
+
+static int parse_sections_block(LSP* p, VecAsn* top_asns, VecSec* sections) {
+ if (expect_ch(p, '{')) return 1;
+ for (;;) {
+ int ch;
+ skip_ws(p);
+ if (p->err) return 1;
+ ch = peek_ch(p);
+ if (ch == '}') {
+ ++p->pos;
+ return 0;
+ }
+ if (ch < 0) {
+ lsp_errf(p, p->pos, "unterminated SECTIONS block");
+ return 1;
+ }
+ /* /DISCARD/ : { body } */
+ if (ch == '/') {
+ static const char kDiscard[] = "/DISCARD/";
+ size_t klen = sizeof(kDiscard) - 1;
+ if (p->pos + klen <= p->len &&
+ memcmp(p->src + p->pos, kDiscard, klen) == 0) {
+ p->pos += klen;
+ if (parse_output_section(p, kDiscard, klen, sections)) return 1;
+ continue;
+ }
+ lsp_errf(p, p->pos, "expected /DISCARD/ or section header");
+ return 1;
+ }
+ /* `. = expr;` at SECTIONS top level */
+ if (ch == '.') {
+ size_t off = p->pos;
+ /* Distinguish bare-dot (`. =`) from `.text :` head. Bare dot has
+ * no id-cont following. */
+ if (p->pos + 1 < p->len &&
+ is_id_cont((unsigned char)p->src[p->pos + 1])) {
+ /* falls through to identifier path */
+ } else {
+ ++p->pos;
+ skip_ws(p);
+ if (p->err) return 1;
+ if (!match_ch(p, '=')) {
+ lsp_errf(p, off, "expected `. = expr`");
+ return 1;
+ }
+ {
+ CfreeLinkExpr* e = parse_expr(p);
+ CfreeLinkAssignment a;
+ if (!e) return 1;
+ (void)match_ch(p, ';');
+ a.kind = CFREE_LAS_DOT;
+ a.sym = NULL;
+ a.expr = e;
+ if (VEC_PUSH(p, *top_asns, a)) return 1;
+ }
+ continue;
+ }
+ }
+ if (is_id_start(ch)) {
+ /* either `name :` (output section) or `sym = expr;` */
+ const char* s;
+ size_t n;
+ size_t name_off;
+ if (match_kw(p, "PROVIDE")) {
+ lsp_errf(p, p->pos, "PROVIDE not supported in this subset");
+ return 1;
+ }
+ name_off = p->pos;
+ if (lex_ident(p, &s, &n)) return 1;
+ skip_ws(p);
+ if (p->err) return 1;
+ if (p->pos < p->len && p->src[p->pos] == ':') {
+ char* nm = lsp_strdup(p, s, n);
+ if (!nm) return 1;
+ if (parse_output_section(p, nm, n, sections)) return 1;
+ continue;
+ }
+ if (match_ch(p, '=')) {
+ CfreeLinkExpr* e = parse_expr(p);
+ CfreeLinkAssignment a;
+ if (!e) return 1;
+ (void)match_ch(p, ';');
+ a.kind = CFREE_LAS_SYM;
+ a.sym = lsp_strdup(p, s, n);
+ a.expr = e;
+ if (VEC_PUSH(p, *top_asns, a)) return 1;
+ continue;
+ }
+ lsp_errf(p, name_off,
+ "expected `:` (output section) or `=` (assignment) after "
+ "`%.*s`",
+ (int)n, s);
+ return 1;
+ }
+ lsp_errf(p, p->pos, "unexpected '%c' in SECTIONS body", (char)ch);
+ return 1;
+ }
+}
+
+/* ---- top level ---- */
+
+static int parse_top(LSP* p, CfreeLinkScript* out) {
+ VecAsn top_asns = {0};
+ VecSec sections = {0};
+ const char* entry_name = NULL;
+ int saw_sections = 0;
+ int rc = 1;
+
+ for (;;) {
+ int ch;
+ skip_ws(p);
+ if (p->err) goto done;
+ if (p->pos >= p->len) break;
+ ch = (unsigned char)p->src[p->pos];
+
+ if (is_id_start(ch)) {
+ if (match_kw(p, "ENTRY")) {
+ const char* s;
+ size_t n;
+ if (expect_ch(p, '(')) goto done;
+ if (lex_ident(p, &s, &n)) goto done;
+ if (expect_ch(p, ')')) goto done;
+ (void)match_ch(p, ';');
+ entry_name = lsp_strdup(p, s, n);
+ if (!entry_name) goto done;
+ continue;
+ }
+ if (match_kw(p, "SECTIONS")) {
+ if (saw_sections) {
+ lsp_errf(p, p->pos, "duplicate SECTIONS block");
+ goto done;
+ }
+ if (parse_sections_block(p, &top_asns, §ions)) goto done;
+ saw_sections = 1;
+ continue;
+ }
+ if (match_kw(p, "MEMORY") || match_kw(p, "OVERLAY") ||
+ match_kw(p, "INSERT") || match_kw(p, "OUTPUT_FORMAT") ||
+ match_kw(p, "OUTPUT_ARCH") || match_kw(p, "INPUT") ||
+ match_kw(p, "GROUP") || match_kw(p, "VERSION") ||
+ match_kw(p, "PROVIDE") || match_kw(p, "STARTUP") ||
+ match_kw(p, "SEARCH_DIR") || match_kw(p, "TARGET")) {
+ lsp_errf(p, p->pos,
+ "directive not supported in this linker-script subset");
+ goto done;
+ }
+ lsp_errf(p, p->pos, "unknown top-level directive");
+ goto done;
+ }
+ lsp_errf(p, p->pos, "unexpected '%c' at top level", (char)ch);
+ goto done;
+ }
+
+ /* Materialize. */
+ out->entry = entry_name;
+ out->regions = NULL;
+ out->nregions = 0;
+ out->top_asns = NULL;
+ out->ntop_asns = 0;
+ out->sections = NULL;
+ out->nsections = 0;
+
+ if (top_asns.n) {
+ CfreeLinkAssignment* a =
+ arena_array(p->c->tu, CfreeLinkAssignment, top_asns.n);
+ if (!a) goto done;
+ memcpy(a, top_asns.p, sizeof(*a) * top_asns.n);
+ out->top_asns = a;
+ out->ntop_asns = top_asns.n;
+ }
+ if (sections.n) {
+ CfreeLinkOutputSection* s =
+ arena_array(p->c->tu, CfreeLinkOutputSection, sections.n);
+ if (!s) goto done;
+ memcpy(s, sections.p, sizeof(*s) * sections.n);
+ out->sections = s;
+ out->nsections = sections.n;
+ }
+ rc = 0;
+
+done:
+ vec_free_(p, top_asns.p, top_asns.cap, sizeof(*top_asns.p));
+ vec_free_(p, sections.p, sections.cap, sizeof(*sections.p));
+ return rc;
+}
+
+/* ---- public API ---- */
+
+int cfree_link_script_parse(CfreeCompiler* c, const char* text, size_t len,
+ const CfreeLinkScript** out) {
+ LSP p;
+ CfreeLinkScript* script;
+ int rc;
+
+ if (!c || !text || !out) return 1;
+ if (!c->env || !c->env->heap) return 1;
+
+ script = arena_znew(c->tu, CfreeLinkScript);
+ if (!script) return 1;
+
+ memset(&p, 0, sizeof(p));
+ p.c = c;
+ p.heap = (Heap*)c->env->heap;
+ p.src = text;
+ p.len = len;
+
+ rc = parse_top(&p, script);
+ if (rc != 0 || p.err) return 1;
+ *out = script;
+ return 0;
+}
+
+void cfree_link_script_free(CfreeCompiler* c, const CfreeLinkScript* s) {
+ /* Arena-owned: lifetime tied to the compiler's tu arena. No-op. */
+ (void)c;
+ (void)s;
+}
diff --git a/test/link/CORPUS.md b/test/link/CORPUS.md
@@ -39,6 +39,8 @@ expects from the combined sequence.
| `gc_absent` | one symbol per line that must be absent post-link (e.g. dropped by `--gc-sections`) |
| `gc_present` | one symbol per line that must remain present post-link |
| `archive_b` | package b.o as b.a; content `demand` (normal) or `whole` (--whole-archive) |
+| `linker_script` | basename of an `.lds` file in the case dir; passed to both runners via `--linker-script` |
+| `kernel_image` | empty marker; case is a freestanding kernel image. Skips R and J; on E, runs the linked exe under `qemu-system-aarch64 -kernel` with semihosting |
Negative tests live in `test/link/bad/<name>/` instead of `cases/`. Each
bad-case directory contains source files (compile cleanly) plus an
@@ -118,6 +120,12 @@ Cases 02–09 all pair ADR_PREL_PG_HI21 with their primary LDST reloc.
|---|------|-----------|
| 31 | `tls_local_exec` | `_Thread_local` w/ initializer; `R_AARCH64_TLSLE_ADD_TPREL_{HI12,LO12_NC}` apply + PT_TLS layout |
+### Group G — linker scripts
+
+| # | Name | Exercises |
+|---|------|-----------|
+| 35 | `linker_script_kernel` | `ENTRY`, `SECTIONS { . = 0x40080000; .text/.rodata/.data/.bss with `ALIGN`; `__bss_start`, `_end`; `/DISCARD/` of `.note.*`, `.comment`, `.eh_frame`. Linked image boots under `qemu-system-aarch64 -kernel` and exits via ARM semihosting. |
+
### bad/ — negative tests
| # | Name | Exercises |
diff --git a/test/link/cases/35_linker_script_kernel/a.c b/test/link/cases/35_linker_script_kernel/a.c
@@ -0,0 +1,9 @@
+static const char msg[] = "ok";
+static int counter;
+
+int kmain(void) {
+ volatile const char* m = msg;
+ volatile int* c = &counter;
+ *c = (int)m[0];
+ return 0;
+}
diff --git a/test/link/cases/35_linker_script_kernel/cflags b/test/link/cases/35_linker_script_kernel/cflags
@@ -0,0 +1 @@
+-fno-asynchronous-unwind-tables
diff --git a/test/link/cases/35_linker_script_kernel/entry.S b/test/link/cases/35_linker_script_kernel/entry.S
@@ -0,0 +1,79 @@
+/* arm64 kernel entry: Image header + EL2->EL1 drop + stack + kmain.
+ * Booted by qemu-system-aarch64 -kernel; exits via ARM semihosting
+ * SYS_EXIT_EXTENDED with kmain's return value as the host exit code. */
+
+ .section .text, "ax"
+ .globl _start
+_start:
+ b stext /* 0x00 */
+ .long 0 /* 0x04 */
+ .quad 0x80000 /* 0x08 text_offset */
+ .quad _end - _start /* 0x10 image_size */
+ .quad 0xa /* 0x18 flags: 4K, anywhere, LE */
+ .quad 0 /* 0x20 */
+ .quad 0 /* 0x28 */
+ .quad 0 /* 0x30 */
+ .ascii "ARM\x64" /* 0x38 magic */
+ .long 0 /* 0x3c PE COFF offset */
+
+stext:
+ msr daifset, #0xf
+
+ mrs x9, CurrentEL
+ lsr x9, x9, #2
+ cmp x9, #2
+ b.ne in_el1
+
+ mov x9, #(1 << 31)
+ msr hcr_el2, x9
+ mov x9, #0x3c5
+ msr spsr_el2, x9
+ adrp x9, in_el1
+ add x9, x9, :lo12:in_el1
+ msr elr_el2, x9
+ adrp x9, kstack_top
+ add x9, x9, :lo12:kstack_top
+ msr sp_el1, x9
+ eret
+
+in_el1:
+ adrp x9, kstack_top
+ add x9, x9, :lo12:kstack_top
+ mov sp, x9
+
+ bl kmain
+
+ /* QEMU semihosting exit. SYS_EXIT_EXTENDED (op 0x20) takes x1 =
+ * pointer to [reason, subcode]; ADP_Stopped_ApplicationExit
+ * (0x20026) returns subcode as the host exit code. */
+ cbnz w0, .Lfail
+ mov w0, #0x20
+ adrp x1, .Lexit_ok
+ add x1, x1, :lo12:.Lexit_ok
+ hlt #0xf000
+.Lhang_ok:
+ wfe
+ b .Lhang_ok
+.Lfail:
+ mov w0, #0x20
+ adrp x1, .Lexit_fail
+ add x1, x1, :lo12:.Lexit_fail
+ hlt #0xf000
+.Lhang_fail:
+ wfe
+ b .Lhang_fail
+
+ .section .rodata, "a"
+ .balign 8
+.Lexit_ok:
+ .quad 0x20026 /* ADP_Stopped_ApplicationExit */
+ .quad 0 /* subcode = host exit 0 */
+.Lexit_fail:
+ .quad 0x20026
+ .quad 1 /* subcode = host exit 1 */
+
+ .section .bss, "aw", %nobits
+ .balign 16
+kstack_bottom:
+ .skip 4096
+kstack_top:
diff --git a/test/link/cases/35_linker_script_kernel/expected b/test/link/cases/35_linker_script_kernel/expected
@@ -0,0 +1 @@
+0
diff --git a/test/link/cases/35_linker_script_kernel/kernel.lds b/test/link/cases/35_linker_script_kernel/kernel.lds
@@ -0,0 +1,29 @@
+ENTRY(_start)
+
+SECTIONS {
+ . = 0x40080000;
+
+ .text : ALIGN(8) {
+ *(.text .text.*)
+ }
+
+ .rodata : ALIGN(8) {
+ *(.rodata .rodata.*)
+ }
+
+ .data : ALIGN(8) {
+ *(.data .data.*)
+ }
+
+ .bss : ALIGN(16) {
+ __bss_start = .;
+ *(.bss .bss.*)
+ . = ALIGN(., 16);
+ }
+
+ _end = .;
+
+ /DISCARD/ : {
+ *(.note.*) *(.comment) *(.eh_frame)
+ }
+}
diff --git a/test/link/cases/35_linker_script_kernel/kernel_image b/test/link/cases/35_linker_script_kernel/kernel_image
diff --git a/test/link/cases/35_linker_script_kernel/linker_script b/test/link/cases/35_linker_script_kernel/linker_script
@@ -0,0 +1 @@
+kernel.lds
diff --git a/test/link/harness/jit_runner.c b/test/link/harness/jit_runner.c
@@ -3,6 +3,7 @@
* Usage:
* jit_runner [--gc-sections] [--use-resolver]
* [--check-absent SYM] [--check-present SYM]
+ * [--linker-script <path>]
* [--archive [--whole-archive] <lib.a>] <in.o> ...
*
* Reads .o (and optionally .a) inputs, calls cfree_link_jit (which runs
@@ -259,6 +260,7 @@ int main(int argc, char** argv) {
* --check-present SYM: after link, verify symbol IS in image. */
const char* check_absent = NULL;
const char* check_present = NULL;
+ const char* script_path = NULL;
CfreeBytesInput objs[64];
CfreeBytesInputArchive archives[16];
@@ -280,6 +282,8 @@ int main(int argc, char** argv) {
check_absent = argv[++i];
} else if (!strcmp(argv[i], "--check-present") && i + 1 < argc) {
check_present = argv[++i];
+ } else if (!strcmp(argv[i], "--linker-script") && i + 1 < argc) {
+ script_path = argv[++i];
} else {
uint8_t* data;
size_t len;
@@ -339,6 +343,26 @@ int main(int argc, char** argv) {
opts.inputs.extern_resolver_user = NULL;
}
+ if (script_path) {
+ uint8_t* sbytes;
+ size_t slen;
+ if (slurp(script_path, &sbytes, &slen)) {
+ fprintf(stderr, "jit-runner: cannot read %s\n", script_path);
+ cfree_compiler_free(c);
+ return 2;
+ }
+ const CfreeLinkScript* script = NULL;
+ int prc = cfree_link_script_parse(c, (const char*)sbytes, slen, &script);
+ free(sbytes);
+ if (prc) {
+ fprintf(stderr, "jit-runner: linker script parse failed: %s\n",
+ script_path);
+ cfree_compiler_free(c);
+ return 1;
+ }
+ opts.inputs.linker_script = script;
+ }
+
CfreeJit* jit = NULL;
int rc = cfree_link_jit(c, &opts, &jit);
for (int i = 0; i < nbufs; i++) free(bufs[i]);
diff --git a/test/link/harness/link_exe_runner.c b/test/link/harness/link_exe_runner.c
@@ -1,7 +1,8 @@
/* link_exe_runner — Path E harness driver.
*
* Usage:
- * link_exe_runner [--gc-sections] [--entry NAME] -o <out.exe>
+ * link_exe_runner [--gc-sections] [--entry NAME] [--linker-script <path>]
+ * -o <out.exe>
* [--archive [--whole-archive] <lib.a>] <in.o> ...
*
* Reads inputs, calls cfree_link_exe, writes the ELF executable.
@@ -98,6 +99,7 @@ static int write_exe(const char* path, const uint8_t* data, size_t len) {
int main(int argc, char** argv) {
const char* out_path = NULL;
const char* entry_name = "_start";
+ const char* script_path = NULL;
int gc_sections = 0;
int next_archive = 0;
int next_whole = 0;
@@ -118,6 +120,8 @@ int main(int argc, char** argv) {
next_archive = 1;
} else if (!strcmp(argv[i], "--entry") && i + 1 < argc) {
entry_name = argv[++i];
+ } else if (!strcmp(argv[i], "--linker-script") && i + 1 < argc) {
+ script_path = argv[++i];
} else if (!strcmp(argv[i], "-o") && i + 1 < argc) {
out_path = argv[++i];
} else {
@@ -177,6 +181,26 @@ int main(int argc, char** argv) {
opts.inputs.entry = entry_name;
opts.gc_sections = gc_sections;
+ if (script_path) {
+ uint8_t* sbytes;
+ size_t slen;
+ if (slurp(script_path, &sbytes, &slen)) {
+ fprintf(stderr, "link-exe-runner: cannot read %s\n", script_path);
+ cfree_compiler_free(c);
+ return 2;
+ }
+ const CfreeLinkScript* script = NULL;
+ int prc = cfree_link_script_parse(c, (const char*)sbytes, slen, &script);
+ free(sbytes);
+ if (prc) {
+ fprintf(stderr, "link-exe-runner: linker script parse failed: %s\n",
+ script_path);
+ cfree_compiler_free(c);
+ return 1;
+ }
+ opts.inputs.linker_script = script;
+ }
+
CfreeWriter* w = cfree_writer_mem(&g_heap);
if (!w) {
cfree_compiler_free(c);