kit

kit
git clone https://git.ryansepassi.com/git/kit.git
Log | Files | Refs | README

commit d11c5c90c2371c2d260a161f8f7d61fc30824615
parent 809a8e5e87c71ee3f3213e7e6f1f31eb972b70da
Author: Ryan Sepassi <rsepassi@gmail.com>
Date:   Fri,  5 Jun 2026 21:01:51 -0700

Teach ld -lc to use hosted libc

Diffstat:
Mdoc/plan/TODO.md | 62--------------------------------------------------------------
Mdriver/cmd/ld.c | 323++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-------
Mdriver/lib/hosted.c | 4++--
Msrc/obj/elf/link.c | 10+++++-----
Msrc/obj/elf/link_dyn.c | 20+++++++++++++++-----
Mtest/driver/run.sh | 105+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
6 files changed, 423 insertions(+), 101 deletions(-)

diff --git a/doc/plan/TODO.md b/doc/plan/TODO.md @@ -193,65 +193,3 @@ program loads at an ASLR base, so captured code addresses don't match link-time addresses without computing the load slide. `-no-pie` should clear `o->pie` (and ideally `-static` without `-pie` should default to non-PIE). Found while making a backtrace demo's addresses line up with `kit addr2line`. - -## `kit cc --sysroot=<musl>` one-shot: vestigial interp on a static image, and no way to force a dynamic libc link - -Two related problems with one-shot `kit cc` against a musl sysroot (a sysroot -that ships both `libc.a` and `libc.so`, the normal case): - -1. **Vestigial `PT_INTERP` + empty `PT_DYNAMIC` on an effectively-static - binary.** `kit cc -fPIE --sysroot=<musl> foo.c -o exe` links `libc.a` - statically — `printf`/`__libc_start_main` come out `T` (defined in the image), - the Dynamic Section is empty (no `DT_NEEDED`) — yet because the output is - ET_DYN (PIE) kit ld stamps its default musl `PT_INTERP` - (`/lib/ld-musl-<arch>.so.1`) and an empty `PT_DYNAMIC`. The result is a static - image wearing a dynamic costume: it references a loader it doesn't need and - carries a pointless dynamic segment. It happens to run where that loader path - exists, but it's malformed and would fail to start on a system without that - exact interp. kit ld should not emit a `PT_INTERP`/`PT_DYNAMIC` when the link - produced no dynamic dependencies. - -2. **No way to get a genuine dynamic libc link through `kit cc`.** The hosted - profile selector `hosted_resolve_linux` (`driver/lib/hosted.c:392-397`) routes - to `hosted_resolve_linux_musl_static` whenever `libc.a` is present — the rule - at line 394 (`has_libc_a && !(has_libc_so6 && has_glibc_nonshared)`) does not - consult `req->static_link`, and it precedes the `musl_dynamic` branch (line - 396), so `hosted_resolve_linux_musl_dynamic` is unreachable for any musl - sysroot that ships `libc.a`. No `kit cc` flag (`-Bdynamic`, absence of - `-static`, etc.) flips it. The only way to get a real dynamic exe today - (`printf` as a `U` import + `DT_NEEDED libc.musl-<arch>.so.1`) is to drop to an - explicit two-step `kit cc -c` + `kit ld -pie … Scrt1.o crti.o obj libc.so - libkit_rt.a crtn.o` (what test/libc/musl/run.sh's dynamic lane does). The - static rule should respect a default-dynamic link mode (or at least let - `-Bdynamic`/non-`-static` reach the dynamic branch). Found verifying - `__kit_backtrace` on dynamically-linked binaries (doc/plan/BACKTRACE.md). - -## `kit ld` can't link against a sysroot with just `-lc` (no `--sysroot`, no crt auto-add, `-l` is `.a`-only) - -The ergonomic expectation — point `kit ld` at a sysroot and pass `-lc`, without -hand-listing crt objects and the raw `libc.so` path — does not work. Three gaps, -all reproduced against `build/musl-sysroot`: - -1. **No `--sysroot`.** `kit ld --sysroot=<dir> …` → `ld: unknown flag: - --sysroot=…`. GNU ld supports `--sysroot` (and the `=`-prefix path rewrite); - kit ld only has `-L`. -2. **No crt auto-provision.** `kit ld -L <sysroot>/lib -lc -pie -o exe obj - libkit_rt.a` → `fatal: link: entry symbol '_start' not defined`. kit ld does - not pull the sysroot's start files, so the caller must pass - `crt1.o`/`Scrt1.o` + `crti.o` + `crtn.o` explicitly. (crt selection is - traditionally the cc driver's job, but kit's own one-step `kit cc` can't - produce a dynamic libc link either — see the item above — so there is no - convenient route at all.) -3. **`-l NAME` resolves only `lib<NAME>.a`.** Per `kit ld --help` and observed - behavior, `-lc` finds `libc.a` (static) only; it never considers - `libc.so`/`libc.so.N`. So even once crts are sorted, `-lc` yields a static - libc — a dynamic link still requires handing `libc.so` to kit ld as a - positional input. GNU ld searches `.so` then `.a` (honoring `-Bstatic`/ - `-Bdynamic`); kit ld should do the same so `-lc` can produce a dynamic - dependency. - -Net: a dynamic libc link today requires the explicit -`kit ld -pie … Scrt1.o crti.o obj libc.so libkit_rt.a crtn.o` form. Wiring -`--sysroot`, `.so`-aware `-l` resolution, and (optionally) crt auto-add would let -`kit ld --sysroot=<dir> -pie -lc -o exe obj` work as expected. Found verifying -`__kit_backtrace` on dynamically-linked binaries (doc/plan/BACKTRACE.md). diff --git a/driver/cmd/ld.c b/driver/cmd/ld.c @@ -5,6 +5,7 @@ #include <string.h> #include "driver.h" +#include "hosted.h" #include "lib_resolve.h" #include "runtime.h" @@ -20,8 +21,11 @@ * -e symbol entry symbol * -T script.ld linker script (parsed, not raw) * --support-dir DIR kit support root for compiler rt + * --sysroot DIR hosted C runtime/sysroot root * -L dir library search path (-l targets) - * -l name resolves to lib<name>.a via -L + * -l c enable hosted CRT/libc expansion + * -l name resolves via -L (.so preferred unless + * -Bstatic/-static) * -static / -pie / -no-pie target.pic * -shared emit a shared library / dylib * -r / --relocatable emit a relocatable partial-link object @@ -82,6 +86,7 @@ typedef struct LdOptions { int no_default_libs; /* -nostdlib / --no-default-libs */ int pic_explicit; /* -static / -pie / -no-pie / -shared seen */ const char* support_dir; /* --support-dir */ + const char* sysroot; /* --sysroot / KIT_SYSROOT */ uint16_t pe_subsystem; /* KitPeSubsystem */ /* PT_INTERP path. NULL means "let libkit pick the target default * (e.g. /lib/ld-musl-aarch64.so.1)". Set by -dynamic-linker / @@ -105,6 +110,9 @@ typedef struct LdOptions { const char** lib_dirs; /* -L */ uint32_t nlib_dirs; + char** owned_paths; /* sysroot-expanded argv/search paths */ + size_t* owned_path_sizes; + uint32_t nowned_paths; /* Shared-library output state. */ int shared; /* -shared */ @@ -119,6 +127,9 @@ typedef struct LdOptions { int gc_sections; /* --gc-sections / --no-gc-sections */ int strip_debug; /* -S / --strip-debug */ int allow_undefined; /* shared output undefined-symbol policy */ + int static_link; /* -static: hosted libc should pick static profile */ + int wants_hosted_libc; /* -lc: expand crt + libc through hosted resolver */ + DriverHostedPlan hosted; /* --build-id state */ uint8_t build_id_mode; /* KitBuildIdMode */ @@ -182,11 +193,14 @@ void driver_help_ld(void) { " -no-pie Disable PIE\n" " --support-dir DIR kit support root for compiler " "runtime\n" + " --sysroot DIR hosted C runtime/sysroot root\n" " (target is otherwise auto-detected from the first object input)\n" "\n" "LIBRARY RESOLUTION\n" " -L DIR Add library search path\n" - " -l NAME Resolve to lib<NAME>.a via -L\n" + " -l c Add hosted CRT objects and libc\n" + " -l NAME Resolve via -L (.so preferred unless " + "static)\n" "\n" "POSITIONAL ARCHIVE STATE (apply to following .a inputs)\n" " --whole-archive Pull every member of following " @@ -237,7 +251,7 @@ void driver_help_ld(void) { /* ---------- argv-sized scratch arrays ---------- */ static int ld_alloc_arrays(LdOptions* o, int argc) { - size_t bound = (size_t)argc + 16u; + size_t bound = (size_t)argc + 32u; o->argv_bound = bound; o->object_files = driver_alloc_zeroed(o->env, bound * sizeof(*o->object_files)); @@ -245,10 +259,14 @@ static int ld_alloc_arrays(LdOptions* o, int argc) { o->dsos = driver_alloc_zeroed(o->env, bound * sizeof(*o->dsos)); o->order = driver_alloc_zeroed(o->env, bound * sizeof(*o->order)); o->lib_dirs = driver_alloc_zeroed(o->env, bound * sizeof(*o->lib_dirs)); + o->owned_paths = driver_alloc_zeroed(o->env, bound * sizeof(*o->owned_paths)); + o->owned_path_sizes = + driver_alloc_zeroed(o->env, bound * sizeof(*o->owned_path_sizes)); o->rpaths = driver_alloc_zeroed(o->env, bound * sizeof(*o->rpaths)); o->rpath_links = driver_alloc_zeroed(o->env, bound * sizeof(*o->rpath_links)); if (!o->object_files || !o->archives || !o->dsos || !o->order || - !o->lib_dirs || !o->rpaths || !o->rpath_links) { + !o->lib_dirs || !o->owned_paths || !o->owned_path_sizes || !o->rpaths || + !o->rpath_links) { driver_errf(LD_TOOL, "out of memory"); return 1; } @@ -265,6 +283,26 @@ static void ld_push_order(LdOptions* o, uint8_t kind, uint32_t index) { slot->index = index; } +static void ld_insert_order(LdOptions* o, uint32_t pos, uint8_t kind, + uint32_t index) { + uint32_t i; + if (pos > o->norder) pos = o->norder; + for (i = o->norder; i > pos; --i) o->order[i] = o->order[i - 1u]; + o->order[pos].kind = kind; + o->order[pos].index = index; + o->norder++; +} + +static void ld_push_object(LdOptions* o, const char* path) { + o->object_files[o->nobject_files++] = path; + ld_push_order(o, KIT_LINK_INPUT_OBJ_BYTES, o->nobject_files - 1u); +} + +static void ld_insert_object(LdOptions* o, const char* path, uint32_t pos) { + o->object_files[o->nobject_files++] = path; + ld_insert_order(o, pos, KIT_LINK_INPUT_OBJ_BYTES, o->nobject_files - 1u); +} + static void ld_push_archive(LdOptions* o, const char* path, int owned, size_t owned_size) { LdArchive* a = &o->archives[o->narchives++]; @@ -330,6 +368,81 @@ static int driver_is_so_filename(const char* path) { return 0; } +static char* ld_join2(DriverEnv* env, const char* a, const char* b, + size_t* out_size) { + size_t alen = driver_strlen(a); + size_t blen = driver_strlen(b); + size_t slash = (alen > 0 && a[alen - 1] != '/') ? 1u : 0u; + size_t bytes = alen + slash + blen + 1u; + char* out = driver_alloc(env, bytes); + size_t off = 0; + if (!out) return NULL; + if (alen) { + driver_memcpy(out + off, a, alen); + off += alen; + } + if (slash) out[off++] = '/'; + if (blen) { + driver_memcpy(out + off, b, blen); + off += blen; + } + out[off] = '\0'; + if (out_size) *out_size = bytes; + return out; +} + +static int ld_own_path(LdOptions* o, char* path, size_t size, + const char** out) { + if (!path) { + driver_errf(LD_TOOL, "out of memory"); + return 1; + } + if (o->nowned_paths >= o->argv_bound) { + driver_free(o->env, path, size); + driver_errf(LD_TOOL, "too many sysroot-expanded paths"); + return 1; + } + o->owned_paths[o->nowned_paths] = path; + o->owned_path_sizes[o->nowned_paths] = size; + o->nowned_paths++; + if (out) *out = path; + return 0; +} + +static int ld_add_sysroot_libdir(LdOptions* o) { + char* path; + size_t size; + const char* owned; + if (!o->sysroot || !o->sysroot[0]) return 0; + path = ld_join2(o->env, o->sysroot, "lib", &size); + if (ld_own_path(o, path, size, &owned) != 0) return 1; + o->lib_dirs[o->nlib_dirs++] = owned; + return 0; +} + +static int ld_sysroot_rewrite_path(LdOptions* o, const char* path, + const char** out) { + const char* tail; + char* joined; + size_t size; + if (!path || path[0] != '=' || !o->sysroot || !o->sysroot[0]) { + *out = path; + return 0; + } + tail = path + 1; + if (tail[0] == '/') tail++; + joined = ld_join2(o->env, o->sysroot, tail, &size); + return ld_own_path(o, joined, size, out); +} + +static int ld_note_library_request(LdOptions* o, const char* name) { + if (driver_streq(name, "c") && !o->no_default_libs) { + o->wants_hosted_libc = 1; + return 1; + } + return 0; +} + /* ---------- --build-id parsing ---------- */ static int hex_nibble(char c) { @@ -576,14 +689,31 @@ static int ld_parse(int argc, char** argv, LdOptions* o) { /* First pass: detect --ms-link-driver up front so the option can * appear anywhere on the command line and still affect earlier - * `/...` tokens. (Matches how `link.exe` treats option order as - * non-positional.) */ + * `/...` tokens. Also capture --sysroot before resolving any -l + * entries; GNU ld treats the sysroot as a global search-prefix + * setting, not positional state. */ for (i = 1; i < argc; ++i) { if (driver_streq(argv[i], "--ms-link-driver")) { o->ms_link_driver = 1; - break; + continue; } + if (driver_streq(argv[i], "--sysroot")) { + if (i + 1 < argc) { + o->sysroot = argv[i + 1]; + i++; + } + continue; + } + if (driver_strneq(argv[i], "--sysroot=", 10)) { + o->sysroot = argv[i] + 10; + continue; + } + } + if (!o->sysroot || !o->sysroot[0]) { + const char* env_sysroot = driver_getenv("KIT_SYSROOT"); + if (env_sysroot && env_sysroot[0]) o->sysroot = env_sysroot; } + if (ld_add_sysroot_libdir(o) != 0) return 1; for (i = 1; i < argc; ++i) { const char* a = argv[i]; @@ -732,26 +862,44 @@ static int ld_parse(int argc, char** argv, LdOptions* o) { o->support_dir = a + 14; continue; } + if (driver_streq(a, "--sysroot")) { + if (++i >= argc) { + driver_errf(LD_TOOL, "--sysroot requires an argument"); + return 1; + } + o->sysroot = argv[i]; + continue; + } + if (driver_strneq(a, "--sysroot=", 10)) { + o->sysroot = a + 10; + continue; + } if (driver_strneq(a, "-L", 2)) { const char* dir = a[2] ? a + 2 : (++i < argc ? argv[i] : NULL); + const char* rewritten; if (!dir) { driver_errf(LD_TOOL, "-L requires an argument"); return 1; } - o->lib_dirs[o->nlib_dirs++] = dir; + if (ld_sysroot_rewrite_path(o, dir, &rewritten) != 0) return 1; + o->lib_dirs[o->nlib_dirs++] = rewritten; continue; } if ((val = arg_eq_value(a, "--library-path")) != NULL) { - o->lib_dirs[o->nlib_dirs++] = val; + const char* rewritten; + if (ld_sysroot_rewrite_path(o, val, &rewritten) != 0) return 1; + o->lib_dirs[o->nlib_dirs++] = rewritten; continue; } if (driver_streq(a, "--library-path")) { + const char* rewritten; if (++i >= argc) { driver_errf(LD_TOOL, "--library-path requires an argument"); return 1; } - o->lib_dirs[o->nlib_dirs++] = argv[i]; + if (ld_sysroot_rewrite_path(o, argv[i], &rewritten) != 0) return 1; + o->lib_dirs[o->nlib_dirs++] = rewritten; continue; } @@ -766,6 +914,7 @@ static int ld_parse(int argc, char** argv, LdOptions* o) { driver_errf(LD_TOOL, "-l requires an argument"); return 1; } + if (ld_note_library_request(o, name)) continue; /* -Bstatic forces .a only; everything else (default, * -Bdynamic, --as-needed) prefers .so but falls back to .a. */ mode = (o->cur_link_mode == KIT_LM_STATIC) ? LIB_RESOLVE_STATIC_ONLY @@ -796,6 +945,7 @@ static int ld_parse(int argc, char** argv, LdOptions* o) { LibResolveOS resolve_os = (o->target.os == KIT_OS_WINDOWS) ? LIB_RESOLVE_OS_WINDOWS : LIB_RESOLVE_OS_POSIX; + if (ld_note_library_request(o, val)) continue; if (driver_lib_resolve_for_os(o->env, val, mode, resolve_os, o->lib_dirs, o->nlib_dirs, &resolved, &resolved_size, &kind) != 0) { @@ -820,6 +970,7 @@ static int ld_parse(int argc, char** argv, LdOptions* o) { driver_errf(LD_TOOL, "--library requires an argument"); return 1; } + if (ld_note_library_request(o, argv[i])) continue; mode = (o->cur_link_mode == KIT_LM_STATIC) ? LIB_RESOLVE_STATIC_ONLY : LIB_RESOLVE_DYNAMIC_PREFER; resolve_os = (o->target.os == KIT_OS_WINDOWS) ? LIB_RESOLVE_OS_WINDOWS @@ -841,6 +992,8 @@ static int ld_parse(int argc, char** argv, LdOptions* o) { if (driver_streq(a, "-static")) { o->target.pic = KIT_PIC_NONE; + o->static_link = 1; + o->cur_link_mode = KIT_LM_STATIC; o->pic_explicit = 1; continue; } @@ -1025,13 +1178,16 @@ static int ld_parse(int argc, char** argv, LdOptions* o) { return 1; } - if (driver_has_suffix(a, ".a")) { - ld_push_archive(o, a, 0, 0); - } else if (driver_is_so_filename(a)) { - ld_push_dso(o, a, 0, 0); - } else { - o->object_files[o->nobject_files++] = a; - ld_push_order(o, KIT_LINK_INPUT_OBJ_BYTES, o->nobject_files - 1u); + { + const char* path; + if (ld_sysroot_rewrite_path(o, a, &path) != 0) return 1; + if (driver_has_suffix(path, ".a")) { + ld_push_archive(o, path, 0, 0); + } else if (driver_is_so_filename(path)) { + ld_push_dso(o, path, 0, 0); + } else { + ld_push_object(o, path); + } } } @@ -1048,11 +1204,21 @@ static int ld_parse(int argc, char** argv, LdOptions* o) { ld_usage(); return 1; } + if (o->wants_hosted_libc && o->no_default_libs) { + driver_errf(LD_TOOL, + "-lc hosted expansion is disabled by -nostdlib/" + "--no-default-libs"); + return 1; + } if (o->relocatable) { if (o->shared) { driver_errf(LD_TOOL, "-r and -shared are incompatible"); return 1; } + if (o->wants_hosted_libc) { + driver_errf(LD_TOOL, "-lc hosted expansion requires executable output"); + return 1; + } /* Only an explicit -pie conflicts; a default PIE target.pic is simply * overridden by -r (driver_link_pie suppresses pie when relocatable). */ if (o->pie) { @@ -1096,14 +1262,22 @@ static void ld_options_release(LdOptions* o) { driver_free(o->env, (void*)d->path, d->owned_size); } } + for (i = 0; i < o->nowned_paths; ++i) { + if (o->owned_paths[i]) + driver_free(o->env, o->owned_paths[i], o->owned_path_sizes[i]); + } if (o->build_id_bytes) { driver_free(o->env, o->build_id_bytes, o->build_id_alloc); } + driver_hosted_plan_fini(o->env, &o->hosted); driver_free(o->env, o->object_files, bound * sizeof(*o->object_files)); driver_free(o->env, o->archives, bound * sizeof(*o->archives)); driver_free(o->env, o->dsos, bound * sizeof(*o->dsos)); driver_free(o->env, o->order, bound * sizeof(*o->order)); driver_free(o->env, o->lib_dirs, bound * sizeof(*o->lib_dirs)); + driver_free(o->env, o->owned_paths, bound * sizeof(*o->owned_paths)); + driver_free(o->env, o->owned_path_sizes, + bound * sizeof(*o->owned_path_sizes)); driver_free(o->env, o->rpaths, bound * sizeof(*o->rpaths)); driver_free(o->env, o->rpath_links, bound * sizeof(*o->rpath_links)); } @@ -1140,6 +1314,82 @@ static void release_all(LoadedFile* arr, uint32_t n) { for (i = 0; i < n; ++i) release_file(&arr[i]); } +static int ld_append_hosted_input(LdOptions* o, const DriverHostedInput* in, + uint32_t insert_pos, int insert) { + switch ((DriverHostedInputKind)in->kind) { + case DRIVER_HOSTED_INPUT_OBJECT: + if (insert) + ld_insert_object(o, in->path, insert_pos); + else + ld_push_object(o, in->path); + return 0; + case DRIVER_HOSTED_INPUT_ARCHIVE: { + LdArchive* a = &o->archives[o->narchives++]; + a->path = in->path; + a->owned = 0; + a->owned_size = 0; + a->whole_archive = 0; + a->link_mode = KIT_LM_DEFAULT; + a->group_id = 0; + if (insert) + ld_insert_order(o, insert_pos, KIT_LINK_INPUT_ARCHIVE, + o->narchives - 1u); + else + ld_push_order(o, KIT_LINK_INPUT_ARCHIVE, o->narchives - 1u); + return 0; + } + case DRIVER_HOSTED_INPUT_DSO: { + LdDso* d = &o->dsos[o->ndsos++]; + d->path = in->path; + d->owned = 0; + d->owned_size = 0; + if (insert) + ld_insert_order(o, insert_pos, KIT_LINK_INPUT_DSO, o->ndsos - 1u); + else + ld_push_order(o, KIT_LINK_INPUT_DSO, o->ndsos - 1u); + return 0; + } + default: + driver_errf(LD_TOOL, "internal error: unknown hosted input kind"); + return 1; + } +} + +static int ld_apply_hosted_before_after(LdOptions* o) { + DriverHostedRequest req; + uint32_t i; + uint32_t insert_pos = 0; + if (!o->wants_hosted_libc || o->shared) return 0; + memset(&req, 0, sizeof req); + req.env = o->env; + req.tool = LD_TOOL; + req.target = o->target; + req.sysroot = o->sysroot; + req.static_link = o->static_link; + req.link_inputs = 1; + if (driver_hosted_resolve(&req, &o->hosted) != 0) return 1; + for (i = 0; i < o->hosted.nbefore; ++i) { + if (ld_append_hosted_input(o, &o->hosted.before[i], insert_pos, 1) != 0) + return 1; + insert_pos++; + } + for (i = 0; i < o->hosted.nafter; ++i) { + if (ld_append_hosted_input(o, &o->hosted.after[i], 0, 0) != 0) return 1; + } + if (!o->interp_path && o->hosted.interp_path) + o->interp_path = o->hosted.interp_path; + return 0; +} + +static int ld_apply_hosted_final(LdOptions* o) { + uint32_t i; + if (!o->hosted.profile_name) return 0; + for (i = 0; i < o->hosted.nfinal; ++i) { + if (ld_append_hosted_input(o, &o->hosted.final[i], 0, 0) != 0) return 1; + } + return 0; +} + /* ---------- link execution ---------- */ /* Run the link. Returns 0 on success, nonzero on error. The caller owns @@ -1163,6 +1413,7 @@ static int ld_run_link(LdOptions* o) { DriverRuntimeSupport runtime = {0}; DriverRuntimeArchive rt_archive = {0}; uint32_t i; + uint32_t initial_nobject_files; int runtime_resolved = 0; int rc = 1; @@ -1171,17 +1422,20 @@ static int ld_run_link(LdOptions* o) { return 1; } - /* Load object files first so the final target is known before deciding - * which compiler-runtime archive to auto-link. */ - if (o->nobject_files) { - obj_lf = driver_alloc_zeroed(o->env, o->nobject_files * sizeof(*obj_lf)); - obj_in = driver_alloc_zeroed(o->env, o->nobject_files * sizeof(*obj_in)); + /* Load the caller's object files first so the final target is known before + * deciding which hosted CRT/libc profile and compiler-runtime archive to + * add. The arrays are sized to the argv bound because hosted expansion may + * append start files after target detection. */ + initial_nobject_files = o->nobject_files; + if (o->argv_bound) { + obj_lf = driver_alloc_zeroed(o->env, o->argv_bound * sizeof(*obj_lf)); + obj_in = driver_alloc_zeroed(o->env, o->argv_bound * sizeof(*obj_in)); if (!obj_lf || !obj_in) { driver_errf(LD_TOOL, "out of memory"); goto out; } } - for (i = 0; i < o->nobject_files; ++i) { + for (i = 0; i < initial_nobject_files; ++i) { const char* path = o->object_files[i]; if (load_file(io, path, &obj_lf[i]) != 0) { driver_errf(LD_TOOL, "failed to read: %.*s", @@ -1202,13 +1456,13 @@ static int ld_run_link(LdOptions* o) { * a freestanding (`*-none-elf`, EI_OSABI=STANDALONE) object means a freestanding * link even if a foreign object (e.g. a clang-assembled startup stub, which * stamps EI_OSABI=SysV and so decodes as Linux) appears first. */ - if (o->nobject_files > 0) { + if (initial_nobject_files > 0) { KitTargetSpec detected; if (kit_detect_target(obj_lf[0].data.data, obj_lf[0].data.size, &detected) == KIT_OK) { uint8_t pic = o->target.pic; uint32_t oi; - for (oi = 1; oi < o->nobject_files; ++oi) { + for (oi = 1; oi < initial_nobject_files; ++oi) { KitTargetSpec t; if (kit_detect_target(obj_lf[oi].data.data, obj_lf[oi].data.size, &t) == KIT_OK) { @@ -1234,6 +1488,8 @@ static int ld_run_link(LdOptions* o) { } } + if (ld_apply_hosted_before_after(o) != 0) goto out; + /* Auto-link kit's compiler runtime for any target that has a variant — * including the freestanding riscv32-none-elf / riscv64-none-elf targets, * whose runtime (and, for rv32, the float-ABI it was detected with) is @@ -1254,6 +1510,19 @@ static int ld_run_link(LdOptions* o) { ld_push_runtime_archive(o, &rt_archive); } + if (ld_apply_hosted_final(o) != 0) goto out; + + for (i = initial_nobject_files; i < o->nobject_files; ++i) { + const char* path = o->object_files[i]; + if (load_file(io, path, &obj_lf[i]) != 0) { + driver_errf(LD_TOOL, "failed to read: %.*s", + KIT_SLICE_ARG(kit_slice_cstr(path))); + goto out; + } + obj_in[i].data = obj_lf[i].data.data; + obj_in[i].len = obj_lf[i].data.size; + } + if (o->narchives) { arch_lf = driver_alloc_zeroed(o->env, o->narchives * sizeof(*arch_lf)); arch_in = driver_alloc_zeroed(o->env, o->narchives * sizeof(*arch_in)); @@ -1461,8 +1730,8 @@ out: release_all(dso_lf, o->ndsos); if (arch_in) driver_free(o->env, arch_in, o->narchives * sizeof(*arch_in)); if (arch_lf) driver_free(o->env, arch_lf, o->narchives * sizeof(*arch_lf)); - if (obj_in) driver_free(o->env, obj_in, o->nobject_files * sizeof(*obj_in)); - if (obj_lf) driver_free(o->env, obj_lf, o->nobject_files * sizeof(*obj_lf)); + if (obj_in) driver_free(o->env, obj_in, o->argv_bound * sizeof(*obj_in)); + if (obj_lf) driver_free(o->env, obj_lf, o->argv_bound * sizeof(*obj_lf)); if (dso_in) driver_free(o->env, dso_in, o->ndsos * sizeof(*dso_in)); if (dso_lf) driver_free(o->env, dso_lf, o->ndsos * sizeof(*dso_lf)); return rc; diff --git a/driver/lib/hosted.c b/driver/lib/hosted.c @@ -391,10 +391,10 @@ static int hosted_resolve_linux(const DriverHostedRequest* req, has_glibc_nonshared = hosted_libdir_has(req->env, dirs, "libc_nonshared.a"); if (!req->static_link && has_libc_so6 && has_glibc_nonshared) return hosted_resolve_linux_glibc_dynamic(req, dirs, plan); - if (has_libc_a && !(has_libc_so6 && has_glibc_nonshared)) - return hosted_resolve_linux_musl_static(req, dirs, plan); if (!req->static_link && has_libc_so) return hosted_resolve_linux_musl_dynamic(req, dirs, plan); + if (has_libc_a && !(has_libc_so6 && has_glibc_nonshared)) + return hosted_resolve_linux_musl_static(req, dirs, plan); driver_errf(req->tool, "no supported Linux hosted libc found (searched %u library " "dir(s))", diff --git a/src/obj/elf/link.c b/src/obj/elf/link.c @@ -922,7 +922,8 @@ void link_emit_elf(LinkImage* img, Writer* w) { * * 1 headers PT_LOAD + nsegments PT_LOAD + 1 PT_NOTE (build-id) * + 1 PT_TLS when this image carries any TLS sections. - * + 4 dyn phdrs (PT_PHDR / PT_INTERP / PT_DYNAMIC / PT_GNU_STACK) on PIE. + * + 4 dyn phdrs (PT_PHDR / PT_INTERP / PT_DYNAMIC / PT_GNU_STACK) when PIE + * also has real dynamic-link state. * * Scripted images skip the headers PT_LOAD and PT_NOTE: phdrs are * just the per-segment PT_LOADs. */ @@ -937,7 +938,7 @@ void link_emit_elf(LinkImage* img, Writer* w) { } } } - u32 nphdr_extra_dyn = pie ? 4u : 0u; + u32 nphdr_extra_dyn = (pie && img->dyn) ? 4u : 0u; u32 nphdr_headers = scripted ? 0u : 1u; u32 nphdr_buildid = scripted ? 0u : 1u; u32 nphdr_total = nphdr_headers + img->nsegments + nphdr_buildid + @@ -1359,7 +1360,7 @@ void link_emit_elf(LinkImage* img, Writer* w) { * PT_LOAD. Required by the runtime loader for ET_DYN to know * where its own program headers live. Must appear before the * first PT_LOAD on dynamic exes (musl checks). */ - if (pie) { + if (pie && img->dyn) { phdrs[pi].p_type = PT_PHDR; phdrs[pi].p_flags = PF_R; phdrs[pi].p_offset = sizeof(Ehdr64); @@ -1488,8 +1489,7 @@ void link_emit_elf(LinkImage* img, Writer* w) { * PT_LOAD that's never made writable. */ } else if (pie) { /* dyn was nominally requested but layout_dyn early-out — no - * imports and no DSO inputs. The image still needs a PT_GNU_STACK - * for kernels that demand it; INTERP/DYNAMIC are skipped. */ + * imports and no DSO inputs. INTERP/DYNAMIC loader headers are skipped. */ (void)0; } (void)pi; diff --git a/src/obj/elf/link_dyn.c b/src/obj/elf/link_dyn.c @@ -525,6 +525,7 @@ static u32 count_dynamic_entries(const LinkDynState* dyn) { void layout_dyn(Linker* l, LinkImage* img) { Heap* h = img->heap; LinkDynState* dyn; + LinkDynState dyn_probe; ImportLists imports; ByteBuf dynstr; u64 page; @@ -555,9 +556,22 @@ void layout_dyn(Linker* l, LinkImage* img) { "link: layout_dyn: no ELF arch descriptor"); } + /* Step 1: enumerate imports + DT_NEEDED. A PIE with no imports and no + * DSO inputs is effectively static; keep ET_DYN output but do not stamp + * PT_INTERP/PT_DYNAMIC or an empty .dynamic section. */ + memset(&dyn_probe, 0, sizeof dyn_probe); + collect_imports(l, img, h, &imports); + collect_needed(l, img, &dyn_probe); + if (l->emit_static_exe && imports.nfuncs == 0 && imports.ndatas == 0 && + dyn_probe.nneeded == 0) { + img->pie = 1; + free_imports(h, &imports); + return; + } + dyn = (LinkDynState*)h->alloc(h, sizeof(*dyn), _Alignof(LinkDynState)); if (!dyn) compiler_panic(img->c, SRCLOC_NONE, "link: oom on dyn state"); - memset(dyn, 0, sizeof(*dyn)); + *dyn = dyn_probe; img->dyn = dyn; img->pie = 1; @@ -573,10 +587,6 @@ void layout_dyn(Linker* l, LinkImage* img) { : pool_intern_slice(l->c->global, slice_from_cstr(elf_arch->default_musl_interp)); - /* Step 1: enumerate imports + DT_NEEDED. */ - collect_imports(l, img, h, &imports); - collect_needed(l, img, dyn); - /* Step 2: build .dynstr + .dynsym. .dynstr must also carry the * DT_NEEDED soname strings the .dynamic body references; intern * them after the import names so build_dynsym's de-dup also covers diff --git a/test/driver/run.sh b/test/driver/run.sh @@ -360,6 +360,111 @@ else not_ok "ld-no-undefined" "$work/shared-undef-cc.err" fi +# ---- ld -lc expands hosted CRT/libc from --sysroot ---- +mkdir -p "$work/ld-hosted-sr/lib" "$work/ld-hosted-sr/include" +cat > "$work/ld-hosted-main.c" <<'SRC' +int main(void) { return 0; } +SRC +cat > "$work/ld-hosted-crt.c" <<'SRC' +extern int main(void); +void _start(void) { (void)main(); for (;;) {} } +SRC +cat > "$work/ld-hosted-crti.c" <<'SRC' +void __kit_fake_crti(void) {} +SRC +cat > "$work/ld-hosted-crtn.c" <<'SRC' +void __kit_fake_crtn(void) {} +SRC +cat > "$work/ld-hosted-libc.c" <<'SRC' +int libc_marker(void) { return 7; } +SRC +if "$KIT" cc -target x86_64-linux -fPIE -c "$work/ld-hosted-main.c" \ + -o "$work/ld-hosted-main.o" > "$work/ld-hosted-main.out" \ + 2> "$work/ld-hosted-main.err" && + "$KIT" cc -target x86_64-linux -fPIE -c "$work/ld-hosted-crt.c" \ + -o "$work/ld-hosted-sr/lib/Scrt1.o" > "$work/ld-hosted-scrt.out" \ + 2> "$work/ld-hosted-scrt.err" && + "$KIT" cc -target x86_64-linux -fno-PIC -c "$work/ld-hosted-crt.c" \ + -o "$work/ld-hosted-sr/lib/crt1.o" > "$work/ld-hosted-crt1.out" \ + 2> "$work/ld-hosted-crt1.err" && + "$KIT" cc -target x86_64-linux -c "$work/ld-hosted-crti.c" \ + -o "$work/ld-hosted-sr/lib/crti.o" > "$work/ld-hosted-crti.out" \ + 2> "$work/ld-hosted-crti.err" && + "$KIT" cc -target x86_64-linux -c "$work/ld-hosted-crtn.c" \ + -o "$work/ld-hosted-sr/lib/crtn.o" > "$work/ld-hosted-crtn.out" \ + 2> "$work/ld-hosted-crtn.err" && + "$KIT" cc -target x86_64-linux -fPIC -c "$work/ld-hosted-libc.c" \ + -o "$work/ld-hosted-libc-pic.o" > "$work/ld-hosted-libc-pic.out" \ + 2> "$work/ld-hosted-libc-pic.err" && + "$KIT" ld -shared -nostdlib -e libc_marker "$work/ld-hosted-libc-pic.o" \ + -o "$work/ld-hosted-sr/lib/libc.so" > "$work/ld-hosted-so.out" \ + 2> "$work/ld-hosted-so.err" && + "$KIT" cc -target x86_64-linux -fno-PIC -c "$work/ld-hosted-libc.c" \ + -o "$work/ld-hosted-libc-static.o" > "$work/ld-hosted-libc-static.out" \ + 2> "$work/ld-hosted-libc-static.err" && + "$KIT" ar rc "$work/ld-hosted-sr/lib/libc.a" \ + "$work/ld-hosted-libc-static.o" > "$work/ld-hosted-ar.out" \ + 2> "$work/ld-hosted-ar.err"; then + if "$KIT" ld --support-dir "$work/ld-rt-support" \ + --sysroot "$work/ld-hosted-sr" -pie -lc \ + "$work/ld-hosted-main.o" -o "$work/ld-hosted" \ + > "$work/ld-hosted.out" 2> "$work/ld-hosted.err" && + "$KIT" objdump -p "$work/ld-hosted" > "$work/ld-hosted-p.out" \ + 2> "$work/ld-hosted-p.err" && + grep -q "interpreter /lib/ld-musl-x86_64.so.1" \ + "$work/ld-hosted-p.out" && + grep -q "NEEDED libc.so" "$work/ld-hosted-p.out"; then + ok "ld-hosted-lc-sysroot" + else + { sed 's/^/ld: /' "$work/ld-hosted.err" 2>/dev/null + sed 's/^/dump: /' "$work/ld-hosted-p.err" 2>/dev/null + sed 's/^/ | /' "$work/ld-hosted-p.out" 2>/dev/null; } \ + > "$work/ld-hosted.diag" + not_ok "ld-hosted-lc-sysroot" "$work/ld-hosted.diag" + fi +else + { sed 's/^/main: /' "$work/ld-hosted-main.err" 2>/dev/null + sed 's/^/scrt: /' "$work/ld-hosted-scrt.err" 2>/dev/null + sed 's/^/crt1: /' "$work/ld-hosted-crt1.err" 2>/dev/null + sed 's/^/crti: /' "$work/ld-hosted-crti.err" 2>/dev/null + sed 's/^/crtn: /' "$work/ld-hosted-crtn.err" 2>/dev/null + sed 's/^/so: /' "$work/ld-hosted-so.err" 2>/dev/null + sed 's/^/ar: /' "$work/ld-hosted-ar.err" 2>/dev/null; } \ + > "$work/ld-hosted-setup.diag" + not_ok "ld-hosted-lc-sysroot" "$work/ld-hosted-setup.diag" +fi + +# ---- ld PIE without dynamic deps has no INTERP/DYNAMIC program headers ---- +cat > "$work/ld-static-pie.c" <<'SRC' +void _start(void) { for (;;) {} } +SRC +if "$KIT" cc -target x86_64-linux -fPIE -c "$work/ld-static-pie.c" \ + -o "$work/ld-static-pie.o" > "$work/ld-static-pie-cc.out" \ + 2> "$work/ld-static-pie-cc.err" && + "$KIT" ld -pie -nostdlib "$work/ld-static-pie.o" \ + -o "$work/ld-static-pie" > "$work/ld-static-pie.out" \ + 2> "$work/ld-static-pie.err" && + "$KIT" objdump -p "$work/ld-static-pie" \ + > "$work/ld-static-pie-p.out" 2> "$work/ld-static-pie-p.err"; then + if ! grep -q "interpreter " "$work/ld-static-pie-p.out" && + ! grep -q "^ DYNAMIC" "$work/ld-static-pie-p.out" && + ! grep -q "NEEDED" "$work/ld-static-pie-p.out"; then + ok "ld-static-pie-no-dynamic-headers" + else + sed 's/^/ | /' "$work/ld-static-pie-p.out" \ + > "$work/ld-static-pie.diag" + not_ok "ld-static-pie-no-dynamic-headers" \ + "$work/ld-static-pie.diag" + fi +else + { sed 's/^/cc: /' "$work/ld-static-pie-cc.err" 2>/dev/null + sed 's/^/ld: /' "$work/ld-static-pie.err" 2>/dev/null + sed 's/^/dump: /' "$work/ld-static-pie-p.err" 2>/dev/null; } \ + > "$work/ld-static-pie-setup.diag" + not_ok "ld-static-pie-no-dynamic-headers" \ + "$work/ld-static-pie-setup.diag" +fi + # ---- objdump -x aggregate (sections + symbol table) ---- if "$KIT" objdump -x "$work/main.o" \ > "$work/objdump-x.out" 2> "$work/objdump-x.err" &&