commit 56198fafd1ed82cecb144b87171f9b88a6ad66b0
parent 40c583cf135d77d972c82cb70a94c771d536ecac
Author: Ryan Sepassi <rsepassi@gmail.com>
Date: Fri, 8 May 2026 17:19:42 -0700
src: pipeline and link updates
Diffstat:
2 files changed, 314 insertions(+), 386 deletions(-)
diff --git a/src/api/pipeline.c b/src/api/pipeline.c
@@ -64,14 +64,6 @@ static _Noreturn void panic_bad_options(Compiler* c, const char* msg)
compiler_panic(c, no_loc(), "bad cfree options: %s", msg);
}
-static const CfreeFileIO* require_file_io(Compiler* c, const char* what)
-{
- if (!c->env || !c->env->file_io) {
- compiler_panic(c, no_loc(), "%s requires env.file_io", what);
- }
- return c->env->file_io;
-}
-
static void apply_pp_options(Pp* pp, const CfreePpOptions* opts)
{
u32 i;
@@ -133,45 +125,79 @@ int cfree_preprocess(CfreeCompiler* c, const CfreePpOptions* pp_opts,
/* One-TU compile against a fresh ObjBuilder. The builder is finalized on
* exit so it is immediately consumable by the linker or an emit_* function.
- * The input bytes must outlive this call. */
+ * The input bytes must outlive this call. Branches on input->lang: C goes
+ * through the preprocessor + C parser + codegen; ASM bypasses pp/cg and
+ * feeds tokens straight to the assembler. */
static void compile_into(Compiler* c, const CfreeCompileOptions* opts,
const CfreeBytesInput* input, ObjBuilder* ob)
{
- Pp* pp = pp_new(c);
Lexer* lex = lex_open_mem(c, input->name,
(const char*)input->data, input->len);
- DeclTable* decls = decl_new(c, ob);
MCEmitter* mc = mc_new(c, ob);
- CGTarget* target = cgtarget_new(c, ob, mc);
- Debug* debug = NULL;
- CG* cg = NULL;
-
- apply_pp_options(pp, &opts->pp);
- pp_push_input(pp, lex); /* PP owns the lexer from here on */
- if (opts->opt_level > 0) {
- target = opt_cgtarget_new(c, target, opts->opt_level);
- }
- if (opts->debug_info) {
- debug = debug_new(c, ob);
+ if (input->lang == CFREE_LANG_ASM) {
+ /* Asm-irrelevant fields on opts (pp, opt_level) are ignored. */
+ parse_asm(c, lex, mc);
+ obj_finalize(ob);
+ mc_free(mc);
+ /* The assembler owns the lexer it was handed; no pp_free release. */
+ return;
}
- cg = cg_new(c, target, debug);
- parse_c(c, pp, decls, cg);
- cgtarget_finalize(target);
- if (debug) {
- debug_emit(debug);
+ {
+ Pp* pp = pp_new(c);
+ DeclTable* decls = decl_new(c, ob);
+ CGTarget* target = cgtarget_new(c, ob, mc);
+ Debug* debug = NULL;
+ CG* cg = NULL;
+
+ apply_pp_options(pp, &opts->pp);
+ pp_push_input(pp, lex); /* PP owns the lexer from here on */
+
+ if (opts->opt_level > 0) {
+ target = opt_cgtarget_new(c, target, opts->opt_level);
+ }
+ if (opts->debug_info) {
+ debug = debug_new(c, ob);
+ }
+ cg = cg_new(c, target, debug);
+
+ parse_c(c, pp, decls, cg);
+ cgtarget_finalize(target);
+ if (debug) {
+ debug_emit(debug);
+ }
+ obj_finalize(ob);
+
+ cg_free(cg);
+ if (debug) {
+ debug_free(debug);
+ }
+ cgtarget_free(target); /* opt_cgtarget cascades to wrapped target */
+ mc_free(mc);
+ decl_free(decls);
+ pp_free(pp); /* releases the pushed lexer */
}
- obj_finalize(ob);
+}
- cg_free(cg);
- if (debug) {
- debug_free(debug);
+/* Suffix-based language inference. See header. */
+CfreeLanguage cfree_language_for_path(const char* path)
+{
+ size_t i, len;
+ if (!path) return CFREE_LANG_C;
+ for (len = 0; path[len]; ++len) {}
+ /* Find the last '.' after the last '/'. */
+ i = len;
+ while (i > 0) {
+ --i;
+ if (path[i] == '/') return CFREE_LANG_C;
+ if (path[i] == '.') {
+ const char* ext = path + i + 1;
+ if (ext[0] == 's' && ext[1] == '\0') return CFREE_LANG_ASM;
+ return CFREE_LANG_C;
+ }
}
- cgtarget_free(target); /* opt_cgtarget cascades to wrapped target */
- mc_free(mc);
- decl_free(decls);
- pp_free(pp); /* releases the pushed lexer */
+ return CFREE_LANG_C;
}
static void validate_bytes_input(Compiler* c, const CfreeBytesInput* in)
@@ -255,33 +281,33 @@ int cfree_compile_obj_emit(CfreeCompiler* c, const CfreeCompileOptions* opts,
* Link
* ============================================================ */
-static Linker* build_linker(Compiler* c, const CfreeLinkOptions* opts)
+static Linker* build_linker(Compiler* c, const CfreeLinkInputs* in)
{
Linker* linker = link_new(c);
u32 i;
- for (i = 0; i < opts->nobjs; ++i) {
- link_add_obj(linker, opts->objs[i]);
+ for (i = 0; i < in->nobjs; ++i) {
+ link_add_obj(linker, in->objs[i]);
}
- for (i = 0; i < opts->nobj_bytes; ++i) {
- link_add_obj_bytes(linker, opts->obj_bytes[i].name,
- opts->obj_bytes[i].data, opts->obj_bytes[i].len);
+ for (i = 0; i < in->nobj_bytes; ++i) {
+ link_add_obj_bytes(linker, in->obj_bytes[i].name,
+ in->obj_bytes[i].data, in->obj_bytes[i].len);
}
- for (i = 0; i < opts->narchives; ++i) {
- const CfreeBytesInputArchive* a = &opts->archives[i];
+ for (i = 0; i < in->narchives; ++i) {
+ const CfreeBytesInputArchive* a = &in->archives[i];
link_add_archive_bytes(linker, a->input.name,
a->input.data, a->input.len,
- a->flags, a->group_id);
+ a->whole_archive, a->link_mode, a->group_id);
}
- if (opts->linker_script) {
- link_set_script(linker, opts->linker_script);
+ if (in->linker_script) {
+ link_set_script(linker, in->linker_script);
}
- if (opts->entry) {
- link_set_entry(linker, opts->entry);
+ if (in->entry) {
+ link_set_entry(linker, in->entry);
}
- if (opts->extern_resolver) {
- link_set_extern_resolver(linker, opts->extern_resolver,
- opts->extern_resolver_user);
+ if (in->extern_resolver) {
+ link_set_extern_resolver(linker, in->extern_resolver,
+ in->extern_resolver_user);
}
return linker;
}
@@ -302,7 +328,8 @@ int cfree_link_exe(CfreeCompiler* c, const CfreeLinkOptions* opts,
if (!opts || !out) {
panic_bad_options(c, "link_exe args missing");
}
- linker = build_linker(c, opts);
+ linker = build_linker(c, &opts->inputs);
+ link_set_gc_sections(linker, opts->gc_sections);
image = link_resolve(linker); /* deferred-cleanup-registered */
link_emit_image_writer(image, out);
link_image_free(image); /* undefers + frees */
@@ -311,6 +338,32 @@ int cfree_link_exe(CfreeCompiler* c, const CfreeLinkOptions* opts,
return 0;
}
+/* Shared-library link: not yet implemented in src/. The header API and
+ * driver glue are in place ahead of the codegen so callers can wire
+ * -shared / -soname / -rpath end-to-end; this entry currently reports
+ * a diagnostic and fails. */
+int cfree_link_shared(CfreeCompiler* c, const CfreeLinkSharedOptions* opts,
+ CfreeWriter* out)
+{
+ PanicSave saved;
+
+ compiler_panic_save(c, &saved);
+ if (setjmp(c->panic)) {
+ compiler_run_cleanups(c);
+ compiler_panic_restore(c, &saved);
+ return 1;
+ }
+ if (!opts || !out) {
+ panic_bad_options(c, "link_shared args missing");
+ }
+ compiler_panic(c, no_loc(),
+ "cfree_link_shared: shared-library codegen is not yet "
+ "implemented in libcfree");
+ /* unreachable */
+ compiler_panic_restore(c, &saved);
+ return 1;
+}
+
int cfree_link_jit(CfreeCompiler* c, const CfreeLinkOptions* opts,
CfreeJit** out_jit)
{
@@ -331,7 +384,8 @@ int cfree_link_jit(CfreeCompiler* c, const CfreeLinkOptions* opts,
if (!opts) {
panic_bad_options(c, "link_jit options missing");
}
- linker = build_linker(c, opts);
+ linker = build_linker(c, &opts->inputs);
+ link_set_gc_sections(linker, opts->gc_sections);
image = link_resolve(linker); /* deferred-cleanup-registered */
*out_jit = cfree_jit_from_image(image); /* undefers + transfers ownership */
link_free(linker);
@@ -340,346 +394,80 @@ int cfree_link_jit(CfreeCompiler* c, const CfreeLinkOptions* opts,
}
/* ============================================================
- * Convenience: cfree_run
+ * CfreePipeline (public)
* ============================================================
- *
- * cfree_run owns a single Compiler for the whole composition. Scratch
- * arrays (objs, loaded-bytes tables, staging arrays) are allocated from
- * c->tu and reaped by compiler_fini. File-io loans for path-shaped inputs
- * are tracked through the cleanup stack so a panic anywhere reaps them; on
- * success they are released explicitly. */
-
-typedef struct LoadedBytes {
- const CfreeFileIO* io;
- CfreeBytesInput in;
- CfreeFileData file;
- int loaded;
- CompilerCleanup* cleanup; /* compiler_defer handle */
-} LoadedBytes;
-
-static void loaded_bytes_release_cb(void* arg)
-{
- LoadedBytes* lb = (LoadedBytes*)arg;
- if (lb->loaded && lb->io && lb->io->release) {
- lb->io->release(lb->io->user, &lb->file);
- }
- lb->loaded = 0;
-}
+ * Thin owning wrapper over CfreeCompiler. Holds the heap pointer used at
+ * allocation so cfree_pipeline_free can release the wrapper without
+ * touching the (already-finalized) compiler's env. */
+
+struct CfreePipeline {
+ CfreeCompiler* compiler;
+ Heap* heap;
+};
-static void load_path_bytes(Compiler* c, const char* path, LoadedBytes* out)
+CfreePipeline* cfree_pipeline_new(CfreeTarget target, const CfreeEnv* env)
{
- out->io = require_file_io(c, "file input");
- out->loaded = 0;
- out->cleanup = NULL;
- out->file.data = NULL;
- out->file.size = 0;
- out->file.token = NULL;
- out->in.name = path;
- out->in.data = NULL;
- out->in.len = 0;
-
- /* Defer release before reading so a read failure still cleans up. The
- * callback is a no-op while loaded == 0. */
- out->cleanup = compiler_defer(c, loaded_bytes_release_cb, out);
-
- if (!out->io->read_all(out->io->user, path, &out->file)) {
- compiler_panic(c, no_loc(), "failed to read: %s", path);
+ Heap* h;
+ CfreePipeline* p;
+ CfreeCompiler* c;
+
+ if (!env || !env->heap) return NULL;
+ h = env->heap;
+ p = h->alloc(h, sizeof(*p), _Alignof(CfreePipeline));
+ if (!p) return NULL;
+ c = cfree_compiler_new(target, env);
+ if (!c) {
+ h->free(h, p, sizeof(*p));
+ return NULL;
}
- out->loaded = 1;
- out->in.data = out->file.data;
- out->in.len = out->file.size;
+ p->compiler = c;
+ p->heap = h;
+ return p;
}
-static void release_loaded_bytes(Compiler* c, LoadedBytes* lb)
+void cfree_pipeline_free(CfreePipeline* p)
{
- if (!lb->cleanup) return;
- compiler_undefer(c, lb->cleanup);
- lb->cleanup = NULL;
- loaded_bytes_release_cb(lb);
+ Heap* h;
+ if (!p) return;
+ h = p->heap;
+ cfree_compiler_free(p->compiler);
+ h->free(h, p, sizeof(*p));
}
-static void release_loaded_array(Compiler* c, LoadedBytes* arr, u32 n)
+CfreeCompiler* cfree_pipeline_compiler(CfreePipeline* p)
{
- u32 i;
- if (!arr) return;
- for (i = 0; i < n; ++i) release_loaded_bytes(c, &arr[i]);
+ return p ? p->compiler : NULL;
}
-static u32 total_sources(const CfreeOptions* opts)
+int cfree_pipeline_compile_obj(CfreePipeline* p,
+ const CfreeCompileOptions* opts,
+ const CfreeBytesInput* input,
+ CfreeObjBuilder** out)
{
- return opts->nsource_files + opts->nsource_memory;
+ if (!p) return 1;
+ return cfree_compile_obj(p->compiler, opts, input, out);
}
-static void validate_run_options(Compiler* c, const CfreeOptions* opts)
+int cfree_pipeline_link_exe(CfreePipeline* p, const CfreeLinkOptions* opts,
+ CfreeWriter* out)
{
- u32 i;
- u32 nsrc;
-
- if (!opts) panic_bad_options(c, "options pointer is NULL");
- nsrc = total_sources(opts);
- if (nsrc == 0) {
- panic_bad_options(c, "at least one C source input is required");
- }
- if (opts->opt_level < 0 || opts->opt_level > 2) {
- panic_bad_options(c, "opt_level must be 0, 1, or 2");
- }
- if (!opts->env.heap) {
- panic_bad_options(c, "env.heap is required");
- }
- if (opts->output_kind != CFREE_OUTPUT_OBJ &&
- opts->output_kind != CFREE_OUTPUT_EXE &&
- opts->output_kind != CFREE_OUTPUT_JIT) {
- panic_bad_options(c, "output_kind is invalid");
- }
- if ((opts->output_kind == CFREE_OUTPUT_OBJ ||
- opts->output_kind == CFREE_OUTPUT_EXE) && !opts->output_path) {
- panic_bad_options(c, "output_path is required for file output");
- }
- if ((opts->output_kind == CFREE_OUTPUT_OBJ ||
- opts->output_kind == CFREE_OUTPUT_EXE) &&
- (!opts->env.file_io || !opts->env.file_io->open_writer)) {
- panic_bad_options(c, "env.file_io.open_writer is required for file output");
- }
- if (opts->output_kind == CFREE_OUTPUT_OBJ && nsrc != 1) {
- panic_bad_options(c, "object output accepts exactly one C source input");
- }
- if (opts->output_kind == CFREE_OUTPUT_OBJ &&
- (opts->nobject_files || opts->narchives ||
- opts->linker_script || opts->extern_resolver)) {
- panic_bad_options(c, "link options are not valid for object output");
- }
- if (opts->output_kind == CFREE_OUTPUT_EXE && opts->extern_resolver) {
- panic_bad_options(c, "extern_resolver is JIT-only; not valid for exe output");
- }
- if (opts->output_kind == CFREE_OUTPUT_JIT && !opts->out_jit) {
- panic_bad_options(c, "out_jit is required for JIT output");
- }
-
- if (opts->nsource_files &&
- (!opts->env.file_io || !opts->env.file_io->read_all)) {
- panic_bad_options(c, "env.file_io.read_all is required for source_files");
- }
- for (i = 0; i < opts->nsource_files; ++i) {
- if (!opts->source_files[i]) {
- panic_bad_options(c, "source_files entry is NULL");
- }
- }
- for (i = 0; i < opts->nsource_memory; ++i) {
- const CfreeBytesInput* in = &opts->source_memory[i];
- if (!in->name || (!in->data && in->len != 0)) {
- panic_bad_options(c, "source_memory entry is incomplete");
- }
- }
-
- if ((opts->nobject_files || opts->narchives || opts->linker_script) &&
- (!opts->env.file_io || !opts->env.file_io->read_all)) {
- panic_bad_options(c, "env.file_io.read_all is required for linker file input");
- }
+ if (!p) return 1;
+ return cfree_link_exe(p->compiler, opts, out);
}
-/* Resolves the i-th source (paths first, then memory) into a CfreeBytesInput,
- * lazily loading paths into `loaded` slots. */
-static const CfreeBytesInput* nth_source(const CfreeOptions* opts,
- LoadedBytes* loaded, u32 i)
+int cfree_pipeline_link_shared(CfreePipeline* p,
+ const CfreeLinkSharedOptions* opts,
+ CfreeWriter* out)
{
- if (i < opts->nsource_files) {
- return &loaded[i].in;
- }
- return &opts->source_memory[i - opts->nsource_files];
+ if (!p) return 1;
+ return cfree_link_shared(p->compiler, opts, out);
}
-int cfree_run(const CfreeOptions* opts)
+int cfree_pipeline_link_jit(CfreePipeline* p, const CfreeLinkOptions* opts,
+ CfreeJit** out_jit)
{
- Compiler c_store;
- Compiler* c = &c_store;
- ObjBuilder** objs = NULL;
- LoadedBytes* src_loaded = NULL;
- LoadedBytes* obj_bytes = NULL;
- LoadedBytes* arch_bytes = NULL;
- LoadedBytes script;
- const CfreeLinkScript* parsed_script = NULL;
- CfreeBytesInput* obj_in = NULL;
- CfreeBytesInputArchive* arch_in = NULL;
- CfreeLinkOptions link_opts;
- CfreeCompileOptions co;
- Writer* out_writer = NULL;
- u32 nsrc;
- u32 i;
-
- if (!opts || !opts->env.heap) return 1;
-
- script.loaded = 0;
- script.cleanup = NULL;
- script.io = NULL;
- script.file.data = NULL;
- script.file.size = 0;
- script.file.token = NULL;
-
- compiler_init(c, opts->target, &opts->env);
-
- if (setjmp(c->panic)) {
- if (out_writer) cfree_writer_close(out_writer);
- compiler_run_cleanups(c);
- compiler_fini(c);
- return 1;
- }
-
- validate_run_options(c, opts);
- nsrc = total_sources(opts);
-
- co.opt_level = opts->opt_level;
- co.debug_info = opts->debug_info;
- co.pp = opts->pp;
- co.warnings_are_errors = opts->warnings_are_errors;
- co.max_errors = opts->max_errors;
-
- /* Load source paths (if any) up front so OBJ and EXE/JIT share one path. */
- if (opts->nsource_files) {
- src_loaded = arena_array(c->tu, LoadedBytes, opts->nsource_files);
- for (i = 0; i < opts->nsource_files; ++i) {
- src_loaded[i].loaded = 0;
- src_loaded[i].cleanup = NULL;
- }
- for (i = 0; i < opts->nsource_files; ++i) {
- load_path_bytes(c, opts->source_files[i], &src_loaded[i]);
- }
- }
-
- /* OBJ output: compile single TU and emit to writer. */
- if (opts->output_kind == CFREE_OUTPUT_OBJ) {
- const CfreeBytesInput* the_input = nth_source(opts, src_loaded, 0);
- out_writer = opts->env.file_io->open_writer(opts->env.file_io->user,
- opts->output_path);
- if (!out_writer) {
- compiler_panic(c, no_loc(), "failed to open output file: %s",
- opts->output_path);
- }
- if (cfree_compile_obj_emit(c, &co, the_input, out_writer)) {
- cfree_writer_close(out_writer);
- release_loaded_array(c, src_loaded, opts->nsource_files);
- compiler_fini(c);
- return 1;
- }
- cfree_writer_close(out_writer);
- out_writer = NULL;
- release_loaded_array(c, src_loaded, opts->nsource_files);
- compiler_fini(c);
- return 0;
- }
-
- /* EXE/JIT: compile all sources, then link. */
- objs = arena_array(c->tu, ObjBuilder*, nsrc);
- for (i = 0; i < nsrc; ++i) objs[i] = NULL;
- for (i = 0; i < nsrc; ++i) {
- const CfreeBytesInput* in = nth_source(opts, src_loaded, i);
- if (cfree_compile_obj(c, &co, in, &objs[i])) {
- release_loaded_array(c, src_loaded, opts->nsource_files);
- compiler_fini(c);
- return 1;
- }
- }
-
- if (opts->nobject_files) {
- obj_bytes = arena_array(c->tu, LoadedBytes, opts->nobject_files);
- for (i = 0; i < opts->nobject_files; ++i) {
- obj_bytes[i].loaded = 0;
- obj_bytes[i].cleanup = NULL;
- }
- for (i = 0; i < opts->nobject_files; ++i) {
- load_path_bytes(c, opts->object_files[i], &obj_bytes[i]);
- }
- }
- if (opts->narchives) {
- arch_bytes = arena_array(c->tu, LoadedBytes, opts->narchives);
- for (i = 0; i < opts->narchives; ++i) {
- arch_bytes[i].loaded = 0;
- arch_bytes[i].cleanup = NULL;
- }
- for (i = 0; i < opts->narchives; ++i) {
- load_path_bytes(c, opts->archives[i], &arch_bytes[i]);
- }
- }
- if (opts->linker_script) {
- load_path_bytes(c, opts->linker_script, &script);
- }
-
- /* The linker takes the structured form only; parse the loaded text into
- * CfreeLinkScript via the helper. The parsed script is arena-owned by
- * the Compiler and freed when c->tu is released by compiler_fini, so no
- * explicit cfree_link_script_free is needed here. */
- if (script.loaded &&
- cfree_link_script_parse(c, (const char*)script.file.data,
- script.file.size, &parsed_script)) {
- release_loaded_array(c, src_loaded, opts->nsource_files);
- release_loaded_array(c, obj_bytes, opts->nobject_files);
- release_loaded_array(c, arch_bytes, opts->narchives);
- release_loaded_bytes(c, &script);
- compiler_fini(c);
- return 1;
- }
-
- /* Stage parallel CfreeBytesInput arrays for the linker. */
- if (opts->nobject_files) {
- obj_in = arena_array(c->tu, CfreeBytesInput, opts->nobject_files);
- for (i = 0; i < opts->nobject_files; ++i) obj_in[i] = obj_bytes[i].in;
- }
- if (opts->narchives) {
- arch_in = arena_array(c->tu, CfreeBytesInputArchive, opts->narchives);
- for (i = 0; i < opts->narchives; ++i) {
- arch_in[i].input = arch_bytes[i].in;
- arch_in[i].flags = CFREE_LAF_NONE;
- arch_in[i].group_id = 0;
- }
- }
-
- link_opts.objs = (ObjBuilder* const*)objs;
- link_opts.nobjs = nsrc;
- link_opts.obj_bytes = obj_in;
- link_opts.nobj_bytes = opts->nobject_files;
- link_opts.archives = arch_in;
- link_opts.narchives = opts->narchives;
- link_opts.linker_script = parsed_script;
- link_opts.entry = opts->entry;
- link_opts.extern_resolver = opts->extern_resolver;
- link_opts.extern_resolver_user = opts->extern_resolver_user;
-
- if (opts->output_kind == CFREE_OUTPUT_EXE) {
- out_writer = opts->env.file_io->open_writer(opts->env.file_io->user,
- opts->output_path);
- if (!out_writer) {
- compiler_panic(c, no_loc(), "failed to open output file: %s",
- opts->output_path);
- }
- if (cfree_link_exe(c, &link_opts, out_writer)) {
- cfree_writer_close(out_writer);
- release_loaded_array(c, src_loaded, opts->nsource_files);
- release_loaded_array(c, obj_bytes, opts->nobject_files);
- release_loaded_array(c, arch_bytes, opts->narchives);
- release_loaded_bytes(c, &script);
- compiler_fini(c);
- return 1;
- }
- cfree_writer_close(out_writer);
- out_writer = NULL;
- } else {
- if (cfree_link_jit(c, &link_opts, opts->out_jit)) {
- release_loaded_array(c, src_loaded, opts->nsource_files);
- release_loaded_array(c, obj_bytes, opts->nobject_files);
- release_loaded_array(c, arch_bytes, opts->narchives);
- release_loaded_bytes(c, &script);
- compiler_fini(c);
- return 1;
- }
- }
-
- release_loaded_array(c, src_loaded, opts->nsource_files);
- release_loaded_array(c, obj_bytes, opts->nobject_files);
- release_loaded_array(c, arch_bytes, opts->narchives);
- release_loaded_bytes(c, &script);
- /* objs (ObjBuilders) are owned by the Compiler; arena scratch is freed
- * by compiler_fini. */
- compiler_fini(c);
- return 0;
+ if (!p) return 1;
+ return cfree_link_jit(p->compiler, opts, out_jit);
}
/* ============================================================
@@ -758,6 +546,145 @@ CfreeBinFmt cfree_detect_fmt(const uint8_t* data, size_t len)
return CFREE_BIN_UNKNOWN;
}
+/* ============================================================
+ * Target detection from object headers
+ * ============================================================ */
+
+static void detect_target_defaults(CfreeTarget* t)
+{
+ t->big_endian = 0;
+ t->pic = CFREE_PIC_NONE;
+ t->code_model = CFREE_CM_DEFAULT;
+}
+
+static void detect_set_ptr(CfreeTarget* t, CfreeArchKind arch)
+{
+ t->arch = arch;
+ switch (arch) {
+ case CFREE_ARCH_X86_64:
+ case CFREE_ARCH_ARM_64:
+ case CFREE_ARCH_RV64:
+ t->ptr_size = 8; t->ptr_align = 8; break;
+ case CFREE_ARCH_X86_32:
+ case CFREE_ARCH_ARM_32:
+ case CFREE_ARCH_RV32:
+ case CFREE_ARCH_WASM:
+ t->ptr_size = 4; t->ptr_align = 4; break;
+ }
+}
+
+static int detect_elf(const u8* d, size_t len, CfreeTarget* out)
+{
+ u8 ei_class, ei_data, ei_osabi;
+ u16 e_machine;
+ if (len < 20) return 1;
+ ei_class = d[4];
+ ei_data = d[5];
+ ei_osabi = d[7];
+ /* e_machine is at offset 18, in the file's endianness. */
+ if (ei_data == 1) { /* little */
+ e_machine = (u16)d[18] | ((u16)d[19] << 8);
+ } else if (ei_data == 2) { /* big */
+ e_machine = (u16)d[19] | ((u16)d[18] << 8);
+ } else {
+ return 1;
+ }
+
+ detect_target_defaults(out);
+ out->big_endian = (ei_data == 2);
+ out->obj = CFREE_OBJ_ELF;
+
+ switch (e_machine) {
+ case 0x03: detect_set_ptr(out, CFREE_ARCH_X86_32); break;
+ case 0x3E: detect_set_ptr(out, CFREE_ARCH_X86_64); break;
+ case 0x28: detect_set_ptr(out, CFREE_ARCH_ARM_32); break;
+ case 0xB7: detect_set_ptr(out, CFREE_ARCH_ARM_64); break;
+ case 0xF3:
+ if (ei_class == 1) detect_set_ptr(out, CFREE_ARCH_RV32);
+ else if (ei_class == 2) detect_set_ptr(out, CFREE_ARCH_RV64);
+ else return 1;
+ break;
+ default: return 1;
+ }
+
+ /* OSABI: 0=SYSV (treat as Linux), 3=Linux. Anything else: freestanding. */
+ if (ei_osabi == 0 || ei_osabi == 3) out->os = CFREE_OS_LINUX;
+ else out->os = CFREE_OS_FREESTANDING;
+ return 0;
+}
+
+static int detect_coff(const u8* d, size_t len, CfreeTarget* out)
+{
+ u16 machine;
+ if (len < 2) return 1;
+ machine = (u16)d[0] | ((u16)d[1] << 8);
+ detect_target_defaults(out);
+ out->obj = CFREE_OBJ_COFF;
+ out->os = CFREE_OS_WINDOWS;
+ switch (machine) {
+ case 0x8664: detect_set_ptr(out, CFREE_ARCH_X86_64); break;
+ case 0x014C: detect_set_ptr(out, CFREE_ARCH_X86_32); break;
+ case 0xAA64: detect_set_ptr(out, CFREE_ARCH_ARM_64); break;
+ case 0x01C4: detect_set_ptr(out, CFREE_ARCH_ARM_32); break;
+ case 0x5032: detect_set_ptr(out, CFREE_ARCH_RV32); break;
+ case 0x5064: detect_set_ptr(out, CFREE_ARCH_RV64); break;
+ default: return 1;
+ }
+ return 0;
+}
+
+static int detect_macho(const u8* d, size_t len, CfreeTarget* out)
+{
+ u32 magic, cputype;
+ int swap, is64;
+ if (len < 8) return 1;
+ magic = (u32)d[0] | ((u32)d[1] << 8) | ((u32)d[2] << 16) | ((u32)d[3] << 24);
+ switch (magic) {
+ case 0xFEEDFACEu: swap = 0; is64 = 0; break;
+ case 0xFEEDFACFu: swap = 0; is64 = 1; break;
+ case 0xCEFAEDFEu: swap = 1; is64 = 0; break;
+ case 0xCFFAEDFEu: swap = 1; is64 = 1; break;
+ default: return 1;
+ }
+ if (!swap) {
+ cputype = (u32)d[4] | ((u32)d[5] << 8) | ((u32)d[6] << 16) | ((u32)d[7] << 24);
+ } else {
+ cputype = (u32)d[7] | ((u32)d[6] << 8) | ((u32)d[5] << 16) | ((u32)d[4] << 24);
+ }
+ detect_target_defaults(out);
+ out->obj = CFREE_OBJ_MACHO;
+ out->os = CFREE_OS_MACOS;
+ /* CPU_TYPE: 7=x86, 0x01000007=x86_64, 12=ARM, 0x0100000C=ARM64. */
+ switch (cputype) {
+ case 0x00000007u: detect_set_ptr(out, CFREE_ARCH_X86_32); break;
+ case 0x01000007u: detect_set_ptr(out, CFREE_ARCH_X86_64); break;
+ case 0x0000000Cu: detect_set_ptr(out, CFREE_ARCH_ARM_32); break;
+ case 0x0100000Cu: detect_set_ptr(out, CFREE_ARCH_ARM_64); break;
+ default: return 1;
+ }
+ (void)is64;
+ return 0;
+}
+
+int cfree_detect_target(const uint8_t* data, size_t len, CfreeTarget* out)
+{
+ CfreeBinFmt bin;
+ if (!data || !out) return 1;
+ bin = cfree_detect_fmt(data, len);
+ switch (bin) {
+ case CFREE_BIN_ELF: return detect_elf (data, len, out);
+ case CFREE_BIN_COFF: return detect_coff (data, len, out);
+ case CFREE_BIN_MACHO: return detect_macho(data, len, out);
+ case CFREE_BIN_WASM:
+ detect_target_defaults(out);
+ detect_set_ptr(out, CFREE_ARCH_WASM);
+ out->obj = CFREE_OBJ_WASM;
+ out->os = CFREE_OS_WASI;
+ return 0;
+ default: return 1;
+ }
+}
+
static ObjBuilder* obj_read_bytes(Compiler* c, const char* name,
const u8* data, size_t len, ObjFmt fmt)
{
@@ -791,25 +718,19 @@ struct CfreeObjSymIter {
ObjSymIter* inner;
};
-CfreeObjFile* cfree_obj_open(const CfreeEnv* env, CfreeTarget target,
+CfreeObjFile* cfree_obj_open(const CfreeEnv* env,
const CfreeBytesInput* input)
{
Heap* h;
CfreeObjFile* f;
- CfreeBinFmt bin;
+ CfreeTarget target;
ObjFmt ofmt;
if (!env || !env->heap || !input) return NULL;
if (!input->data && input->len > 0) return NULL;
- bin = cfree_detect_fmt(input->data, input->len);
- switch (bin) {
- case CFREE_BIN_ELF: ofmt = CFREE_OBJ_ELF; break;
- case CFREE_BIN_COFF: ofmt = CFREE_OBJ_COFF; break;
- case CFREE_BIN_MACHO: ofmt = CFREE_OBJ_MACHO; break;
- case CFREE_BIN_WASM: ofmt = CFREE_OBJ_WASM; break;
- default: return NULL;
- }
+ if (cfree_detect_target(input->data, input->len, &target) != 0) return NULL;
+ ofmt = target.obj;
h = (Heap*)env->heap;
f = (CfreeObjFile*)h->alloc(h, sizeof(*f), _Alignof(CfreeObjFile));
@@ -873,7 +794,7 @@ CfreeObjSecInfo cfree_obj_section(const CfreeObjFile* f, uint32_t idx)
out.kind = sec ? (CfreeSecKind)sec->kind : CFREE_SEC_OTHER;
out.flags = sec ? (uint32_t)sec->flags : 0u;
out.size = sec ? (sec->bss_size ? sec->bss_size : sec->bytes.total) : 0u;
- out.align = sec ? sec->align : 0u;
+ out.align = (sec && sec->align > 1u) ? sec->align : 1u;
return out;
}
diff --git a/src/link/link.h b/src/link/link.h
@@ -102,12 +102,15 @@ void link_free(Linker*);
LinkInputId link_add_obj(Linker*, ObjBuilder*);
LinkInputId link_add_obj_bytes(Linker*, const char* name,
const u8* data, size_t len);
-/* `flags` is a bitmask of CfreeLinkArchFlag. `group_id == 0` means linear
- * single-pass; archives sharing a nonzero `group_id` are scanned cyclically
- * (equivalent to GNU ld --start-group ... --end-group). */
+/* `whole_archive` (nonzero == --whole-archive) and `link_mode`
+ * (CfreeLinkMode: -Bstatic / -Bdynamic / --as-needed positional state) are
+ * orthogonal per-archive flags. `group_id == 0` means linear single-pass;
+ * archives sharing a nonzero `group_id` are scanned cyclically (equivalent
+ * to GNU ld --start-group ... --end-group). */
LinkInputId link_add_archive_bytes(Linker*, const char* name,
const u8* data, size_t len,
- u8 flags, u8 group_id);
+ u8 whole_archive, u8 link_mode,
+ u8 group_id);
void link_set_entry(Linker*, const char* name);
/* Borrowed reference; the script and every sub-object must outlive
@@ -116,6 +119,10 @@ void link_set_entry(Linker*, const char* name);
* cfree_link_script_parse first. */
void link_set_script(Linker*, const CfreeLinkScript*);
void link_set_extern_resolver(Linker*, LinkExternResolver, void* user);
+/* Enable --gc-sections on this link. Roots are: entry symbol, exported
+ * symbols (shared link), and any section flagged KEEP by the linker
+ * script. Unreferenced sections are dropped from the output. */
+void link_set_gc_sections(Linker*, int enable);
/* Symbol resolution and layout are explicit so file linking and JIT share the
* same resolved image. Fatal diagnostics use Compiler.panic.