kit

kit
git clone https://git.ryansepassi.com/git/kit.git
Log | Files | Refs | README

commit 56198fafd1ed82cecb144b87171f9b88a6ad66b0
parent 40c583cf135d77d972c82cb70a94c771d536ecac
Author: Ryan Sepassi <rsepassi@gmail.com>
Date:   Fri,  8 May 2026 17:19:42 -0700

src: pipeline and link updates

Diffstat:
Msrc/api/pipeline.c | 685+++++++++++++++++++++++++++++++++++--------------------------------------------
Msrc/link/link.h | 15+++++++++++----
2 files changed, 314 insertions(+), 386 deletions(-)

diff --git a/src/api/pipeline.c b/src/api/pipeline.c @@ -64,14 +64,6 @@ static _Noreturn void panic_bad_options(Compiler* c, const char* msg) compiler_panic(c, no_loc(), "bad cfree options: %s", msg); } -static const CfreeFileIO* require_file_io(Compiler* c, const char* what) -{ - if (!c->env || !c->env->file_io) { - compiler_panic(c, no_loc(), "%s requires env.file_io", what); - } - return c->env->file_io; -} - static void apply_pp_options(Pp* pp, const CfreePpOptions* opts) { u32 i; @@ -133,45 +125,79 @@ int cfree_preprocess(CfreeCompiler* c, const CfreePpOptions* pp_opts, /* One-TU compile against a fresh ObjBuilder. The builder is finalized on * exit so it is immediately consumable by the linker or an emit_* function. - * The input bytes must outlive this call. */ + * The input bytes must outlive this call. Branches on input->lang: C goes + * through the preprocessor + C parser + codegen; ASM bypasses pp/cg and + * feeds tokens straight to the assembler. */ static void compile_into(Compiler* c, const CfreeCompileOptions* opts, const CfreeBytesInput* input, ObjBuilder* ob) { - Pp* pp = pp_new(c); Lexer* lex = lex_open_mem(c, input->name, (const char*)input->data, input->len); - DeclTable* decls = decl_new(c, ob); MCEmitter* mc = mc_new(c, ob); - CGTarget* target = cgtarget_new(c, ob, mc); - Debug* debug = NULL; - CG* cg = NULL; - - apply_pp_options(pp, &opts->pp); - pp_push_input(pp, lex); /* PP owns the lexer from here on */ - if (opts->opt_level > 0) { - target = opt_cgtarget_new(c, target, opts->opt_level); - } - if (opts->debug_info) { - debug = debug_new(c, ob); + if (input->lang == CFREE_LANG_ASM) { + /* Asm-irrelevant fields on opts (pp, opt_level) are ignored. */ + parse_asm(c, lex, mc); + obj_finalize(ob); + mc_free(mc); + /* The assembler owns the lexer it was handed; no pp_free release. */ + return; } - cg = cg_new(c, target, debug); - parse_c(c, pp, decls, cg); - cgtarget_finalize(target); - if (debug) { - debug_emit(debug); + { + Pp* pp = pp_new(c); + DeclTable* decls = decl_new(c, ob); + CGTarget* target = cgtarget_new(c, ob, mc); + Debug* debug = NULL; + CG* cg = NULL; + + apply_pp_options(pp, &opts->pp); + pp_push_input(pp, lex); /* PP owns the lexer from here on */ + + if (opts->opt_level > 0) { + target = opt_cgtarget_new(c, target, opts->opt_level); + } + if (opts->debug_info) { + debug = debug_new(c, ob); + } + cg = cg_new(c, target, debug); + + parse_c(c, pp, decls, cg); + cgtarget_finalize(target); + if (debug) { + debug_emit(debug); + } + obj_finalize(ob); + + cg_free(cg); + if (debug) { + debug_free(debug); + } + cgtarget_free(target); /* opt_cgtarget cascades to wrapped target */ + mc_free(mc); + decl_free(decls); + pp_free(pp); /* releases the pushed lexer */ } - obj_finalize(ob); +} - cg_free(cg); - if (debug) { - debug_free(debug); +/* Suffix-based language inference. See header. */ +CfreeLanguage cfree_language_for_path(const char* path) +{ + size_t i, len; + if (!path) return CFREE_LANG_C; + for (len = 0; path[len]; ++len) {} + /* Find the last '.' after the last '/'. */ + i = len; + while (i > 0) { + --i; + if (path[i] == '/') return CFREE_LANG_C; + if (path[i] == '.') { + const char* ext = path + i + 1; + if (ext[0] == 's' && ext[1] == '\0') return CFREE_LANG_ASM; + return CFREE_LANG_C; + } } - cgtarget_free(target); /* opt_cgtarget cascades to wrapped target */ - mc_free(mc); - decl_free(decls); - pp_free(pp); /* releases the pushed lexer */ + return CFREE_LANG_C; } static void validate_bytes_input(Compiler* c, const CfreeBytesInput* in) @@ -255,33 +281,33 @@ int cfree_compile_obj_emit(CfreeCompiler* c, const CfreeCompileOptions* opts, * Link * ============================================================ */ -static Linker* build_linker(Compiler* c, const CfreeLinkOptions* opts) +static Linker* build_linker(Compiler* c, const CfreeLinkInputs* in) { Linker* linker = link_new(c); u32 i; - for (i = 0; i < opts->nobjs; ++i) { - link_add_obj(linker, opts->objs[i]); + for (i = 0; i < in->nobjs; ++i) { + link_add_obj(linker, in->objs[i]); } - for (i = 0; i < opts->nobj_bytes; ++i) { - link_add_obj_bytes(linker, opts->obj_bytes[i].name, - opts->obj_bytes[i].data, opts->obj_bytes[i].len); + for (i = 0; i < in->nobj_bytes; ++i) { + link_add_obj_bytes(linker, in->obj_bytes[i].name, + in->obj_bytes[i].data, in->obj_bytes[i].len); } - for (i = 0; i < opts->narchives; ++i) { - const CfreeBytesInputArchive* a = &opts->archives[i]; + for (i = 0; i < in->narchives; ++i) { + const CfreeBytesInputArchive* a = &in->archives[i]; link_add_archive_bytes(linker, a->input.name, a->input.data, a->input.len, - a->flags, a->group_id); + a->whole_archive, a->link_mode, a->group_id); } - if (opts->linker_script) { - link_set_script(linker, opts->linker_script); + if (in->linker_script) { + link_set_script(linker, in->linker_script); } - if (opts->entry) { - link_set_entry(linker, opts->entry); + if (in->entry) { + link_set_entry(linker, in->entry); } - if (opts->extern_resolver) { - link_set_extern_resolver(linker, opts->extern_resolver, - opts->extern_resolver_user); + if (in->extern_resolver) { + link_set_extern_resolver(linker, in->extern_resolver, + in->extern_resolver_user); } return linker; } @@ -302,7 +328,8 @@ int cfree_link_exe(CfreeCompiler* c, const CfreeLinkOptions* opts, if (!opts || !out) { panic_bad_options(c, "link_exe args missing"); } - linker = build_linker(c, opts); + linker = build_linker(c, &opts->inputs); + link_set_gc_sections(linker, opts->gc_sections); image = link_resolve(linker); /* deferred-cleanup-registered */ link_emit_image_writer(image, out); link_image_free(image); /* undefers + frees */ @@ -311,6 +338,32 @@ int cfree_link_exe(CfreeCompiler* c, const CfreeLinkOptions* opts, return 0; } +/* Shared-library link: not yet implemented in src/. The header API and + * driver glue are in place ahead of the codegen so callers can wire + * -shared / -soname / -rpath end-to-end; this entry currently reports + * a diagnostic and fails. */ +int cfree_link_shared(CfreeCompiler* c, const CfreeLinkSharedOptions* opts, + CfreeWriter* out) +{ + PanicSave saved; + + compiler_panic_save(c, &saved); + if (setjmp(c->panic)) { + compiler_run_cleanups(c); + compiler_panic_restore(c, &saved); + return 1; + } + if (!opts || !out) { + panic_bad_options(c, "link_shared args missing"); + } + compiler_panic(c, no_loc(), + "cfree_link_shared: shared-library codegen is not yet " + "implemented in libcfree"); + /* unreachable */ + compiler_panic_restore(c, &saved); + return 1; +} + int cfree_link_jit(CfreeCompiler* c, const CfreeLinkOptions* opts, CfreeJit** out_jit) { @@ -331,7 +384,8 @@ int cfree_link_jit(CfreeCompiler* c, const CfreeLinkOptions* opts, if (!opts) { panic_bad_options(c, "link_jit options missing"); } - linker = build_linker(c, opts); + linker = build_linker(c, &opts->inputs); + link_set_gc_sections(linker, opts->gc_sections); image = link_resolve(linker); /* deferred-cleanup-registered */ *out_jit = cfree_jit_from_image(image); /* undefers + transfers ownership */ link_free(linker); @@ -340,346 +394,80 @@ int cfree_link_jit(CfreeCompiler* c, const CfreeLinkOptions* opts, } /* ============================================================ - * Convenience: cfree_run + * CfreePipeline (public) * ============================================================ - * - * cfree_run owns a single Compiler for the whole composition. Scratch - * arrays (objs, loaded-bytes tables, staging arrays) are allocated from - * c->tu and reaped by compiler_fini. File-io loans for path-shaped inputs - * are tracked through the cleanup stack so a panic anywhere reaps them; on - * success they are released explicitly. */ - -typedef struct LoadedBytes { - const CfreeFileIO* io; - CfreeBytesInput in; - CfreeFileData file; - int loaded; - CompilerCleanup* cleanup; /* compiler_defer handle */ -} LoadedBytes; - -static void loaded_bytes_release_cb(void* arg) -{ - LoadedBytes* lb = (LoadedBytes*)arg; - if (lb->loaded && lb->io && lb->io->release) { - lb->io->release(lb->io->user, &lb->file); - } - lb->loaded = 0; -} + * Thin owning wrapper over CfreeCompiler. Holds the heap pointer used at + * allocation so cfree_pipeline_free can release the wrapper without + * touching the (already-finalized) compiler's env. */ + +struct CfreePipeline { + CfreeCompiler* compiler; + Heap* heap; +}; -static void load_path_bytes(Compiler* c, const char* path, LoadedBytes* out) +CfreePipeline* cfree_pipeline_new(CfreeTarget target, const CfreeEnv* env) { - out->io = require_file_io(c, "file input"); - out->loaded = 0; - out->cleanup = NULL; - out->file.data = NULL; - out->file.size = 0; - out->file.token = NULL; - out->in.name = path; - out->in.data = NULL; - out->in.len = 0; - - /* Defer release before reading so a read failure still cleans up. The - * callback is a no-op while loaded == 0. */ - out->cleanup = compiler_defer(c, loaded_bytes_release_cb, out); - - if (!out->io->read_all(out->io->user, path, &out->file)) { - compiler_panic(c, no_loc(), "failed to read: %s", path); + Heap* h; + CfreePipeline* p; + CfreeCompiler* c; + + if (!env || !env->heap) return NULL; + h = env->heap; + p = h->alloc(h, sizeof(*p), _Alignof(CfreePipeline)); + if (!p) return NULL; + c = cfree_compiler_new(target, env); + if (!c) { + h->free(h, p, sizeof(*p)); + return NULL; } - out->loaded = 1; - out->in.data = out->file.data; - out->in.len = out->file.size; + p->compiler = c; + p->heap = h; + return p; } -static void release_loaded_bytes(Compiler* c, LoadedBytes* lb) +void cfree_pipeline_free(CfreePipeline* p) { - if (!lb->cleanup) return; - compiler_undefer(c, lb->cleanup); - lb->cleanup = NULL; - loaded_bytes_release_cb(lb); + Heap* h; + if (!p) return; + h = p->heap; + cfree_compiler_free(p->compiler); + h->free(h, p, sizeof(*p)); } -static void release_loaded_array(Compiler* c, LoadedBytes* arr, u32 n) +CfreeCompiler* cfree_pipeline_compiler(CfreePipeline* p) { - u32 i; - if (!arr) return; - for (i = 0; i < n; ++i) release_loaded_bytes(c, &arr[i]); + return p ? p->compiler : NULL; } -static u32 total_sources(const CfreeOptions* opts) +int cfree_pipeline_compile_obj(CfreePipeline* p, + const CfreeCompileOptions* opts, + const CfreeBytesInput* input, + CfreeObjBuilder** out) { - return opts->nsource_files + opts->nsource_memory; + if (!p) return 1; + return cfree_compile_obj(p->compiler, opts, input, out); } -static void validate_run_options(Compiler* c, const CfreeOptions* opts) +int cfree_pipeline_link_exe(CfreePipeline* p, const CfreeLinkOptions* opts, + CfreeWriter* out) { - u32 i; - u32 nsrc; - - if (!opts) panic_bad_options(c, "options pointer is NULL"); - nsrc = total_sources(opts); - if (nsrc == 0) { - panic_bad_options(c, "at least one C source input is required"); - } - if (opts->opt_level < 0 || opts->opt_level > 2) { - panic_bad_options(c, "opt_level must be 0, 1, or 2"); - } - if (!opts->env.heap) { - panic_bad_options(c, "env.heap is required"); - } - if (opts->output_kind != CFREE_OUTPUT_OBJ && - opts->output_kind != CFREE_OUTPUT_EXE && - opts->output_kind != CFREE_OUTPUT_JIT) { - panic_bad_options(c, "output_kind is invalid"); - } - if ((opts->output_kind == CFREE_OUTPUT_OBJ || - opts->output_kind == CFREE_OUTPUT_EXE) && !opts->output_path) { - panic_bad_options(c, "output_path is required for file output"); - } - if ((opts->output_kind == CFREE_OUTPUT_OBJ || - opts->output_kind == CFREE_OUTPUT_EXE) && - (!opts->env.file_io || !opts->env.file_io->open_writer)) { - panic_bad_options(c, "env.file_io.open_writer is required for file output"); - } - if (opts->output_kind == CFREE_OUTPUT_OBJ && nsrc != 1) { - panic_bad_options(c, "object output accepts exactly one C source input"); - } - if (opts->output_kind == CFREE_OUTPUT_OBJ && - (opts->nobject_files || opts->narchives || - opts->linker_script || opts->extern_resolver)) { - panic_bad_options(c, "link options are not valid for object output"); - } - if (opts->output_kind == CFREE_OUTPUT_EXE && opts->extern_resolver) { - panic_bad_options(c, "extern_resolver is JIT-only; not valid for exe output"); - } - if (opts->output_kind == CFREE_OUTPUT_JIT && !opts->out_jit) { - panic_bad_options(c, "out_jit is required for JIT output"); - } - - if (opts->nsource_files && - (!opts->env.file_io || !opts->env.file_io->read_all)) { - panic_bad_options(c, "env.file_io.read_all is required for source_files"); - } - for (i = 0; i < opts->nsource_files; ++i) { - if (!opts->source_files[i]) { - panic_bad_options(c, "source_files entry is NULL"); - } - } - for (i = 0; i < opts->nsource_memory; ++i) { - const CfreeBytesInput* in = &opts->source_memory[i]; - if (!in->name || (!in->data && in->len != 0)) { - panic_bad_options(c, "source_memory entry is incomplete"); - } - } - - if ((opts->nobject_files || opts->narchives || opts->linker_script) && - (!opts->env.file_io || !opts->env.file_io->read_all)) { - panic_bad_options(c, "env.file_io.read_all is required for linker file input"); - } + if (!p) return 1; + return cfree_link_exe(p->compiler, opts, out); } -/* Resolves the i-th source (paths first, then memory) into a CfreeBytesInput, - * lazily loading paths into `loaded` slots. */ -static const CfreeBytesInput* nth_source(const CfreeOptions* opts, - LoadedBytes* loaded, u32 i) +int cfree_pipeline_link_shared(CfreePipeline* p, + const CfreeLinkSharedOptions* opts, + CfreeWriter* out) { - if (i < opts->nsource_files) { - return &loaded[i].in; - } - return &opts->source_memory[i - opts->nsource_files]; + if (!p) return 1; + return cfree_link_shared(p->compiler, opts, out); } -int cfree_run(const CfreeOptions* opts) +int cfree_pipeline_link_jit(CfreePipeline* p, const CfreeLinkOptions* opts, + CfreeJit** out_jit) { - Compiler c_store; - Compiler* c = &c_store; - ObjBuilder** objs = NULL; - LoadedBytes* src_loaded = NULL; - LoadedBytes* obj_bytes = NULL; - LoadedBytes* arch_bytes = NULL; - LoadedBytes script; - const CfreeLinkScript* parsed_script = NULL; - CfreeBytesInput* obj_in = NULL; - CfreeBytesInputArchive* arch_in = NULL; - CfreeLinkOptions link_opts; - CfreeCompileOptions co; - Writer* out_writer = NULL; - u32 nsrc; - u32 i; - - if (!opts || !opts->env.heap) return 1; - - script.loaded = 0; - script.cleanup = NULL; - script.io = NULL; - script.file.data = NULL; - script.file.size = 0; - script.file.token = NULL; - - compiler_init(c, opts->target, &opts->env); - - if (setjmp(c->panic)) { - if (out_writer) cfree_writer_close(out_writer); - compiler_run_cleanups(c); - compiler_fini(c); - return 1; - } - - validate_run_options(c, opts); - nsrc = total_sources(opts); - - co.opt_level = opts->opt_level; - co.debug_info = opts->debug_info; - co.pp = opts->pp; - co.warnings_are_errors = opts->warnings_are_errors; - co.max_errors = opts->max_errors; - - /* Load source paths (if any) up front so OBJ and EXE/JIT share one path. */ - if (opts->nsource_files) { - src_loaded = arena_array(c->tu, LoadedBytes, opts->nsource_files); - for (i = 0; i < opts->nsource_files; ++i) { - src_loaded[i].loaded = 0; - src_loaded[i].cleanup = NULL; - } - for (i = 0; i < opts->nsource_files; ++i) { - load_path_bytes(c, opts->source_files[i], &src_loaded[i]); - } - } - - /* OBJ output: compile single TU and emit to writer. */ - if (opts->output_kind == CFREE_OUTPUT_OBJ) { - const CfreeBytesInput* the_input = nth_source(opts, src_loaded, 0); - out_writer = opts->env.file_io->open_writer(opts->env.file_io->user, - opts->output_path); - if (!out_writer) { - compiler_panic(c, no_loc(), "failed to open output file: %s", - opts->output_path); - } - if (cfree_compile_obj_emit(c, &co, the_input, out_writer)) { - cfree_writer_close(out_writer); - release_loaded_array(c, src_loaded, opts->nsource_files); - compiler_fini(c); - return 1; - } - cfree_writer_close(out_writer); - out_writer = NULL; - release_loaded_array(c, src_loaded, opts->nsource_files); - compiler_fini(c); - return 0; - } - - /* EXE/JIT: compile all sources, then link. */ - objs = arena_array(c->tu, ObjBuilder*, nsrc); - for (i = 0; i < nsrc; ++i) objs[i] = NULL; - for (i = 0; i < nsrc; ++i) { - const CfreeBytesInput* in = nth_source(opts, src_loaded, i); - if (cfree_compile_obj(c, &co, in, &objs[i])) { - release_loaded_array(c, src_loaded, opts->nsource_files); - compiler_fini(c); - return 1; - } - } - - if (opts->nobject_files) { - obj_bytes = arena_array(c->tu, LoadedBytes, opts->nobject_files); - for (i = 0; i < opts->nobject_files; ++i) { - obj_bytes[i].loaded = 0; - obj_bytes[i].cleanup = NULL; - } - for (i = 0; i < opts->nobject_files; ++i) { - load_path_bytes(c, opts->object_files[i], &obj_bytes[i]); - } - } - if (opts->narchives) { - arch_bytes = arena_array(c->tu, LoadedBytes, opts->narchives); - for (i = 0; i < opts->narchives; ++i) { - arch_bytes[i].loaded = 0; - arch_bytes[i].cleanup = NULL; - } - for (i = 0; i < opts->narchives; ++i) { - load_path_bytes(c, opts->archives[i], &arch_bytes[i]); - } - } - if (opts->linker_script) { - load_path_bytes(c, opts->linker_script, &script); - } - - /* The linker takes the structured form only; parse the loaded text into - * CfreeLinkScript via the helper. The parsed script is arena-owned by - * the Compiler and freed when c->tu is released by compiler_fini, so no - * explicit cfree_link_script_free is needed here. */ - if (script.loaded && - cfree_link_script_parse(c, (const char*)script.file.data, - script.file.size, &parsed_script)) { - release_loaded_array(c, src_loaded, opts->nsource_files); - release_loaded_array(c, obj_bytes, opts->nobject_files); - release_loaded_array(c, arch_bytes, opts->narchives); - release_loaded_bytes(c, &script); - compiler_fini(c); - return 1; - } - - /* Stage parallel CfreeBytesInput arrays for the linker. */ - if (opts->nobject_files) { - obj_in = arena_array(c->tu, CfreeBytesInput, opts->nobject_files); - for (i = 0; i < opts->nobject_files; ++i) obj_in[i] = obj_bytes[i].in; - } - if (opts->narchives) { - arch_in = arena_array(c->tu, CfreeBytesInputArchive, opts->narchives); - for (i = 0; i < opts->narchives; ++i) { - arch_in[i].input = arch_bytes[i].in; - arch_in[i].flags = CFREE_LAF_NONE; - arch_in[i].group_id = 0; - } - } - - link_opts.objs = (ObjBuilder* const*)objs; - link_opts.nobjs = nsrc; - link_opts.obj_bytes = obj_in; - link_opts.nobj_bytes = opts->nobject_files; - link_opts.archives = arch_in; - link_opts.narchives = opts->narchives; - link_opts.linker_script = parsed_script; - link_opts.entry = opts->entry; - link_opts.extern_resolver = opts->extern_resolver; - link_opts.extern_resolver_user = opts->extern_resolver_user; - - if (opts->output_kind == CFREE_OUTPUT_EXE) { - out_writer = opts->env.file_io->open_writer(opts->env.file_io->user, - opts->output_path); - if (!out_writer) { - compiler_panic(c, no_loc(), "failed to open output file: %s", - opts->output_path); - } - if (cfree_link_exe(c, &link_opts, out_writer)) { - cfree_writer_close(out_writer); - release_loaded_array(c, src_loaded, opts->nsource_files); - release_loaded_array(c, obj_bytes, opts->nobject_files); - release_loaded_array(c, arch_bytes, opts->narchives); - release_loaded_bytes(c, &script); - compiler_fini(c); - return 1; - } - cfree_writer_close(out_writer); - out_writer = NULL; - } else { - if (cfree_link_jit(c, &link_opts, opts->out_jit)) { - release_loaded_array(c, src_loaded, opts->nsource_files); - release_loaded_array(c, obj_bytes, opts->nobject_files); - release_loaded_array(c, arch_bytes, opts->narchives); - release_loaded_bytes(c, &script); - compiler_fini(c); - return 1; - } - } - - release_loaded_array(c, src_loaded, opts->nsource_files); - release_loaded_array(c, obj_bytes, opts->nobject_files); - release_loaded_array(c, arch_bytes, opts->narchives); - release_loaded_bytes(c, &script); - /* objs (ObjBuilders) are owned by the Compiler; arena scratch is freed - * by compiler_fini. */ - compiler_fini(c); - return 0; + if (!p) return 1; + return cfree_link_jit(p->compiler, opts, out_jit); } /* ============================================================ @@ -758,6 +546,145 @@ CfreeBinFmt cfree_detect_fmt(const uint8_t* data, size_t len) return CFREE_BIN_UNKNOWN; } +/* ============================================================ + * Target detection from object headers + * ============================================================ */ + +static void detect_target_defaults(CfreeTarget* t) +{ + t->big_endian = 0; + t->pic = CFREE_PIC_NONE; + t->code_model = CFREE_CM_DEFAULT; +} + +static void detect_set_ptr(CfreeTarget* t, CfreeArchKind arch) +{ + t->arch = arch; + switch (arch) { + case CFREE_ARCH_X86_64: + case CFREE_ARCH_ARM_64: + case CFREE_ARCH_RV64: + t->ptr_size = 8; t->ptr_align = 8; break; + case CFREE_ARCH_X86_32: + case CFREE_ARCH_ARM_32: + case CFREE_ARCH_RV32: + case CFREE_ARCH_WASM: + t->ptr_size = 4; t->ptr_align = 4; break; + } +} + +static int detect_elf(const u8* d, size_t len, CfreeTarget* out) +{ + u8 ei_class, ei_data, ei_osabi; + u16 e_machine; + if (len < 20) return 1; + ei_class = d[4]; + ei_data = d[5]; + ei_osabi = d[7]; + /* e_machine is at offset 18, in the file's endianness. */ + if (ei_data == 1) { /* little */ + e_machine = (u16)d[18] | ((u16)d[19] << 8); + } else if (ei_data == 2) { /* big */ + e_machine = (u16)d[19] | ((u16)d[18] << 8); + } else { + return 1; + } + + detect_target_defaults(out); + out->big_endian = (ei_data == 2); + out->obj = CFREE_OBJ_ELF; + + switch (e_machine) { + case 0x03: detect_set_ptr(out, CFREE_ARCH_X86_32); break; + case 0x3E: detect_set_ptr(out, CFREE_ARCH_X86_64); break; + case 0x28: detect_set_ptr(out, CFREE_ARCH_ARM_32); break; + case 0xB7: detect_set_ptr(out, CFREE_ARCH_ARM_64); break; + case 0xF3: + if (ei_class == 1) detect_set_ptr(out, CFREE_ARCH_RV32); + else if (ei_class == 2) detect_set_ptr(out, CFREE_ARCH_RV64); + else return 1; + break; + default: return 1; + } + + /* OSABI: 0=SYSV (treat as Linux), 3=Linux. Anything else: freestanding. */ + if (ei_osabi == 0 || ei_osabi == 3) out->os = CFREE_OS_LINUX; + else out->os = CFREE_OS_FREESTANDING; + return 0; +} + +static int detect_coff(const u8* d, size_t len, CfreeTarget* out) +{ + u16 machine; + if (len < 2) return 1; + machine = (u16)d[0] | ((u16)d[1] << 8); + detect_target_defaults(out); + out->obj = CFREE_OBJ_COFF; + out->os = CFREE_OS_WINDOWS; + switch (machine) { + case 0x8664: detect_set_ptr(out, CFREE_ARCH_X86_64); break; + case 0x014C: detect_set_ptr(out, CFREE_ARCH_X86_32); break; + case 0xAA64: detect_set_ptr(out, CFREE_ARCH_ARM_64); break; + case 0x01C4: detect_set_ptr(out, CFREE_ARCH_ARM_32); break; + case 0x5032: detect_set_ptr(out, CFREE_ARCH_RV32); break; + case 0x5064: detect_set_ptr(out, CFREE_ARCH_RV64); break; + default: return 1; + } + return 0; +} + +static int detect_macho(const u8* d, size_t len, CfreeTarget* out) +{ + u32 magic, cputype; + int swap, is64; + if (len < 8) return 1; + magic = (u32)d[0] | ((u32)d[1] << 8) | ((u32)d[2] << 16) | ((u32)d[3] << 24); + switch (magic) { + case 0xFEEDFACEu: swap = 0; is64 = 0; break; + case 0xFEEDFACFu: swap = 0; is64 = 1; break; + case 0xCEFAEDFEu: swap = 1; is64 = 0; break; + case 0xCFFAEDFEu: swap = 1; is64 = 1; break; + default: return 1; + } + if (!swap) { + cputype = (u32)d[4] | ((u32)d[5] << 8) | ((u32)d[6] << 16) | ((u32)d[7] << 24); + } else { + cputype = (u32)d[7] | ((u32)d[6] << 8) | ((u32)d[5] << 16) | ((u32)d[4] << 24); + } + detect_target_defaults(out); + out->obj = CFREE_OBJ_MACHO; + out->os = CFREE_OS_MACOS; + /* CPU_TYPE: 7=x86, 0x01000007=x86_64, 12=ARM, 0x0100000C=ARM64. */ + switch (cputype) { + case 0x00000007u: detect_set_ptr(out, CFREE_ARCH_X86_32); break; + case 0x01000007u: detect_set_ptr(out, CFREE_ARCH_X86_64); break; + case 0x0000000Cu: detect_set_ptr(out, CFREE_ARCH_ARM_32); break; + case 0x0100000Cu: detect_set_ptr(out, CFREE_ARCH_ARM_64); break; + default: return 1; + } + (void)is64; + return 0; +} + +int cfree_detect_target(const uint8_t* data, size_t len, CfreeTarget* out) +{ + CfreeBinFmt bin; + if (!data || !out) return 1; + bin = cfree_detect_fmt(data, len); + switch (bin) { + case CFREE_BIN_ELF: return detect_elf (data, len, out); + case CFREE_BIN_COFF: return detect_coff (data, len, out); + case CFREE_BIN_MACHO: return detect_macho(data, len, out); + case CFREE_BIN_WASM: + detect_target_defaults(out); + detect_set_ptr(out, CFREE_ARCH_WASM); + out->obj = CFREE_OBJ_WASM; + out->os = CFREE_OS_WASI; + return 0; + default: return 1; + } +} + static ObjBuilder* obj_read_bytes(Compiler* c, const char* name, const u8* data, size_t len, ObjFmt fmt) { @@ -791,25 +718,19 @@ struct CfreeObjSymIter { ObjSymIter* inner; }; -CfreeObjFile* cfree_obj_open(const CfreeEnv* env, CfreeTarget target, +CfreeObjFile* cfree_obj_open(const CfreeEnv* env, const CfreeBytesInput* input) { Heap* h; CfreeObjFile* f; - CfreeBinFmt bin; + CfreeTarget target; ObjFmt ofmt; if (!env || !env->heap || !input) return NULL; if (!input->data && input->len > 0) return NULL; - bin = cfree_detect_fmt(input->data, input->len); - switch (bin) { - case CFREE_BIN_ELF: ofmt = CFREE_OBJ_ELF; break; - case CFREE_BIN_COFF: ofmt = CFREE_OBJ_COFF; break; - case CFREE_BIN_MACHO: ofmt = CFREE_OBJ_MACHO; break; - case CFREE_BIN_WASM: ofmt = CFREE_OBJ_WASM; break; - default: return NULL; - } + if (cfree_detect_target(input->data, input->len, &target) != 0) return NULL; + ofmt = target.obj; h = (Heap*)env->heap; f = (CfreeObjFile*)h->alloc(h, sizeof(*f), _Alignof(CfreeObjFile)); @@ -873,7 +794,7 @@ CfreeObjSecInfo cfree_obj_section(const CfreeObjFile* f, uint32_t idx) out.kind = sec ? (CfreeSecKind)sec->kind : CFREE_SEC_OTHER; out.flags = sec ? (uint32_t)sec->flags : 0u; out.size = sec ? (sec->bss_size ? sec->bss_size : sec->bytes.total) : 0u; - out.align = sec ? sec->align : 0u; + out.align = (sec && sec->align > 1u) ? sec->align : 1u; return out; } diff --git a/src/link/link.h b/src/link/link.h @@ -102,12 +102,15 @@ void link_free(Linker*); LinkInputId link_add_obj(Linker*, ObjBuilder*); LinkInputId link_add_obj_bytes(Linker*, const char* name, const u8* data, size_t len); -/* `flags` is a bitmask of CfreeLinkArchFlag. `group_id == 0` means linear - * single-pass; archives sharing a nonzero `group_id` are scanned cyclically - * (equivalent to GNU ld --start-group ... --end-group). */ +/* `whole_archive` (nonzero == --whole-archive) and `link_mode` + * (CfreeLinkMode: -Bstatic / -Bdynamic / --as-needed positional state) are + * orthogonal per-archive flags. `group_id == 0` means linear single-pass; + * archives sharing a nonzero `group_id` are scanned cyclically (equivalent + * to GNU ld --start-group ... --end-group). */ LinkInputId link_add_archive_bytes(Linker*, const char* name, const u8* data, size_t len, - u8 flags, u8 group_id); + u8 whole_archive, u8 link_mode, + u8 group_id); void link_set_entry(Linker*, const char* name); /* Borrowed reference; the script and every sub-object must outlive @@ -116,6 +119,10 @@ void link_set_entry(Linker*, const char* name); * cfree_link_script_parse first. */ void link_set_script(Linker*, const CfreeLinkScript*); void link_set_extern_resolver(Linker*, LinkExternResolver, void* user); +/* Enable --gc-sections on this link. Roots are: entry symbol, exported + * symbols (shared link), and any section flagged KEEP by the linker + * script. Unreferenced sections are dropped from the output. */ +void link_set_gc_sections(Linker*, int enable); /* Symbol resolution and layout are explicit so file linking and JIT share the * same resolved image. Fatal diagnostics use Compiler.panic.