kit

kit
git clone https://git.ryansepassi.com/git/kit.git
Log | Files | Refs | README

commit 0980ffb0ad7001c3752a5c02e8e003f5cef1f36c
parent 73eb8434a99589c32227cd9c6ed549570b81430b
Author: Ryan Sepassi <rsepassi@gmail.com>
Date:   Thu,  7 May 2026 17:52:21 -0700

driver/pipeline.h,c

Diffstat:
A.gitignore | 1+
AMakefile | 17+++++++++++++++++
Mdoc/DESIGN.md | 139++++++++++++++++++++++++++++++++++++++++++++++++++++++-------------------------
Msrc/abi/abi.h | 4++--
Msrc/arch/arch.h | 8++++----
Msrc/cg/cg.h | 6+++---
Msrc/debug/debug.h | 6+++---
Msrc/decl/decl.h | 2+-
Msrc/driver/driver.h | 2+-
Asrc/driver/pipeline.c | 589+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Asrc/driver/pipeline.h | 152+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Msrc/lex/lex.h | 13+++++++++++--
Msrc/link/link.h | 70+++++++++++++++++++++++++++++++++++++++++++---------------------------
Msrc/obj/obj.h | 4++--
Msrc/opt/ir.h | 8++++----
Msrc/opt/opt.h | 4++--
Msrc/parse/parse.h | 8++++----
Msrc/pp/pp.h | 9++++++++-
Msrc/type/type.h | 4++--
19 files changed, 945 insertions(+), 101 deletions(-)

diff --git a/.gitignore b/.gitignore @@ -0,0 +1 @@ +build/ diff --git a/Makefile b/Makefile @@ -0,0 +1,17 @@ +CC = clang +SYSROOT = $(shell xcrun --show-sdk-path) +CFLAGS = -std=c11 -Wpedantic -Wall -Wextra -Werror -ffreestanding -Isrc -isysroot $(SYSROOT) + +SRCS = $(shell find src -name '*.c') +OBJS = $(patsubst src/%.c,build/%.o,$(SRCS)) + +.PHONY: all clean + +all: $(OBJS) + +build/%.o: src/%.c + @mkdir -p $(dir $@) + $(CC) $(CFLAGS) -c $< -o $@ + +clean: + rm -rf build diff --git a/doc/DESIGN.md b/doc/DESIGN.md @@ -338,14 +338,26 @@ unsupported-relocation, and layout failures are fatal diagnostics through `Compiler.panic`. Executable emission and JIT mapping consume the same `LinkImage`. File writers -read segment bytes, section placements, final addresses, and relocation records -from the image. JIT maps fresh writable memory, copies the same segment bytes, -applies relocation records at their `write_vaddr` locations, resolves allowed -external symbols through `LinkExternResolver`, changes final permissions, and -looks up exported/entry symbols by resolved `Sym` name. Object-local -`ObjSymId` values never escape as JIT lookup handles. `JitImage` owns the mapped -memory; the caller owns the `LinkImage` unless an API explicitly documents a -transfer. +(`link_emit_image_writer`) read segment bytes, section placements, final +addresses, and relocation records from the image and write to a caller-owned +`Writer*`. JIT (`cfree_jit_from_image`) maps fresh writable memory, copies the +same segment bytes, applies relocation records at their `write_vaddr` +locations, resolves allowed external symbols through `LinkExternResolver`, +changes final permissions, and looks up exported/entry symbols by resolved +`Sym` name. Object-local `ObjSymId` values never escape as JIT lookup handles. +`CfreeJit` is the public owning handle; it takes ownership of the `LinkImage` +on construction and releases both on `cfree_jit_free`. + +`link_resolve` registers the returned `LinkImage` with `compiler_defer`, so a +panic between resolve and consumer (file emit or JIT mapping) reaps the +image. Successful consumers either call `link_image_free` (which undefers +and frees) or transfer ownership via `cfree_jit_from_image` (which undefers +and keeps the image alive for the JIT's lifetime). + +Linker inputs are byte buffers (`link_add_obj_bytes`, `link_add_archive_bytes`) +or already-built `ObjBuilder*` (`link_add_obj`). Path-shaped inputs are a +driver-level concern: the driver calls `c->env->file_io->read_all`, then feeds +the bytes APIs. ### 5.6 `MemAccess` — explicit memory semantics @@ -430,7 +442,7 @@ fields of `Compiler` (`src/core/core.h`) and are passed down to subsystems. | Allocator | Lifetime | Owns | |--------------|------------------------|--------------------------------------------------------| | `Pool global`| Process | Interned strings and interned types. | -| `Heap output`| Output object/exe | Section chunks, reloc tables (survive into linker). | +| `env.heap` | Output object/exe | Section chunks, reloc tables (survive into linker), JIT bookkeeping. | | `Arena tu` | One TU compile | Local symbols, parser scratch, SourceManager tables, ABI caches. | | `Arena scratch` | Reset per function | Value-stack scratch, fixup lists, lookahead buffers. | @@ -448,22 +460,39 @@ Rules: compile/link invocation and are read by diagnostics, dependency output, and DWARF emission. -`Heap output` is a normal heap (typically `heap_libc`). The JIT does not -compile directly into executable memory: `link_jit_image` consumes a resolved -`LinkImage`, mmaps a fresh region, copies laid-out segments in, applies -relocations in-place, and `mprotect`s final permissions. The `Heap` vtable -still exists so the JIT can swap allocators for the *destination* mapping and -so tests can substitute fakes. +`env.heap` is a normal heap (typically `heap_libc`). The JIT does not +compile directly into executable memory: `cfree_jit_from_image` consumes a +resolved `LinkImage`, mmaps a fresh region, copies laid-out segments in, +applies relocations in-place, and `mprotect`s final permissions. The `Heap` +vtable still exists so the JIT can swap allocators for the *destination* +mapping and so tests can substitute fakes. ## 7. Error handling -A single `Compiler` carries a `jmp_buf` and a `DiagSink`. Fatal errors call -`compiler_panic`, which emits a diagnostic and `longjmp`s out of the entire -parse/CG pipeline. Drivers establish the `setjmp` boundary at TU granularity. +A single `Compiler` carries a `jmp_buf` and references a host-supplied +`DiagSink` through `CfreeEnv`. Fatal errors call `compiler_panic`, which emits +a diagnostic and `longjmp`s out of the entire parse/CG pipeline. Drivers +establish the `setjmp` boundary at TU or pipeline granularity. + +Layered driver functions (`cfree_compile_obj`, `cfree_link_*`, `cfree_run`) +each install their own boundary. To remain composable, every such function +saves `c->panic` via `compiler_panic_save` on entry and restores it via +`compiler_panic_restore` on every exit path (panic-return after +`compiler_run_cleanups`, and success). Without save/restore, an inner +`setjmp` clobbers an outer one and any subsequent `compiler_panic` in the +outer caller longjmps into the inner's already-returned stack frame. This means almost no function in `parse`, `cg`, or `arch` returns an error. The -happy path is the only path. Subsystems clean up via arena reset, not by -unwinding allocations one-by-one. +happy path is the only path. Arena scratch is reset rather than unwound +one-by-one. + +Subsystem objects with non-arena resources (file handles, mmaps, child +allocators) self-register a cleanup with `compiler_defer` in their `_new` +and call `compiler_undefer` from their `_free`. The pipeline-level +`setjmp` handler runs `compiler_run_cleanups`, which walks the LIFO stack +and releases everything still registered. This keeps `compiler_panic` +correct even when failure happens deep inside a composition that has +allocated several subsystems. What is *not* fatal: warnings, recoverable parse errors that have a sensible recovery point (skip-to-`;`, skip-to-`}`). The parser uses limited internal @@ -757,45 +786,69 @@ equal types thanks to `Pool global`). ## 13. Build composition -A typical `cc` invocation composes the pipeline like this: +The driver-facing API is layered (`src/driver/pipeline.h`). Most consumers +should not hand-compose the pipeline; they should call one of: + +- `cfree_compile_obj(c, opts, input, &ob)` — one TU → in-memory `ObjBuilder*` + for chaining into the linker. +- `cfree_compile_obj_emit(c, opts, input, writer)` — one TU → encoded `.o` + bytes via the caller's `Writer*` (cc -c). +- `cfree_link_exe(c, link_opts, writer)` — link → executable bytes. +- `cfree_link_jit(c, link_opts, &jit)` — link → owning `CfreeJit*`. +- `cfree_run(opts)` — convenience composition for the multi-input case. + +Data contracts at each boundary: + +- `compile_obj → link`: `ObjBuilder*` is the cross-API currency. The + returned builder is finalized; do not write further. Lifetime is tied to + the `Compiler`; it must remain alive until link is done. +- `compile_obj_emit → file`: `Writer*`. The `ObjBuilder` is consumed and + released inside the call. On nonzero return the Writer may contain + partial output and should not be consumed. +- `link → exe`: `Writer*`. No path appears in the core API. Same partial- + output caveat on nonzero return. +- `link → jit`: `CfreeJit*` owns its `LinkImage` and mapped pages; lookups + are by `Sym` (interned name) — `ObjSymId` never escapes. + +Each layered function (`cfree_compile_obj`, `cfree_compile_obj_emit`, +`cfree_link_exe`, `cfree_link_jit`) saves and restores `Compiler.panic` +around its own `setjmp`, so they are safely callable from inside another +active panic boundary (for example from `cfree_run`). Library resolution +(`-lfoo` against `-L` paths) is the CLI driver's job; archives reaching +`CfreeOptions` must already be concrete paths. + +Path-shaped helpers (`cfree -c file.c -o file.o`, `ld a.o b.o`, etc.) live +in driver-level adapters. They call `c->env->file_io->read_all` to obtain +byte buffers, then feed the byte/Writer APIs above. The freestanding core +never takes paths. + +The internal one-TU sequence used by `cfree_compile_obj` looks like: ```c -Compiler c_store; -Compiler* c = &c_store; -compiler_init(c, target); /* creates SourceManager, ABI, allocators */ -Pp* pp = pp_new(c); -ObjBuilder* ob = obj_new(c); -DeclTable* decls = decl_new(c, ob); -MCEmitter* mc = mc_new(c, ob); -CGTarget* a = cgtarget_new(c, ob, mc); +ObjBuilder* ob = obj_new(c); +Pp* pp = pp_new(c); /* reads c->env->file_io */ +DeclTable* decls = decl_new(c, ob); +MCEmitter* mc = mc_new(c, ob); +CGTarget* a = cgtarget_new(c, ob, mc); if (opt_level >= 1) a = opt_cgtarget_new(c, a, opt_level); Debug* d = dbg ? debug_new(c, ob) : NULL; CG* g = cg_new(c, a, d); -pp_push_input(pp, lex_open(c, input_path)); +pp_push_input(pp, lex_open_mem(c, name, src, len)); /* borrows src */ parse_c(c, pp, decls, g); cgtarget_finalize(a); /* IPO + lowering at -O2; no-op otherwise */ if (d) debug_emit(d); obj_finalize(ob); -Writer* w = writer_file(output_path); -emit_elf(c, ob, w); -writer_close(w); ``` Order is load-bearing: `cgtarget_finalize` flushes lowered code, `debug_emit` appends `.debug_*` sections, `obj_finalize` freezes the read-side view, and -only then may file emitters consume the builder. +only then may file emitters or the linker consume the builder. -JIT swaps the final emit for: - -```c -Linker* l = link_new(c); -link_add_obj(l, ob); -LinkImage* img = link_resolve(l); -JitImage* jit = link_jit_image(img); -entry = jit_image_lookup(jit, entry_sym); -``` +Each subsystem `_new` registers a cleanup with `compiler_defer` and the +matching `_free` pops it via `compiler_undefer` (§7), so a panic anywhere +in the sequence above unwinds correctly through `compiler_run_cleanups`. ## 14. Open questions diff --git a/src/abi/abi.h b/src/abi/abi.h @@ -1,8 +1,8 @@ #ifndef CFREE_ABI_H #define CFREE_ABI_H -#include "../core/core.h" -#include "../type/type.h" +#include "core/core.h" +#include "type/type.h" /* TargetABI is the single authority for target-dependent C layout and calling * convention decisions. Type remains structural and ABI-neutral; all sizes, diff --git a/src/arch/arch.h b/src/arch/arch.h @@ -1,10 +1,10 @@ #ifndef CFREE_ARCH_H #define CFREE_ARCH_H -#include "../core/core.h" -#include "../type/type.h" -#include "../abi/abi.h" -#include "../obj/obj.h" +#include "core/core.h" +#include "type/type.h" +#include "abi/abi.h" +#include "obj/obj.h" /* Reg is wide enough for opt_cgtarget to hand out unbounded virtual registers * (one per defined value). Target backends use only a small subset. */ diff --git a/src/cg/cg.h b/src/cg/cg.h @@ -1,9 +1,9 @@ #ifndef CFREE_CG_H #define CFREE_CG_H -#include "../arch/arch.h" -#include "../decl/decl.h" -#include "../type/type.h" +#include "arch/arch.h" +#include "decl/decl.h" +#include "type/type.h" typedef struct CG CG; typedef struct Debug Debug; diff --git a/src/debug/debug.h b/src/debug/debug.h @@ -1,9 +1,9 @@ #ifndef CFREE_DEBUG_H #define CFREE_DEBUG_H -#include "../core/core.h" -#include "../type/type.h" -#include "../arch/arch.h" +#include "core/core.h" +#include "type/type.h" +#include "arch/arch.h" /* DWARF debug info. The producer side (CG, CGTarget/MCEmitter, opt) feeds events here as * compilation runs; the consumer side writes .debug_* sections into the same diff --git a/src/decl/decl.h b/src/decl/decl.h @@ -1,7 +1,7 @@ #ifndef CFREE_DECL_H #define CFREE_DECL_H -#include "../arch/arch.h" +#include "arch/arch.h" /* C declaration semantics. This layer is deliberately above ObjBuilder: * ObjBuilder stores object-format facts, while DeclTable owns C linkage, diff --git a/src/driver/driver.h b/src/driver/driver.h @@ -1,7 +1,7 @@ #ifndef CFREE_DRIVER_H #define CFREE_DRIVER_H -#include "../core/core.h" +#include "core/core.h" typedef enum Tool { TOOL_CC, diff --git a/src/driver/pipeline.c b/src/driver/pipeline.c @@ -0,0 +1,589 @@ +#include "driver/pipeline.h" + +#include "arch/arch.h" +#include "cg/cg.h" +#include "core/arena.h" +#include "debug/debug.h" +#include "decl/decl.h" +#include "lex/lex.h" +#include "obj/obj.h" +#include "opt/opt.h" +#include "parse/parse.h" +#include "pp/pp.h" + +/* ============================================================ + * Helpers + * ============================================================ */ + +static SrcLoc no_loc(void) +{ + SrcLoc loc; + loc.file_id = 0; + loc.line = 0; + loc.col = 0; + return loc; +} + +static _Noreturn void panic_bad_options(Compiler* c, const char* msg) +{ + compiler_panic(c, no_loc(), "bad cfree options: %s", msg); +} + +static const CfreeFileIO* require_file_io(Compiler* c, const char* what) +{ + if (!c->env || !c->env->file_io) { + compiler_panic(c, no_loc(), "%s requires env.file_io", what); + } + return c->env->file_io; +} + +static void apply_pp_options(Pp* pp, const CfreePpOptions* opts) +{ + u32 i; + + for (i = 0; i < opts->ninclude_dirs; ++i) { + pp_add_include_dir(pp, opts->include_dirs[i], 0); + } + for (i = 0; i < opts->nsystem_include_dirs; ++i) { + pp_add_include_dir(pp, opts->system_include_dirs[i], 1); + } + for (i = 0; i < opts->ndefines; ++i) { + const char* body = opts->defines[i].body ? opts->defines[i].body : "1"; + pp_define(pp, opts->defines[i].name, body); + } + for (i = 0; i < opts->nundefines; ++i) { + pp_undef(pp, opts->undefines[i]); + } +} + +/* ============================================================ + * Compile one TU + * ============================================================ */ + +/* One-TU compile against a fresh ObjBuilder. The builder is finalized on + * exit so it is immediately consumable by the linker or an emit_* function. + * The input bytes must outlive this call. */ +static void compile_into(Compiler* c, const CfreeCompileOptions* opts, + const CfreeBytesInput* input, ObjBuilder* ob) +{ + Pp* pp = pp_new(c); + Lexer* lex = lex_open_mem(c, input->name, + (const char*)input->data, input->len); + DeclTable* decls = decl_new(c, ob); + MCEmitter* mc = mc_new(c, ob); + CGTarget* target = cgtarget_new(c, ob, mc); + Debug* debug = NULL; + CG* cg = NULL; + + apply_pp_options(pp, &opts->pp); + pp_push_input(pp, lex); /* PP owns the lexer from here on */ + + if (opts->opt_level > 0) { + target = opt_cgtarget_new(c, target, opts->opt_level); + } + if (opts->debug_info) { + debug = debug_new(c, ob); + } + cg = cg_new(c, target, debug); + + parse_c(c, pp, decls, cg); + cgtarget_finalize(target); + if (debug) { + debug_emit(debug); + } + obj_finalize(ob); + + cg_free(cg); + if (debug) { + debug_free(debug); + } + cgtarget_free(target); /* opt_cgtarget cascades to wrapped target */ + mc_free(mc); + decl_free(decls); + pp_free(pp); /* releases the pushed lexer */ +} + +static void validate_bytes_input(Compiler* c, const CfreeBytesInput* in) +{ + if (!in->name) panic_bad_options(c, "input name is NULL"); + if (!in->data && in->len != 0) { + panic_bad_options(c, "input data is NULL but len > 0"); + } +} + +int cfree_compile_obj(Compiler* c, const CfreeCompileOptions* opts, + const CfreeBytesInput* input, ObjBuilder** out) +{ + PanicSave saved; + ObjBuilder* ob; + + if (!out) { + return 1; + } + *out = NULL; + compiler_panic_save(c, &saved); + if (setjmp(c->panic)) { + compiler_run_cleanups(c); + compiler_panic_restore(c, &saved); + return 1; + } + if (!opts || !input) { + panic_bad_options(c, "compile options or input is NULL"); + } + validate_bytes_input(c, input); + ob = obj_new(c); + compile_into(c, opts, input, ob); + *out = ob; + compiler_panic_restore(c, &saved); + return 0; +} + +static void emit_object_bytes(Compiler* c, ObjBuilder* ob, Writer* w) +{ + switch (c->target.obj) { + case OBJ_ELF: + emit_elf(c, ob, w); + break; + case OBJ_COFF: + emit_coff(c, ob, w); + break; + case OBJ_MACHO: + emit_macho(c, ob, w); + break; + case OBJ_WASM: + emit_wasm(c, ob, w); + break; + } +} + +int cfree_compile_obj_emit(Compiler* c, const CfreeCompileOptions* opts, + const CfreeBytesInput* input, Writer* out) +{ + PanicSave saved; + ObjBuilder* ob; + + compiler_panic_save(c, &saved); + if (setjmp(c->panic)) { + compiler_run_cleanups(c); + compiler_panic_restore(c, &saved); + return 1; + } + if (!opts || !input || !out) { + panic_bad_options(c, "compile_emit args missing"); + } + validate_bytes_input(c, input); + ob = obj_new(c); + compile_into(c, opts, input, ob); + emit_object_bytes(c, ob, out); + obj_free(ob); + compiler_panic_restore(c, &saved); + return 0; +} + +/* ============================================================ + * Link + * ============================================================ */ + +static Linker* build_linker(Compiler* c, const CfreeLinkOptions* opts) +{ + Linker* linker = link_new(c); + u32 i; + + for (i = 0; i < opts->nobjs; ++i) { + link_add_obj(linker, opts->objs[i]); + } + for (i = 0; i < opts->nobj_bytes; ++i) { + link_add_obj_bytes(linker, opts->obj_bytes[i].name, + opts->obj_bytes[i].data, opts->obj_bytes[i].len); + } + for (i = 0; i < opts->narchives; ++i) { + link_add_archive_bytes(linker, opts->archives[i].name, + opts->archives[i].data, opts->archives[i].len); + } + if (opts->linker_script_text) { + link_set_script_text(linker, opts->linker_script_text, + opts->linker_script_len); + } + if (opts->entry) { + link_set_entry(linker, opts->entry); + } + if (opts->extern_resolver) { + link_set_extern_resolver(linker, opts->extern_resolver, + opts->extern_resolver_user); + } + return linker; +} + +int cfree_link_exe(Compiler* c, const CfreeLinkOptions* opts, Writer* out) +{ + PanicSave saved; + Linker* linker; + LinkImage* image; + + compiler_panic_save(c, &saved); + if (setjmp(c->panic)) { + compiler_run_cleanups(c); + compiler_panic_restore(c, &saved); + return 1; + } + if (!opts || !out) { + panic_bad_options(c, "link_exe args missing"); + } + linker = build_linker(c, opts); + image = link_resolve(linker); /* deferred-cleanup-registered */ + link_emit_image_writer(image, out); + link_image_free(image); /* undefers + frees */ + link_free(linker); + compiler_panic_restore(c, &saved); + return 0; +} + +int cfree_link_jit(Compiler* c, const CfreeLinkOptions* opts, + CfreeJit** out_jit) +{ + PanicSave saved; + Linker* linker; + LinkImage* image; + + if (!out_jit) { + return 1; + } + *out_jit = NULL; + compiler_panic_save(c, &saved); + if (setjmp(c->panic)) { + compiler_run_cleanups(c); + compiler_panic_restore(c, &saved); + return 1; + } + if (!opts) { + panic_bad_options(c, "link_jit options missing"); + } + linker = build_linker(c, opts); + image = link_resolve(linker); /* deferred-cleanup-registered */ + *out_jit = cfree_jit_from_image(image); /* undefers + transfers ownership */ + link_free(linker); + compiler_panic_restore(c, &saved); + return 0; +} + +/* ============================================================ + * Convenience: cfree_run + * ============================================================ + * + * cfree_run owns a single Compiler for the whole composition. Scratch + * arrays (objs, loaded-bytes tables, staging arrays) are allocated from + * c->tu and reaped by compiler_fini. File-io loans for path-shaped inputs + * are tracked through the cleanup stack so a panic anywhere reaps them; on + * success they are released explicitly. */ + +typedef struct LoadedBytes { + const CfreeFileIO* io; + CfreeBytesInput in; + CfreeFileData file; + int loaded; + CompilerCleanup* cleanup; /* compiler_defer handle */ +} LoadedBytes; + +static void loaded_bytes_release_cb(void* arg) +{ + LoadedBytes* lb = (LoadedBytes*)arg; + if (lb->loaded && lb->io && lb->io->release) { + lb->io->release(lb->io->user, &lb->file); + } + lb->loaded = 0; +} + +static void load_path_bytes(Compiler* c, const char* path, LoadedBytes* out) +{ + out->io = require_file_io(c, "file input"); + out->loaded = 0; + out->cleanup = NULL; + out->file.data = NULL; + out->file.size = 0; + out->file.token = NULL; + out->in.name = path; + out->in.data = NULL; + out->in.len = 0; + + /* Defer release before reading so a read failure still cleans up. The + * callback is a no-op while loaded == 0. */ + out->cleanup = compiler_defer(c, loaded_bytes_release_cb, out); + + if (!out->io->read_all(out->io->user, path, &out->file)) { + compiler_panic(c, no_loc(), "failed to read: %s", path); + } + out->loaded = 1; + out->in.data = out->file.data; + out->in.len = out->file.size; +} + +static void release_loaded_bytes(Compiler* c, LoadedBytes* lb) +{ + if (!lb->cleanup) return; + compiler_undefer(c, lb->cleanup); + lb->cleanup = NULL; + loaded_bytes_release_cb(lb); +} + +static void release_loaded_array(Compiler* c, LoadedBytes* arr, u32 n) +{ + u32 i; + if (!arr) return; + for (i = 0; i < n; ++i) release_loaded_bytes(c, &arr[i]); +} + +static u32 total_sources(const CfreeOptions* opts) +{ + return opts->nsource_files + opts->nsource_memory; +} + +static void validate_run_options(Compiler* c, const CfreeOptions* opts) +{ + u32 i; + u32 nsrc; + + if (!opts) panic_bad_options(c, "options pointer is NULL"); + nsrc = total_sources(opts); + if (nsrc == 0) { + panic_bad_options(c, "at least one C source input is required"); + } + if (opts->opt_level < 0 || opts->opt_level > 2) { + panic_bad_options(c, "opt_level must be 0, 1, or 2"); + } + if (!opts->env.heap) { + panic_bad_options(c, "env.heap is required"); + } + if (opts->output_kind != CFREE_OUTPUT_OBJ && + opts->output_kind != CFREE_OUTPUT_EXE && + opts->output_kind != CFREE_OUTPUT_JIT) { + panic_bad_options(c, "output_kind is invalid"); + } + if ((opts->output_kind == CFREE_OUTPUT_OBJ || + opts->output_kind == CFREE_OUTPUT_EXE) && !opts->output_path) { + panic_bad_options(c, "output_path is required for file output"); + } + if ((opts->output_kind == CFREE_OUTPUT_OBJ || + opts->output_kind == CFREE_OUTPUT_EXE) && + (!opts->env.file_io || !opts->env.file_io->open_writer)) { + panic_bad_options(c, "env.file_io.open_writer is required for file output"); + } + if (opts->output_kind == CFREE_OUTPUT_OBJ && nsrc != 1) { + panic_bad_options(c, "object output accepts exactly one C source input"); + } + if (opts->output_kind == CFREE_OUTPUT_OBJ && + (opts->nobject_files || opts->narchives || + opts->linker_script || opts->extern_resolver)) { + panic_bad_options(c, "link options are not valid for object output"); + } + if (opts->output_kind == CFREE_OUTPUT_EXE && opts->extern_resolver) { + panic_bad_options(c, "extern_resolver is JIT-only; not valid for exe output"); + } + if (opts->output_kind == CFREE_OUTPUT_JIT && !opts->out_jit) { + panic_bad_options(c, "out_jit is required for JIT output"); + } + + if (opts->nsource_files && + (!opts->env.file_io || !opts->env.file_io->read_all)) { + panic_bad_options(c, "env.file_io.read_all is required for source_files"); + } + for (i = 0; i < opts->nsource_files; ++i) { + if (!opts->source_files[i]) { + panic_bad_options(c, "source_files entry is NULL"); + } + } + for (i = 0; i < opts->nsource_memory; ++i) { + const CfreeBytesInput* in = &opts->source_memory[i]; + if (!in->name || (!in->data && in->len != 0)) { + panic_bad_options(c, "source_memory entry is incomplete"); + } + } + + if ((opts->nobject_files || opts->narchives || opts->linker_script) && + (!opts->env.file_io || !opts->env.file_io->read_all)) { + panic_bad_options(c, "env.file_io.read_all is required for linker file input"); + } +} + +/* Resolves the i-th source (paths first, then memory) into a CfreeBytesInput, + * lazily loading paths into `loaded` slots. */ +static const CfreeBytesInput* nth_source(const CfreeOptions* opts, + LoadedBytes* loaded, u32 i) +{ + if (i < opts->nsource_files) { + return &loaded[i].in; + } + return &opts->source_memory[i - opts->nsource_files]; +} + +int cfree_run(const CfreeOptions* opts) +{ + Compiler c_store; + Compiler* c = &c_store; + ObjBuilder** objs = NULL; + LoadedBytes* src_loaded = NULL; + LoadedBytes* obj_bytes = NULL; + LoadedBytes* arch_bytes = NULL; + LoadedBytes script; + CfreeBytesInput* obj_in = NULL; + CfreeBytesInput* arch_in = NULL; + CfreeLinkOptions link_opts; + CfreeCompileOptions co; + Writer* out_writer = NULL; + u32 nsrc; + u32 i; + + if (!opts || !opts->env.heap) return 1; + + script.loaded = 0; + script.cleanup = NULL; + script.io = NULL; + script.file.data = NULL; + script.file.size = 0; + script.file.token = NULL; + + compiler_init(c, opts->target, &opts->env); + + if (setjmp(c->panic)) { + if (out_writer) writer_close(out_writer); + compiler_run_cleanups(c); + compiler_fini(c); + return 1; + } + + validate_run_options(c, opts); + nsrc = total_sources(opts); + + co.opt_level = opts->opt_level; + co.debug_info = opts->debug_info; + co.pp = opts->pp; + + /* Load source paths (if any) up front so OBJ and EXE/JIT share one path. */ + if (opts->nsource_files) { + src_loaded = arena_array(c->tu, LoadedBytes, opts->nsource_files); + for (i = 0; i < opts->nsource_files; ++i) { + src_loaded[i].loaded = 0; + src_loaded[i].cleanup = NULL; + } + for (i = 0; i < opts->nsource_files; ++i) { + load_path_bytes(c, opts->source_files[i], &src_loaded[i]); + } + } + + /* OBJ output: compile single TU and emit to writer. */ + if (opts->output_kind == CFREE_OUTPUT_OBJ) { + const CfreeBytesInput* the_input = nth_source(opts, src_loaded, 0); + out_writer = opts->env.file_io->open_writer(opts->env.file_io->user, + opts->output_path); + if (!out_writer) { + compiler_panic(c, no_loc(), "failed to open output file: %s", + opts->output_path); + } + if (cfree_compile_obj_emit(c, &co, the_input, out_writer)) { + writer_close(out_writer); + release_loaded_array(c, src_loaded, opts->nsource_files); + compiler_fini(c); + return 1; + } + writer_close(out_writer); + out_writer = NULL; + release_loaded_array(c, src_loaded, opts->nsource_files); + compiler_fini(c); + return 0; + } + + /* EXE/JIT: compile all sources, then link. */ + objs = arena_array(c->tu, ObjBuilder*, nsrc); + for (i = 0; i < nsrc; ++i) objs[i] = NULL; + for (i = 0; i < nsrc; ++i) { + const CfreeBytesInput* in = nth_source(opts, src_loaded, i); + if (cfree_compile_obj(c, &co, in, &objs[i])) { + release_loaded_array(c, src_loaded, opts->nsource_files); + compiler_fini(c); + return 1; + } + } + + if (opts->nobject_files) { + obj_bytes = arena_array(c->tu, LoadedBytes, opts->nobject_files); + for (i = 0; i < opts->nobject_files; ++i) { + obj_bytes[i].loaded = 0; + obj_bytes[i].cleanup = NULL; + } + for (i = 0; i < opts->nobject_files; ++i) { + load_path_bytes(c, opts->object_files[i], &obj_bytes[i]); + } + } + if (opts->narchives) { + arch_bytes = arena_array(c->tu, LoadedBytes, opts->narchives); + for (i = 0; i < opts->narchives; ++i) { + arch_bytes[i].loaded = 0; + arch_bytes[i].cleanup = NULL; + } + for (i = 0; i < opts->narchives; ++i) { + load_path_bytes(c, opts->archives[i], &arch_bytes[i]); + } + } + if (opts->linker_script) { + load_path_bytes(c, opts->linker_script, &script); + } + + /* Stage parallel CfreeBytesInput arrays for the linker. */ + if (opts->nobject_files) { + obj_in = arena_array(c->tu, CfreeBytesInput, opts->nobject_files); + for (i = 0; i < opts->nobject_files; ++i) obj_in[i] = obj_bytes[i].in; + } + if (opts->narchives) { + arch_in = arena_array(c->tu, CfreeBytesInput, opts->narchives); + for (i = 0; i < opts->narchives; ++i) arch_in[i] = arch_bytes[i].in; + } + + link_opts.objs = (ObjBuilder* const*)objs; + link_opts.nobjs = nsrc; + link_opts.obj_bytes = obj_in; + link_opts.nobj_bytes = opts->nobject_files; + link_opts.archives = arch_in; + link_opts.narchives = opts->narchives; + link_opts.linker_script_text = script.loaded ? (const char*)script.file.data : NULL; + link_opts.linker_script_len = script.loaded ? script.file.size : 0; + link_opts.entry = opts->entry; + link_opts.extern_resolver = opts->extern_resolver; + link_opts.extern_resolver_user = opts->extern_resolver_user; + + if (opts->output_kind == CFREE_OUTPUT_EXE) { + out_writer = opts->env.file_io->open_writer(opts->env.file_io->user, + opts->output_path); + if (!out_writer) { + compiler_panic(c, no_loc(), "failed to open output file: %s", + opts->output_path); + } + if (cfree_link_exe(c, &link_opts, out_writer)) { + writer_close(out_writer); + release_loaded_array(c, src_loaded, opts->nsource_files); + release_loaded_array(c, obj_bytes, opts->nobject_files); + release_loaded_array(c, arch_bytes, opts->narchives); + release_loaded_bytes(c, &script); + compiler_fini(c); + return 1; + } + writer_close(out_writer); + out_writer = NULL; + } else { + if (cfree_link_jit(c, &link_opts, opts->out_jit)) { + release_loaded_array(c, src_loaded, opts->nsource_files); + release_loaded_array(c, obj_bytes, opts->nobject_files); + release_loaded_array(c, arch_bytes, opts->narchives); + release_loaded_bytes(c, &script); + compiler_fini(c); + return 1; + } + } + + release_loaded_array(c, src_loaded, opts->nsource_files); + release_loaded_array(c, obj_bytes, opts->nobject_files); + release_loaded_array(c, arch_bytes, opts->narchives); + release_loaded_bytes(c, &script); + /* objs (ObjBuilders) are owned by the Compiler; arena scratch is freed + * by compiler_fini. */ + compiler_fini(c); + return 0; +} diff --git a/src/driver/pipeline.h b/src/driver/pipeline.h @@ -0,0 +1,152 @@ +#ifndef CFREE_PIPELINE_H +#define CFREE_PIPELINE_H + +#include "core/core.h" +#include "link/link.h" +#include "obj/obj.h" + +/* Layered driver-facing API. Three core operations + one convenience: + * + * cfree_compile_obj one C TU -> in-memory ObjBuilder (chains into link) + * cfree_compile_obj_emit one C TU -> Writer (cc -c) + * cfree_link_exe link inputs -> Writer (ld) + * cfree_link_jit link inputs -> owning CfreeJit handle + * cfree_run convenience: compile N inputs and link/jit + * + * The freestanding core takes only byte buffers and Writers — never paths. + * Path-shaped helpers live in driver-level adapters (and in cfree_run, which + * is itself a driver convenience), and feed the byte/Writer APIs after + * consulting Compiler.env->file_io. + * + * Errors are reported through Compiler.panic. Each top-level function in + * this header saves and restores Compiler.panic around its own setjmp, so + * these functions are safely nestable: a caller that has already installed + * a panic handler keeps it across calls into this API. On panic the inner + * function runs compiler_run_cleanups, restores the caller's jmp_buf, and + * returns nonzero. */ + +typedef struct CfreeDefine { + const char* name; + const char* body; /* NULL means "1" */ +} CfreeDefine; + +/* Generic byte-buffer input. Used for C source TUs, encoded objects, and + * archives. `name` is a diagnostic label (typically a path or pseudo-path); + * the linker interns it on entry. `data` may be any byte-shaped content. */ +typedef struct CfreeBytesInput { + const char* name; + const u8* data; + size_t len; +} CfreeBytesInput; + +/* Preprocessor configuration shared by compile_* and the convenience run. */ +typedef struct CfreePpOptions { + const char* const* include_dirs; + u32 ninclude_dirs; + const char* const* system_include_dirs; + u32 nsystem_include_dirs; + const CfreeDefine* defines; + u32 ndefines; + const char* const* undefines; + u32 nundefines; +} CfreePpOptions; + +/* Per-TU compile knobs. */ +typedef struct CfreeCompileOptions { + int opt_level; /* 0 direct, 1 minimal, 2 full */ + int debug_info; + CfreePpOptions pp; +} CfreeCompileOptions; + +/* Compile one C input (memory bytes). + * + * cfree_compile_obj returns an ObjBuilder owned by Compiler. The builder is + * already finalized; do not write to it further. Pass it to link_add_obj or + * an emit_* function. It must be alive until the linker has consumed it + * (link_resolve). The Compiler must outlive the returned builder. + * + * cfree_compile_obj_emit writes the encoded object to `out` and frees its + * temporary builder before returning. The Writer is not closed. On nonzero + * return the Writer may contain partial output and should not be consumed. + * + * Returns 0 on success, nonzero if the compiler panicked. */ +int cfree_compile_obj(Compiler*, const CfreeCompileOptions*, + const CfreeBytesInput* input, ObjBuilder** out); +int cfree_compile_obj_emit(Compiler*, const CfreeCompileOptions*, + const CfreeBytesInput* input, Writer* out); + +typedef struct CfreeLinkOptions { + ObjBuilder* const* objs; /* fresh-compiled, by reference */ + u32 nobjs; + const CfreeBytesInput* obj_bytes; + u32 nobj_bytes; + const CfreeBytesInput* archives; + u32 narchives; + const char* linker_script_text; /* NULL = no script. + * Non-NULL: linker_script_len + * must match the buffer. */ + size_t linker_script_len; + Sym entry; /* 0 = format/target default */ + LinkExternResolver extern_resolver; + void* extern_resolver_user; +} CfreeLinkOptions; + +/* All bytes inputs (obj_bytes, archives) must remain alive until the + * matching cfree_link_* call returns. */ + +/* Link to executable. Writer is not closed by the call. On nonzero return + * the Writer may contain partial output and should not be consumed. */ +int cfree_link_exe(Compiler*, const CfreeLinkOptions*, Writer* out); + +/* Link as JIT. On success, *out_jit owns its LinkImage and mapped pages and + * must be released with cfree_jit_free. */ +int cfree_link_jit(Compiler*, const CfreeLinkOptions*, CfreeJit** out_jit); + +/* ----- Convenience: compose compile + link/jit for the common case. ----- */ + +typedef enum CfreeOutputKind { + CFREE_OUTPUT_OBJ, /* exactly one source input total */ + CFREE_OUTPUT_EXE, + CFREE_OUTPUT_JIT, +} CfreeOutputKind; + +typedef struct CfreeOptions { + Target target; + CfreeEnv env; + CfreeOutputKind output_kind; + + int opt_level; + int debug_info; + + const char* output_path; /* OBJ/EXE: required, used via env.file_io */ + + /* C source inputs. Path-shaped sources are read via env.file_io; memory + * sources are passed directly. The combined sequence (paths first, then + * memory) is compiled in order. For OBJ output exactly one total source + * is required. */ + const char* const* source_files; + u32 nsource_files; + const CfreeBytesInput* source_memory; + u32 nsource_memory; + + CfreePpOptions pp; + + /* Path-shaped link inputs: driver reads via env.file_io. Library + * resolution (-lfoo against -L paths) is the CLI driver's job; by the + * time options reach cfree_run, archives must be concrete paths. */ + const char* const* object_files; + u32 nobject_files; + const char* const* archives; + u32 narchives; + const char* linker_script; /* path; driver reads via env.file_io */ + + Sym entry; + LinkExternResolver extern_resolver; + void* extern_resolver_user; + + CfreeJit** out_jit; /* JIT only: caller owns on success */ +} CfreeOptions; + +int cfree_run(const CfreeOptions*); + +#endif diff --git a/src/lex/lex.h b/src/lex/lex.h @@ -1,7 +1,7 @@ #ifndef CFREE_LEX_H #define CFREE_LEX_H -#include "../core/core.h" +#include "core/core.h" typedef enum TokKind { TOK_EOF = 0, @@ -89,7 +89,16 @@ typedef struct Tok { typedef struct Lexer Lexer; -Lexer* lex_open(Compiler*, const char* path); +/* lex_open_mem borrows (src, len). The lexer does not copy source bytes; + * tokens carry SrcLoc + Sym spellings into the global pool, but diagnostics + * and the preprocessor's directive scanner read from the borrowed buffer. + * + * Ownership: a Lexer that has been handed to pp_push_input is owned by PP + * thereafter — PP closes it on EOF-pop or in pp_free. Callers only call + * lex_close on a Lexer they have not pushed (e.g. standalone .s assembly). + * + * The borrowed (src, len) buffer must outlive the Lexer, which for a pushed + * Lexer means outliving pp_free. */ Lexer* lex_open_mem(Compiler*, const char* name, const char* src, size_t len); void lex_close(Lexer*); diff --git a/src/link/link.h b/src/link/link.h @@ -1,15 +1,16 @@ #ifndef CFREE_LINK_H #define CFREE_LINK_H -#include "../obj/obj.h" +#include "obj/obj.h" typedef struct Linker Linker; typedef struct LinkImage LinkImage; +typedef struct CfreeJit CfreeJit; typedef enum LinkInputKind { LINK_INPUT_OBJ, - LINK_INPUT_OBJ_FILE, - LINK_INPUT_ARCHIVE, + LINK_INPUT_OBJ_BYTES, + LINK_INPUT_ARCHIVE_BYTES, } LinkInputKind; typedef u32 LinkInputId; @@ -29,7 +30,7 @@ typedef struct LinkInput { u8 kind; /* LinkInputKind */ u8 pad[3]; ObjBuilder* obj; /* for LINK_INPUT_OBJ, otherwise NULL until read */ - Sym path; /* for file/archive inputs */ + Sym name; /* diagnostic name for bytes inputs */ } LinkInput; typedef struct LinkSymbol { @@ -86,25 +87,34 @@ typedef struct LinkRelocApply { typedef void* (*LinkExternResolver)(void* user, Sym name); -typedef struct JitImage { - LinkImage* image; - void* base; - size_t size; -} JitImage; - Linker* link_new(Compiler*); void link_free(Linker*); -LinkInputId link_add_obj(Linker*, ObjBuilder*); /* fresh-compiled */ -LinkInputId link_add_obj_file(Linker*, const char* path); /* read .o from disk */ -LinkInputId link_add_archive(Linker*, const char* path); /* .a / static archive */ -void link_add_lib_search_path(Linker*, const char* dir); +/* Inputs are byte-buffer-shaped. Path-based adapters live in the driver + * (see driver/driver.h) and use Compiler.env->file_io to read bytes before + * calling these. All bytes inputs must remain alive until link_resolve + * returns; ObjBuilder inputs must remain alive until link_image_free. + * + * `name` is an unowned diagnostic string; the linker interns it on entry + * (callers do not need to pre-intern). */ +LinkInputId link_add_obj(Linker*, ObjBuilder*); +LinkInputId link_add_obj_bytes(Linker*, const char* name, + const u8* data, size_t len); +LinkInputId link_add_archive_bytes(Linker*, const char* name, + const u8* data, size_t len); + void link_set_entry(Linker*, Sym name); -void link_set_script(Linker*, const char* path); +void link_set_script_text(Linker*, const char* text, size_t len); void link_set_extern_resolver(Linker*, LinkExternResolver, void* user); /* Symbol resolution and layout are explicit so file linking and JIT share the - * same resolved image. Fatal diagnostics use Compiler.panic. */ + * same resolved image. Fatal diagnostics use Compiler.panic. + * + * link_resolve registers the returned LinkImage with compiler_defer so a + * panic between resolve and consumer (emit_writer / jit_from_image) reaps + * it. Successful consumers either call link_image_free (which undefers and + * frees) or transfer ownership via cfree_jit_from_image (which undefers and + * keeps the image alive for the JIT's lifetime). */ LinkImage* link_resolve(Linker*); void link_image_free(LinkImage*); const LinkSymbol* link_symbol(LinkImage*, LinkSymId); @@ -117,16 +127,22 @@ const LinkSection* link_section_get(LinkImage*, LinkSectionId id); u32 link_reloc_apply_count(LinkImage*); const LinkRelocApply* link_reloc_apply_get(LinkImage*, u32 id); -/* Writes an executable in the format implied by Compiler.target. */ -void link_emit_exe(Linker*, const char* out_path); -void link_emit_image(LinkImage*, const char* out_path); - -/* JIT: maps sections into memory, applies relocations, returns the address of - * the entry symbol (or any named symbol via link_jit_lookup). */ -void* link_jit(Linker*); -JitImage* link_jit_image(LinkImage*); -void* link_jit_lookup(Linker*, Sym name); -void* jit_image_lookup(JitImage*, Sym name); -void jit_image_free(JitImage*); +/* Writes an executable in the format implied by Compiler.target into the + * caller-provided Writer. Path-based emit lives in the driver. */ +void link_emit_image_writer(LinkImage*, Writer*); + +/* JIT: maps the image into executable memory and returns an owning handle. + * The returned CfreeJit takes ownership of the LinkImage (undefers it from + * the cleanup stack registered by link_resolve); on cfree_jit_free both the + * JIT mapping and the LinkImage are released. Symbol lookup is by resolved + * Sym name (object-local ObjSymIds never escape as JIT handles). Sym is + * for callers that already interned the name; the const char* form interns + * lazily. */ +typedef void (*CfreeJitFn)(void); + +CfreeJit* cfree_jit_from_image(LinkImage*); +void cfree_jit_free(CfreeJit*); +CfreeJitFn cfree_jit_lookup(CfreeJit*, const char* name); +CfreeJitFn cfree_jit_lookup_sym(CfreeJit*, Sym name); #endif diff --git a/src/obj/obj.h b/src/obj/obj.h @@ -1,8 +1,8 @@ #ifndef CFREE_OBJ_H #define CFREE_OBJ_H -#include "../core/core.h" -#include "../core/buf.h" +#include "core/core.h" +#include "core/buf.h" typedef enum SecKind { SEC_TEXT, diff --git a/src/opt/ir.h b/src/opt/ir.h @@ -1,10 +1,10 @@ #ifndef CFREE_IR_H #define CFREE_IR_H -#include "../core/core.h" -#include "../core/arena.h" -#include "../arch/arch.h" -#include "../type/type.h" +#include "core/core.h" +#include "core/arena.h" +#include "arch/arch.h" +#include "type/type.h" typedef u32 Val; #define VAL_NONE 0u diff --git a/src/opt/opt.h b/src/opt/opt.h @@ -1,8 +1,8 @@ #ifndef CFREE_OPT_H #define CFREE_OPT_H -#include "../arch/arch.h" -#include "ir.h" +#include "arch/arch.h" +#include "opt/ir.h" /* opt_cgtarget: a CGTarget wrapper that records each function as IR. * diff --git a/src/parse/parse.h b/src/parse/parse.h @@ -1,10 +1,10 @@ #ifndef CFREE_PARSE_H #define CFREE_PARSE_H -#include "../pp/pp.h" -#include "../decl/decl.h" -#include "../cg/cg.h" -#include "../arch/arch.h" +#include "pp/pp.h" +#include "decl/decl.h" +#include "cg/cg.h" +#include "arch/arch.h" /* C11 frontend. Reads tokens from `pp`, records C declarations in DeclTable, * and drives `cg` for executable code. */ diff --git a/src/pp/pp.h b/src/pp/pp.h @@ -1,10 +1,12 @@ #ifndef CFREE_PP_H #define CFREE_PP_H -#include "../lex/lex.h" +#include "lex/lex.h" typedef struct Pp Pp; +/* PP reads file_io from c->env for include search. If include search is + * configured but c->env->file_io is missing, include resolution panics. */ Pp* pp_new(Compiler*); void pp_free(Pp*); @@ -12,6 +14,11 @@ void pp_add_include_dir(Pp*, const char* dir, int system); void pp_define(Pp*, const char* name, const char* body); /* -D */ void pp_undef(Pp*, const char* name); /* -U */ +/* Pushes a Lexer onto the include stack. PP takes ownership of the Lexer: + * it is closed when the input hits EOF and is popped, or in pp_free if it + * is still on the stack. Callers must not call lex_close on a pushed + * Lexer. The borrowed source buffer (see lex_open_mem) must outlive + * pp_free. */ void pp_push_input(Pp*, Lexer*); void pp_add_include_edge(Pp*, u32 includer_file_id, u32 included_file_id, SrcLoc include_loc, int system); diff --git a/src/type/type.h b/src/type/type.h @@ -1,8 +1,8 @@ #ifndef CFREE_TYPE_H #define CFREE_TYPE_H -#include "../core/core.h" -#include "../core/pool.h" +#include "core/core.h" +#include "core/pool.h" typedef enum TypeKind { TY_VOID,