commit 6455a33b8198a8add5db3df9dad14da492eac738
parent c9baaf8f7d59e4bed9f7dc7d280dc692945c5d86
Author: Ryan Sepassi <rsepassi@gmail.com>
Date: Mon, 11 May 2026 11:03:33 -0700
run: fix cfree run JIT runner — pipeline lifetime, dlsym mangling, synthetic argv[0]
`cfree run` segfaulted on every successful JIT because the pipeline (and
its Compiler) was freed inside run_compile_and_jit before
cfree_jit_lookup ran in driver_run; lookup dereferences jit->c. Lifted
pipeline ownership up to driver_run to mirror driver/dbg.c.
Also: zero-source input (.o/.a only) no longer reports a spurious "out
of memory" from a 0-byte driver_alloc_zeroed; the dlsym fallback strips
Mach-O's leading underscore so JIT'd code can resolve libc names; and
JIT'd main now receives a synthetic argv[0] from the first input so it
behaves like a hosted program. doc/JIT.md collects the remaining JIT
gaps (Mach-O Path-J reloc-apply, inspector follow-ups, tests) in one
place.
Diffstat:
| A | doc/JIT.md | | | 114 | +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ |
| M | driver/env.c | | | 10 | +++++++++- |
| M | driver/run.c | | | 71 | +++++++++++++++++++++++++++++++++++++++++++++++++++-------------------- |
3 files changed, 174 insertions(+), 21 deletions(-)
diff --git a/doc/JIT.md b/doc/JIT.md
@@ -0,0 +1,114 @@
+# JIT — known limitations and TODOs
+
+Status of the in-process JIT path: `cfree_link_jit` (`src/link/link_jit.c`),
+its driver front-end `cfree run` (`driver/run.c`), and the debugger session
+that rides on top (`cfree dbg`, `driver/dbg.c`). Adjacent docs that cover
+specific slices stay authoritative; entries here cross-reference instead of
+duplicating.
+
+Companion docs:
+- `doc/DESIGN.md` §5.5 — `LinkImage` / `CfreeJit` ownership and lifetime.
+- `doc/MACHO.md` §3 — Mach-O Path-J reloc-apply gaps (the longest list).
+- `doc/DBG.md` §12 — JIT debugger checklist (session, view, REPL).
+- `doc/EMU.md` §6 — per-block JIT on a growing `LinkImage` (separate scheme).
+
+## Driver — `cfree run`
+
+- [x] Pipeline outlives the JIT — `cfree_jit_lookup` dereferences
+ `jit->c`, so freeing the pipeline (and its Compiler) before lookup
+ segfaults. Ownership lifted to `driver_run` to mirror
+ `driver/dbg.c`.
+- [x] Zero-source input case (`.o`/`.a` only) no longer spuriously
+ reports "out of memory" from a 0-byte `driver_alloc_zeroed`.
+- [x] `driver_dlsym_resolver` retries with the leading `_` stripped so
+ Mach-O-mangled C names (`_strlen`) resolve through
+ `dlsym(RTLD_DEFAULT)`. (Call-site reachability is a separate
+ issue — see §"Reloc-apply" below.)
+- [x] Synthetic `argv[0]` — JIT'd `main` now receives `argv[0]` set to
+ the first input's display name (source path, `<stdin>`, `.o`, or
+ `.a`); user args from `--` start at `argv[1]`. Without `--` the
+ program sees `argc==1`.
+- [ ] `-O2` crashes on the multi-file inline-asm demo with `Bus error`.
+ Likely an optimizer bug surfaced through `IR_ASM_BLOCK` replay —
+ reduce and file against `src/opt/opt.c` (the recorder/replay seam
+ from `INLINEASM.md` track B), not the JIT.
+- [ ] Regression test: scripted `test/run/` harness diffing exit codes
+ and stdout for `.c`, stdin, `.o`, `.a`, multi-file, and `-e`
+ entry cases. No coverage today.
+
+## Reloc-apply gaps
+
+The JIT shares resolver/layout with the file linker but has its own
+in-process apply path. The Mach-O J-path issues are listed in
+`doc/MACHO.md` §3; the corresponding ELF JIT path is green on the same
+inputs.
+
+- [ ] **Cross-TU data via ADRP/ADD/LDR.** (`doc/MACHO.md` §3.1.)
+ `ARM64_RELOC_GOT_LOAD_PAGE21` / `PAGEOFF12` patches the *value*
+ instead of the *address* on the JIT path. Internal-GOT slots are
+ seeded by dyld chained-fixup REBASEs in the exe path; the JIT has
+ no dyld and must seed them in-process. Cases: `11_data_cross_tu`,
+ `14_weak_present`, `17_common_coalesce`, `34_ifunc_addr_taken`.
+- [ ] **Weak-undef out of ±4 GiB.** (`doc/MACHO.md` §3.2.) JIT maps
+ code far from the synthetic weak-undef sentinel; ELF JIT colocates
+ `.got` with `.text` and avoids this. Fix: colocate the sentinel,
+ or rewrite ADRP into an absolute MOV/LDR when out of range. Case:
+ `16_weak_undef`.
+- [ ] **IFUNC under Mach-O JIT.** (`doc/MACHO.md` §3.3.) Mach-O has no
+ `__mod_init_func` equivalent for iplt synthesis. Either exclude
+ from `j_targets` or emulate the ELF iplt scheme inside the JIT
+ mapping. Cases: `32_ifunc`, `33_ifunc_in_init`.
+- [ ] **Extern resolver / far-call.** (`doc/MACHO.md` §3.4.) Resolver
+ returns a host pointer (e.g. libc); reloc-apply tries to encode a
+ PC-relative ADRP/ADD or CALL26. Today `cfree run` cannot call any
+ libc function from JIT'd code — fails with
+ `link: CALL26 out of range (need ±128MiB)`. Options:
+ - Route resolver-supplied symbols through an internal-GOT slot
+ inside the JIT mapping (matches the exe shape), or
+ - Emit a per-import trampoline inside JIT memory (PLT-style:
+ `ADRP+LDR+BR Xn`) and redirect CALL26/JUMP26 at it.
+ Case: `28_extern_resolver`. Workaround in the meantime: use
+ inline `asm volatile(... svc ...)` for syscalls from JIT'd code.
+
+## Inspector / debugger surface
+
+`cfree_jit_view` and the symbol-walk inspector entries
+(`cfree_jit_sym_iter_*`, `cfree_jit_addr_to_sym`,
+`cfree_jit_image_contains`, `cfree_jit_image_arch`,
+`cfree_jit_compiler`) landed with commit `1b5a596` and PC translation is
+wired up. Remaining items are listed in `doc/DBG.md` §12; the JIT-facing
+ones to keep an eye on:
+
+- [ ] `cfree_jit_view` — multi-input handling. v1 returns NULL when more
+ than one `CfreeObjBuilder` was linked (the cross-CU offset
+ adjustment for concatenated debug sections is not done).
+ `src/link/link_jit.c:490`.
+- [ ] Windows host adapter for the JIT debugger (vectored exception
+ handlers + `SetThreadContext` instead of POSIX signals).
+ `doc/DBG.md` §host-adapter.
+- [ ] x64 / rv64 displaced-step (`arch_x64.c` INT3 + RIP-relative fixups,
+ `arch_rv64.c` EBREAK + AUIPC/JAL/branch fixups). aarch64 only
+ today.
+
+## Memory mapping / executable allocator
+
+- [ ] Cross-host `CfreeExecMem` audit. Today Apple silicon goes through
+ dual-mapping (`g_jit_dual_map`) and other POSIX hosts fall back to
+ `mprotect` RW↔RX. Document the contract and the failure mode when
+ `env->execmem` is unset (currently `compiler_panic`).
+- [ ] Page-size: JIT defaults to `0x4000` when the host adapter reports
+ `page_size = 0`. Either require the adapter to fill it, or query
+ `sysconf(_SC_PAGESIZE)` in `driver/env.c`.
+
+## Tests
+
+Coverage today is `make test-link CFREE_TEST_OBJ=macho` (Path J), which
+prints raw `Segmentation fault: 11` lines from the harness wrapper with
+no J-specific markers. Items below would catch the failure modes
+explicitly.
+
+- [ ] J-path markers in the link-test reporter so the four MACHO §3
+ groups are distinguishable from generic SIGSEGV.
+- [ ] `test/run/` smoke suite for `cfree run` (see Driver above).
+- [ ] `test/smoke/dbg_hello` — scripted REPL diff against a JIT'd
+ source. `doc/DBG.md` §tests.
diff --git a/driver/env.c b/driver/env.c
@@ -1169,8 +1169,16 @@ int driver_read_stdin(DriverEnv* e, uint8_t** out_data, size_t* out_size) {
}
void* driver_dlsym_resolver(void* user, const char* name) {
+ void* p;
(void)user;
- return dlsym(RTLD_DEFAULT, name);
+ if (!name) return NULL;
+ p = dlsym(RTLD_DEFAULT, name);
+ /* On Mach-O hosts the linker hands us C names with a leading underscore
+ * (obj_format_c_mangle), but dlsym(RTLD_DEFAULT) expects the
+ * source-level name. Retry with the prefix stripped so JITed code can
+ * resolve libc symbols by their C name. */
+ if (!p && name[0] == '_' && name[1] != '\0') p = dlsym(RTLD_DEFAULT, name + 1);
+ return p;
}
int driver_read_line(char* buf, size_t cap) {
diff --git a/driver/run.c b/driver/run.c
@@ -97,9 +97,12 @@ void driver_help_run(void) {
"\n"
"ARGV PASSTHROUGH\n"
" -- End of `cfree run` options. Tokens after `--` are\n"
- " passed verbatim to the JITed program's main(argc,\n"
- " argv). Without `--` the program receives an empty\n"
- " argv.\n"
+ " passed to the JITed program's main(argc, argv)\n"
+ " starting at argv[1]. argv[0] is synthesized from\n"
+ " the first input (path, `<stdin>`, .o, or .a) so\n"
+ " JITed code can index argv[0] like a hosted\n"
+ " program. Without `--` the program receives\n"
+ " argc==1 with argv[0] set and argv[1]==NULL.\n"
"\n"
"GETTING HELP\n"
" -h, --help Show this help and exit\n"
@@ -217,6 +220,10 @@ static int run_parse(int argc, char** argv, RunOptions* o) {
if (run_alloc_arrays(o, argc) != 0) return 1;
o->target = driver_host_target();
+ /* Reserve argv[0] for a synthetic program name filled in below. User
+ * args after `--` start at argv[1]. */
+ o->prog_argc = 1;
+
for (i = 1; i < argc; ++i) {
const char* a = argv[i];
@@ -328,6 +335,17 @@ static int run_parse(int argc, char** argv, RunOptions* o) {
return 1;
}
if (!o->entry) o->entry = "main";
+
+ /* Synthetic argv[0]. Hosted programs conventionally read argv[0] as
+ * the program name; under `cfree run` there is no executable path, so
+ * use the first input's display name. Preference matches the input
+ * scan in run_compile_and_jit (sources before memory before objects
+ * before archives) so it lines up with whatever shows up first in
+ * diagnostics. */
+ if (o->nsources) o->prog_argv[0] = (char*)o->sources[0];
+ else if (o->nsource_memory) o->prog_argv[0] = (char*)o->source_memory[0].name;
+ else if (o->nobject_files) o->prog_argv[0] = (char*)o->object_files[0];
+ else o->prog_argv[0] = (char*)o->archives[0];
return 0;
}
@@ -353,14 +371,15 @@ static void run_fill_compile_opts(const RunOptions* o,
copts->max_errors = o->max_errors;
}
-/* Compile every C source through a pipeline, load .o/.a inputs, and JIT-link.
- * On success *out_jit owns the JIT image; caller releases via cfree_jit_free.
+/* Compile every C source through the caller-owned pipeline, load .o/.a
+ * inputs, and JIT-link. On success *out_jit owns the JIT image; caller
+ * releases via cfree_jit_free. The pipeline must outlive the JIT — its
+ * Compiler backs jit->c, which cfree_jit_lookup dereferences.
*/
static int run_compile_and_jit(DriverEnv* env, const RunOptions* o,
- CfreeJit** out_jit) {
+ CfreePipeline* pipe, CfreeJit** out_jit) {
CfreeEnv cenv = driver_env_to_cfree(env);
const CfreeFileIO* io = cenv.file_io;
- CfreePipeline* pipe = NULL;
DriverLoad* src_lf = NULL;
DriverLoad* obj_lf = NULL;
DriverLoad* arch_lf = NULL;
@@ -379,11 +398,13 @@ static int run_compile_and_jit(DriverEnv* env, const RunOptions* o,
return 1;
}
- src_in = driver_alloc_zeroed(env, nsrc * sizeof(*src_in));
- objs = driver_alloc_zeroed(env, nsrc * sizeof(*objs));
- if (!src_in || !objs) {
- driver_errf(RUN_TOOL, "out of memory");
- goto out;
+ if (nsrc) {
+ src_in = driver_alloc_zeroed(env, nsrc * sizeof(*src_in));
+ objs = driver_alloc_zeroed(env, nsrc * sizeof(*objs));
+ if (!src_in || !objs) {
+ driver_errf(RUN_TOOL, "out of memory");
+ goto out;
+ }
}
if (o->nsources) {
src_lf = driver_alloc_zeroed(env, o->nsources * sizeof(*src_lf));
@@ -432,12 +453,6 @@ static int run_compile_and_jit(DriverEnv* env, const RunOptions* o,
arch_in[i].group_id = 0;
}
- pipe = driver_pipeline_new(o->target, &cenv);
- if (!pipe) {
- driver_errf(RUN_TOOL, "failed to initialize compiler");
- goto out;
- }
-
run_fill_compile_opts(o, &copts);
for (i = 0; i < nsrc; ++i) {
if (cfree_pipeline_compile_obj(pipe, &copts, &src_in[i], &objs[i]) != 0)
@@ -461,7 +476,6 @@ static int run_compile_and_jit(DriverEnv* env, const RunOptions* o,
rc = cfree_pipeline_link_jit(pipe, &link_opts, out_jit);
out:
- if (pipe) driver_pipeline_free(pipe);
if (arch_lf) {
for (i = 0; i < o->narchives; ++i) driver_release_bytes(io, &arch_lf[i]);
}
@@ -486,6 +500,8 @@ typedef int (*MainFn)(int, char**);
int driver_run(int argc, char** argv) {
DriverEnv env;
RunOptions ro = {0};
+ CfreeEnv cenv;
+ CfreePipeline* pipe = NULL;
CfreeJit* jit = NULL;
void* sym;
MainFn entry_fn;
@@ -505,8 +521,21 @@ int driver_run(int argc, char** argv) {
return 2;
}
- rc = run_compile_and_jit(&env, &ro, &jit);
+ /* Pipeline owns the Compiler that backs the JIT image — keep it alive
+ * across cfree_jit_lookup and the entry call, free after cfree_jit_free.
+ */
+ cenv = driver_env_to_cfree(&env);
+ pipe = driver_pipeline_new(ro.target, &cenv);
+ if (!pipe) {
+ driver_errf(RUN_TOOL, "failed to initialize compiler");
+ run_options_release(&ro);
+ driver_env_fini(&env);
+ return 1;
+ }
+
+ rc = run_compile_and_jit(&env, &ro, pipe, &jit);
if (rc != 0) {
+ driver_pipeline_free(pipe);
run_options_release(&ro);
driver_env_fini(&env);
return rc;
@@ -516,6 +545,7 @@ int driver_run(int argc, char** argv) {
if (!sym) {
driver_errf(RUN_TOOL, "entry symbol not found: %s", ro.entry);
cfree_jit_free(jit);
+ driver_pipeline_free(pipe);
run_options_release(&ro);
driver_env_fini(&env);
return 1;
@@ -537,6 +567,7 @@ int driver_run(int argc, char** argv) {
rc = entry_fn((int)ro.prog_argc, ro.prog_argv);
cfree_jit_free(jit);
+ driver_pipeline_free(pipe);
run_options_release(&ro);
driver_env_fini(&env);
return rc;