commit 21aa437c9addefcc191d61f2b9c39f2bccc6efcc
parent 96608783797124dd00d823fc222bddf3ffa0891e
Author: Ryan Sepassi <rsepassi@gmail.com>
Date: Tue, 2 Jun 2026 15:18:25 -0700
driver run: shebang support
Diffstat:
7 files changed, 133 insertions(+), 0 deletions(-)
diff --git a/doc/DRIVER.md b/doc/DRIVER.md
@@ -107,6 +107,20 @@ tool reaches into compiler internals.
argv. `cc` and `run` overlap heavily on input shape and the preprocessor flag
family — that overlap is exactly what `driver/lib/` factors out.
+`run` doubles as a `#!` script interpreter so a C file can be made executable
+and run directly. The kernel's shebang mechanism appends the script path *and*
+the user's arguments after the interpreter's flags, with no way to inject a
+`--` between them, so `run --script FILE` names the sole source and routes every
+later token to the program's argv (an implicit `--` after `FILE`). `--script`
+implies `-lc` (scripts are usually hosted; under the JIT that only enables libc
+headers/macros — symbols still resolve at run time via host `dlsym`). The
+portable shebang is `#!/usr/bin/env -S kit run --script` (the `env -S` split is
+required because Linux passes everything after the interpreter as one argument).
+A leading `#!` line on the *primary* source file is recognized and skipped by
+the lexer (`lex_skip_shebang`, byte 0 only) for both the C frontend and `cc -E`,
+so the shebang is never mistaken for a `#` directive; includes and paste buffers
+are untouched.
+
## Cross-tool helpers (lib/)
These hold the logic that more than one tool needs, so the CLI shells stay thin
diff --git a/driver/cmd/run.c b/driver/cmd/run.c
@@ -304,6 +304,19 @@ void driver_help_run(void) {
" JITed code can index argv[0] like a hosted\n"
" program. Without `--` the program receives\n"
" argc==1 with argv[0] set and argv[1]==NULL.\n"
+ " --script FILE Run FILE as the sole source, passing every\n"
+ " later token to the program as argv (an "
+ "implicit\n"
+ " `--` after FILE). Implies -lc (hosted libc),\n"
+ " since scripts are usually hosted; on macOS that\n"
+ " still needs --sysroot for header resolution.\n"
+ " Intended for `#!` script use:\n"
+ " #!/usr/bin/env -S kit run --script\n"
+ " Make the .c file executable and run it "
+ "directly;\n"
+ " the kernel appends the path + the user's args.\n"
+ " Add compile flags before --script, e.g.\n"
+ " #!/usr/bin/env -S kit run -g --script\n"
"\n"
"GETTING HELP\n"
" -h, --help Show this help and exit\n"
@@ -312,6 +325,7 @@ void driver_help_run(void) {
" kit run hello.c\n"
" kit run -O2 -DNDEBUG main.c util.c\n"
" kit run main.c -- arg1 arg2\n"
+ " kit run --script script.c arg1 arg2 (as a #! interpreter)\n"
"\n"
"EXIT CODES\n"
" Returns the exit code of the JITed entry, or 1 on internal "
@@ -431,6 +445,28 @@ static int run_parse(int argc, char** argv, RunOptions* o) {
after_dash_dash = 1;
continue;
}
+ /* `--script FILE`: shebang entry point. The kernel's `#!` mechanism
+ * appends the script path and the user's arguments after our flags, with
+ * no way to inject a `--` between them. `--script` names the sole source
+ * file (the next argv element, supplied by the kernel) and routes every
+ * later token — flag-shaped or not — to the program's argv, exactly like
+ * an implicit `--` after the script. See driver_help_run / DRIVER.md. */
+ if (driver_streq(a, "--script")) {
+ if (++i >= argc) {
+ driver_errf(RUN_TOOL, "--script requires a source-file argument");
+ return 1;
+ }
+ /* Scripts are overwhelmingly hosted programs, so default `--script` to
+ * hosted libc — under the JIT that only enables libc headers/macros
+ * (symbols resolve at run time via host dlsym), so the only added cost
+ * is needing a sysroot for #include resolution. An earlier explicit
+ * -lc is a harmless no-op; this just spares every shebang line from
+ * repeating it. */
+ o->wants_hosted_libc = 1;
+ if (run_classify_positional(o, argv[i]) != 0) return 1;
+ after_dash_dash = 1;
+ continue;
+ }
{
int r =
diff --git a/lang/c/c.c b/lang/c/c.c
@@ -83,6 +83,7 @@ static KitStatus c_frontend_compile(KitFrontendState* frontend,
if (!pool) compiler_panic(c, c_no_loc(), "C compiler out of memory");
kit_frontend_metrics_scope_begin(c, "compile.c.lex_open");
lex = lex_open_mem(c, input->name.s, bytes->s, bytes->len);
+ if (lex) lex_skip_shebang(lex);
kit_frontend_metrics_scope_end(c, "compile.c.lex_open");
kit_frontend_metrics_scope_begin(c, "compile.c.pp_new");
pp = pp_new(c);
diff --git a/lang/cpp/cpp.c b/lang/cpp/cpp.c
@@ -68,6 +68,7 @@ static KitStatus cpp_preprocess_body(KitCompiler* c, void* user) {
}
lex = lex_open_mem(c, r->name.s, input->s, input->len);
+ if (lex) lex_skip_shebang(lex);
pp = pp_new(c);
if (!lex || !pp)
compiler_panic(c, cpp_no_loc(), "C preprocessor out of memory");
diff --git a/lang/cpp/lex/lex.c b/lang/cpp/lex/lex.c
@@ -152,6 +152,21 @@ void lex_close(Lexer* l) {
l->heap->free(l->heap, l, sizeof(*l));
}
+/* Skip a script "shebang" line: a `#!` at the very start of the source.
+ * The kernel-level `#!/path interpreter` mechanism (used to make a C file
+ * executable via `kit run`) leaves the interpreter line as the first line of
+ * the file, which is not valid C — `#!` would otherwise be lexed as a `#`
+ * directive introducer. We only recognize it at byte 0, so a `#!` anywhere
+ * else is left untouched. The line's trailing newline is left in place so the
+ * lexer emits its TOK_NEWLINE and line numbering stays accurate (the shebang
+ * remains line 1). No-op unless the buffer begins with the two bytes `#!`.
+ * Apply only to a primary source file, never to includes/paste buffers. */
+void lex_skip_shebang(Lexer* l) {
+ if (!l || l->pos != 0) return;
+ if (l->len < 2 || l->src[0] != '#' || l->src[1] != '!') return;
+ while (l->pos < l->len && l->src[l->pos] != '\n') l->pos++;
+}
+
SrcLoc lex_loc(const Lexer* l) { return lex_here(l); }
u32 lex_file_id(const Lexer* l) { return l->file_id; }
const LitInfo* lex_lit(const Lexer* l, LitId id) {
diff --git a/lang/cpp/lex/lex.h b/lang/cpp/lex/lex.h
@@ -118,6 +118,11 @@ typedef struct Lexer Lexer;
Lexer* lex_open_mem(Compiler*, const char* name, const char* src, size_t len);
void lex_close(Lexer*);
+/* Skip a leading `#!` script-interpreter ("shebang") line so an executable
+ * C file run via `kit run` lexes cleanly. Call only on a freshly-opened
+ * primary source lexer, before any token is pulled; no-op otherwise. */
+void lex_skip_shebang(Lexer*);
+
/* Streaming. Returns TOK_EOF repeatedly at end of input. */
Tok lex_next(Lexer*);
SrcLoc lex_loc(const Lexer*);
diff --git a/test/driver/run.sh b/test/driver/run.sh
@@ -409,6 +409,67 @@ else
not_ok "run-source-archive-demand" "$work/run-setup.diag"
fi
+# ---- run --script: #! shebang interpreter, argv passthrough, implicit -lc ----
+# Make a .c file executable with a `#!` line and run it directly. The kernel
+# launches the interpreter and appends the script path + the user's args, so
+# `--script` names the sole source and routes everything after it to the
+# program's argv. `--script` implies -lc; under the JIT that only needs a libc
+# sysroot for #include resolution, so probe for a usable one and skip if none.
+shebang_sysroot=""
+if command -v xcrun >/dev/null 2>&1; then
+ shebang_sysroot="$(xcrun --show-sdk-path 2>/dev/null || true)"
+fi
+shebang_sysroot="${KIT_TEST_SYSROOT:-$shebang_sysroot}"
+
+cat > "$work/shebang-probe.c" <<'SRC'
+#include <stdio.h>
+int main(void) { return 0; }
+SRC
+shebang_ok=0
+if [ -n "$shebang_sysroot" ] &&
+ "$KIT" run --sysroot "$shebang_sysroot" --script "$work/shebang-probe.c" \
+ > "$work/shebang-probe.out" 2> "$work/shebang-probe.err"; then
+ shebang_ok=1
+fi
+
+if [ "$shebang_ok" -eq 1 ]; then
+ cat > "$work/greet.c" <<SHEBANG
+#!/usr/bin/env -S $KIT run --sysroot $shebang_sysroot --script
+#include <stdio.h>
+#include <stdlib.h>
+int main(int argc, char** argv) {
+ if (argc < 2) { fprintf(stderr, "usage: greet N\n"); return 2; }
+ printf("greet:%d\n", atoi(argv[1]) + 1);
+ return 0;
+}
+SHEBANG
+ chmod +x "$work/greet.c"
+
+ # Execute the C file directly. The arg "41" reaches the program (not
+ # `kit run`); -lc is implied so <stdio.h>/<stdlib.h> resolve.
+ if "$work/greet.c" 41 > "$work/greet.out" 2> "$work/greet.err"; then
+ contains "run-shebang-arg" "$work/greet.out" "greet:42"
+ else
+ not_ok "run-shebang-arg" "$work/greet.err"
+ fi
+
+ # A flag-shaped program arg after the script must pass through to the
+ # program, not be parsed as a `kit run` option. atoi("-5")+1 = -4.
+ "$work/greet.c" -5 > "$work/greet-flag.out" 2> "$work/greet-flag.err"
+ greet_flag_rc=$?
+ if [ "$greet_flag_rc" -eq 0 ] && grep -q "greet:-4" "$work/greet-flag.out"; then
+ ok "run-shebang-flaglike-arg"
+ else
+ { printf 'rc=%s\n' "$greet_flag_rc"
+ sed 's/^/out: /' "$work/greet-flag.out"
+ sed 's/^/err: /' "$work/greet-flag.err"; } > "$work/greet-flag.diag"
+ not_ok "run-shebang-flaglike-arg" "$work/greet-flag.diag"
+ fi
+else
+ skip_test "run-shebang-arg" "no usable libc sysroot (set KIT_TEST_SYSROOT)"
+ skip_test "run-shebang-flaglike-arg" "no usable libc sysroot (set KIT_TEST_SYSROOT)"
+fi
+
# ---- archive link order is enforced (def after ref vs ref after def) ----
cat > "$work/order-main.c" <<'SRC'
int foo(void);