kit

kit
git clone https://git.ryansepassi.com/git/kit.git
Log | Files | Refs | README

commit 21aa437c9addefcc191d61f2b9c39f2bccc6efcc
parent 96608783797124dd00d823fc222bddf3ffa0891e
Author: Ryan Sepassi <rsepassi@gmail.com>
Date:   Tue,  2 Jun 2026 15:18:25 -0700

driver run: shebang support

Diffstat:
Mdoc/DRIVER.md | 14++++++++++++++
Mdriver/cmd/run.c | 36++++++++++++++++++++++++++++++++++++
Mlang/c/c.c | 1+
Mlang/cpp/cpp.c | 1+
Mlang/cpp/lex/lex.c | 15+++++++++++++++
Mlang/cpp/lex/lex.h | 5+++++
Mtest/driver/run.sh | 61+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
7 files changed, 133 insertions(+), 0 deletions(-)

diff --git a/doc/DRIVER.md b/doc/DRIVER.md @@ -107,6 +107,20 @@ tool reaches into compiler internals. argv. `cc` and `run` overlap heavily on input shape and the preprocessor flag family — that overlap is exactly what `driver/lib/` factors out. +`run` doubles as a `#!` script interpreter so a C file can be made executable +and run directly. The kernel's shebang mechanism appends the script path *and* +the user's arguments after the interpreter's flags, with no way to inject a +`--` between them, so `run --script FILE` names the sole source and routes every +later token to the program's argv (an implicit `--` after `FILE`). `--script` +implies `-lc` (scripts are usually hosted; under the JIT that only enables libc +headers/macros — symbols still resolve at run time via host `dlsym`). The +portable shebang is `#!/usr/bin/env -S kit run --script` (the `env -S` split is +required because Linux passes everything after the interpreter as one argument). +A leading `#!` line on the *primary* source file is recognized and skipped by +the lexer (`lex_skip_shebang`, byte 0 only) for both the C frontend and `cc -E`, +so the shebang is never mistaken for a `#` directive; includes and paste buffers +are untouched. + ## Cross-tool helpers (lib/) These hold the logic that more than one tool needs, so the CLI shells stay thin diff --git a/driver/cmd/run.c b/driver/cmd/run.c @@ -304,6 +304,19 @@ void driver_help_run(void) { " JITed code can index argv[0] like a hosted\n" " program. Without `--` the program receives\n" " argc==1 with argv[0] set and argv[1]==NULL.\n" + " --script FILE Run FILE as the sole source, passing every\n" + " later token to the program as argv (an " + "implicit\n" + " `--` after FILE). Implies -lc (hosted libc),\n" + " since scripts are usually hosted; on macOS that\n" + " still needs --sysroot for header resolution.\n" + " Intended for `#!` script use:\n" + " #!/usr/bin/env -S kit run --script\n" + " Make the .c file executable and run it " + "directly;\n" + " the kernel appends the path + the user's args.\n" + " Add compile flags before --script, e.g.\n" + " #!/usr/bin/env -S kit run -g --script\n" "\n" "GETTING HELP\n" " -h, --help Show this help and exit\n" @@ -312,6 +325,7 @@ void driver_help_run(void) { " kit run hello.c\n" " kit run -O2 -DNDEBUG main.c util.c\n" " kit run main.c -- arg1 arg2\n" + " kit run --script script.c arg1 arg2 (as a #! interpreter)\n" "\n" "EXIT CODES\n" " Returns the exit code of the JITed entry, or 1 on internal " @@ -431,6 +445,28 @@ static int run_parse(int argc, char** argv, RunOptions* o) { after_dash_dash = 1; continue; } + /* `--script FILE`: shebang entry point. The kernel's `#!` mechanism + * appends the script path and the user's arguments after our flags, with + * no way to inject a `--` between them. `--script` names the sole source + * file (the next argv element, supplied by the kernel) and routes every + * later token — flag-shaped or not — to the program's argv, exactly like + * an implicit `--` after the script. See driver_help_run / DRIVER.md. */ + if (driver_streq(a, "--script")) { + if (++i >= argc) { + driver_errf(RUN_TOOL, "--script requires a source-file argument"); + return 1; + } + /* Scripts are overwhelmingly hosted programs, so default `--script` to + * hosted libc — under the JIT that only enables libc headers/macros + * (symbols resolve at run time via host dlsym), so the only added cost + * is needing a sysroot for #include resolution. An earlier explicit + * -lc is a harmless no-op; this just spares every shebang line from + * repeating it. */ + o->wants_hosted_libc = 1; + if (run_classify_positional(o, argv[i]) != 0) return 1; + after_dash_dash = 1; + continue; + } { int r = diff --git a/lang/c/c.c b/lang/c/c.c @@ -83,6 +83,7 @@ static KitStatus c_frontend_compile(KitFrontendState* frontend, if (!pool) compiler_panic(c, c_no_loc(), "C compiler out of memory"); kit_frontend_metrics_scope_begin(c, "compile.c.lex_open"); lex = lex_open_mem(c, input->name.s, bytes->s, bytes->len); + if (lex) lex_skip_shebang(lex); kit_frontend_metrics_scope_end(c, "compile.c.lex_open"); kit_frontend_metrics_scope_begin(c, "compile.c.pp_new"); pp = pp_new(c); diff --git a/lang/cpp/cpp.c b/lang/cpp/cpp.c @@ -68,6 +68,7 @@ static KitStatus cpp_preprocess_body(KitCompiler* c, void* user) { } lex = lex_open_mem(c, r->name.s, input->s, input->len); + if (lex) lex_skip_shebang(lex); pp = pp_new(c); if (!lex || !pp) compiler_panic(c, cpp_no_loc(), "C preprocessor out of memory"); diff --git a/lang/cpp/lex/lex.c b/lang/cpp/lex/lex.c @@ -152,6 +152,21 @@ void lex_close(Lexer* l) { l->heap->free(l->heap, l, sizeof(*l)); } +/* Skip a script "shebang" line: a `#!` at the very start of the source. + * The kernel-level `#!/path interpreter` mechanism (used to make a C file + * executable via `kit run`) leaves the interpreter line as the first line of + * the file, which is not valid C — `#!` would otherwise be lexed as a `#` + * directive introducer. We only recognize it at byte 0, so a `#!` anywhere + * else is left untouched. The line's trailing newline is left in place so the + * lexer emits its TOK_NEWLINE and line numbering stays accurate (the shebang + * remains line 1). No-op unless the buffer begins with the two bytes `#!`. + * Apply only to a primary source file, never to includes/paste buffers. */ +void lex_skip_shebang(Lexer* l) { + if (!l || l->pos != 0) return; + if (l->len < 2 || l->src[0] != '#' || l->src[1] != '!') return; + while (l->pos < l->len && l->src[l->pos] != '\n') l->pos++; +} + SrcLoc lex_loc(const Lexer* l) { return lex_here(l); } u32 lex_file_id(const Lexer* l) { return l->file_id; } const LitInfo* lex_lit(const Lexer* l, LitId id) { diff --git a/lang/cpp/lex/lex.h b/lang/cpp/lex/lex.h @@ -118,6 +118,11 @@ typedef struct Lexer Lexer; Lexer* lex_open_mem(Compiler*, const char* name, const char* src, size_t len); void lex_close(Lexer*); +/* Skip a leading `#!` script-interpreter ("shebang") line so an executable + * C file run via `kit run` lexes cleanly. Call only on a freshly-opened + * primary source lexer, before any token is pulled; no-op otherwise. */ +void lex_skip_shebang(Lexer*); + /* Streaming. Returns TOK_EOF repeatedly at end of input. */ Tok lex_next(Lexer*); SrcLoc lex_loc(const Lexer*); diff --git a/test/driver/run.sh b/test/driver/run.sh @@ -409,6 +409,67 @@ else not_ok "run-source-archive-demand" "$work/run-setup.diag" fi +# ---- run --script: #! shebang interpreter, argv passthrough, implicit -lc ---- +# Make a .c file executable with a `#!` line and run it directly. The kernel +# launches the interpreter and appends the script path + the user's args, so +# `--script` names the sole source and routes everything after it to the +# program's argv. `--script` implies -lc; under the JIT that only needs a libc +# sysroot for #include resolution, so probe for a usable one and skip if none. +shebang_sysroot="" +if command -v xcrun >/dev/null 2>&1; then + shebang_sysroot="$(xcrun --show-sdk-path 2>/dev/null || true)" +fi +shebang_sysroot="${KIT_TEST_SYSROOT:-$shebang_sysroot}" + +cat > "$work/shebang-probe.c" <<'SRC' +#include <stdio.h> +int main(void) { return 0; } +SRC +shebang_ok=0 +if [ -n "$shebang_sysroot" ] && + "$KIT" run --sysroot "$shebang_sysroot" --script "$work/shebang-probe.c" \ + > "$work/shebang-probe.out" 2> "$work/shebang-probe.err"; then + shebang_ok=1 +fi + +if [ "$shebang_ok" -eq 1 ]; then + cat > "$work/greet.c" <<SHEBANG +#!/usr/bin/env -S $KIT run --sysroot $shebang_sysroot --script +#include <stdio.h> +#include <stdlib.h> +int main(int argc, char** argv) { + if (argc < 2) { fprintf(stderr, "usage: greet N\n"); return 2; } + printf("greet:%d\n", atoi(argv[1]) + 1); + return 0; +} +SHEBANG + chmod +x "$work/greet.c" + + # Execute the C file directly. The arg "41" reaches the program (not + # `kit run`); -lc is implied so <stdio.h>/<stdlib.h> resolve. + if "$work/greet.c" 41 > "$work/greet.out" 2> "$work/greet.err"; then + contains "run-shebang-arg" "$work/greet.out" "greet:42" + else + not_ok "run-shebang-arg" "$work/greet.err" + fi + + # A flag-shaped program arg after the script must pass through to the + # program, not be parsed as a `kit run` option. atoi("-5")+1 = -4. + "$work/greet.c" -5 > "$work/greet-flag.out" 2> "$work/greet-flag.err" + greet_flag_rc=$? + if [ "$greet_flag_rc" -eq 0 ] && grep -q "greet:-4" "$work/greet-flag.out"; then + ok "run-shebang-flaglike-arg" + else + { printf 'rc=%s\n' "$greet_flag_rc" + sed 's/^/out: /' "$work/greet-flag.out" + sed 's/^/err: /' "$work/greet-flag.err"; } > "$work/greet-flag.diag" + not_ok "run-shebang-flaglike-arg" "$work/greet-flag.diag" + fi +else + skip_test "run-shebang-arg" "no usable libc sysroot (set KIT_TEST_SYSROOT)" + skip_test "run-shebang-flaglike-arg" "no usable libc sysroot (set KIT_TEST_SYSROOT)" +fi + # ---- archive link order is enforced (def after ref vs ref after def) ---- cat > "$work/order-main.c" <<'SRC' int foo(void);