boot2

Playing with the boostrap
git clone https://git.ryansepassi.com/git/boot2.git
Log | Files | Refs | README

commit a5249ddd82cee9881f5db480ce7773c660453804
parent 66ab01573550a19083f93d3e6588272df7cadc7f
Author: Ryan Sepassi <rsepassi@gmail.com>
Date:   Sun, 26 Apr 2026 02:21:26 -0700

cc: phase-1 milestone — int main(int argc, char **argv) compiles e2e

Three fixes that take the compiler from "modules pass their own tests"
to "compiles a real C program through to a working ELF":

1. cc-main wired up.

   cc/main.scm now slurps argv[2], lex+pp+parse+cg, writes argv[3].
   No -D / -o flags yet — those land when we push past phase 1.

2. cg-fn-begin params contract aligned with CC-CONTRACTS §3.1.

   cg's earlier implementation took a list of sym records and returned
   a list of sym records. The contract — and parse-fn-body — expected
   `(name . ctype)` pairs in, and an `(name . sym)` alist out. The
   mismatch only surfaced at integration time (cc-cg fixtures that
   exercised params were constructing syms directly to satisfy cg's
   actual shape). All four fixtures with params now use the contract
   shape; cg-fn-begin updated to match.

3. cg-finish emits trailing `:ELF_end`.

   The ELF.hex2 header references `:ELF_end` to compute file size and
   ph_memsz boundaries; without it hex2-0 errors out and produces a
   truncated binary. All cg .expected files updated.

Plus two scheme1 capacity bumps, both required by the integrated
compiler:

- READBUF_CAP_BYTES 64K -> 256K (combined cc.scm is ~150K).
  Also fixed the BSS init table that was hardcoding the heap offset
  at +0x10000 — it now uses %READBUF_CAP_BYTES so the heap moves
  with the readbuf cap, instead of being trampled by readbuf overflow.

- SYMTAB_CAP_SLOTS 1024 -> 8192. The integrated cc has ~366 explicit
  defines plus ~100 auto-generated record accessors plus interned
  literals, far more than the original 1024-slot table.

Phase-1 validation on aarch64:

  ./compile tests/cc-e2e/00-return-argc.c | sh boot-build-p1pp.sh ...
  ./build/cc-e2e/00-return-argc                 -> exit 1   (argc=1)
  ./build/cc-e2e/00-return-argc a b c           -> exit 4   (argc=4)
  ./build/cc-e2e/00-return-argc a b c d e       -> exit 6   (argc=6)

Suites: scheme1 (94), cc-util (14), cc-lex (16), cc-pp (22),
cc-cg (15), cc-parse (15) all pass on aarch64.

Diffstat:
Mcc/cg.scm | 18++++++++++++------
Mcc/main.scm | 54++++++++++++++++++++++++++++++++++++++++++------------
Mscheme1/scheme1.P1pp | 6+++---
Mtests/cc-cg/00-fn-empty.expected | 1+
Mtests/cc-cg/01-return-imm.expected | 1+
Mtests/cc-cg/02-one-param.expected | 1+
Mtests/cc-cg/02-one-param.scm | 7++++---
Mtests/cc-cg/03-two-params.expected | 1+
Mtests/cc-cg/03-two-params.scm | 9+++++----
Mtests/cc-cg/04-binop-add.expected | 1+
Mtests/cc-cg/05-load-binop-store.expected | 1+
Mtests/cc-cg/05-load-binop-store.scm | 7++++---
Mtests/cc-cg/06-if.expected | 1+
Mtests/cc-cg/07-ifelse.expected | 1+
Mtests/cc-cg/08-while-break-continue.expected | 1+
Mtests/cc-cg/09-call.expected | 1+
Mtests/cc-cg/10-string.expected | 1+
Mtests/cc-cg/11-global-var.expected | 1+
Mtests/cc-cg/12-entry-stub.expected | 1+
Mtests/cc-cg/13-call-5args.expected | 1+
Mtests/cc-cg/14-take-addr.expected | 1+
Mtests/cc-cg/14-take-addr.scm | 7++++---
22 files changed, 89 insertions(+), 34 deletions(-)

diff --git a/cc/cg.scm b/cc/cg.scm @@ -128,9 +128,12 @@ "%call(&cc__main)\n" "})\n")))) (buf-push! (cg-text cg) stub)) + ;; Every P1pp translation unit must end with :ELF_end so the ELF + ;; header can compute file-size and ph_memsz boundaries. (bv-cat (list (buf-flush (cg-text cg)) (buf-flush (cg-data cg)) - (buf-flush (cg-bss cg))))) + (buf-flush (cg-bss cg)) + ":ELF_end\n"))) (define (cg-fn-begin cg name params return-type) (cg-fn-buf-set! cg (make-buf)) @@ -144,13 +147,17 @@ (%cg-fn-set! cg '%indirect-slots '()) (let ((ret-slot (cg-alloc-slot cg 8 8))) (%cg-fn-set! cg '%fn-ret-slot ret-slot)) + ;; params per CC-CONTRACTS §3.1: list of (name-bv . ctype). We + ;; return an alist (name-bv . sym) the parser binds into scope. (let walk ((ps params) (idx 0) (out '())) (cond ((null? ps) (reverse out)) (else - (let* ((p (car ps)) - (ty (sym-type p)) - (off (cg-alloc-slot cg 8 8))) + (let* ((p (car ps)) + (nm (car p)) + (ty (cdr p)) + (off (cg-alloc-slot cg 8 8)) + (psym (%sym nm 'param #f ty off))) (cond ((< idx 4) (let ((ar (%reg-by-idx idx))) @@ -161,8 +168,7 @@ (buf-push! (cg-prologue-buf cg) (bv-cat (list "%ldarg(t0, " (%n (- idx 4)) ")\n" "%st(t0, sp, " (%cg-slot-expr cg off) ")\n"))))) - (walk (cdr ps) (+ idx 1) - (cons (%sym (sym-name p) 'param #f ty off) out))))))) + (walk (cdr ps) (+ idx 1) (cons (cons nm psym) out))))))) (define (cg-fn-end cg) (let* ((name (%cg-fn-get cg '%fn-name)) diff --git a/cc/main.scm b/cc/main.scm @@ -1,19 +1,49 @@ -;; cc/main.scm — driver. argv handling, file I/O, ties phases together. +;; cc/main.scm — driver. Argv, file I/O, ties phases together. ;; ;; Realization of docs/CC-INTERNALS.md §main.scm. -;; -;; Owner: <unassigned> ;; -------------------------------------------------------------------- -;; cc-main : argv (list of bv) -> exit-status (fixnum) -;; argv[0] is the program name (per scheme1's `argv` primitive). -;; Recognized CLI: <input.flat.c> -o <output.P1pp> [-D NAME[=VAL]] ... +;; CLI: cc <input.c> <output.P1pp> +;; (-o flag and -D flags are deferred — phase-1 runner doesn't need them.) +;; +;; scheme1 passes (argv) as a list of bvs; argv[0] is "scheme1", argv[1] +;; is the catm'd compiler source path, argv[2..] are the user-facing +;; positional args. cc-main strips the first two. ;; -------------------------------------------------------------------- -(define (cc-main argv) - (error "TBD: cc-main")) -;; -------------------------------------------------------------------- -;; Top-level invocation. main.scm is the last file in the catm order, -;; so execution begins here once the runtime has finished loading. -;; -------------------------------------------------------------------- +(define (%cc-slurp path) + (let ((r (open-input path))) + (cond ((not (car r)) + (die #f "cannot open input" path))) + (let* ((p (cdr r)) + (rd (read-all p))) + (close p) + (cond ((not (car rd)) (die #f "read failed" path))) + (cdr rd)))) + +(define (%cc-write path bv) + (let ((r (open-output path))) + (cond ((not (car r)) + (die #f "cannot open output" path))) + (let ((p (cdr r))) + (write-bv-fd (port-fd p) bv) + (close p) + 0))) + +(define (cc-main av) + (let ((args (cdr (cdr av)))) + (cond + ((or (null? args) (null? (cdr args))) + (die #f "usage: cc <input.c> <output.P1pp>"))) + (let* ((in-path (car args)) + (out-path (car (cdr args))) + (src (%cc-slurp in-path)) + (toks (lex-tokenize src in-path)) + (expanded (pp-expand toks '())) + (cg (cg-init)) + (ps (make-pstate expanded cg))) + (parse-translation-unit ps) + (%cc-write out-path (cg-finish cg)) + 0))) + (sys-exit (cc-main (argv))) diff --git a/scheme1/scheme1.P1pp b/scheme1/scheme1.P1pp @@ -64,7 +64,7 @@ # slot. The offsets are emitted directly in bss_init_tbl via $(). %macro SYMTAB_CAP_SLOTS() -1024 +8192 %endm %macro READBUF_CAP_BYTES() @@ -5137,8 +5137,8 @@ # 8-byte offset constant. p1_main walks this once at startup. :bss_init_tbl &readbuf_buf_ptr %(0) $(0) -&heap_buf_ptr %(0) $(0x10000) -&symtab_buf_ptr %(0) $(0x1010000) +&heap_buf_ptr %(0) $(%READBUF_CAP_BYTES) +&symtab_buf_ptr %(0) $((+ %READBUF_CAP_BYTES %HEAP_CAP_BYTES)) :bss_init_tbl_end # ========================================================================= diff --git a/tests/cc-cg/00-fn-empty.expected b/tests/cc-cg/00-fn-empty.expected @@ -12,3 +12,4 @@ %fn(p1_main, 16, { %call(&cc__main) }) +:ELF_end diff --git a/tests/cc-cg/01-return-imm.expected b/tests/cc-cg/01-return-imm.expected @@ -12,3 +12,4 @@ %fn(p1_main, 16, { %call(&cc__main) }) +:ELF_end diff --git a/tests/cc-cg/02-one-param.expected b/tests/cc-cg/02-one-param.expected @@ -15,3 +15,4 @@ %fn(p1_main, 16, { %call(&cc__main) }) +:ELF_end diff --git a/tests/cc-cg/02-one-param.scm b/tests/cc-cg/02-one-param.scm @@ -3,9 +3,10 @@ ;; Exercises cg-fn-begin's param spill path and cg-push-sym → cg-load. (let* ((cg (cg-init)) - (argc (%sym "argc" 'param #f %t-i32 #f)) - (params (cg-fn-begin cg "main" (list argc) %t-i32)) - (argc* (car params))) + (params (cg-fn-begin cg "main" + (list (cons "argc" %t-i32)) + %t-i32)) + (argc* (cdr (car params)))) (cg-push-sym cg argc*) ; lval frame (cg-load cg) ; rval i32 (cg-return cg) diff --git a/tests/cc-cg/03-two-params.expected b/tests/cc-cg/03-two-params.expected @@ -16,3 +16,4 @@ %fn(p1_main, 16, { %call(&cc__main) }) +:ELF_end diff --git a/tests/cc-cg/03-two-params.scm b/tests/cc-cg/03-two-params.scm @@ -3,11 +3,12 @@ ;; Both params spill, but only argc is loaded for return. (let* ((cg (cg-init)) - (argc (%sym "argc" 'param #f %t-i32 #f)) (cpp (%ctype 'ptr 8 8 (%ctype 'ptr 8 8 %t-i8))) - (argv (%sym "argv" 'param #f cpp #f)) - (params (cg-fn-begin cg "main" (list argc argv) %t-i32)) - (argc* (car params))) + (params (cg-fn-begin cg "main" + (list (cons "argc" %t-i32) + (cons "argv" cpp)) + %t-i32)) + (argc* (cdr (car params)))) (cg-push-sym cg argc*) (cg-load cg) (cg-return cg) diff --git a/tests/cc-cg/04-binop-add.expected b/tests/cc-cg/04-binop-add.expected @@ -16,3 +16,4 @@ %fn(p1_main, 16, { %call(&cc__main) }) +:ELF_end diff --git a/tests/cc-cg/05-load-binop-store.expected b/tests/cc-cg/05-load-binop-store.expected @@ -24,3 +24,4 @@ %fn(p1_main, 16, { %call(&cc__main) }) +:ELF_end diff --git a/tests/cc-cg/05-load-binop-store.scm b/tests/cc-cg/05-load-binop-store.scm @@ -3,9 +3,10 @@ ;; Exercises load → binop → assign → load → return on the same lval. (let* ((cg (cg-init)) - (x (%sym "x" 'param #f %t-i32 #f)) - (params (cg-fn-begin cg "main" (list x) %t-i32)) - (x* (car params))) + (params (cg-fn-begin cg "main" + (list (cons "x" %t-i32)) + %t-i32)) + (x* (cdr (car params)))) ;; x = x + 5 (cg-push-sym cg x*) ; lval (lhs) (cg-push-sym cg x*) ; lval diff --git a/tests/cc-cg/06-if.expected b/tests/cc-cg/06-if.expected @@ -18,3 +18,4 @@ %fn(p1_main, 16, { %call(&cc__main) }) +:ELF_end diff --git a/tests/cc-cg/07-ifelse.expected b/tests/cc-cg/07-ifelse.expected @@ -19,3 +19,4 @@ %fn(p1_main, 16, { %call(&cc__main) }) +:ELF_end diff --git a/tests/cc-cg/08-while-break-continue.expected b/tests/cc-cg/08-while-break-continue.expected @@ -18,3 +18,4 @@ %fn(p1_main, 16, { %call(&cc__main) }) +:ELF_end diff --git a/tests/cc-cg/09-call.expected b/tests/cc-cg/09-call.expected @@ -15,3 +15,4 @@ %fn(p1_main, 16, { %call(&cc__main) }) +:ELF_end diff --git a/tests/cc-cg/10-string.expected b/tests/cc-cg/10-string.expected @@ -20,3 +20,4 @@ :cc__str_1 "world" !(0) +:ELF_end diff --git a/tests/cc-cg/11-global-var.expected b/tests/cc-cg/11-global-var.expected @@ -21,3 +21,4 @@ !(0) !(0) !(0) +:ELF_end diff --git a/tests/cc-cg/12-entry-stub.expected b/tests/cc-cg/12-entry-stub.expected @@ -2,3 +2,4 @@ %fn(p1_main, 16, { %call(&cc__main) }) +:ELF_end diff --git a/tests/cc-cg/13-call-5args.expected b/tests/cc-cg/13-call-5args.expected @@ -20,3 +20,4 @@ %fn(p1_main, 16, { %call(&cc__main) }) +:ELF_end diff --git a/tests/cc-cg/14-take-addr.expected b/tests/cc-cg/14-take-addr.expected @@ -21,3 +21,4 @@ %fn(p1_main, 16, { %call(&cc__main) }) +:ELF_end diff --git a/tests/cc-cg/14-take-addr.scm b/tests/cc-cg/14-take-addr.scm @@ -4,9 +4,10 @@ ;; just push x's lval, take-addr, push-deref, load, return.) (let* ((cg (cg-init)) - (x (%sym "x" 'param #f %t-i32 #f)) - (params (cg-fn-begin cg "main" (list x) %t-i32)) - (x* (car params))) + (params (cg-fn-begin cg "main" + (list (cons "x" %t-i32)) + %t-i32)) + (x* (cdr (car params)))) (cg-push-sym cg x*) ; lval frame (cg-take-addr cg) ; rval ptr-to-int (cg-push-deref cg) ; lval int (through pointer)