commit a5249ddd82cee9881f5db480ce7773c660453804
parent 66ab01573550a19083f93d3e6588272df7cadc7f
Author: Ryan Sepassi <rsepassi@gmail.com>
Date: Sun, 26 Apr 2026 02:21:26 -0700
cc: phase-1 milestone — int main(int argc, char **argv) compiles e2e
Three fixes that take the compiler from "modules pass their own tests"
to "compiles a real C program through to a working ELF":
1. cc-main wired up.
cc/main.scm now slurps argv[2], lex+pp+parse+cg, writes argv[3].
No -D / -o flags yet — those land when we push past phase 1.
2. cg-fn-begin params contract aligned with CC-CONTRACTS §3.1.
cg's earlier implementation took a list of sym records and returned
a list of sym records. The contract — and parse-fn-body — expected
`(name . ctype)` pairs in, and an `(name . sym)` alist out. The
mismatch only surfaced at integration time (cc-cg fixtures that
exercised params were constructing syms directly to satisfy cg's
actual shape). All four fixtures with params now use the contract
shape; cg-fn-begin updated to match.
3. cg-finish emits trailing `:ELF_end`.
The ELF.hex2 header references `:ELF_end` to compute file size and
ph_memsz boundaries; without it hex2-0 errors out and produces a
truncated binary. All cg .expected files updated.
Plus two scheme1 capacity bumps, both required by the integrated
compiler:
- READBUF_CAP_BYTES 64K -> 256K (combined cc.scm is ~150K).
Also fixed the BSS init table that was hardcoding the heap offset
at +0x10000 — it now uses %READBUF_CAP_BYTES so the heap moves
with the readbuf cap, instead of being trampled by readbuf overflow.
- SYMTAB_CAP_SLOTS 1024 -> 8192. The integrated cc has ~366 explicit
defines plus ~100 auto-generated record accessors plus interned
literals, far more than the original 1024-slot table.
Phase-1 validation on aarch64:
./compile tests/cc-e2e/00-return-argc.c | sh boot-build-p1pp.sh ...
./build/cc-e2e/00-return-argc -> exit 1 (argc=1)
./build/cc-e2e/00-return-argc a b c -> exit 4 (argc=4)
./build/cc-e2e/00-return-argc a b c d e -> exit 6 (argc=6)
Suites: scheme1 (94), cc-util (14), cc-lex (16), cc-pp (22),
cc-cg (15), cc-parse (15) all pass on aarch64.
Diffstat:
22 files changed, 89 insertions(+), 34 deletions(-)
diff --git a/cc/cg.scm b/cc/cg.scm
@@ -128,9 +128,12 @@
"%call(&cc__main)\n"
"})\n"))))
(buf-push! (cg-text cg) stub))
+ ;; Every P1pp translation unit must end with :ELF_end so the ELF
+ ;; header can compute file-size and ph_memsz boundaries.
(bv-cat (list (buf-flush (cg-text cg))
(buf-flush (cg-data cg))
- (buf-flush (cg-bss cg)))))
+ (buf-flush (cg-bss cg))
+ ":ELF_end\n")))
(define (cg-fn-begin cg name params return-type)
(cg-fn-buf-set! cg (make-buf))
@@ -144,13 +147,17 @@
(%cg-fn-set! cg '%indirect-slots '())
(let ((ret-slot (cg-alloc-slot cg 8 8)))
(%cg-fn-set! cg '%fn-ret-slot ret-slot))
+ ;; params per CC-CONTRACTS §3.1: list of (name-bv . ctype). We
+ ;; return an alist (name-bv . sym) the parser binds into scope.
(let walk ((ps params) (idx 0) (out '()))
(cond
((null? ps) (reverse out))
(else
- (let* ((p (car ps))
- (ty (sym-type p))
- (off (cg-alloc-slot cg 8 8)))
+ (let* ((p (car ps))
+ (nm (car p))
+ (ty (cdr p))
+ (off (cg-alloc-slot cg 8 8))
+ (psym (%sym nm 'param #f ty off)))
(cond
((< idx 4)
(let ((ar (%reg-by-idx idx)))
@@ -161,8 +168,7 @@
(buf-push! (cg-prologue-buf cg)
(bv-cat (list "%ldarg(t0, " (%n (- idx 4)) ")\n"
"%st(t0, sp, " (%cg-slot-expr cg off) ")\n")))))
- (walk (cdr ps) (+ idx 1)
- (cons (%sym (sym-name p) 'param #f ty off) out)))))))
+ (walk (cdr ps) (+ idx 1) (cons (cons nm psym) out)))))))
(define (cg-fn-end cg)
(let* ((name (%cg-fn-get cg '%fn-name))
diff --git a/cc/main.scm b/cc/main.scm
@@ -1,19 +1,49 @@
-;; cc/main.scm — driver. argv handling, file I/O, ties phases together.
+;; cc/main.scm — driver. Argv, file I/O, ties phases together.
;;
;; Realization of docs/CC-INTERNALS.md §main.scm.
-;;
-;; Owner: <unassigned>
;; --------------------------------------------------------------------
-;; cc-main : argv (list of bv) -> exit-status (fixnum)
-;; argv[0] is the program name (per scheme1's `argv` primitive).
-;; Recognized CLI: <input.flat.c> -o <output.P1pp> [-D NAME[=VAL]] ...
+;; CLI: cc <input.c> <output.P1pp>
+;; (-o flag and -D flags are deferred — phase-1 runner doesn't need them.)
+;;
+;; scheme1 passes (argv) as a list of bvs; argv[0] is "scheme1", argv[1]
+;; is the catm'd compiler source path, argv[2..] are the user-facing
+;; positional args. cc-main strips the first two.
;; --------------------------------------------------------------------
-(define (cc-main argv)
- (error "TBD: cc-main"))
-;; --------------------------------------------------------------------
-;; Top-level invocation. main.scm is the last file in the catm order,
-;; so execution begins here once the runtime has finished loading.
-;; --------------------------------------------------------------------
+(define (%cc-slurp path)
+ (let ((r (open-input path)))
+ (cond ((not (car r))
+ (die #f "cannot open input" path)))
+ (let* ((p (cdr r))
+ (rd (read-all p)))
+ (close p)
+ (cond ((not (car rd)) (die #f "read failed" path)))
+ (cdr rd))))
+
+(define (%cc-write path bv)
+ (let ((r (open-output path)))
+ (cond ((not (car r))
+ (die #f "cannot open output" path)))
+ (let ((p (cdr r)))
+ (write-bv-fd (port-fd p) bv)
+ (close p)
+ 0)))
+
+(define (cc-main av)
+ (let ((args (cdr (cdr av))))
+ (cond
+ ((or (null? args) (null? (cdr args)))
+ (die #f "usage: cc <input.c> <output.P1pp>")))
+ (let* ((in-path (car args))
+ (out-path (car (cdr args)))
+ (src (%cc-slurp in-path))
+ (toks (lex-tokenize src in-path))
+ (expanded (pp-expand toks '()))
+ (cg (cg-init))
+ (ps (make-pstate expanded cg)))
+ (parse-translation-unit ps)
+ (%cc-write out-path (cg-finish cg))
+ 0)))
+
(sys-exit (cc-main (argv)))
diff --git a/scheme1/scheme1.P1pp b/scheme1/scheme1.P1pp
@@ -64,7 +64,7 @@
# slot. The offsets are emitted directly in bss_init_tbl via $().
%macro SYMTAB_CAP_SLOTS()
-1024
+8192
%endm
%macro READBUF_CAP_BYTES()
@@ -5137,8 +5137,8 @@
# 8-byte offset constant. p1_main walks this once at startup.
:bss_init_tbl
&readbuf_buf_ptr %(0) $(0)
-&heap_buf_ptr %(0) $(0x10000)
-&symtab_buf_ptr %(0) $(0x1010000)
+&heap_buf_ptr %(0) $(%READBUF_CAP_BYTES)
+&symtab_buf_ptr %(0) $((+ %READBUF_CAP_BYTES %HEAP_CAP_BYTES))
:bss_init_tbl_end
# =========================================================================
diff --git a/tests/cc-cg/00-fn-empty.expected b/tests/cc-cg/00-fn-empty.expected
@@ -12,3 +12,4 @@
%fn(p1_main, 16, {
%call(&cc__main)
})
+:ELF_end
diff --git a/tests/cc-cg/01-return-imm.expected b/tests/cc-cg/01-return-imm.expected
@@ -12,3 +12,4 @@
%fn(p1_main, 16, {
%call(&cc__main)
})
+:ELF_end
diff --git a/tests/cc-cg/02-one-param.expected b/tests/cc-cg/02-one-param.expected
@@ -15,3 +15,4 @@
%fn(p1_main, 16, {
%call(&cc__main)
})
+:ELF_end
diff --git a/tests/cc-cg/02-one-param.scm b/tests/cc-cg/02-one-param.scm
@@ -3,9 +3,10 @@
;; Exercises cg-fn-begin's param spill path and cg-push-sym → cg-load.
(let* ((cg (cg-init))
- (argc (%sym "argc" 'param #f %t-i32 #f))
- (params (cg-fn-begin cg "main" (list argc) %t-i32))
- (argc* (car params)))
+ (params (cg-fn-begin cg "main"
+ (list (cons "argc" %t-i32))
+ %t-i32))
+ (argc* (cdr (car params))))
(cg-push-sym cg argc*) ; lval frame
(cg-load cg) ; rval i32
(cg-return cg)
diff --git a/tests/cc-cg/03-two-params.expected b/tests/cc-cg/03-two-params.expected
@@ -16,3 +16,4 @@
%fn(p1_main, 16, {
%call(&cc__main)
})
+:ELF_end
diff --git a/tests/cc-cg/03-two-params.scm b/tests/cc-cg/03-two-params.scm
@@ -3,11 +3,12 @@
;; Both params spill, but only argc is loaded for return.
(let* ((cg (cg-init))
- (argc (%sym "argc" 'param #f %t-i32 #f))
(cpp (%ctype 'ptr 8 8 (%ctype 'ptr 8 8 %t-i8)))
- (argv (%sym "argv" 'param #f cpp #f))
- (params (cg-fn-begin cg "main" (list argc argv) %t-i32))
- (argc* (car params)))
+ (params (cg-fn-begin cg "main"
+ (list (cons "argc" %t-i32)
+ (cons "argv" cpp))
+ %t-i32))
+ (argc* (cdr (car params))))
(cg-push-sym cg argc*)
(cg-load cg)
(cg-return cg)
diff --git a/tests/cc-cg/04-binop-add.expected b/tests/cc-cg/04-binop-add.expected
@@ -16,3 +16,4 @@
%fn(p1_main, 16, {
%call(&cc__main)
})
+:ELF_end
diff --git a/tests/cc-cg/05-load-binop-store.expected b/tests/cc-cg/05-load-binop-store.expected
@@ -24,3 +24,4 @@
%fn(p1_main, 16, {
%call(&cc__main)
})
+:ELF_end
diff --git a/tests/cc-cg/05-load-binop-store.scm b/tests/cc-cg/05-load-binop-store.scm
@@ -3,9 +3,10 @@
;; Exercises load → binop → assign → load → return on the same lval.
(let* ((cg (cg-init))
- (x (%sym "x" 'param #f %t-i32 #f))
- (params (cg-fn-begin cg "main" (list x) %t-i32))
- (x* (car params)))
+ (params (cg-fn-begin cg "main"
+ (list (cons "x" %t-i32))
+ %t-i32))
+ (x* (cdr (car params))))
;; x = x + 5
(cg-push-sym cg x*) ; lval (lhs)
(cg-push-sym cg x*) ; lval
diff --git a/tests/cc-cg/06-if.expected b/tests/cc-cg/06-if.expected
@@ -18,3 +18,4 @@
%fn(p1_main, 16, {
%call(&cc__main)
})
+:ELF_end
diff --git a/tests/cc-cg/07-ifelse.expected b/tests/cc-cg/07-ifelse.expected
@@ -19,3 +19,4 @@
%fn(p1_main, 16, {
%call(&cc__main)
})
+:ELF_end
diff --git a/tests/cc-cg/08-while-break-continue.expected b/tests/cc-cg/08-while-break-continue.expected
@@ -18,3 +18,4 @@
%fn(p1_main, 16, {
%call(&cc__main)
})
+:ELF_end
diff --git a/tests/cc-cg/09-call.expected b/tests/cc-cg/09-call.expected
@@ -15,3 +15,4 @@
%fn(p1_main, 16, {
%call(&cc__main)
})
+:ELF_end
diff --git a/tests/cc-cg/10-string.expected b/tests/cc-cg/10-string.expected
@@ -20,3 +20,4 @@
:cc__str_1
"world"
!(0)
+:ELF_end
diff --git a/tests/cc-cg/11-global-var.expected b/tests/cc-cg/11-global-var.expected
@@ -21,3 +21,4 @@
!(0)
!(0)
!(0)
+:ELF_end
diff --git a/tests/cc-cg/12-entry-stub.expected b/tests/cc-cg/12-entry-stub.expected
@@ -2,3 +2,4 @@
%fn(p1_main, 16, {
%call(&cc__main)
})
+:ELF_end
diff --git a/tests/cc-cg/13-call-5args.expected b/tests/cc-cg/13-call-5args.expected
@@ -20,3 +20,4 @@
%fn(p1_main, 16, {
%call(&cc__main)
})
+:ELF_end
diff --git a/tests/cc-cg/14-take-addr.expected b/tests/cc-cg/14-take-addr.expected
@@ -21,3 +21,4 @@
%fn(p1_main, 16, {
%call(&cc__main)
})
+:ELF_end
diff --git a/tests/cc-cg/14-take-addr.scm b/tests/cc-cg/14-take-addr.scm
@@ -4,9 +4,10 @@
;; just push x's lval, take-addr, push-deref, load, return.)
(let* ((cg (cg-init))
- (x (%sym "x" 'param #f %t-i32 #f))
- (params (cg-fn-begin cg "main" (list x) %t-i32))
- (x* (car params)))
+ (params (cg-fn-begin cg "main"
+ (list (cons "x" %t-i32))
+ %t-i32))
+ (x* (cdr (car params))))
(cg-push-sym cg x*) ; lval frame
(cg-take-addr cg) ; rval ptr-to-int
(cg-push-deref cg) ; lval int (through pointer)