boot2

Playing with the boostrap
git clone https://git.ryansepassi.com/git/boot2.git
Log | Files | Refs | README

commit 0045e54d16cd90c3eae0448e3760c7aaeec8fc17
parent c0af21b7d2e5511016c8989f404945b0d697b8ba
Author: Ryan Sepassi <rsepassi@gmail.com>
Date:   Sun, 26 Apr 2026 22:25:25 -0700

cc/cg: structured init for cg-emit-global; scalar global init (§E.1)

Reshape cg-emit-global's init argument from "bv or #f" to "list of
pieces or #f". Each piece is either a bytevector (raw bytes, emitted as
N×!(byte) entries in .data) or a (label-ref . <label-bv>) pair (emitted
as `&<label> %(0)` for an 8-byte ptr slot). #f remains the zero-init
sentinel.

This lets §E.2-§E.7 + §L.3 build initializer values compositionally
without each callsite reinventing the data-section grammar.

Existing call sites all pass #f today, so signature change is
backwards-compatible.

Diffstat:
Mcc/cg.scm | 44+++++++++++++++++++++++++++++++++++++++-----
Mdocs/CC-INTERNALS.md | 5++++-
Atests/cc-cg/49-init-scalar-global.expected-exit | 1+
Atests/cc-cg/49-init-scalar-global.scm | 19+++++++++++++++++++
4 files changed, 63 insertions(+), 6 deletions(-)

diff --git a/cc/cg.scm b/cc/cg.scm @@ -793,16 +793,50 @@ ;; -------------------------------------------------------------------- ;; Globals and data ;; -------------------------------------------------------------------- -(define (cg-emit-global cg sym init-bv-or-false) +;; cg-emit-global: emit a global symbol into either .data (initialized) +;; or .bss (zero-init). +;; +;; init can be: +;; #f — zero-init in .bss (size from sym's ctype). +;; (piece ...) — initialized in .data; pieces concatenated. +;; +;; Each piece is either: +;; <bytevector> — raw bytes; emitted as N×!(byte) entries. +;; (label-ref . <label-bv>) — 8-byte pointer slot containing &label; +;; emitted as `&<label> %(0)` (4B label ref + +;; 4B zero pad). +(define (%cg-init-piece->bv piece) + (cond + ((bytevector? piece) + (let ((n (bytevector-length piece))) + (let loop ((i 0) (acc '())) + (cond + ((= i n) (bv-cat (reverse acc))) + (else + (loop (+ i 1) + (cons (bv-cat (list "!(" + (number->string + (bytevector-u8-ref piece i) 10) + ")\n")) + acc))))))) + ((and (pair? piece) (eq? (car piece) 'label-ref)) + (bv-cat (list "&" (cdr piece) " %(0)\n"))) + (else (die #f "cg-emit-global: bad init piece" piece)))) + +(define (cg-emit-global cg sym init) (let* ((nm (sym-name sym)) (lbl (%cg-mangle-global nm)) (sz (ctype-size (sym-type sym))) (size (if (< sz 0) 8 sz))) (cond - (init-bv-or-false - (buf-push! (cg-data cg) - (bv-cat (list "\n:" lbl "\n" - "\"" init-bv-or-false "\"\n")))) + (init + (buf-push! (cg-data cg) (bv-cat (list "\n:" lbl "\n"))) + (let walk ((ps init)) + (cond + ((null? ps) 0) + (else + (buf-push! (cg-data cg) (%cg-init-piece->bv (car ps))) + (walk (cdr ps)))))) (else (buf-push! (cg-bss cg) (bv-cat (list "\n:" lbl "\n" diff --git a/docs/CC-INTERNALS.md b/docs/CC-INTERNALS.md @@ -555,7 +555,10 @@ beat seven.) ### Globals and data ```scheme -(cg-emit-global cg sym init-bv-or-#f) ; init-bv: bytes for .data, or #f for .bss +(cg-emit-global cg sym init) ; init = #f (zero-init in .bss) + ; | (piece ...) in .data +;; piece := <bytevector> — raw bytes +;; | (label-ref . <label-bv>) — 8-byte slot holding &label (cg-emit-extern cg sym) ; declare without defining (cg-intern-string cg bv-content) -> bv-label ; idempotent; used internally by cg-push-string ``` diff --git a/tests/cc-cg/49-init-scalar-global.expected-exit b/tests/cc-cg/49-init-scalar-global.expected-exit @@ -0,0 +1 @@ +42 diff --git a/tests/cc-cg/49-init-scalar-global.scm b/tests/cc-cg/49-init-scalar-global.scm @@ -0,0 +1,19 @@ +;; tests/cc-cg/49-init-scalar-global.scm — emit a global int with a +;; constant initializer; main returns its value. §E.1 of CC-PUNCHLIST. +;; +;; Models: int g = 42; int main(void) { return g; } +;; Runtime: exits 42. + +(let* ((cg (cg-init)) + (g (%sym "g" 'var 'static %t-i32 #f)) + ;; structured init: a single 4-byte LE bytevector piece for 42. + (bv4 (make-bytevector 4 0)) + (_ (bytevector-u8-set! bv4 0 42)) + (init (list bv4))) + (cg-emit-global cg g init) + (cg-fn-begin cg "main" '() %t-i32) + (cg-push-sym cg g) + (cg-load cg) + (cg-return cg) + (cg-fn-end cg) + (write-bv-fd 1 (cg-finish cg)))