boot2

Playing with the boostrap
git clone https://git.ryansepassi.com/git/boot2.git
Log | Files | Refs | README

commit 698bd1c311dd8004ba2d3216a15dc95a3af0e63d
parent 90d1259e597202f115a57b9922a3d0049b8e157e
Author: Ryan Sepassi <rsepassi@gmail.com>
Date:   Mon, 27 Apr 2026 13:38:04 -0700

cc: union field offsets + sizeof no-emit (Stream D)

- parse-struct-fields takes kind; unions keep all fields at offset 0
  while struct still bumps the running offset. Fixes 082.
- Add cg-snapshot / cg-rewind primitives (vstack, fn-buf offset,
  frame-hi, max-outgoing). Sizeof's value-expression arms snapshot,
  parse, read the operand type, and rewind so side effects (e.g.
  sizeof(x++)) are not retained. Fixes 087.
- New cc-cg fixture 72-cg-snapshot-rewind locks in the snapshot/rewind
  contract directly.

Diffstat:
Mcc/cc.scm | 102+++++++++++++++++++++++++++++++++++++++++++++++++++++++------------------------
Atests/cc-cg/72-cg-snapshot-rewind.expected-exit | 1+
Atests/cc-cg/72-cg-snapshot-rewind.scm | 46++++++++++++++++++++++++++++++++++++++++++++++
3 files changed, 118 insertions(+), 31 deletions(-)

diff --git a/cc/cc.scm b/cc/cc.scm @@ -2659,6 +2659,35 @@ (define (cg-depth cg) (length (cg-vstack cg))) +;; -------------------------------------------------------------------- +;; Snapshot / rewind — discard any vstack pushes and fn-buf bytes +;; emitted between snapshot and rewind. Used by sizeof to parse its +;; operand for type information without retaining its side effects +;; (CC.md §Expressions: sizeof's operand is not evaluated). Internal- +;; only; the parser is the sole expected caller. +;; +;; vstack captures the head of the cons-list (immutable structurally). +;; fn-buf is restored by resetting buf-offset; the underlying storage +;; bytes past the new offset become garbage that the next buf-push! +;; will overwrite (buf-push! always copies into [offset, offset+len)). +;; frame-hi and max-outgoing are also restored so cg-alloc-slot calls +;; inside the rewound region don't leak frame bytes. +;; -------------------------------------------------------------------- +(define (cg-snapshot cg) + (cond + ((not (cg-in-fn? cg)) + (die #f "cg-snapshot: not in fn"))) + (list (cg-vstack cg) + (buf-offset (cg-fn-buf cg)) + (cg-frame-hi cg) + (cg-max-outgoing cg))) + +(define (cg-rewind cg tag) + (cg-vstack-set! cg (car tag)) + (buf-offset-set! (cg-fn-buf cg) (cadr tag)) + (cg-frame-hi-set! cg (caddr tag)) + (cg-max-outgoing-set! cg (cadddr tag))) + ;; Duplicate the top vstack entry. For lvals this is safe — the slot ;; (or label, or indirect-marked frame) backing the lval keeps existing ;; until the function ends. For rvals it duplicates the descriptor of @@ -3621,7 +3650,7 @@ (else (let ((c (%ctype kind -1 -1 (list (or tag #f) #f '())))) (if tag (tag-bind! ps tag c)) c)))) - (fields (parse-struct-fields ps))) + (fields (parse-struct-fields ps kind))) (expect-punct ps 'rbrace) (complete-agg! ct kind tag fields) ct)) (tag (let ((ex (tag-lookup ps tag))) @@ -3631,28 +3660,30 @@ (tag-bind! ps tag c) c))))) (else (die (tok-loc (peek ps)) "anon agg"))))) -(define (parse-struct-fields ps) - (let loop ((acc '()) (off 0)) - (cond - ((at-punct? ps 'rbrace) (reverse acc)) - (else - (let ((spec (parse-decl-spec ps))) - (let dl ((acc2 acc) (o2 off)) - (let* ((p (parse-declarator ps (cdr spec))) - (nm (car p)) (ty (cdr p)) - (al (max (ctype-align ty) 1)) - (sz (ctype-size ty)) - (oa (align-up o2 al))) - (cond - ((at-punct? ps 'comma) - (advance ps) - (dl (cons (list nm ty oa) acc2) - (+ oa (max sz 0)))) - ((at-punct? ps 'semi) - (advance ps) - (loop (cons (list nm ty oa) acc2) - (+ oa (max sz 0)))) - (else (die (tok-loc (peek ps)) "field")))))))))) +(define (parse-struct-fields ps kind) + ;; For unions, every field stays at offset 0; complete-agg! takes the + ;; max of field sizes for the union's overall size. + (let ((struct? (eq? kind 'struct))) + (let loop ((acc '()) (off 0)) + (cond + ((at-punct? ps 'rbrace) (reverse acc)) + (else + (let ((spec (parse-decl-spec ps))) + (let dl ((acc2 acc) (o2 off)) + (let* ((p (parse-declarator ps (cdr spec))) + (nm (car p)) (ty (cdr p)) + (al (max (ctype-align ty) 1)) + (sz (ctype-size ty)) + (oa (if struct? (align-up o2 al) 0)) + (next (if struct? (+ oa (max sz 0)) 0))) + (cond + ((at-punct? ps 'comma) + (advance ps) + (dl (cons (list nm ty oa) acc2) next)) + ((at-punct? ps 'semi) + (advance ps) + (loop (cons (list nm ty oa) acc2) next)) + (else (die (tok-loc (peek ps)) "field"))))))))))) (define (complete-agg! ct k tag fs) (let* ((ma (let m ((xs fs) (a 1)) @@ -4890,16 +4921,25 @@ (cg-push-imm (ps-cg ps) %t-u64 (max (ctype-size ty) 0)))) (else - (parse-expr ps) (expect-punct ps 'rparen) - (let* ((tp (cg-top (ps-cg ps))) - (sz (max (ctype-size (opnd-type tp)) 0))) - (cg-pop (ps-cg ps)) - (cg-push-imm (ps-cg ps) %t-u64 sz))))) - (else (parse-unary ps) + ;; sizeof(EXPR): C semantics — operand is NOT evaluated. + ;; Snapshot cg state, parse the expr to learn its type, + ;; then rewind to discard any code emission and vstack + ;; pushes the parse incurred (e.g. `sizeof(x++)` must not + ;; increment x). cf. CC.md §Expressions. + (let ((tag (cg-snapshot (ps-cg ps)))) + (parse-expr ps) (expect-punct ps 'rparen) (let* ((tp (cg-top (ps-cg ps))) (sz (max (ctype-size (opnd-type tp)) 0))) - (cg-pop (ps-cg ps)) - (cg-push-imm (ps-cg ps) %t-u64 sz))))) + (cg-rewind (ps-cg ps) tag) + (cg-push-imm (ps-cg ps) %t-u64 sz)))))) + (else + ;; sizeof EXPR (no parens) — same no-eval rule. + (let ((tag (cg-snapshot (ps-cg ps)))) + (parse-unary ps) + (let* ((tp (cg-top (ps-cg ps))) + (sz (max (ctype-size (opnd-type tp)) 0))) + (cg-rewind (ps-cg ps) tag) + (cg-push-imm (ps-cg ps) %t-u64 sz)))))) (else (parse-postfix ps)))) (define (token-is-decl? ps) diff --git a/tests/cc-cg/72-cg-snapshot-rewind.expected-exit b/tests/cc-cg/72-cg-snapshot-rewind.expected-exit @@ -0,0 +1 @@ +5 diff --git a/tests/cc-cg/72-cg-snapshot-rewind.scm b/tests/cc-cg/72-cg-snapshot-rewind.scm @@ -0,0 +1,46 @@ +;; tests/cc-cg/72-cg-snapshot-rewind.scm — cg-snapshot / cg-rewind +;; primitives. The pair underpins parse-side `sizeof EXPR` (no-eval): +;; the parser learns the expression's type without retaining the code +;; emission or vstack push the operand parse produced. +;; +;; Models, in C terms: +;; int x = 5; +;; /* "speculative" load+inc+store sequence, then rewound */ +;; /* speculative pushes onto vstack, then rewound */ +;; return x; /* must still be 5 */ +;; +;; Concretely: +;; 1. x = 5 (committed). +;; 2. cg-snapshot — capture vstack/fn-buf/frame-hi state. +;; 3. Emit a postinc on x (loads, stores x+1) AND push extra opnds +;; onto the vstack. +;; 4. cg-rewind — discard the emitted bytes and the vstack pushes. +;; 5. return x — must observe pre-snapshot value, exit 5. +;; +;; If snapshot/rewind fail to undo the fn-buf bytes, x is incremented +;; to 6 and we'd exit 6 instead of 5. If the vstack rewind fails, the +;; final cg-return sees the wrong opnd on top. + +(let ((cg (cg-init))) + (cg-fn-begin cg "main" '() %t-i32) + (let* ((off-x (cg-alloc-slot cg 4 4)) + (sym-x (%sym "x" 'var 'auto %t-i32 off-x))) + ;; x = 5 (committed) + (cg-push-sym cg sym-x) + (cg-push-imm cg %t-i32 5) + (cg-assign cg) (cg-pop cg) + ;; snapshot: capture state before the speculative work + (let ((tag (cg-snapshot cg))) + ;; speculative emission: postinc on x — would make x = 6 + (cg-push-sym cg sym-x) + (cg-postinc cg) + (cg-pop cg) + ;; speculative vstack push, never assigned: an extra imm + (cg-push-imm cg %t-i32 999) + ;; rewind: discard the postinc bytes AND the speculative pushes + (cg-rewind cg tag)) + ;; return x — should still be 5 + (cg-push-sym cg sym-x) (cg-load cg) + (cg-return cg)) + (cg-fn-end cg) + (write-bv-fd 1 (cg-finish cg)))