boot2

Playing with the boostrap
git clone https://git.ryansepassi.com/git/boot2.git
Log | Files | Refs | README

commit 8c3794edc8d382460a98cf293b2ebcd07fd94a37
parent 698bd1c311dd8004ba2d3216a15dc95a3af0e63d
Author: Ryan Sepassi <rsepassi@gmail.com>
Date:   Mon, 27 Apr 2026 13:38:16 -0700

cc: integer constant expressions (Stream C)

C99 §6.6 demands an integer constant expression at four call sites:
array bounds, enum initializers, case labels, and file-scope/static
initializers. Previously parse-const-int accepted only a literal with
optional unary +/- or an enum-const ident, which made cc/118-const-expr
compile-fail.

Add parse-const-expr ps -> (value . ctype), a self-contained walker
that never touches cg. The (value . ctype) tuple keeps cast widths so
(int)(unsigned char)257 truncates to 1 instead of leaking 257 through
to the destination slot.

Operand surface: integer/character literal, enum constant,
sizeof(TYPENAME), unary + - ~ !, binary + - * / % << >> & | ^,
compare < <= > >= == !=, logical && || (short-circuit at the value
layer), ternary ?:, cast to integer type, parenthesization. Anything
else dies.

parse-const-int becomes a thin wrapper that returns just the value, so
the four existing call sites (parse-enum-spec, parse-decl-suf-cont's
[] arm, %const-init-piece, parse-case-stmt) light up with the broader
operand surface without further plumbing.

cc/118-const-expr now PASSes on aarch64; remaining 9 failures (082,
087, 111-117) are out of scope for this stream.

Diffstat:
Mcc/cc.scm | 350+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++--
1 file changed, 343 insertions(+), 7 deletions(-)

diff --git a/cc/cc.scm b/cc/cc.scm @@ -3736,18 +3736,354 @@ (tag-bind! ps tag c) c))))) (else (die (tok-loc (peek ps)) "enum"))))) -(define (parse-const-int ps) +;; ==================================================================== +;; Integer constant expressions (C99 §6.6). +;; +;; parse-const-expr ps -> (value . ctype) +;; A self-contained walker that never touches cg. The four call sites +;; that demand an integer constant expression — array bounds, enum +;; initializers, case labels, file-scope/static initializers — all go +;; through here. Returns a (value . ctype) pair so a final cast can +;; truncate at the target type's width (e.g. `(int)(unsigned char)257` +;; needs the inner cast to mask off to u8 before the outer relabel). +;; +;; Operand surface: integer / character literals, enum constants, +;; sizeof(TYPENAME), unary + - ~ !, binary + - * / % << >> & | ^, +;; compare < <= > >= == !=, logical && || (short-circuit at the value +;; layer; both sides are still parsed so the token stream advances), +;; ternary ?:, cast to integer type, parenthesization. Anything else +;; dies. Floats / function calls / address-of / non-const idents / VLAs +;; are out of scope. +;; ==================================================================== + +;; Truncate VALUE to the width and signedness of CT. Integer types only +;; — pointer/array/etc. operands abort upstream. +(define (%const-trunc value ct) + (let* ((sz (ctype-size ct)) + (k (ctype-kind ct)) + (mask (cond ((<= sz 0) 0) + ((= sz 1) #xff) + ((= sz 2) #xffff) + ((= sz 4) #xffffffff) + (else -1)))) + (cond + ;; bool: 0 or 1. + ((eq? k 'bool) (if (= value 0) 0 1)) + ;; 8-byte integers — value already fits in scheme's bignum. + ((or (eq? k 'i64) (eq? k 'u64)) + (cond ((eq? k 'u64) + ;; Mask to 64 bits without losing sign on negative values. + (bit-and value #xffffffffffffffff)) + (else value))) + ((%ctype-unsigned? ct) (bit-and value mask)) + (else + ;; Signed: mask to width, then sign-extend if top bit is set. + (let* ((m (bit-and value mask)) + (sign-bit (arithmetic-shift 1 (- (* sz 8) 1)))) + (cond ((= 0 (bit-and m sign-bit)) m) + (else (- m (arithmetic-shift 1 (* sz 8)))))))))) + +;; Usual arithmetic conversions on (value . ctype) pairs. Both operands +;; have already been integer-promoted (≤ int → int) by the caller. +;; Returns (cons new-a-value new-b-value-and-result-ctype): we only need +;; the new values (already truncated) and a single result ctype. +(define (%const-arith-conv ap bp) + (let* ((av (car ap)) (at (cdr ap)) + (bv (car bp)) (bt (cdr bp)) + (rt (%const-arith-conv-type at bt))) + (cons (%const-trunc av rt) + (cons (%const-trunc bv rt) rt)))) + +(define (%const-arith-conv-type at bt) + ;; Pick the wider type; tie-break on unsigned. Caller has already + ;; promoted both to >= int width. + (let ((sa (ctype-size at)) (sb (ctype-size bt))) + (cond + ((> sa sb) at) + ((> sb sa) bt) + ((%ctype-unsigned? at) at) + ((%ctype-unsigned? bt) bt) + (else at)))) + +(define (%const-promote vp) + ;; Integer promotion: types narrower than int (i.e. i8/u8/i16/u16/bool + ;; and 'i32/u32 untouched, see ctype-size). For const-expr, char and + ;; short widen to int, with sign preserved. + (let* ((v (car vp)) (ct (cdr vp)) + (sz (ctype-size ct))) + (cond + ((< sz 4) + (cond ((%ctype-unsigned? ct) + (cons (%const-trunc v %t-u32) %t-u32)) + (else (cons (%const-trunc v %t-i32) %t-i32)))) + (else vp)))) + +(define (%const-bool? vp) (not (= 0 (car vp)))) + +(define (parse-const-expr ps) (parse-const-cond ps)) + +;; Ternary (right-associative). +(define (parse-const-cond ps) + (let ((c (parse-const-lor ps))) + (cond + ((at-punct? ps 'qmark) + (advance ps) + (let* ((t (parse-const-expr ps)) + (_ (expect-punct ps 'colon)) + (e (parse-const-cond ps))) + (cond ((%const-bool? c) t) (else e)))) + (else c)))) + +(define (parse-const-lor ps) + (let lp ((a (parse-const-land ps))) + (cond + ((at-punct? ps 'lor) + (advance ps) + (let ((b (parse-const-land ps))) + (lp (cons (if (or (%const-bool? a) (%const-bool? b)) 1 0) + %t-i32)))) + (else a)))) + +(define (parse-const-land ps) + (let lp ((a (parse-const-bor ps))) + (cond + ((at-punct? ps 'land) + (advance ps) + (let ((b (parse-const-bor ps))) + (lp (cons (if (and (%const-bool? a) (%const-bool? b)) 1 0) + %t-i32)))) + (else a)))) + +(define (parse-const-bor ps) + (let lp ((a (parse-const-bxor ps))) + (cond + ((at-punct? ps 'bar) + (advance ps) + (let* ((b (parse-const-bxor ps)) + (r (%const-arith-conv (%const-promote a) (%const-promote b)))) + (lp (cons (bit-or (car r) (car (cdr r))) (cdr (cdr r)))))) + (else a)))) + +(define (parse-const-bxor ps) + (let lp ((a (parse-const-band ps))) + (cond + ((at-punct? ps 'caret) + (advance ps) + (let* ((b (parse-const-band ps)) + (r (%const-arith-conv (%const-promote a) (%const-promote b)))) + (lp (cons (bit-xor (car r) (car (cdr r))) (cdr (cdr r)))))) + (else a)))) + +(define (parse-const-band ps) + (let lp ((a (parse-const-eq ps))) + (cond + ((at-punct? ps 'amp) + (advance ps) + (let* ((b (parse-const-eq ps)) + (r (%const-arith-conv (%const-promote a) (%const-promote b)))) + (lp (cons (bit-and (car r) (car (cdr r))) (cdr (cdr r)))))) + (else a)))) + +(define (parse-const-eq ps) + (let lp ((a (parse-const-rel ps))) + (let ((t (peek ps))) + (cond + ((and (eq? (tok-kind t) 'PUNCT) + (or (eq? (tok-value t) 'eq2) (eq? (tok-value t) 'ne))) + (let ((op (tok-value (advance ps))) + (b (parse-const-rel ps))) + (let* ((r (%const-arith-conv (%const-promote a) (%const-promote b))) + (av (car r)) (bv (car (cdr r)))) + (lp (cons + (cond ((eq? op 'eq2) (if (= av bv) 1 0)) + (else (if (= av bv) 0 1))) + %t-i32))))) + (else a))))) + +(define (parse-const-rel ps) + (let lp ((a (parse-const-shift ps))) + (let ((t (peek ps))) + (cond + ((and (eq? (tok-kind t) 'PUNCT) + (let ((v (tok-value t))) + (or (eq? v 'lt) (eq? v 'le) (eq? v 'gt) (eq? v 'ge)))) + (let ((op (tok-value (advance ps))) + (b (parse-const-shift ps))) + (let* ((r (%const-arith-conv (%const-promote a) (%const-promote b))) + (av (car r)) (bv (car (cdr r)))) + (lp (cons + (cond ((eq? op 'lt) (if (< av bv) 1 0)) + ((eq? op 'le) (if (<= av bv) 1 0)) + ((eq? op 'gt) (if (> av bv) 1 0)) + (else (if (>= av bv) 1 0))) + %t-i32))))) + (else a))))) + +(define (parse-const-shift ps) + ;; Shift result type is the (promoted) lhs type — rhs is just a count. + (let lp ((a (parse-const-add ps))) + (let ((t (peek ps))) + (cond + ((and (eq? (tok-kind t) 'PUNCT) + (or (eq? (tok-value t) 'shl) (eq? (tok-value t) 'shr))) + (let* ((op (tok-value (advance ps))) + (b (parse-const-add ps)) + (ap (%const-promote a)) + (bp (%const-promote b)) + (rt (cdr ap)) + (sh (car bp)) + (av (car ap)) + (raw (cond ((eq? op 'shl) (arithmetic-shift av sh)) + (else (arithmetic-shift av (- 0 sh)))))) + (lp (cons (%const-trunc raw rt) rt)))) + (else a))))) + +(define (parse-const-add ps) + (let lp ((a (parse-const-mul ps))) + (let ((t (peek ps))) + (cond + ((and (eq? (tok-kind t) 'PUNCT) + (or (eq? (tok-value t) 'plus) (eq? (tok-value t) 'minus))) + (let* ((op (tok-value (advance ps))) + (b (parse-const-mul ps)) + (r (%const-arith-conv (%const-promote a) (%const-promote b))) + (av (car r)) (bv (car (cdr r))) (rt (cdr (cdr r))) + (raw (cond ((eq? op 'plus) (+ av bv)) (else (- av bv))))) + (lp (cons (%const-trunc raw rt) rt)))) + (else a))))) + +(define (parse-const-mul ps) + (let lp ((a (parse-const-cast ps))) + (let ((t (peek ps))) + (cond + ((and (eq? (tok-kind t) 'PUNCT) + (let ((v (tok-value t))) + (or (eq? v 'star) (eq? v 'slash) (eq? v 'pct)))) + (let* ((op (tok-value (advance ps))) + (b (parse-const-cast ps)) + (r (%const-arith-conv (%const-promote a) (%const-promote b))) + (av (car r)) (bv (car (cdr r))) (rt (cdr (cdr r)))) + (cond + ((and (or (eq? op 'slash) (eq? op 'pct)) (= bv 0)) + (die (tok-loc t) "const-expr: divide by zero"))) + (let ((raw (cond ((eq? op 'star) (* av bv)) + ((eq? op 'slash) (quotient av bv)) + (else (remainder av bv))))) + (lp (cons (%const-trunc raw rt) rt))))) + (else a))))) + +(define (parse-const-cast ps) + ;; (typename) operand — distinguished from ( expr ) by paren-is-group?. + (cond + ((at-punct? ps 'lparen) + (cond + ((%const-paren-is-cast? ps) + (advance ps) + (let* ((sp (parse-decl-spec ps)) + (p (parse-declarator ps (cdr sp))) + (ty (cdr p))) + (expect-punct ps 'rparen) + (cond + ((not (%ctype-int? ty)) + (die (tok-loc (peek ps)) "const-expr: cast must be integer" + (ctype-kind ty)))) + (let ((v (parse-const-cast ps))) + (cons (%const-trunc (car v) ty) ty)))) + (else (parse-const-unary ps)))) + (else (parse-const-unary ps)))) + +(define (%const-paren-is-cast? ps) + ;; A '(' starts a cast iff the following token kicks off a type-name. + (pmatch (peek2 ps) + (($ tok? (kind KW) (value ,v)) + (or (eq? v 'void) (eq? v 'char) (eq? v 'short) (eq? v 'int) + (eq? v 'long) (eq? v 'signed) (eq? v 'unsigned) (eq? v '_Bool) + (eq? v 'struct) (eq? v 'union) (eq? v 'enum) + (eq? v 'const) (eq? v 'volatile) (eq? v 'restrict))) + (($ tok? (kind IDENT) (value ,n)) (typedef? ps n)) + (else #f))) + +(define (%ctype-int? ty) + (let ((k (ctype-kind ty))) + (or (eq? k 'i8) (eq? k 'u8) (eq? k 'i16) (eq? k 'u16) + (eq? k 'i32) (eq? k 'u32) (eq? k 'i64) (eq? k 'u64) + (eq? k 'bool) (eq? k 'enum)))) + +(define (parse-const-unary ps) (let ((t (peek ps))) (pmatch t - (($ tok? (kind INT)) (tok-value (advance ps))) - (($ tok? (kind PUNCT) (value minus)) (advance ps) (- 0 (parse-const-int ps))) - (($ tok? (kind PUNCT) (value plus)) (advance ps) (parse-const-int ps)) + (($ tok? (kind PUNCT) (value plus)) + (advance ps) (%const-promote (parse-const-cast ps))) + (($ tok? (kind PUNCT) (value minus)) + (advance ps) + (let* ((vp (%const-promote (parse-const-cast ps))) + (rt (cdr vp))) + (cons (%const-trunc (- 0 (car vp)) rt) rt))) + (($ tok? (kind PUNCT) (value tilde)) + (advance ps) + (let* ((vp (%const-promote (parse-const-cast ps))) + (rt (cdr vp))) + (cons (%const-trunc (bit-not (car vp)) rt) rt))) + (($ tok? (kind PUNCT) (value bang)) + (advance ps) + (let ((vp (parse-const-cast ps))) + (cons (if (%const-bool? vp) 0 1) %t-i32))) + (($ tok? (kind KW) (value sizeof)) + (advance ps) + (cond + ((at-punct? ps 'lparen) + (advance ps) + (cond + ((%const-tok-is-decl? ps) + (let* ((sp (parse-decl-spec ps)) + (p (parse-declarator ps (cdr sp))) + (ty (cdr p))) + (expect-punct ps 'rparen) + (cons (max (ctype-size ty) 0) %t-u64))) + (else (die (tok-loc t) + "const-expr: only sizeof(TYPENAME) supported")))) + (else (die (tok-loc t) + "const-expr: only sizeof(TYPENAME) supported")))) + (else (parse-const-primary ps))))) + +(define (%const-tok-is-decl? ps) + (pmatch (peek ps) + (($ tok? (kind KW) (value ,v)) + (or (eq? v 'void) (eq? v 'char) (eq? v 'short) (eq? v 'int) + (eq? v 'long) (eq? v 'signed) (eq? v 'unsigned) + (eq? v '_Bool) (eq? v 'struct) (eq? v 'union) + (eq? v 'enum) (eq? v 'const) (eq? v 'volatile) + (eq? v 'restrict) (eq? v 'inline))) + (($ tok? (kind IDENT) (value ,n)) (typedef? ps n)) + (else #f))) + +(define (parse-const-primary ps) + (let ((t (peek ps))) + (pmatch t + (($ tok? (kind INT) (value ,v)) + (advance ps) + ;; Untyped INT literals ride as i32. Suffixes (L, LL, U) aren't + ;; preserved through to the parser, but const-expr operands at + ;; the granularity 118 cares about all fit in i32. + (cons v %t-i32)) + (($ tok? (kind CHAR) (value ,v)) + (advance ps) + ;; Character constants have type int in C. + (cons v %t-i32)) + (($ tok? (kind PUNCT) (value lparen)) + (advance ps) + (let ((v (parse-const-expr ps))) + (expect-punct ps 'rparen) v)) (($ tok? (kind IDENT) (value ,n)) (let ((sm (scope-lookup ps n))) (cond ((and sm (eq? (sym-kind sm) 'enum-const)) - (advance ps) (sym-slot sm)) - (else (die (tok-loc t) "const?" n))))) - (else (die (tok-loc t) "const?" (tok-value t)))))) + (advance ps) (cons (sym-slot sm) %t-i32)) + (else (die (tok-loc t) "const-expr: not a constant" n))))) + (else (die (tok-loc t) "const-expr: bad operand" + (tok-value t)))))) + +;; Back-compat wrapper: callers that just want the integer value. +(define (parse-const-int ps) (car (parse-const-expr ps))) (define (parse-declarator ps base) ((cdr (parse-decl-cont ps)) base