commit 8c3794edc8d382460a98cf293b2ebcd07fd94a37
parent 698bd1c311dd8004ba2d3216a15dc95a3af0e63d
Author: Ryan Sepassi <rsepassi@gmail.com>
Date: Mon, 27 Apr 2026 13:38:16 -0700
cc: integer constant expressions (Stream C)
C99 §6.6 demands an integer constant expression at four call sites:
array bounds, enum initializers, case labels, and file-scope/static
initializers. Previously parse-const-int accepted only a literal with
optional unary +/- or an enum-const ident, which made cc/118-const-expr
compile-fail.
Add parse-const-expr ps -> (value . ctype), a self-contained walker
that never touches cg. The (value . ctype) tuple keeps cast widths so
(int)(unsigned char)257 truncates to 1 instead of leaking 257 through
to the destination slot.
Operand surface: integer/character literal, enum constant,
sizeof(TYPENAME), unary + - ~ !, binary + - * / % << >> & | ^,
compare < <= > >= == !=, logical && || (short-circuit at the value
layer), ternary ?:, cast to integer type, parenthesization. Anything
else dies.
parse-const-int becomes a thin wrapper that returns just the value, so
the four existing call sites (parse-enum-spec, parse-decl-suf-cont's
[] arm, %const-init-piece, parse-case-stmt) light up with the broader
operand surface without further plumbing.
cc/118-const-expr now PASSes on aarch64; remaining 9 failures (082,
087, 111-117) are out of scope for this stream.
Diffstat:
| M | cc/cc.scm | | | 350 | +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-- |
1 file changed, 343 insertions(+), 7 deletions(-)
diff --git a/cc/cc.scm b/cc/cc.scm
@@ -3736,18 +3736,354 @@
(tag-bind! ps tag c) c)))))
(else (die (tok-loc (peek ps)) "enum")))))
-(define (parse-const-int ps)
+;; ====================================================================
+;; Integer constant expressions (C99 §6.6).
+;;
+;; parse-const-expr ps -> (value . ctype)
+;; A self-contained walker that never touches cg. The four call sites
+;; that demand an integer constant expression — array bounds, enum
+;; initializers, case labels, file-scope/static initializers — all go
+;; through here. Returns a (value . ctype) pair so a final cast can
+;; truncate at the target type's width (e.g. `(int)(unsigned char)257`
+;; needs the inner cast to mask off to u8 before the outer relabel).
+;;
+;; Operand surface: integer / character literals, enum constants,
+;; sizeof(TYPENAME), unary + - ~ !, binary + - * / % << >> & | ^,
+;; compare < <= > >= == !=, logical && || (short-circuit at the value
+;; layer; both sides are still parsed so the token stream advances),
+;; ternary ?:, cast to integer type, parenthesization. Anything else
+;; dies. Floats / function calls / address-of / non-const idents / VLAs
+;; are out of scope.
+;; ====================================================================
+
+;; Truncate VALUE to the width and signedness of CT. Integer types only
+;; — pointer/array/etc. operands abort upstream.
+(define (%const-trunc value ct)
+ (let* ((sz (ctype-size ct))
+ (k (ctype-kind ct))
+ (mask (cond ((<= sz 0) 0)
+ ((= sz 1) #xff)
+ ((= sz 2) #xffff)
+ ((= sz 4) #xffffffff)
+ (else -1))))
+ (cond
+ ;; bool: 0 or 1.
+ ((eq? k 'bool) (if (= value 0) 0 1))
+ ;; 8-byte integers — value already fits in scheme's bignum.
+ ((or (eq? k 'i64) (eq? k 'u64))
+ (cond ((eq? k 'u64)
+ ;; Mask to 64 bits without losing sign on negative values.
+ (bit-and value #xffffffffffffffff))
+ (else value)))
+ ((%ctype-unsigned? ct) (bit-and value mask))
+ (else
+ ;; Signed: mask to width, then sign-extend if top bit is set.
+ (let* ((m (bit-and value mask))
+ (sign-bit (arithmetic-shift 1 (- (* sz 8) 1))))
+ (cond ((= 0 (bit-and m sign-bit)) m)
+ (else (- m (arithmetic-shift 1 (* sz 8))))))))))
+
+;; Usual arithmetic conversions on (value . ctype) pairs. Both operands
+;; have already been integer-promoted (≤ int → int) by the caller.
+;; Returns (cons new-a-value new-b-value-and-result-ctype): we only need
+;; the new values (already truncated) and a single result ctype.
+(define (%const-arith-conv ap bp)
+ (let* ((av (car ap)) (at (cdr ap))
+ (bv (car bp)) (bt (cdr bp))
+ (rt (%const-arith-conv-type at bt)))
+ (cons (%const-trunc av rt)
+ (cons (%const-trunc bv rt) rt))))
+
+(define (%const-arith-conv-type at bt)
+ ;; Pick the wider type; tie-break on unsigned. Caller has already
+ ;; promoted both to >= int width.
+ (let ((sa (ctype-size at)) (sb (ctype-size bt)))
+ (cond
+ ((> sa sb) at)
+ ((> sb sa) bt)
+ ((%ctype-unsigned? at) at)
+ ((%ctype-unsigned? bt) bt)
+ (else at))))
+
+(define (%const-promote vp)
+ ;; Integer promotion: types narrower than int (i.e. i8/u8/i16/u16/bool
+ ;; and 'i32/u32 untouched, see ctype-size). For const-expr, char and
+ ;; short widen to int, with sign preserved.
+ (let* ((v (car vp)) (ct (cdr vp))
+ (sz (ctype-size ct)))
+ (cond
+ ((< sz 4)
+ (cond ((%ctype-unsigned? ct)
+ (cons (%const-trunc v %t-u32) %t-u32))
+ (else (cons (%const-trunc v %t-i32) %t-i32))))
+ (else vp))))
+
+(define (%const-bool? vp) (not (= 0 (car vp))))
+
+(define (parse-const-expr ps) (parse-const-cond ps))
+
+;; Ternary (right-associative).
+(define (parse-const-cond ps)
+ (let ((c (parse-const-lor ps)))
+ (cond
+ ((at-punct? ps 'qmark)
+ (advance ps)
+ (let* ((t (parse-const-expr ps))
+ (_ (expect-punct ps 'colon))
+ (e (parse-const-cond ps)))
+ (cond ((%const-bool? c) t) (else e))))
+ (else c))))
+
+(define (parse-const-lor ps)
+ (let lp ((a (parse-const-land ps)))
+ (cond
+ ((at-punct? ps 'lor)
+ (advance ps)
+ (let ((b (parse-const-land ps)))
+ (lp (cons (if (or (%const-bool? a) (%const-bool? b)) 1 0)
+ %t-i32))))
+ (else a))))
+
+(define (parse-const-land ps)
+ (let lp ((a (parse-const-bor ps)))
+ (cond
+ ((at-punct? ps 'land)
+ (advance ps)
+ (let ((b (parse-const-bor ps)))
+ (lp (cons (if (and (%const-bool? a) (%const-bool? b)) 1 0)
+ %t-i32))))
+ (else a))))
+
+(define (parse-const-bor ps)
+ (let lp ((a (parse-const-bxor ps)))
+ (cond
+ ((at-punct? ps 'bar)
+ (advance ps)
+ (let* ((b (parse-const-bxor ps))
+ (r (%const-arith-conv (%const-promote a) (%const-promote b))))
+ (lp (cons (bit-or (car r) (car (cdr r))) (cdr (cdr r))))))
+ (else a))))
+
+(define (parse-const-bxor ps)
+ (let lp ((a (parse-const-band ps)))
+ (cond
+ ((at-punct? ps 'caret)
+ (advance ps)
+ (let* ((b (parse-const-band ps))
+ (r (%const-arith-conv (%const-promote a) (%const-promote b))))
+ (lp (cons (bit-xor (car r) (car (cdr r))) (cdr (cdr r))))))
+ (else a))))
+
+(define (parse-const-band ps)
+ (let lp ((a (parse-const-eq ps)))
+ (cond
+ ((at-punct? ps 'amp)
+ (advance ps)
+ (let* ((b (parse-const-eq ps))
+ (r (%const-arith-conv (%const-promote a) (%const-promote b))))
+ (lp (cons (bit-and (car r) (car (cdr r))) (cdr (cdr r))))))
+ (else a))))
+
+(define (parse-const-eq ps)
+ (let lp ((a (parse-const-rel ps)))
+ (let ((t (peek ps)))
+ (cond
+ ((and (eq? (tok-kind t) 'PUNCT)
+ (or (eq? (tok-value t) 'eq2) (eq? (tok-value t) 'ne)))
+ (let ((op (tok-value (advance ps)))
+ (b (parse-const-rel ps)))
+ (let* ((r (%const-arith-conv (%const-promote a) (%const-promote b)))
+ (av (car r)) (bv (car (cdr r))))
+ (lp (cons
+ (cond ((eq? op 'eq2) (if (= av bv) 1 0))
+ (else (if (= av bv) 0 1)))
+ %t-i32)))))
+ (else a)))))
+
+(define (parse-const-rel ps)
+ (let lp ((a (parse-const-shift ps)))
+ (let ((t (peek ps)))
+ (cond
+ ((and (eq? (tok-kind t) 'PUNCT)
+ (let ((v (tok-value t)))
+ (or (eq? v 'lt) (eq? v 'le) (eq? v 'gt) (eq? v 'ge))))
+ (let ((op (tok-value (advance ps)))
+ (b (parse-const-shift ps)))
+ (let* ((r (%const-arith-conv (%const-promote a) (%const-promote b)))
+ (av (car r)) (bv (car (cdr r))))
+ (lp (cons
+ (cond ((eq? op 'lt) (if (< av bv) 1 0))
+ ((eq? op 'le) (if (<= av bv) 1 0))
+ ((eq? op 'gt) (if (> av bv) 1 0))
+ (else (if (>= av bv) 1 0)))
+ %t-i32)))))
+ (else a)))))
+
+(define (parse-const-shift ps)
+ ;; Shift result type is the (promoted) lhs type — rhs is just a count.
+ (let lp ((a (parse-const-add ps)))
+ (let ((t (peek ps)))
+ (cond
+ ((and (eq? (tok-kind t) 'PUNCT)
+ (or (eq? (tok-value t) 'shl) (eq? (tok-value t) 'shr)))
+ (let* ((op (tok-value (advance ps)))
+ (b (parse-const-add ps))
+ (ap (%const-promote a))
+ (bp (%const-promote b))
+ (rt (cdr ap))
+ (sh (car bp))
+ (av (car ap))
+ (raw (cond ((eq? op 'shl) (arithmetic-shift av sh))
+ (else (arithmetic-shift av (- 0 sh))))))
+ (lp (cons (%const-trunc raw rt) rt))))
+ (else a)))))
+
+(define (parse-const-add ps)
+ (let lp ((a (parse-const-mul ps)))
+ (let ((t (peek ps)))
+ (cond
+ ((and (eq? (tok-kind t) 'PUNCT)
+ (or (eq? (tok-value t) 'plus) (eq? (tok-value t) 'minus)))
+ (let* ((op (tok-value (advance ps)))
+ (b (parse-const-mul ps))
+ (r (%const-arith-conv (%const-promote a) (%const-promote b)))
+ (av (car r)) (bv (car (cdr r))) (rt (cdr (cdr r)))
+ (raw (cond ((eq? op 'plus) (+ av bv)) (else (- av bv)))))
+ (lp (cons (%const-trunc raw rt) rt))))
+ (else a)))))
+
+(define (parse-const-mul ps)
+ (let lp ((a (parse-const-cast ps)))
+ (let ((t (peek ps)))
+ (cond
+ ((and (eq? (tok-kind t) 'PUNCT)
+ (let ((v (tok-value t)))
+ (or (eq? v 'star) (eq? v 'slash) (eq? v 'pct))))
+ (let* ((op (tok-value (advance ps)))
+ (b (parse-const-cast ps))
+ (r (%const-arith-conv (%const-promote a) (%const-promote b)))
+ (av (car r)) (bv (car (cdr r))) (rt (cdr (cdr r))))
+ (cond
+ ((and (or (eq? op 'slash) (eq? op 'pct)) (= bv 0))
+ (die (tok-loc t) "const-expr: divide by zero")))
+ (let ((raw (cond ((eq? op 'star) (* av bv))
+ ((eq? op 'slash) (quotient av bv))
+ (else (remainder av bv)))))
+ (lp (cons (%const-trunc raw rt) rt)))))
+ (else a)))))
+
+(define (parse-const-cast ps)
+ ;; (typename) operand — distinguished from ( expr ) by paren-is-group?.
+ (cond
+ ((at-punct? ps 'lparen)
+ (cond
+ ((%const-paren-is-cast? ps)
+ (advance ps)
+ (let* ((sp (parse-decl-spec ps))
+ (p (parse-declarator ps (cdr sp)))
+ (ty (cdr p)))
+ (expect-punct ps 'rparen)
+ (cond
+ ((not (%ctype-int? ty))
+ (die (tok-loc (peek ps)) "const-expr: cast must be integer"
+ (ctype-kind ty))))
+ (let ((v (parse-const-cast ps)))
+ (cons (%const-trunc (car v) ty) ty))))
+ (else (parse-const-unary ps))))
+ (else (parse-const-unary ps))))
+
+(define (%const-paren-is-cast? ps)
+ ;; A '(' starts a cast iff the following token kicks off a type-name.
+ (pmatch (peek2 ps)
+ (($ tok? (kind KW) (value ,v))
+ (or (eq? v 'void) (eq? v 'char) (eq? v 'short) (eq? v 'int)
+ (eq? v 'long) (eq? v 'signed) (eq? v 'unsigned) (eq? v '_Bool)
+ (eq? v 'struct) (eq? v 'union) (eq? v 'enum)
+ (eq? v 'const) (eq? v 'volatile) (eq? v 'restrict)))
+ (($ tok? (kind IDENT) (value ,n)) (typedef? ps n))
+ (else #f)))
+
+(define (%ctype-int? ty)
+ (let ((k (ctype-kind ty)))
+ (or (eq? k 'i8) (eq? k 'u8) (eq? k 'i16) (eq? k 'u16)
+ (eq? k 'i32) (eq? k 'u32) (eq? k 'i64) (eq? k 'u64)
+ (eq? k 'bool) (eq? k 'enum))))
+
+(define (parse-const-unary ps)
(let ((t (peek ps)))
(pmatch t
- (($ tok? (kind INT)) (tok-value (advance ps)))
- (($ tok? (kind PUNCT) (value minus)) (advance ps) (- 0 (parse-const-int ps)))
- (($ tok? (kind PUNCT) (value plus)) (advance ps) (parse-const-int ps))
+ (($ tok? (kind PUNCT) (value plus))
+ (advance ps) (%const-promote (parse-const-cast ps)))
+ (($ tok? (kind PUNCT) (value minus))
+ (advance ps)
+ (let* ((vp (%const-promote (parse-const-cast ps)))
+ (rt (cdr vp)))
+ (cons (%const-trunc (- 0 (car vp)) rt) rt)))
+ (($ tok? (kind PUNCT) (value tilde))
+ (advance ps)
+ (let* ((vp (%const-promote (parse-const-cast ps)))
+ (rt (cdr vp)))
+ (cons (%const-trunc (bit-not (car vp)) rt) rt)))
+ (($ tok? (kind PUNCT) (value bang))
+ (advance ps)
+ (let ((vp (parse-const-cast ps)))
+ (cons (if (%const-bool? vp) 0 1) %t-i32)))
+ (($ tok? (kind KW) (value sizeof))
+ (advance ps)
+ (cond
+ ((at-punct? ps 'lparen)
+ (advance ps)
+ (cond
+ ((%const-tok-is-decl? ps)
+ (let* ((sp (parse-decl-spec ps))
+ (p (parse-declarator ps (cdr sp)))
+ (ty (cdr p)))
+ (expect-punct ps 'rparen)
+ (cons (max (ctype-size ty) 0) %t-u64)))
+ (else (die (tok-loc t)
+ "const-expr: only sizeof(TYPENAME) supported"))))
+ (else (die (tok-loc t)
+ "const-expr: only sizeof(TYPENAME) supported"))))
+ (else (parse-const-primary ps)))))
+
+(define (%const-tok-is-decl? ps)
+ (pmatch (peek ps)
+ (($ tok? (kind KW) (value ,v))
+ (or (eq? v 'void) (eq? v 'char) (eq? v 'short) (eq? v 'int)
+ (eq? v 'long) (eq? v 'signed) (eq? v 'unsigned)
+ (eq? v '_Bool) (eq? v 'struct) (eq? v 'union)
+ (eq? v 'enum) (eq? v 'const) (eq? v 'volatile)
+ (eq? v 'restrict) (eq? v 'inline)))
+ (($ tok? (kind IDENT) (value ,n)) (typedef? ps n))
+ (else #f)))
+
+(define (parse-const-primary ps)
+ (let ((t (peek ps)))
+ (pmatch t
+ (($ tok? (kind INT) (value ,v))
+ (advance ps)
+ ;; Untyped INT literals ride as i32. Suffixes (L, LL, U) aren't
+ ;; preserved through to the parser, but const-expr operands at
+ ;; the granularity 118 cares about all fit in i32.
+ (cons v %t-i32))
+ (($ tok? (kind CHAR) (value ,v))
+ (advance ps)
+ ;; Character constants have type int in C.
+ (cons v %t-i32))
+ (($ tok? (kind PUNCT) (value lparen))
+ (advance ps)
+ (let ((v (parse-const-expr ps)))
+ (expect-punct ps 'rparen) v))
(($ tok? (kind IDENT) (value ,n))
(let ((sm (scope-lookup ps n)))
(cond ((and sm (eq? (sym-kind sm) 'enum-const))
- (advance ps) (sym-slot sm))
- (else (die (tok-loc t) "const?" n)))))
- (else (die (tok-loc t) "const?" (tok-value t))))))
+ (advance ps) (cons (sym-slot sm) %t-i32))
+ (else (die (tok-loc t) "const-expr: not a constant" n)))))
+ (else (die (tok-loc t) "const-expr: bad operand"
+ (tok-value t))))))
+
+;; Back-compat wrapper: callers that just want the integer value.
+(define (parse-const-int ps) (car (parse-const-expr ps)))
(define (parse-declarator ps base)
((cdr (parse-decl-cont ps)) base