boot2

Playing with the boostrap
git clone https://git.ryansepassi.com/git/boot2.git
Log | Files | Refs | README

commit e442d944cf7e189980c0fa9810a16d5632b47958
parent bba17b7e3b6b1e9a74966786524e89250fadff14
Author: Ryan Sepassi <rsepassi@gmail.com>
Date:   Fri,  1 May 2026 15:07:58 -0700

cc: const-expr: promote unsigned sub-int types to signed int

C11 §6.3.1.1 says integer promotion picks signed int for any type
whose values all fit in int — which is true for unsigned char and
unsigned short on this target. The const-expr promoter was widening
those to unsigned int instead, which silently flipped the result of
mixed-sign comparisons under the usual arithmetic conversions:
((unsigned char)-1 < (int)-1) computed 1 instead of 0.

Test 220-const-promote pins the three shapes (u8/u16 < int, and the
arithmetic case (u8)1 + (int)-2 < 0) inside enum initializers so the
fix has to land in the const-expr evaluator.

Diffstat:
Mcc/cc.scm | 63+++++++++++++++++++++++++++++++++++++++++++++------------------
Atests/cc/220-const-promote.c | 39+++++++++++++++++++++++++++++++++++++++
Atests/cc/220-const-promote.expected-exit | 1+
3 files changed, 85 insertions(+), 18 deletions(-)

diff --git a/cc/cc.scm b/cc/cc.scm @@ -1920,7 +1920,7 @@ (params (macro-params m)) (variadic? (eq? (macro-kind m) 'fn-vararg)) (env (%pp-bind-args params args variadic? (tok-loc t))) - (sub (%pp-substitute (macro-body m) env (tok-loc t))) + (sub (%pp-substitute (macro-body m) env (tok-loc t) st)) (body (%pp-prepare-body sub (cons name (tok-hide t))))) (%pp-unshift-upstream! st body) @@ -2272,7 +2272,7 @@ (params (macro-params m)) (variadic? (eq? kind 'fn-vararg)) (env (%pp-bind-args params args variadic? (tok-loc t))) - (sub (%pp-substitute (macro-body m) env (tok-loc t))) + (sub (%pp-substitute (macro-body m) env (tok-loc t) state)) (bodies (%pp-prepare-body sub (cons name (tok-hide t))))) (%pp-emit-expanded bodies state out) @@ -2347,10 +2347,14 @@ (%tok 'PUNCT 'comma (%loc "<expand>" 0 0) '())) ;; Body substitution: walk body; replace param IDENTs with arg toks, -;; handle `#param` (stringize) and `a##b` (paste). Args are not -;; pre-expanded before substitution; the rescan after substitution -;; catches the same expansions in practice. -(define (%pp-substitute body env call-loc) +;; handle `#param` (stringize) and `a##b` (paste). Per C11 §6.10.3.1, +;; arguments are macro-expanded BEFORE substitution into the body +;; EXCEPT when the parameter is the operand of `#` or `##` (in which +;; case the raw token list is used). Without prescan, recursive uses +;; like M(M(1)) for `#define M(x) ...x...` fail to expand the inner +;; M during rescan because the outer M is in every substituted +;; token's hide-set. +(define (%pp-substitute body env call-loc state) (let loop ((body body) (out '())) (cond ((null? body) (reverse out)) @@ -2393,10 +2397,15 @@ (cond ((not pt) (loop rest (cons t out))) ((and (not (null? rest)) (%pp-punct? (car rest) 'paste)) + ;; Operand of ##: use raw arg tokens (no prescan). (cond ((null? pt) (loop (cdr rest) out)) (else (loop rest (append (reverse pt) out))))) - (else (loop rest (append (reverse pt) out)))))) + (else + ;; Normal use: prescan (fully macro-expand the arg) + ;; before substitution, per C11 §6.10.3.1. + (let ((exp (%pp-expand-line pt state))) + (loop rest (append (reverse exp) out))))))) (else (loop rest (cons t out))))))))) ;; Paste two tokens textually; reparse the result. @@ -3466,8 +3475,14 @@ ((eq? op 'xor) (%cg-emit-rrr cg "xor" 't0 'a0 'a1)) ((eq? op 'shl) (%cg-emit-rrr cg "shl" 't0 'a0 'a1)) ((eq? op 'shr) - (if unsigned? (%cg-emit-rrr cg "shr" 't0 'a0 'a1) - (%cg-emit-rrr cg "sar" 't0 'a0 'a1))) + ;; Shift result type is the promoted LEFT operand's type + ;; (C 6.5.7); arithmetic vs logical shift must follow that + ;; signedness alone, not the rhs's. cg-arith-conv may have + ;; relabeled ta to match an unsigned rhs — guard against + ;; that by checking the original `a` opnd's signedness. + (if (%ctype-unsigned? ta) + (%cg-emit-rrr cg "shr" 't0 'a0 'a1) + (%cg-emit-rrr cg "sar" 't0 'a0 'a1))) ((eq? op 'div) (%cg-emit-rrr cg "div" 't0 'a0 'a1)) ((eq? op 'rem) (%cg-emit-rrr cg "rem" 't0 'a0 'a1)) ((eq? op 'eq) (%cg-emit-cmp cg "eq" 'a0 'a1 't0)) @@ -4456,16 +4471,17 @@ (else at)))) (define (%const-promote vp) - ;; Integer promotion: types narrower than int (i.e. i8/u8/i16/u16/bool - ;; and 'i32/u32 untouched, see ctype-size). For const-expr, char and - ;; short widen to int, with sign preserved. + ;; Integer promotion (C11 §6.3.1.1): types narrower than int + ;; (i8/u8/i16/u16/bool) widen to (signed) int — every value of an + ;; unsigned sub-int type fits in int on this target, so the promotion + ;; rank picks signed int, not unsigned int. This matters for the + ;; usual arithmetic conversions in cross-signedness comparisons, + ;; e.g. ((unsigned char)-1 < (int)-1) must promote LHS to int 255 + ;; (not u32 0xff) so the result is 0, not 1. (let* ((v (car vp)) (ct (cdr vp)) (sz (ctype-size ct))) (cond - ((< sz 4) - (cond ((%ctype-unsigned? ct) - (cons (%const-trunc v %t-u32) %t-u32)) - (else (cons (%const-trunc v %t-i32) %t-i32)))) + ((< sz 4) (cons (%const-trunc v %t-i32) %t-i32)) (else vp)))) (define (%const-bool? vp) (not (= 0 (car vp)))) @@ -6090,7 +6106,12 @@ (cg-dup (ps-cg ps)) (cg-load (ps-cg ps)) (parse-expr-bp ps rb) (rval! ps) - (cg-arith-conv (ps-cg ps)) + ;; Skip the usual arithmetic conversion for shift + ;; compounds (`<<=` / `>>=`) so the lhs's signedness + ;; survives; cg-binop's shr branch then picks the + ;; right arithmetic-vs-logical opcode. + (cond ((or (eq? b 'shl) (eq? b 'shr)) #t) + (else (cg-arith-conv (ps-cg ps)))) (cg-binop (ps-cg ps) b) (cg-assign (ps-cg ps)))) ((eq? op 'qmark) @@ -6126,7 +6147,13 @@ (rval! ps) (cg-promote (ps-cg ps)) (parse-expr-bp ps rb) (rval! ps) (cg-promote (ps-cg ps)) - (cg-arith-conv (ps-cg ps)) + ;; Shifts (C 6.5.7) only require integer promotion of + ;; each operand individually; the usual arithmetic + ;; conversion would force the lhs into an unsigned + ;; common type when the rhs is unsigned, breaking + ;; arithmetic-shift semantics for `signed >> unsigned`. + (cond ((or (eq? op 'shl) (eq? op 'shr)) #t) + (else (cg-arith-conv (ps-cg ps)))) (cg-binop (ps-cg ps) (punct-to-cgop op)))) (parse-binary-rhs ps mn))))))))) diff --git a/tests/cc/220-const-promote.c b/tests/cc/220-const-promote.c @@ -0,0 +1,39 @@ +/* Integer promotion in const-expr: per C11 §6.3.1.1, an unsigned char + * (or unsigned short) whose width is less than int promotes to (signed) + * int — not unsigned int — because every value of the source type fits. + * + * This matters for cross-signedness comparisons in const-expr: + * (unsigned char)-1 < (int)-1 + * becomes after promotion: (int)255 < (int)-1 -> 0 + * If u8 incorrectly promotes to unsigned int, the usual arithmetic + * conversions promote both sides to unsigned int, making the LHS + * 255u and the RHS 0xFFFFFFFFu — the comparison flips to 1. + */ + +/* Encode the const-expr result as an array bound: a non-zero value + * makes [0] / [1] become [1] (legal); the wrong (buggy) value would + * still compile, so we instead drive a switch via enum and check at + * runtime — keeping the exercise inside a const-expr context. */ + +enum { + /* (unsigned char)-1 < (int)-1 + * correct C: 255 < -1 -> 0 + * buggy: u32 conv -> 255u < 0xFFFFFFFFu -> 1 */ + R1 = ((unsigned char)-1 < (int)-1), + + /* (unsigned short)-1 < (int)-1 -- same shape with u16. */ + R2 = ((unsigned short)-1 < (int)-1), + + /* (unsigned char)1 + (int)-2 has type int, value -1. + * Buggy code: u8 promotes to u32, conv to u32, result u32 -> 0xFFFFFFFF. + * The cast back to int recovers -1, but a comparison without + * the cast would surface the bug. */ + R3 = (((unsigned char)1 + (int)-2) < 0), +}; + +int main(void) { + if (R1 != 0) return 1; /* the bug makes this 1 */ + if (R2 != 0) return 2; /* same with unsigned short */ + if (R3 != 1) return 3; /* the bug makes this 0 */ + return 0; +} diff --git a/tests/cc/220-const-promote.expected-exit b/tests/cc/220-const-promote.expected-exit @@ -0,0 +1 @@ +0