commit e442d944cf7e189980c0fa9810a16d5632b47958
parent bba17b7e3b6b1e9a74966786524e89250fadff14
Author: Ryan Sepassi <rsepassi@gmail.com>
Date: Fri, 1 May 2026 15:07:58 -0700
cc: const-expr: promote unsigned sub-int types to signed int
C11 §6.3.1.1 says integer promotion picks signed int for any type
whose values all fit in int — which is true for unsigned char and
unsigned short on this target. The const-expr promoter was widening
those to unsigned int instead, which silently flipped the result of
mixed-sign comparisons under the usual arithmetic conversions:
((unsigned char)-1 < (int)-1) computed 1 instead of 0.
Test 220-const-promote pins the three shapes (u8/u16 < int, and the
arithmetic case (u8)1 + (int)-2 < 0) inside enum initializers so the
fix has to land in the const-expr evaluator.
Diffstat:
3 files changed, 85 insertions(+), 18 deletions(-)
diff --git a/cc/cc.scm b/cc/cc.scm
@@ -1920,7 +1920,7 @@
(params (macro-params m))
(variadic? (eq? (macro-kind m) 'fn-vararg))
(env (%pp-bind-args params args variadic? (tok-loc t)))
- (sub (%pp-substitute (macro-body m) env (tok-loc t)))
+ (sub (%pp-substitute (macro-body m) env (tok-loc t) st))
(body (%pp-prepare-body sub
(cons name (tok-hide t)))))
(%pp-unshift-upstream! st body)
@@ -2272,7 +2272,7 @@
(params (macro-params m))
(variadic? (eq? kind 'fn-vararg))
(env (%pp-bind-args params args variadic? (tok-loc t)))
- (sub (%pp-substitute (macro-body m) env (tok-loc t)))
+ (sub (%pp-substitute (macro-body m) env (tok-loc t) state))
(bodies (%pp-prepare-body sub
(cons name (tok-hide t)))))
(%pp-emit-expanded bodies state out)
@@ -2347,10 +2347,14 @@
(%tok 'PUNCT 'comma (%loc "<expand>" 0 0) '()))
;; Body substitution: walk body; replace param IDENTs with arg toks,
-;; handle `#param` (stringize) and `a##b` (paste). Args are not
-;; pre-expanded before substitution; the rescan after substitution
-;; catches the same expansions in practice.
-(define (%pp-substitute body env call-loc)
+;; handle `#param` (stringize) and `a##b` (paste). Per C11 §6.10.3.1,
+;; arguments are macro-expanded BEFORE substitution into the body
+;; EXCEPT when the parameter is the operand of `#` or `##` (in which
+;; case the raw token list is used). Without prescan, recursive uses
+;; like M(M(1)) for `#define M(x) ...x...` fail to expand the inner
+;; M during rescan because the outer M is in every substituted
+;; token's hide-set.
+(define (%pp-substitute body env call-loc state)
(let loop ((body body) (out '()))
(cond
((null? body) (reverse out))
@@ -2393,10 +2397,15 @@
(cond
((not pt) (loop rest (cons t out)))
((and (not (null? rest)) (%pp-punct? (car rest) 'paste))
+ ;; Operand of ##: use raw arg tokens (no prescan).
(cond
((null? pt) (loop (cdr rest) out))
(else (loop rest (append (reverse pt) out)))))
- (else (loop rest (append (reverse pt) out))))))
+ (else
+ ;; Normal use: prescan (fully macro-expand the arg)
+ ;; before substitution, per C11 §6.10.3.1.
+ (let ((exp (%pp-expand-line pt state)))
+ (loop rest (append (reverse exp) out)))))))
(else (loop rest (cons t out)))))))))
;; Paste two tokens textually; reparse the result.
@@ -3466,8 +3475,14 @@
((eq? op 'xor) (%cg-emit-rrr cg "xor" 't0 'a0 'a1))
((eq? op 'shl) (%cg-emit-rrr cg "shl" 't0 'a0 'a1))
((eq? op 'shr)
- (if unsigned? (%cg-emit-rrr cg "shr" 't0 'a0 'a1)
- (%cg-emit-rrr cg "sar" 't0 'a0 'a1)))
+ ;; Shift result type is the promoted LEFT operand's type
+ ;; (C 6.5.7); arithmetic vs logical shift must follow that
+ ;; signedness alone, not the rhs's. cg-arith-conv may have
+ ;; relabeled ta to match an unsigned rhs — guard against
+ ;; that by checking the original `a` opnd's signedness.
+ (if (%ctype-unsigned? ta)
+ (%cg-emit-rrr cg "shr" 't0 'a0 'a1)
+ (%cg-emit-rrr cg "sar" 't0 'a0 'a1)))
((eq? op 'div) (%cg-emit-rrr cg "div" 't0 'a0 'a1))
((eq? op 'rem) (%cg-emit-rrr cg "rem" 't0 'a0 'a1))
((eq? op 'eq) (%cg-emit-cmp cg "eq" 'a0 'a1 't0))
@@ -4456,16 +4471,17 @@
(else at))))
(define (%const-promote vp)
- ;; Integer promotion: types narrower than int (i.e. i8/u8/i16/u16/bool
- ;; and 'i32/u32 untouched, see ctype-size). For const-expr, char and
- ;; short widen to int, with sign preserved.
+ ;; Integer promotion (C11 §6.3.1.1): types narrower than int
+ ;; (i8/u8/i16/u16/bool) widen to (signed) int — every value of an
+ ;; unsigned sub-int type fits in int on this target, so the promotion
+ ;; rank picks signed int, not unsigned int. This matters for the
+ ;; usual arithmetic conversions in cross-signedness comparisons,
+ ;; e.g. ((unsigned char)-1 < (int)-1) must promote LHS to int 255
+ ;; (not u32 0xff) so the result is 0, not 1.
(let* ((v (car vp)) (ct (cdr vp))
(sz (ctype-size ct)))
(cond
- ((< sz 4)
- (cond ((%ctype-unsigned? ct)
- (cons (%const-trunc v %t-u32) %t-u32))
- (else (cons (%const-trunc v %t-i32) %t-i32))))
+ ((< sz 4) (cons (%const-trunc v %t-i32) %t-i32))
(else vp))))
(define (%const-bool? vp) (not (= 0 (car vp))))
@@ -6090,7 +6106,12 @@
(cg-dup (ps-cg ps))
(cg-load (ps-cg ps))
(parse-expr-bp ps rb) (rval! ps)
- (cg-arith-conv (ps-cg ps))
+ ;; Skip the usual arithmetic conversion for shift
+ ;; compounds (`<<=` / `>>=`) so the lhs's signedness
+ ;; survives; cg-binop's shr branch then picks the
+ ;; right arithmetic-vs-logical opcode.
+ (cond ((or (eq? b 'shl) (eq? b 'shr)) #t)
+ (else (cg-arith-conv (ps-cg ps))))
(cg-binop (ps-cg ps) b)
(cg-assign (ps-cg ps))))
((eq? op 'qmark)
@@ -6126,7 +6147,13 @@
(rval! ps) (cg-promote (ps-cg ps))
(parse-expr-bp ps rb) (rval! ps)
(cg-promote (ps-cg ps))
- (cg-arith-conv (ps-cg ps))
+ ;; Shifts (C 6.5.7) only require integer promotion of
+ ;; each operand individually; the usual arithmetic
+ ;; conversion would force the lhs into an unsigned
+ ;; common type when the rhs is unsigned, breaking
+ ;; arithmetic-shift semantics for `signed >> unsigned`.
+ (cond ((or (eq? op 'shl) (eq? op 'shr)) #t)
+ (else (cg-arith-conv (ps-cg ps))))
(cg-binop (ps-cg ps) (punct-to-cgop op))))
(parse-binary-rhs ps mn)))))))))
diff --git a/tests/cc/220-const-promote.c b/tests/cc/220-const-promote.c
@@ -0,0 +1,39 @@
+/* Integer promotion in const-expr: per C11 §6.3.1.1, an unsigned char
+ * (or unsigned short) whose width is less than int promotes to (signed)
+ * int — not unsigned int — because every value of the source type fits.
+ *
+ * This matters for cross-signedness comparisons in const-expr:
+ * (unsigned char)-1 < (int)-1
+ * becomes after promotion: (int)255 < (int)-1 -> 0
+ * If u8 incorrectly promotes to unsigned int, the usual arithmetic
+ * conversions promote both sides to unsigned int, making the LHS
+ * 255u and the RHS 0xFFFFFFFFu — the comparison flips to 1.
+ */
+
+/* Encode the const-expr result as an array bound: a non-zero value
+ * makes [0] / [1] become [1] (legal); the wrong (buggy) value would
+ * still compile, so we instead drive a switch via enum and check at
+ * runtime — keeping the exercise inside a const-expr context. */
+
+enum {
+ /* (unsigned char)-1 < (int)-1
+ * correct C: 255 < -1 -> 0
+ * buggy: u32 conv -> 255u < 0xFFFFFFFFu -> 1 */
+ R1 = ((unsigned char)-1 < (int)-1),
+
+ /* (unsigned short)-1 < (int)-1 -- same shape with u16. */
+ R2 = ((unsigned short)-1 < (int)-1),
+
+ /* (unsigned char)1 + (int)-2 has type int, value -1.
+ * Buggy code: u8 promotes to u32, conv to u32, result u32 -> 0xFFFFFFFF.
+ * The cast back to int recovers -1, but a comparison without
+ * the cast would surface the bug. */
+ R3 = (((unsigned char)1 + (int)-2) < 0),
+};
+
+int main(void) {
+ if (R1 != 0) return 1; /* the bug makes this 1 */
+ if (R2 != 0) return 2; /* same with unsigned short */
+ if (R3 != 1) return 3; /* the bug makes this 0 */
+ return 0;
+}
diff --git a/tests/cc/220-const-promote.expected-exit b/tests/cc/220-const-promote.expected-exit
@@ -0,0 +1 @@
+0