commit 91bee1d76ff5a3d817e7322ab68c3f81945c879a
parent e7a073b8699c74f2e9c025078fb275323711a8e1
Author: Ryan Sepassi <rsepassi@gmail.com>
Date: Sun, 3 May 2026 20:50:53 -0700
cc: apply usual arith conversion to ternary common type
The parser's `?:` handler called `cg-ifelse-merge` without applying
C11 §6.5.15 ¶5 / §6.3.1.8 conversions across the two arms, so the
merged opnd inherited arm 1's type. When arm 1 was narrower than arm
2, the surrounding expression saw a too-narrow rvalue and the next
`cg-arith-conv` truncated arm 2's bits.
The slot already stores the raw 8-byte payload (per cc.scm's
canonical-form discipline), and `%cg-load-opnd-into` re-canonicalizes
on read against the opnd's type kind, so picking the merged type
post-hoc is enough to fix the load. Add a `%ctype-arith?` predicate
and a `%cg-merge-arith-type` helper that integer-promotes both arms
then picks the wider type with unsigned tie-break, mirroring the
const-expr path's `%const-arith-conv-type`. `&&` / `||` callers are
unaffected because the parser pre-casts both arms to %t-i32.
The same idiom appears in tcc.flat.c's `gen_opic` sign-extension code
(`(uint32_t)l | (signed ? -(l & 0x80000000) : 0)`); the cc.scm
miscompile of that line was the last cc.scm-only divergence on the
tcc-cc suite vs the gcc baseline (220-const-promote).
tests/cc/335-ternary-merge-arith-conv.c covers the exact gen_opic
shape, swapped-arm symmetry, asymmetric int/u64 arms, and signed/
unsigned arith conversion.
Diffstat:
3 files changed, 97 insertions(+), 10 deletions(-)
diff --git a/cc/cc.scm b/cc/cc.scm
@@ -510,6 +510,13 @@
((eq? k 'ptr) #t) ((eq? k 'arr) #t) ((eq? k 'fn) #t)
(else #f))))
+(define (%ctype-arith? t)
+ (let ((k (ctype-kind t)))
+ (cond ((eq? k 'i8) #t) ((eq? k 'i16) #t) ((eq? k 'i32) #t)
+ ((eq? k 'i64) #t) ((eq? k 'u8) #t) ((eq? k 'u16) #t)
+ ((eq? k 'u32) #t) ((eq? k 'u64) #t) ((eq? k 'bool) #t)
+ (else #f))))
+
(define (%ctype-fp? t)
(let ((k (ctype-kind t)))
(cond ((eq? k 'flt) #t) ((eq? k 'dbl) #t) ((eq? k 'ldbl) #t)
@@ -3895,27 +3902,51 @@
;; the surrounding expression. `cg-ifelse-merge` solves that: pop the
;; cond, allocate one result slot, and after each thunk runs, pop its
;; rval and store into the slot. Push the slot as one frame rval.
-;; Both branches must push exactly one opnd; the result type is the
-;; type of the first thunk's pushed opnd (parser must arrange for
-;; both branches to push compatible types — either by passing
-;; pre-coerced operands or by injecting a `cg-cast` inside the thunk).
+;;
+;; Result type follows C11 §6.5.15 ¶5 for ternary: the usual arithmetic
+;; conversions over the two arms' types. The slot stores the raw 8-byte
+;; payload (per cc.scm's canonical-form discipline); %cg-load-opnd-into
+;; then re-canonicalizes on read against whatever common type we picked.
+;; For `&&` / `||` callers both arms are pre-cast to %t-i32 by the
+;; parser, so the merge is a no-op on type.
(define (cg-ifelse-merge cg then-thunk else-thunk)
(let* ((cond-op (cg-pop cg))
(slot (cg-alloc-slot cg 8 8)))
(%cg-load-opnd-into cg cond-op 't0)
(%cg-emit-many cg (list "%ifelse_nez(t0, {\n"))
(then-thunk)
- (let* ((p (cg-pop cg))
- (rty (opnd-type p)))
+ (let* ((p (cg-pop cg))
+ (rty1 (opnd-type p)))
(%cg-load-opnd-into cg p 'a0)
(%cg-emit-st-slot cg 'a0 slot)
(%cg-emit-many cg (list "}, {\n"))
(else-thunk)
- (let ((q (cg-pop cg)))
+ (let* ((q (cg-pop cg))
+ (rty2 (opnd-type q)))
(%cg-load-opnd-into cg q 'a0)
- (%cg-emit-st-slot cg 'a0 slot))
- (%cg-emit-many cg (list "})\n"))
- (cg-push cg (%opnd 'frame rty slot #f)))))
+ (%cg-emit-st-slot cg 'a0 slot)
+ (%cg-emit-many cg (list "})\n"))
+ (cg-push cg (%opnd 'frame
+ (%cg-merge-arith-type rty1 rty2)
+ slot #f))))))
+
+;; Usual arithmetic conversion over two ctypes (C11 §6.3.1.8):
+;; integer-promote each (sub-int → int), then pick the wider with
+;; unsigned tie-break. Falls back to t1 for non-arithmetic kinds
+;; (pointer, struct, array — ternary on those preserves the first
+;; arm's type as before).
+(define (%cg-merge-arith-type t1 t2)
+ (cond
+ ((and (%ctype-arith? t1) (%ctype-arith? t2))
+ (let ((p1 (cond ((< (ctype-size t1) 4) %t-i32) (else t1)))
+ (p2 (cond ((< (ctype-size t2) 4) %t-i32) (else t2))))
+ (cond
+ ((> (ctype-size p1) (ctype-size p2)) p1)
+ ((> (ctype-size p2) (ctype-size p1)) p2)
+ ((%ctype-unsigned? p1) p1)
+ ((%ctype-unsigned? p2) p2)
+ (else p1))))
+ (else t1)))
(define (cg-loop cg head-thunk body-thunk)
;; body-thunk receives the loop tag as its argument; parser uses
diff --git a/tests/cc/335-ternary-merge-arith-conv.c b/tests/cc/335-ternary-merge-arith-conv.c
@@ -0,0 +1,55 @@
+/* tests/cc/335-ternary-merge-arith-conv.c — C11 §6.5.15 ¶5: ternary
+ * applies the usual arithmetic conversions to its second and third
+ * operands and the result type is the resulting common type.
+ *
+ * cc.scm originally took arm 1's type as the merge slot's result type
+ * unchanged. When arm 1 was narrower than arm 2, the surrounding
+ * expression saw a too-narrow rvalue and cg-arith-conv truncated to
+ * that width on the next op. Concretely, in
+ *
+ * (uint32_t)x | (cond ? 0 : -(x & 0x80000000))
+ *
+ * the ternary's int arm-1 dominated the merge type, so the `|` was
+ * lowered as 32-bit and the upper sign-extension bits in arm-2 were
+ * dropped. tcc.flat.c's gen_opic uses exactly this idiom for
+ * sign-extension of narrow constants, so the cc.scm-built tcc-boot2
+ * miscompiled int < int as unsigned and 220-const-promote.c failed
+ * under tcc-cc.
+ *
+ * Keep this fixture as a runtime check on cc.scm directly so the
+ * regression surfaces without dragging tcc into the loop. */
+typedef unsigned long long u64;
+typedef unsigned int u32;
+
+int main(void) {
+ u64 l = (u64)-1;
+ int t = 0;
+
+ /* The exact pattern from tcc.flat.c gen_opic line 5471-5475. */
+ u64 sext = ((u32)l |
+ (t & 0x10 ? 0 : -(l & 0x80000000)));
+ if (sext != (u64)-1) return 1;
+
+ /* Same shape with the ternary's arms in the other order — the
+ * picked common type must not depend on which arm parses first. */
+ u64 sext2 = ((u32)l |
+ (t & 0x10 ? -(l & 0x80000000) : 0));
+ if (sext2 != 0xFFFFFFFFULL) return 2;
+
+ /* Asymmetric arm types (int vs u64) at top level. With arm-1's
+ * type leaking through, the result is read as a 32-bit value and
+ * the high bits vanish. */
+ u64 a = (1 ? 0 : (u64)0x100000001ULL);
+ if (a != 0) return 3;
+ u64 b = (0 ? 0 : (u64)0x100000001ULL);
+ if (b != 0x100000001ULL) return 4;
+
+ /* Mixed signed/unsigned ternary: result type should follow the
+ * usual arithmetic conversions (i32 + u32 -> u32). */
+ int s = -1;
+ u32 u = 1u;
+ u32 r = (1 ? s : u);
+ if (r != 0xFFFFFFFFu) return 5;
+
+ return 0;
+}
diff --git a/tests/cc/335-ternary-merge-arith-conv.expected-exit b/tests/cc/335-ternary-merge-arith-conv.expected-exit
@@ -0,0 +1 @@
+0