boot2

Playing with the boostrap
git clone https://git.ryansepassi.com/git/boot2.git
Log | Files | Refs | README

commit 91bee1d76ff5a3d817e7322ab68c3f81945c879a
parent e7a073b8699c74f2e9c025078fb275323711a8e1
Author: Ryan Sepassi <rsepassi@gmail.com>
Date:   Sun,  3 May 2026 20:50:53 -0700

cc: apply usual arith conversion to ternary common type

The parser's `?:` handler called `cg-ifelse-merge` without applying
C11 §6.5.15 ¶5 / §6.3.1.8 conversions across the two arms, so the
merged opnd inherited arm 1's type. When arm 1 was narrower than arm
2, the surrounding expression saw a too-narrow rvalue and the next
`cg-arith-conv` truncated arm 2's bits.

The slot already stores the raw 8-byte payload (per cc.scm's
canonical-form discipline), and `%cg-load-opnd-into` re-canonicalizes
on read against the opnd's type kind, so picking the merged type
post-hoc is enough to fix the load. Add a `%ctype-arith?` predicate
and a `%cg-merge-arith-type` helper that integer-promotes both arms
then picks the wider type with unsigned tie-break, mirroring the
const-expr path's `%const-arith-conv-type`. `&&` / `||` callers are
unaffected because the parser pre-casts both arms to %t-i32.

The same idiom appears in tcc.flat.c's `gen_opic` sign-extension code
(`(uint32_t)l | (signed ? -(l & 0x80000000) : 0)`); the cc.scm
miscompile of that line was the last cc.scm-only divergence on the
tcc-cc suite vs the gcc baseline (220-const-promote).

tests/cc/335-ternary-merge-arith-conv.c covers the exact gen_opic
shape, swapped-arm symmetry, asymmetric int/u64 arms, and signed/
unsigned arith conversion.

Diffstat:
Mcc/cc.scm | 51+++++++++++++++++++++++++++++++++++++++++----------
Atests/cc/335-ternary-merge-arith-conv.c | 55+++++++++++++++++++++++++++++++++++++++++++++++++++++++
Atests/cc/335-ternary-merge-arith-conv.expected-exit | 1+
3 files changed, 97 insertions(+), 10 deletions(-)

diff --git a/cc/cc.scm b/cc/cc.scm @@ -510,6 +510,13 @@ ((eq? k 'ptr) #t) ((eq? k 'arr) #t) ((eq? k 'fn) #t) (else #f)))) +(define (%ctype-arith? t) + (let ((k (ctype-kind t))) + (cond ((eq? k 'i8) #t) ((eq? k 'i16) #t) ((eq? k 'i32) #t) + ((eq? k 'i64) #t) ((eq? k 'u8) #t) ((eq? k 'u16) #t) + ((eq? k 'u32) #t) ((eq? k 'u64) #t) ((eq? k 'bool) #t) + (else #f)))) + (define (%ctype-fp? t) (let ((k (ctype-kind t))) (cond ((eq? k 'flt) #t) ((eq? k 'dbl) #t) ((eq? k 'ldbl) #t) @@ -3895,27 +3902,51 @@ ;; the surrounding expression. `cg-ifelse-merge` solves that: pop the ;; cond, allocate one result slot, and after each thunk runs, pop its ;; rval and store into the slot. Push the slot as one frame rval. -;; Both branches must push exactly one opnd; the result type is the -;; type of the first thunk's pushed opnd (parser must arrange for -;; both branches to push compatible types — either by passing -;; pre-coerced operands or by injecting a `cg-cast` inside the thunk). +;; +;; Result type follows C11 §6.5.15 ¶5 for ternary: the usual arithmetic +;; conversions over the two arms' types. The slot stores the raw 8-byte +;; payload (per cc.scm's canonical-form discipline); %cg-load-opnd-into +;; then re-canonicalizes on read against whatever common type we picked. +;; For `&&` / `||` callers both arms are pre-cast to %t-i32 by the +;; parser, so the merge is a no-op on type. (define (cg-ifelse-merge cg then-thunk else-thunk) (let* ((cond-op (cg-pop cg)) (slot (cg-alloc-slot cg 8 8))) (%cg-load-opnd-into cg cond-op 't0) (%cg-emit-many cg (list "%ifelse_nez(t0, {\n")) (then-thunk) - (let* ((p (cg-pop cg)) - (rty (opnd-type p))) + (let* ((p (cg-pop cg)) + (rty1 (opnd-type p))) (%cg-load-opnd-into cg p 'a0) (%cg-emit-st-slot cg 'a0 slot) (%cg-emit-many cg (list "}, {\n")) (else-thunk) - (let ((q (cg-pop cg))) + (let* ((q (cg-pop cg)) + (rty2 (opnd-type q))) (%cg-load-opnd-into cg q 'a0) - (%cg-emit-st-slot cg 'a0 slot)) - (%cg-emit-many cg (list "})\n")) - (cg-push cg (%opnd 'frame rty slot #f))))) + (%cg-emit-st-slot cg 'a0 slot) + (%cg-emit-many cg (list "})\n")) + (cg-push cg (%opnd 'frame + (%cg-merge-arith-type rty1 rty2) + slot #f)))))) + +;; Usual arithmetic conversion over two ctypes (C11 §6.3.1.8): +;; integer-promote each (sub-int → int), then pick the wider with +;; unsigned tie-break. Falls back to t1 for non-arithmetic kinds +;; (pointer, struct, array — ternary on those preserves the first +;; arm's type as before). +(define (%cg-merge-arith-type t1 t2) + (cond + ((and (%ctype-arith? t1) (%ctype-arith? t2)) + (let ((p1 (cond ((< (ctype-size t1) 4) %t-i32) (else t1))) + (p2 (cond ((< (ctype-size t2) 4) %t-i32) (else t2)))) + (cond + ((> (ctype-size p1) (ctype-size p2)) p1) + ((> (ctype-size p2) (ctype-size p1)) p2) + ((%ctype-unsigned? p1) p1) + ((%ctype-unsigned? p2) p2) + (else p1)))) + (else t1))) (define (cg-loop cg head-thunk body-thunk) ;; body-thunk receives the loop tag as its argument; parser uses diff --git a/tests/cc/335-ternary-merge-arith-conv.c b/tests/cc/335-ternary-merge-arith-conv.c @@ -0,0 +1,55 @@ +/* tests/cc/335-ternary-merge-arith-conv.c — C11 §6.5.15 ¶5: ternary + * applies the usual arithmetic conversions to its second and third + * operands and the result type is the resulting common type. + * + * cc.scm originally took arm 1's type as the merge slot's result type + * unchanged. When arm 1 was narrower than arm 2, the surrounding + * expression saw a too-narrow rvalue and cg-arith-conv truncated to + * that width on the next op. Concretely, in + * + * (uint32_t)x | (cond ? 0 : -(x & 0x80000000)) + * + * the ternary's int arm-1 dominated the merge type, so the `|` was + * lowered as 32-bit and the upper sign-extension bits in arm-2 were + * dropped. tcc.flat.c's gen_opic uses exactly this idiom for + * sign-extension of narrow constants, so the cc.scm-built tcc-boot2 + * miscompiled int < int as unsigned and 220-const-promote.c failed + * under tcc-cc. + * + * Keep this fixture as a runtime check on cc.scm directly so the + * regression surfaces without dragging tcc into the loop. */ +typedef unsigned long long u64; +typedef unsigned int u32; + +int main(void) { + u64 l = (u64)-1; + int t = 0; + + /* The exact pattern from tcc.flat.c gen_opic line 5471-5475. */ + u64 sext = ((u32)l | + (t & 0x10 ? 0 : -(l & 0x80000000))); + if (sext != (u64)-1) return 1; + + /* Same shape with the ternary's arms in the other order — the + * picked common type must not depend on which arm parses first. */ + u64 sext2 = ((u32)l | + (t & 0x10 ? -(l & 0x80000000) : 0)); + if (sext2 != 0xFFFFFFFFULL) return 2; + + /* Asymmetric arm types (int vs u64) at top level. With arm-1's + * type leaking through, the result is read as a 32-bit value and + * the high bits vanish. */ + u64 a = (1 ? 0 : (u64)0x100000001ULL); + if (a != 0) return 3; + u64 b = (0 ? 0 : (u64)0x100000001ULL); + if (b != 0x100000001ULL) return 4; + + /* Mixed signed/unsigned ternary: result type should follow the + * usual arithmetic conversions (i32 + u32 -> u32). */ + int s = -1; + u32 u = 1u; + u32 r = (1 ? s : u); + if (r != 0xFFFFFFFFu) return 5; + + return 0; +} diff --git a/tests/cc/335-ternary-merge-arith-conv.expected-exit b/tests/cc/335-ternary-merge-arith-conv.expected-exit @@ -0,0 +1 @@ +0