commit a40b5b22c833630beacec9b7575f03922a9e6918
parent 5dbea4132c4faacb1c5a1056b67a75ce698c36bf
Author: Ryan Sepassi <rsepassi@gmail.com>
Date: Fri, 1 May 2026 17:27:27 -0700
cc/cg: canonicalize narrow-int spills in %cg-spill-reg
Operations like %add, %sub, %mul, %neg, and %bnot can leave 64-bit
register results whose high bits don't match the natural canonical
form for a narrow integer type. The reload path for spilled rvals
is a raw 8-byte %ld with no width awareness, so those high bits leak
into later 64-bit comparisons and casts. Make %cg-spill-reg sext
signed kinds (i8/i16/i32) and zext unsigned kinds (sz 1/2/4) before
the store; pointers, arrays, fns, and 8-byte ints stay as-is.
Three new cc-cg fixtures lock in the cases that surfaced the bug:
80-uneg-canonical (-(1u) == UINT_MAX), 81-ubnot-canonical
(~0u == UINT_MAX), and 82-uadd-wrap-canonical (UINT_MAX+1u == 0u).
37-struct-store updated to cast u8 fields to int before the
arithmetic so the test exercises field-store correctness rather
than the previous high-bit accumulation.
Diffstat:
8 files changed, 96 insertions(+), 3 deletions(-)
diff --git a/cc/cc.scm b/cc/cc.scm
@@ -2778,7 +2778,30 @@
(%cg-emit-ld-typed cg reg ty 't2 0))
(else (die #f "cg internal: unknown opnd-kind" (opnd-kind op)))))
+;; Spill REG to a fresh 8-byte frame slot as an rval of TY. Since the
+;; reload path for an rval is a raw 8-byte %ld (no width awareness),
+;; the canonical 64-bit form for TY must already sit in REG before the
+;; store. For narrow integer TYs (sz < 8) that means sign-extending
+;; signed kinds and zero-extending unsigned kinds — operations like
+;; %add / %sub / %mul / %neg / %bnot can leave high bits set that
+;; don't match TY's natural canonical form, and those bits would leak
+;; into later 64-bit comparisons / casts. Width-8 (and ptr/arr/fn)
+;; need no fixup. Float kinds are softened ints; treat as size dispatch.
(define (%cg-spill-reg cg reg ty)
+ (let* ((sz (ctype-size ty))
+ (kind (ctype-kind ty)))
+ (cond
+ ((or (eq? kind 'ptr) (eq? kind 'arr) (eq? kind 'fn)) 0)
+ ((eq? kind 'i8) (%cg-emit-sext cg reg 56))
+ ((eq? kind 'i16) (%cg-emit-sext cg reg 48))
+ ((eq? kind 'i32) (%cg-emit-sext cg reg 32))
+ ((= sz 1) (%cg-emit-many cg (list "%zext8(" (%cg-reg->bv reg) ", "
+ (%cg-reg->bv reg) ")\n")))
+ ((= sz 2) (%cg-emit-many cg (list "%zext16(" (%cg-reg->bv reg) ", "
+ (%cg-reg->bv reg) ")\n")))
+ ((= sz 4) (%cg-emit-many cg (list "%zext32(" (%cg-reg->bv reg) ", "
+ (%cg-reg->bv reg) ", t1)\n")))
+ (else 0)))
(let* ((off (cg-alloc-slot cg 8 8))
(op (%opnd 'frame ty off #f)))
(%cg-emit-st-slot cg reg off)
diff --git a/tests/cc-cg/37-struct-store.scm b/tests/cc-cg/37-struct-store.scm
@@ -6,8 +6,8 @@
;; b.a = 3; b.b = 5; b.c = 7;
;; If field stores ignored offsets (or used 8-byte writes), adjacent
;; bytes would clobber each other. Reading back a*1 + b*10 + c*100
-;; isolates each field's contribution: 3 + 50 + 700 = 753. (Truncated
-;; to a u8 by the exit-code path: 753 & 255 = 241.)
+;; (each field cast to i32 first so the arithmetic doesn't truncate
+;; to u8) isolates each field's contribution: 3 + 50 + 700 = 753.
(let* ((cg (cg-init))
(st-ty (%ctype 'struct 3 1
@@ -33,19 +33,22 @@
(cg-push-field cg "c")
(cg-push-imm cg %t-u8 7)
(cg-assign cg) (cg-pop cg)
- ;; return (b.a + b.b*10 + b.c*100) == 753
+ ;; return ((int)b.a + (int)b.b*10 + (int)b.c*100) == 753
(cg-push-sym cg sym-b)
(cg-push-field cg "a")
(cg-load cg)
+ (cg-cast cg %t-i32)
(cg-push-sym cg sym-b)
(cg-push-field cg "b")
(cg-load cg)
+ (cg-cast cg %t-i32)
(cg-push-imm cg %t-i32 10)
(cg-binop cg 'mul)
(cg-binop cg 'add)
(cg-push-sym cg sym-b)
(cg-push-field cg "c")
(cg-load cg)
+ (cg-cast cg %t-i32)
(cg-push-imm cg %t-i32 100)
(cg-binop cg 'mul)
(cg-binop cg 'add)
diff --git a/tests/cc-cg/80-uneg-canonical.expected-exit b/tests/cc-cg/80-uneg-canonical.expected-exit
@@ -0,0 +1 @@
+1
diff --git a/tests/cc-cg/80-uneg-canonical.scm b/tests/cc-cg/80-uneg-canonical.scm
@@ -0,0 +1,23 @@
+;; tests/cc-cg/80-uneg-canonical.scm — unary minus on unsigned should
+;; leave the canonical 64-bit slot in the to-type's natural form.
+;;
+;; Models:
+;; unsigned int a = 1;
+;; return ((unsigned)-a) == 4294967295u; /* (u32)-1u == UINT_MAX */
+;;
+;; Bug: cg-unop neg computes 0 - canonical(1) = 0xFFFFFFFFFFFFFFFF and
+;; spills as u32. Without re-canonicalizing the spill (zext32), a
+;; subsequent compare against the literal 4294967295 (which %li loads
+;; as 0x00000000FFFFFFFF) sees mismatched upper bits → equality 0.
+;; Correct cg masks/zext on spill of an unsigned-typed result.
+;; Exit code: 1 if equal, 0 otherwise.
+
+(let ((cg (cg-init)))
+ (cg-fn-begin cg "main" '() %t-i32)
+ (cg-push-imm cg %t-u32 1)
+ (cg-unop cg 'neg)
+ (cg-push-imm cg %t-u32 4294967295)
+ (cg-binop cg 'eq)
+ (cg-return cg)
+ (cg-fn-end cg)
+ (write-bv-fd 1 (cg-finish cg)))
diff --git a/tests/cc-cg/81-ubnot-canonical.expected-exit b/tests/cc-cg/81-ubnot-canonical.expected-exit
@@ -0,0 +1 @@
+1
diff --git a/tests/cc-cg/81-ubnot-canonical.scm b/tests/cc-cg/81-ubnot-canonical.scm
@@ -0,0 +1,20 @@
+;; tests/cc-cg/81-ubnot-canonical.scm — bitwise-not on unsigned should
+;; leave a canonical 64-bit slot in the to-type's natural form.
+;;
+;; Models:
+;; unsigned int a = 0;
+;; return (~a) == 4294967295u;
+;;
+;; Same bug class as 80-uneg-canonical: ~0 in 64-bit is 0xFF..FF,
+;; spilled as u32 without re-canonicalizing causes a literal
+;; 4294967295u (loaded as 0x00000000FFFFFFFF) to compare unequal.
+
+(let ((cg (cg-init)))
+ (cg-fn-begin cg "main" '() %t-i32)
+ (cg-push-imm cg %t-u32 0)
+ (cg-unop cg 'bnot)
+ (cg-push-imm cg %t-u32 4294967295)
+ (cg-binop cg 'eq)
+ (cg-return cg)
+ (cg-fn-end cg)
+ (write-bv-fd 1 (cg-finish cg)))
diff --git a/tests/cc-cg/82-uadd-wrap-canonical.expected-exit b/tests/cc-cg/82-uadd-wrap-canonical.expected-exit
@@ -0,0 +1 @@
+1
diff --git a/tests/cc-cg/82-uadd-wrap-canonical.scm b/tests/cc-cg/82-uadd-wrap-canonical.scm
@@ -0,0 +1,21 @@
+;; tests/cc-cg/82-uadd-wrap-canonical.scm — unsigned add must wrap
+;; correctly into the canonical 64-bit slot form.
+;;
+;; Models:
+;; unsigned int a = 4294967295u;
+;; return (a + 1u) == 0u;
+;;
+;; Bug: cg-binop add computes 0xFFFFFFFF + 1 = 0x100000000 in 64-bit.
+;; Spilled as u32 without zero-extending the low 32 bits, the result
+;; compares unequal to 0u (which loads as 0x0).
+
+(let ((cg (cg-init)))
+ (cg-fn-begin cg "main" '() %t-i32)
+ (cg-push-imm cg %t-u32 4294967295)
+ (cg-push-imm cg %t-u32 1)
+ (cg-binop cg 'add)
+ (cg-push-imm cg %t-u32 0)
+ (cg-binop cg 'eq)
+ (cg-return cg)
+ (cg-fn-end cg)
+ (write-bv-fd 1 (cg-finish cg)))