cc/cg: canonicalize narrow-int spills in %cg-spill-reg - boot2

commit a40b5b22c833630beacec9b7575f03922a9e6918
parent 5dbea4132c4faacb1c5a1056b67a75ce698c36bf
Author: Ryan Sepassi <rsepassi@gmail.com>
Date:   Fri,  1 May 2026 17:27:27 -0700

cc/cg: canonicalize narrow-int spills in %cg-spill-reg

Operations like %add, %sub, %mul, %neg, and %bnot can leave 64-bit
register results whose high bits don't match the natural canonical
form for a narrow integer type. The reload path for spilled rvals
is a raw 8-byte %ld with no width awareness, so those high bits leak
into later 64-bit comparisons and casts. Make %cg-spill-reg sext
signed kinds (i8/i16/i32) and zext unsigned kinds (sz 1/2/4) before
the store; pointers, arrays, fns, and 8-byte ints stay as-is.

Three new cc-cg fixtures lock in the cases that surfaced the bug:
80-uneg-canonical (-(1u) == UINT_MAX), 81-ubnot-canonical
(~0u == UINT_MAX), and 82-uadd-wrap-canonical (UINT_MAX+1u == 0u).
37-struct-store updated to cast u8 fields to int before the
arithmetic so the test exercises field-store correctness rather
than the previous high-bit accumulation.

Diffstat:
M cc/cc.scm  | 23 +++++++++++++++++++++++
M tests/cc-cg/37-struct-store.scm  | 9 ++++++---
A tests/cc-cg/80-uneg-canonical.expected-exit  | 1 +
A tests/cc-cg/80-uneg-canonical.scm  | 23 +++++++++++++++++++++++
A tests/cc-cg/81-ubnot-canonical.expected-exit  | 1 +
A tests/cc-cg/81-ubnot-canonical.scm  | 20 ++++++++++++++++++++
A tests/cc-cg/82-uadd-wrap-canonical.expected-exit  | 1 +
A tests/cc-cg/82-uadd-wrap-canonical.scm  | 21 +++++++++++++++++++++

8 files changed, 96 insertions(+), 3 deletions(-)
diff --git a/cc/cc.scm b/cc/cc.scm
@@ -2778,7 +2778,30 @@
      (%cg-emit-ld-typed cg reg ty 't2 0))
     (else (die #f "cg internal: unknown opnd-kind" (opnd-kind op)))))
 
+;; Spill REG to a fresh 8-byte frame slot as an rval of TY. Since the
+;; reload path for an rval is a raw 8-byte %ld (no width awareness),
+;; the canonical 64-bit form for TY must already sit in REG before the
+;; store. For narrow integer TYs (sz < 8) that means sign-extending
+;; signed kinds and zero-extending unsigned kinds — operations like
+;; %add / %sub / %mul / %neg / %bnot can leave high bits set that
+;; don't match TY's natural canonical form, and those bits would leak
+;; into later 64-bit comparisons / casts. Width-8 (and ptr/arr/fn)
+;; need no fixup. Float kinds are softened ints; treat as size dispatch.
 (define (%cg-spill-reg cg reg ty)
+  (let* ((sz   (ctype-size ty))
+         (kind (ctype-kind ty)))
+    (cond
+      ((or (eq? kind 'ptr) (eq? kind 'arr) (eq? kind 'fn)) 0)
+      ((eq? kind 'i8)  (%cg-emit-sext cg reg 56))
+      ((eq? kind 'i16) (%cg-emit-sext cg reg 48))
+      ((eq? kind 'i32) (%cg-emit-sext cg reg 32))
+      ((= sz 1) (%cg-emit-many cg (list "%zext8(" (%cg-reg->bv reg) ", "
+                                        (%cg-reg->bv reg) ")\n")))
+      ((= sz 2) (%cg-emit-many cg (list "%zext16(" (%cg-reg->bv reg) ", "
+                                        (%cg-reg->bv reg) ")\n")))
+      ((= sz 4) (%cg-emit-many cg (list "%zext32(" (%cg-reg->bv reg) ", "
+                                        (%cg-reg->bv reg) ", t1)\n")))
+      (else 0)))
   (let* ((off (cg-alloc-slot cg 8 8))
          (op  (%opnd 'frame ty off #f)))
     (%cg-emit-st-slot cg reg off)
diff --git a/tests/cc-cg/37-struct-store.scm b/tests/cc-cg/37-struct-store.scm
@@ -6,8 +6,8 @@
 ;; b.a = 3; b.b = 5; b.c = 7;
 ;; If field stores ignored offsets (or used 8-byte writes), adjacent
 ;; bytes would clobber each other. Reading back a*1 + b*10 + c*100
-;; isolates each field's contribution: 3 + 50 + 700 = 753. (Truncated
-;; to a u8 by the exit-code path: 753 & 255 = 241.)
+;; (each field cast to i32 first so the arithmetic doesn't truncate
+;; to u8) isolates each field's contribution: 3 + 50 + 700 = 753.
 
 (let* ((cg     (cg-init))
        (st-ty  (%ctype 'struct 3 1
@@ -33,19 +33,22 @@
     (cg-push-field cg "c")
     (cg-push-imm cg %t-u8 7)
     (cg-assign cg) (cg-pop cg)
-    ;; return (b.a + b.b*10 + b.c*100) == 753
+    ;; return ((int)b.a + (int)b.b*10 + (int)b.c*100) == 753
     (cg-push-sym cg sym-b)
     (cg-push-field cg "a")
     (cg-load cg)
+    (cg-cast cg %t-i32)
     (cg-push-sym cg sym-b)
     (cg-push-field cg "b")
     (cg-load cg)
+    (cg-cast cg %t-i32)
     (cg-push-imm cg %t-i32 10)
     (cg-binop cg 'mul)
     (cg-binop cg 'add)
     (cg-push-sym cg sym-b)
     (cg-push-field cg "c")
     (cg-load cg)
+    (cg-cast cg %t-i32)
     (cg-push-imm cg %t-i32 100)
     (cg-binop cg 'mul)
     (cg-binop cg 'add)
diff --git a/tests/cc-cg/80-uneg-canonical.expected-exit b/tests/cc-cg/80-uneg-canonical.expected-exit
@@ -0,0 +1 @@
+1
diff --git a/tests/cc-cg/80-uneg-canonical.scm b/tests/cc-cg/80-uneg-canonical.scm
@@ -0,0 +1,23 @@
+;; tests/cc-cg/80-uneg-canonical.scm — unary minus on unsigned should
+;; leave the canonical 64-bit slot in the to-type's natural form.
+;;
+;; Models:
+;;   unsigned int a = 1;
+;;   return ((unsigned)-a) == 4294967295u;  /* (u32)-1u == UINT_MAX */
+;;
+;; Bug: cg-unop neg computes 0 - canonical(1) = 0xFFFFFFFFFFFFFFFF and
+;; spills as u32. Without re-canonicalizing the spill (zext32), a
+;; subsequent compare against the literal 4294967295 (which %li loads
+;; as 0x00000000FFFFFFFF) sees mismatched upper bits → equality 0.
+;; Correct cg masks/zext on spill of an unsigned-typed result.
+;; Exit code: 1 if equal, 0 otherwise.
+
+(let ((cg (cg-init)))
+  (cg-fn-begin cg "main" '() %t-i32)
+  (cg-push-imm cg %t-u32 1)
+  (cg-unop cg 'neg)
+  (cg-push-imm cg %t-u32 4294967295)
+  (cg-binop cg 'eq)
+  (cg-return cg)
+  (cg-fn-end cg)
+  (write-bv-fd 1 (cg-finish cg)))
diff --git a/tests/cc-cg/81-ubnot-canonical.expected-exit b/tests/cc-cg/81-ubnot-canonical.expected-exit
@@ -0,0 +1 @@
+1
diff --git a/tests/cc-cg/81-ubnot-canonical.scm b/tests/cc-cg/81-ubnot-canonical.scm
@@ -0,0 +1,20 @@
+;; tests/cc-cg/81-ubnot-canonical.scm — bitwise-not on unsigned should
+;; leave a canonical 64-bit slot in the to-type's natural form.
+;;
+;; Models:
+;;   unsigned int a = 0;
+;;   return (~a) == 4294967295u;
+;;
+;; Same bug class as 80-uneg-canonical: ~0 in 64-bit is 0xFF..FF,
+;; spilled as u32 without re-canonicalizing causes a literal
+;; 4294967295u (loaded as 0x00000000FFFFFFFF) to compare unequal.
+
+(let ((cg (cg-init)))
+  (cg-fn-begin cg "main" '() %t-i32)
+  (cg-push-imm cg %t-u32 0)
+  (cg-unop cg 'bnot)
+  (cg-push-imm cg %t-u32 4294967295)
+  (cg-binop cg 'eq)
+  (cg-return cg)
+  (cg-fn-end cg)
+  (write-bv-fd 1 (cg-finish cg)))
diff --git a/tests/cc-cg/82-uadd-wrap-canonical.expected-exit b/tests/cc-cg/82-uadd-wrap-canonical.expected-exit
@@ -0,0 +1 @@
+1
diff --git a/tests/cc-cg/82-uadd-wrap-canonical.scm b/tests/cc-cg/82-uadd-wrap-canonical.scm
@@ -0,0 +1,21 @@
+;; tests/cc-cg/82-uadd-wrap-canonical.scm — unsigned add must wrap
+;; correctly into the canonical 64-bit slot form.
+;;
+;; Models:
+;;   unsigned int a = 4294967295u;
+;;   return (a + 1u) == 0u;
+;;
+;; Bug: cg-binop add computes 0xFFFFFFFF + 1 = 0x100000000 in 64-bit.
+;; Spilled as u32 without zero-extending the low 32 bits, the result
+;; compares unequal to 0u (which loads as 0x0).
+
+(let ((cg (cg-init)))
+  (cg-fn-begin cg "main" '() %t-i32)
+  (cg-push-imm cg %t-u32 4294967295)
+  (cg-push-imm cg %t-u32 1)
+  (cg-binop cg 'add)
+  (cg-push-imm cg %t-u32 0)
+  (cg-binop cg 'eq)
+  (cg-return cg)
+  (cg-fn-end cg)
+  (write-bv-fd 1 (cg-finish cg)))

	boot2 Playing with the boostrap
	git clone https://git.ryansepassi.com/git/boot2.git
	Log \| Files \| Refs \| README

M	cc/cc.scm	\|	23	+++++++++++++++++++++++
M	tests/cc-cg/37-struct-store.scm	\|	9	++++++---
A	tests/cc-cg/80-uneg-canonical.expected-exit	\|	1	+
A	tests/cc-cg/80-uneg-canonical.scm	\|	23	+++++++++++++++++++++++
A	tests/cc-cg/81-ubnot-canonical.expected-exit	\|	1	+
A	tests/cc-cg/81-ubnot-canonical.scm	\|	20	++++++++++++++++++++
A	tests/cc-cg/82-uadd-wrap-canonical.expected-exit	\|	1	+
A	tests/cc-cg/82-uadd-wrap-canonical.scm	\|	21	+++++++++++++++++++++