boot2

Playing with the boostrap
git clone https://git.ryansepassi.com/git/boot2.git
Log | Files | Refs | README

commit a40b5b22c833630beacec9b7575f03922a9e6918
parent 5dbea4132c4faacb1c5a1056b67a75ce698c36bf
Author: Ryan Sepassi <rsepassi@gmail.com>
Date:   Fri,  1 May 2026 17:27:27 -0700

cc/cg: canonicalize narrow-int spills in %cg-spill-reg

Operations like %add, %sub, %mul, %neg, and %bnot can leave 64-bit
register results whose high bits don't match the natural canonical
form for a narrow integer type. The reload path for spilled rvals
is a raw 8-byte %ld with no width awareness, so those high bits leak
into later 64-bit comparisons and casts. Make %cg-spill-reg sext
signed kinds (i8/i16/i32) and zext unsigned kinds (sz 1/2/4) before
the store; pointers, arrays, fns, and 8-byte ints stay as-is.

Three new cc-cg fixtures lock in the cases that surfaced the bug:
80-uneg-canonical (-(1u) == UINT_MAX), 81-ubnot-canonical
(~0u == UINT_MAX), and 82-uadd-wrap-canonical (UINT_MAX+1u == 0u).
37-struct-store updated to cast u8 fields to int before the
arithmetic so the test exercises field-store correctness rather
than the previous high-bit accumulation.

Diffstat:
Mcc/cc.scm | 23+++++++++++++++++++++++
Mtests/cc-cg/37-struct-store.scm | 9++++++---
Atests/cc-cg/80-uneg-canonical.expected-exit | 1+
Atests/cc-cg/80-uneg-canonical.scm | 23+++++++++++++++++++++++
Atests/cc-cg/81-ubnot-canonical.expected-exit | 1+
Atests/cc-cg/81-ubnot-canonical.scm | 20++++++++++++++++++++
Atests/cc-cg/82-uadd-wrap-canonical.expected-exit | 1+
Atests/cc-cg/82-uadd-wrap-canonical.scm | 21+++++++++++++++++++++
8 files changed, 96 insertions(+), 3 deletions(-)

diff --git a/cc/cc.scm b/cc/cc.scm @@ -2778,7 +2778,30 @@ (%cg-emit-ld-typed cg reg ty 't2 0)) (else (die #f "cg internal: unknown opnd-kind" (opnd-kind op))))) +;; Spill REG to a fresh 8-byte frame slot as an rval of TY. Since the +;; reload path for an rval is a raw 8-byte %ld (no width awareness), +;; the canonical 64-bit form for TY must already sit in REG before the +;; store. For narrow integer TYs (sz < 8) that means sign-extending +;; signed kinds and zero-extending unsigned kinds — operations like +;; %add / %sub / %mul / %neg / %bnot can leave high bits set that +;; don't match TY's natural canonical form, and those bits would leak +;; into later 64-bit comparisons / casts. Width-8 (and ptr/arr/fn) +;; need no fixup. Float kinds are softened ints; treat as size dispatch. (define (%cg-spill-reg cg reg ty) + (let* ((sz (ctype-size ty)) + (kind (ctype-kind ty))) + (cond + ((or (eq? kind 'ptr) (eq? kind 'arr) (eq? kind 'fn)) 0) + ((eq? kind 'i8) (%cg-emit-sext cg reg 56)) + ((eq? kind 'i16) (%cg-emit-sext cg reg 48)) + ((eq? kind 'i32) (%cg-emit-sext cg reg 32)) + ((= sz 1) (%cg-emit-many cg (list "%zext8(" (%cg-reg->bv reg) ", " + (%cg-reg->bv reg) ")\n"))) + ((= sz 2) (%cg-emit-many cg (list "%zext16(" (%cg-reg->bv reg) ", " + (%cg-reg->bv reg) ")\n"))) + ((= sz 4) (%cg-emit-many cg (list "%zext32(" (%cg-reg->bv reg) ", " + (%cg-reg->bv reg) ", t1)\n"))) + (else 0))) (let* ((off (cg-alloc-slot cg 8 8)) (op (%opnd 'frame ty off #f))) (%cg-emit-st-slot cg reg off) diff --git a/tests/cc-cg/37-struct-store.scm b/tests/cc-cg/37-struct-store.scm @@ -6,8 +6,8 @@ ;; b.a = 3; b.b = 5; b.c = 7; ;; If field stores ignored offsets (or used 8-byte writes), adjacent ;; bytes would clobber each other. Reading back a*1 + b*10 + c*100 -;; isolates each field's contribution: 3 + 50 + 700 = 753. (Truncated -;; to a u8 by the exit-code path: 753 & 255 = 241.) +;; (each field cast to i32 first so the arithmetic doesn't truncate +;; to u8) isolates each field's contribution: 3 + 50 + 700 = 753. (let* ((cg (cg-init)) (st-ty (%ctype 'struct 3 1 @@ -33,19 +33,22 @@ (cg-push-field cg "c") (cg-push-imm cg %t-u8 7) (cg-assign cg) (cg-pop cg) - ;; return (b.a + b.b*10 + b.c*100) == 753 + ;; return ((int)b.a + (int)b.b*10 + (int)b.c*100) == 753 (cg-push-sym cg sym-b) (cg-push-field cg "a") (cg-load cg) + (cg-cast cg %t-i32) (cg-push-sym cg sym-b) (cg-push-field cg "b") (cg-load cg) + (cg-cast cg %t-i32) (cg-push-imm cg %t-i32 10) (cg-binop cg 'mul) (cg-binop cg 'add) (cg-push-sym cg sym-b) (cg-push-field cg "c") (cg-load cg) + (cg-cast cg %t-i32) (cg-push-imm cg %t-i32 100) (cg-binop cg 'mul) (cg-binop cg 'add) diff --git a/tests/cc-cg/80-uneg-canonical.expected-exit b/tests/cc-cg/80-uneg-canonical.expected-exit @@ -0,0 +1 @@ +1 diff --git a/tests/cc-cg/80-uneg-canonical.scm b/tests/cc-cg/80-uneg-canonical.scm @@ -0,0 +1,23 @@ +;; tests/cc-cg/80-uneg-canonical.scm — unary minus on unsigned should +;; leave the canonical 64-bit slot in the to-type's natural form. +;; +;; Models: +;; unsigned int a = 1; +;; return ((unsigned)-a) == 4294967295u; /* (u32)-1u == UINT_MAX */ +;; +;; Bug: cg-unop neg computes 0 - canonical(1) = 0xFFFFFFFFFFFFFFFF and +;; spills as u32. Without re-canonicalizing the spill (zext32), a +;; subsequent compare against the literal 4294967295 (which %li loads +;; as 0x00000000FFFFFFFF) sees mismatched upper bits → equality 0. +;; Correct cg masks/zext on spill of an unsigned-typed result. +;; Exit code: 1 if equal, 0 otherwise. + +(let ((cg (cg-init))) + (cg-fn-begin cg "main" '() %t-i32) + (cg-push-imm cg %t-u32 1) + (cg-unop cg 'neg) + (cg-push-imm cg %t-u32 4294967295) + (cg-binop cg 'eq) + (cg-return cg) + (cg-fn-end cg) + (write-bv-fd 1 (cg-finish cg))) diff --git a/tests/cc-cg/81-ubnot-canonical.expected-exit b/tests/cc-cg/81-ubnot-canonical.expected-exit @@ -0,0 +1 @@ +1 diff --git a/tests/cc-cg/81-ubnot-canonical.scm b/tests/cc-cg/81-ubnot-canonical.scm @@ -0,0 +1,20 @@ +;; tests/cc-cg/81-ubnot-canonical.scm — bitwise-not on unsigned should +;; leave a canonical 64-bit slot in the to-type's natural form. +;; +;; Models: +;; unsigned int a = 0; +;; return (~a) == 4294967295u; +;; +;; Same bug class as 80-uneg-canonical: ~0 in 64-bit is 0xFF..FF, +;; spilled as u32 without re-canonicalizing causes a literal +;; 4294967295u (loaded as 0x00000000FFFFFFFF) to compare unequal. + +(let ((cg (cg-init))) + (cg-fn-begin cg "main" '() %t-i32) + (cg-push-imm cg %t-u32 0) + (cg-unop cg 'bnot) + (cg-push-imm cg %t-u32 4294967295) + (cg-binop cg 'eq) + (cg-return cg) + (cg-fn-end cg) + (write-bv-fd 1 (cg-finish cg))) diff --git a/tests/cc-cg/82-uadd-wrap-canonical.expected-exit b/tests/cc-cg/82-uadd-wrap-canonical.expected-exit @@ -0,0 +1 @@ +1 diff --git a/tests/cc-cg/82-uadd-wrap-canonical.scm b/tests/cc-cg/82-uadd-wrap-canonical.scm @@ -0,0 +1,21 @@ +;; tests/cc-cg/82-uadd-wrap-canonical.scm — unsigned add must wrap +;; correctly into the canonical 64-bit slot form. +;; +;; Models: +;; unsigned int a = 4294967295u; +;; return (a + 1u) == 0u; +;; +;; Bug: cg-binop add computes 0xFFFFFFFF + 1 = 0x100000000 in 64-bit. +;; Spilled as u32 without zero-extending the low 32 bits, the result +;; compares unequal to 0u (which loads as 0x0). + +(let ((cg (cg-init))) + (cg-fn-begin cg "main" '() %t-i32) + (cg-push-imm cg %t-u32 4294967295) + (cg-push-imm cg %t-u32 1) + (cg-binop cg 'add) + (cg-push-imm cg %t-u32 0) + (cg-binop cg 'eq) + (cg-return cg) + (cg-fn-end cg) + (write-bv-fd 1 (cg-finish cg)))