commit bab1f12ae5c8898d5b816a20dae3f067479e884c
parent 3e41fa6f1053c65da38ac4a7a432077425984ab0
Author: Ryan Sepassi <rsepassi@gmail.com>
Date: Sun, 26 Apr 2026 21:46:21 -0700
cc/cg: width-aware 32-bit load/store, generalised N-byte helpers (§A.3)
Generalises the byte-decomposed dispatch from §A.2 into
%cg-emit-{ld,st}N-bytes that handle any 1..8 byte width; size-4
routes through 4× %lb/%sb with shift+OR. i32 sign-extends via
shli/sari 32.
Multi-byte gathers can't alias destination with base (the first %lb
would clobber the address before subsequent loads), so
%cg-load-opnd-into's global-lval path and cg-load's indirect path now
stage the base address in t2 — fixing the regression that surfaced in
11-global-var and 14-take-addr the moment size-4 byte-gathered.
cc-cg/17-int-roundtrip and cc-parse/17-int-arith verify adjacent
4-byte slots round-trip cleanly.
Diffstat:
6 files changed, 125 insertions(+), 47 deletions(-)
diff --git a/cc/cg.scm b/cc/cg.scm
@@ -77,25 +77,44 @@
;; Width-aware load/store. Dispatches on ctype-size:
;; 1: %lb / %sb (LB zero-extends; for signed i8 we sign-extend by
;; shli/sari 56 to materialize the canonical 64-bit form).
-;; 2: byte-decomposed: store low then shri-shift+store high; load
-;; two bytes + shli/or; sign-extend via shli/sari 48 for i16.
+;; 2/4: byte-decomposed (P1 has only 1-byte and 8-byte memory ops,
+;; and word ops require natural alignment which we can't promise
+;; for struct fields or non-word-aligned local slots). Loads
+;; gather bytes via %lb + shli/or; stores scatter via shri/%sb.
+;; Signed loads (i16/i32) sign-extend via shli/sari to canonical
+;; 64-bit form.
;; 8 (or anything else for now): %ld / %st.
-;; Size 4 collapses to the 8-byte path until §A.3 lands.
;; Scratch convention: helpers may clobber t1; callers never pass
;; reg=t1.
-(define (%cg-emit-ld2-bytes cg reg base-bv off-expr-lo off-expr-hi)
- (%cg-emit-many cg (list
- "%lb(" (%cg-reg->bv reg) ", " base-bv ", " off-expr-lo ")\n"
- "%lb(t1, " base-bv ", " off-expr-hi ")\n"
- "%shli(t1, t1, 8)\n"
- "%or(" (%cg-reg->bv reg) ", " (%cg-reg->bv reg) ", t1)\n")))
-
-(define (%cg-emit-st2-bytes cg reg base-bv off-expr-lo off-expr-hi)
- (%cg-emit-many cg (list
- "%sb(" (%cg-reg->bv reg) ", " base-bv ", " off-expr-lo ")\n"
- "%shri(t1, " (%cg-reg->bv reg) ", 8)\n"
- "%sb(t1, " base-bv ", " off-expr-hi ")\n")))
+(define (%cg-emit-ldN-bytes cg reg base-bv off-expr-fn n-bytes)
+ ;; Emit n-bytes %lb gathers into reg with shift+OR. byte 0 is low.
+ ;; off-expr-fn is a procedure: (off-expr-fn k) returns the bv
+ ;; expression for offset k.
+ (%cg-emit-many cg (list "%lb(" (%cg-reg->bv reg) ", " base-bv ", "
+ (off-expr-fn 0) ")\n"))
+ (let loop ((k 1))
+ (cond
+ ((= k n-bytes) 0)
+ (else
+ (%cg-emit-many cg (list
+ "%lb(t1, " base-bv ", " (off-expr-fn k) ")\n"
+ "%shli(t1, t1, " (%n (* 8 k)) ")\n"
+ "%or(" (%cg-reg->bv reg) ", " (%cg-reg->bv reg) ", t1)\n"))
+ (loop (+ k 1))))))
+
+(define (%cg-emit-stN-bytes cg reg base-bv off-expr-fn n-bytes)
+ ;; Emit n-bytes %sb scatters from reg via shri-shifted t1.
+ (%cg-emit-many cg (list "%sb(" (%cg-reg->bv reg) ", " base-bv ", "
+ (off-expr-fn 0) ")\n"))
+ (let loop ((k 1))
+ (cond
+ ((= k n-bytes) 0)
+ (else
+ (%cg-emit-many cg (list
+ "%shri(t1, " (%cg-reg->bv reg) ", " (%n (* 8 k)) ")\n"
+ "%sb(t1, " base-bv ", " (off-expr-fn k) ")\n"))
+ (loop (+ k 1))))))
(define (%cg-emit-sext cg reg shift-amount)
(%cg-emit-many cg (list
@@ -105,53 +124,59 @@
(%n shift-amount) ")\n")))
(define (%cg-emit-ld-slot-typed cg reg ctype logical-off)
- (let ((sz (ctype-size ctype)) (kind (ctype-kind ctype)))
+ (let* ((sz (ctype-size ctype)) (kind (ctype-kind ctype))
+ (off-fn (lambda (k) (%cg-slot-expr cg (+ logical-off k)))))
(cond
((= sz 1)
(%cg-emit-many cg (list "%lb(" (%cg-reg->bv reg) ", sp, "
- (%cg-slot-expr cg logical-off) ")\n"))
+ (off-fn 0) ")\n"))
(cond ((eq? kind 'i8) (%cg-emit-sext cg reg 56))))
((= sz 2)
- (%cg-emit-ld2-bytes cg reg "sp"
- (%cg-slot-expr cg logical-off)
- (%cg-slot-expr cg (+ logical-off 1)))
+ (%cg-emit-ldN-bytes cg reg "sp" off-fn 2)
(cond ((eq? kind 'i16) (%cg-emit-sext cg reg 48))))
+ ((= sz 4)
+ (%cg-emit-ldN-bytes cg reg "sp" off-fn 4)
+ (cond ((eq? kind 'i32) (%cg-emit-sext cg reg 32))))
(else (%cg-emit-ld-slot cg reg logical-off)))))
(define (%cg-emit-st-slot-typed cg reg ctype logical-off)
- (let ((sz (ctype-size ctype)))
+ (let* ((sz (ctype-size ctype))
+ (off-fn (lambda (k) (%cg-slot-expr cg (+ logical-off k)))))
(cond
((= sz 1)
(%cg-emit-many cg (list "%sb(" (%cg-reg->bv reg) ", sp, "
- (%cg-slot-expr cg logical-off) ")\n")))
- ((= sz 2)
- (%cg-emit-st2-bytes cg reg "sp"
- (%cg-slot-expr cg logical-off)
- (%cg-slot-expr cg (+ logical-off 1))))
+ (off-fn 0) ")\n")))
+ ((= sz 2) (%cg-emit-stN-bytes cg reg "sp" off-fn 2))
+ ((= sz 4) (%cg-emit-stN-bytes cg reg "sp" off-fn 4))
(else (%cg-emit-st-slot cg reg logical-off)))))
(define (%cg-emit-ld-typed cg reg ctype base off)
- (let ((sz (ctype-size ctype)) (kind (ctype-kind ctype)))
+ (let* ((sz (ctype-size ctype)) (kind (ctype-kind ctype))
+ (base-bv (%cg-reg->bv base))
+ (off-fn (lambda (k) (%n (+ off k)))))
(cond
((= sz 1)
(%cg-emit-many cg (list "%lb(" (%cg-reg->bv reg) ", "
- (%cg-reg->bv base) ", " (%n off) ")\n"))
+ base-bv ", " (off-fn 0) ")\n"))
(cond ((eq? kind 'i8) (%cg-emit-sext cg reg 56))))
((= sz 2)
- (%cg-emit-ld2-bytes cg reg (%cg-reg->bv base)
- (%n off) (%n (+ off 1)))
+ (%cg-emit-ldN-bytes cg reg base-bv off-fn 2)
(cond ((eq? kind 'i16) (%cg-emit-sext cg reg 48))))
+ ((= sz 4)
+ (%cg-emit-ldN-bytes cg reg base-bv off-fn 4)
+ (cond ((eq? kind 'i32) (%cg-emit-sext cg reg 32))))
(else (%cg-emit-ld cg reg base off)))))
(define (%cg-emit-st-typed cg reg ctype base off)
- (let ((sz (ctype-size ctype)))
+ (let* ((sz (ctype-size ctype))
+ (base-bv (%cg-reg->bv base))
+ (off-fn (lambda (k) (%n (+ off k)))))
(cond
((= sz 1)
(%cg-emit-many cg (list "%sb(" (%cg-reg->bv reg) ", "
- (%cg-reg->bv base) ", " (%n off) ")\n")))
- ((= sz 2)
- (%cg-emit-st2-bytes cg reg (%cg-reg->bv base)
- (%n off) (%n (+ off 1))))
+ base-bv ", " (off-fn 0) ")\n")))
+ ((= sz 2) (%cg-emit-stN-bytes cg reg base-bv off-fn 2))
+ ((= sz 4) (%cg-emit-stN-bytes cg reg base-bv off-fn 4))
(else (%cg-emit-st cg reg base off)))))
(define (%cg-load-opnd-into cg op reg)
@@ -167,8 +192,12 @@
((eq? kind 'global)
(cond
((not lv?) (%cg-emit-la cg reg ext))
- (else (%cg-emit-la cg reg ext)
- (%cg-emit-ld-typed cg reg ty reg 0))))
+ (else
+ ;; Width > 1 byte-gathers must not alias dest with base —
+ ;; the first %lb would otherwise clobber the address before
+ ;; subsequent byte loads. Stage the address in t2.
+ (%cg-emit-la cg 't2 ext)
+ (%cg-emit-ld-typed cg reg ty 't2 0))))
(else (die #f "cg internal: unknown opnd-kind" kind)))))
(define (%cg-spill-reg cg reg ty)
@@ -402,10 +431,11 @@
((not (opnd-lval? p)) (die #f "cg-load: not an lvalue"))
((and (eq? (opnd-kind p) 'frame)
(%cg-indirect? cg (opnd-ext p)))
- ;; Indirect frame-lval: slot holds the address. Load addr → t0,
- ;; then load value through t0 at the lval's type width.
- (%cg-emit-ld-slot cg 't0 (opnd-ext p))
- (%cg-emit-ld-typed cg 't0 ty 't0 0)
+ ;; Indirect frame-lval: slot holds the address. Stage the
+ ;; address in t2 so multi-byte gathers don't alias dest with
+ ;; base.
+ (%cg-emit-ld-slot cg 't2 (opnd-ext p))
+ (%cg-emit-ld-typed cg 't0 ty 't2 0)
(%cg-spill-reg cg 't0 ty))
(else (%cg-load-opnd-into cg p 't0) (%cg-spill-reg cg 't0 ty)))))
diff --git a/docs/CC-PUNCHLIST.md b/docs/CC-PUNCHLIST.md
@@ -56,12 +56,15 @@ upstream of nearly everything else. Land this first.
+ `%or`. `i16` sign-extends via `shli`/`sari` 48. Helpers may
clobber `t1`; callers never pass `reg=t1`.
-- [ ] **`int` (32-bit) load/store via lval**
- - cg: `cc-cg/NN-int-roundtrip.scm` — distinct from cc-cg/04 because it
- forces a 4-byte slot, not an 8-byte spill.
- - parse: `cc-parse/NN-int-arith.c`
- - Needs: `%ldw` / `%stw`. Existing fixtures pass because cg always
- spills i32 results into 8-byte slots.
+- [x] **`int` (32-bit) load/store via lval**
+ - cg: `cc-cg/17-int-roundtrip.scm`
+ - parse: `cc-parse/17-int-arith.c`
+ - Done: byte-decomposed dispatch generalised to N bytes via
+ `%cg-emit-{ld,st}N-bytes`; size-4 routes through 4× `%lb`/`%sb`
+ with shift+OR / shri-shift. `i32` sign-extends via `shli`/`sari`
+ 32. The address-staging in `%cg-load-opnd-into` and `cg-load`'s
+ indirect path now uses `t2` so multi-byte gathers don't alias
+ dest with base.
- [ ] **Signed narrowing keeps sign on re-widen**
- cg: `cc-cg/NN-sext-narrow.scm` — `(unsigned)(int)(char)-3` → exit 253.
diff --git a/tests/cc-cg/17-int-roundtrip.expected-exit b/tests/cc-cg/17-int-roundtrip.expected-exit
@@ -0,0 +1 @@
+1
diff --git a/tests/cc-cg/17-int-roundtrip.scm b/tests/cc-cg/17-int-roundtrip.scm
@@ -0,0 +1,35 @@
+;; tests/cc-cg/17-int-roundtrip.scm — width-correct 32-bit load/store
+;; on adjacent unsigned-int lvals (§A.3 of docs/CC-PUNCHLIST.md).
+;;
+;; Models:
+;; unsigned int a = 0xAABBCCDD, b = 0x11223344;
+;; return a == 0xAABBCCDD;
+;;
+;; Two 4-byte slots are allocated back-to-back at offsets 8 and 12.
+;; The buggy 8-byte path would store 8 bytes from sp+8 and 8 bytes
+;; from sp+12, contaminating each slot. Width-aware emission (4-byte
+;; decomposition: byte-gather load, %shri-cascade store) round-trips
+;; cleanly → comparison true → exit 1.
+
+(let ((cg (cg-init)))
+ (cg-fn-begin cg "main" '() %t-i32)
+ (let* ((off-a (cg-alloc-slot cg 4 4))
+ (off-b (cg-alloc-slot cg 4 4))
+ (sym-a (%sym "a" 'var 'auto %t-u32 off-a))
+ (sym-b (%sym "b" 'var 'auto %t-u32 off-b)))
+ ;; a = 0xAABBCCDD
+ (cg-push-sym cg sym-a)
+ (cg-push-imm cg %t-u32 2864434397)
+ (cg-assign cg) (cg-pop cg)
+ ;; b = 0x11223344
+ (cg-push-sym cg sym-b)
+ (cg-push-imm cg %t-u32 287454020)
+ (cg-assign cg) (cg-pop cg)
+ ;; return a == 0xAABBCCDD
+ (cg-push-sym cg sym-a)
+ (cg-load cg)
+ (cg-push-imm cg %t-u32 2864434397)
+ (cg-binop cg 'eq)
+ (cg-return cg))
+ (cg-fn-end cg)
+ (write-bv-fd 1 (cg-finish cg)))
diff --git a/tests/cc-parse/17-int-arith.c b/tests/cc-parse/17-int-arith.c
@@ -0,0 +1,8 @@
+// tests/cc-parse/17-int-arith.c — width-correct 32-bit load/store via
+// real C source. §A.3 of docs/CC-PUNCHLIST.md.
+
+int main() {
+ unsigned int a = 2864434397u; /* 0xAABBCCDD */
+ unsigned int b = 287454020u; /* 0x11223344 */
+ return a == 2864434397u;
+}
diff --git a/tests/cc-parse/17-int-arith.expected-exit b/tests/cc-parse/17-int-arith.expected-exit
@@ -0,0 +1 @@
+1