commit 3e41fa6f1053c65da38ac4a7a432077425984ab0
parent ab85e02eed979d11e2e1a9327ad7929670602ac6
Author: Ryan Sepassi <rsepassi@gmail.com>
Date: Sun, 26 Apr 2026 21:41:14 -0700
cc/cg: width-aware 16-bit load/store via byte decomposition (§A.2)
P1 has only 1-byte (LB/SB) and 8-byte (LD/ST) memory ops, so 16-bit
lvals decompose into two byte ops: store low + shri/store high; load
two bytes + shli/or. i16 loads sign-extend via shli/sari 48.
cc-cg/16-short-roundtrip and cc-parse/16-short-arith verify that
adjacent 2-byte slots round-trip without contamination.
Diffstat:
6 files changed, 124 insertions(+), 35 deletions(-)
diff --git a/cc/cg.scm b/cc/cg.scm
@@ -77,45 +77,82 @@
;; Width-aware load/store. Dispatches on ctype-size:
;; 1: %lb / %sb (LB zero-extends; for signed i8 we sign-extend by
;; shli/sari 56 to materialize the canonical 64-bit form).
+;; 2: byte-decomposed: store low then shri-shift+store high; load
+;; two bytes + shli/or; sign-extend via shli/sari 48 for i16.
;; 8 (or anything else for now): %ld / %st.
-;; Sizes 2 and 4 collapse to the 8-byte path until §A.2 / §A.3 land.
+;; Size 4 collapses to the 8-byte path until §A.3 lands.
+;; Scratch convention: helpers may clobber t1; callers never pass
+;; reg=t1.
+
+(define (%cg-emit-ld2-bytes cg reg base-bv off-expr-lo off-expr-hi)
+ (%cg-emit-many cg (list
+ "%lb(" (%cg-reg->bv reg) ", " base-bv ", " off-expr-lo ")\n"
+ "%lb(t1, " base-bv ", " off-expr-hi ")\n"
+ "%shli(t1, t1, 8)\n"
+ "%or(" (%cg-reg->bv reg) ", " (%cg-reg->bv reg) ", t1)\n")))
+
+(define (%cg-emit-st2-bytes cg reg base-bv off-expr-lo off-expr-hi)
+ (%cg-emit-many cg (list
+ "%sb(" (%cg-reg->bv reg) ", " base-bv ", " off-expr-lo ")\n"
+ "%shri(t1, " (%cg-reg->bv reg) ", 8)\n"
+ "%sb(t1, " base-bv ", " off-expr-hi ")\n")))
+
+(define (%cg-emit-sext cg reg shift-amount)
+ (%cg-emit-many cg (list
+ "%shli(" (%cg-reg->bv reg) ", " (%cg-reg->bv reg) ", "
+ (%n shift-amount) ")\n"
+ "%sari(" (%cg-reg->bv reg) ", " (%cg-reg->bv reg) ", "
+ (%n shift-amount) ")\n")))
+
(define (%cg-emit-ld-slot-typed cg reg ctype logical-off)
- (cond
- ((= (ctype-size ctype) 1)
- (%cg-emit-many cg (list "%lb(" (%cg-reg->bv reg) ", sp, "
- (%cg-slot-expr cg logical-off) ")\n"))
- (cond ((eq? (ctype-kind ctype) 'i8)
- (%cg-emit-many cg (list "%shli(" (%cg-reg->bv reg) ", "
- (%cg-reg->bv reg) ", 56)\n"
- "%sari(" (%cg-reg->bv reg) ", "
- (%cg-reg->bv reg) ", 56)\n")))))
- (else (%cg-emit-ld-slot cg reg logical-off))))
+ (let ((sz (ctype-size ctype)) (kind (ctype-kind ctype)))
+ (cond
+ ((= sz 1)
+ (%cg-emit-many cg (list "%lb(" (%cg-reg->bv reg) ", sp, "
+ (%cg-slot-expr cg logical-off) ")\n"))
+ (cond ((eq? kind 'i8) (%cg-emit-sext cg reg 56))))
+ ((= sz 2)
+ (%cg-emit-ld2-bytes cg reg "sp"
+ (%cg-slot-expr cg logical-off)
+ (%cg-slot-expr cg (+ logical-off 1)))
+ (cond ((eq? kind 'i16) (%cg-emit-sext cg reg 48))))
+ (else (%cg-emit-ld-slot cg reg logical-off)))))
(define (%cg-emit-st-slot-typed cg reg ctype logical-off)
- (cond
- ((= (ctype-size ctype) 1)
- (%cg-emit-many cg (list "%sb(" (%cg-reg->bv reg) ", sp, "
- (%cg-slot-expr cg logical-off) ")\n")))
- (else (%cg-emit-st-slot cg reg logical-off))))
+ (let ((sz (ctype-size ctype)))
+ (cond
+ ((= sz 1)
+ (%cg-emit-many cg (list "%sb(" (%cg-reg->bv reg) ", sp, "
+ (%cg-slot-expr cg logical-off) ")\n")))
+ ((= sz 2)
+ (%cg-emit-st2-bytes cg reg "sp"
+ (%cg-slot-expr cg logical-off)
+ (%cg-slot-expr cg (+ logical-off 1))))
+ (else (%cg-emit-st-slot cg reg logical-off)))))
(define (%cg-emit-ld-typed cg reg ctype base off)
- (cond
- ((= (ctype-size ctype) 1)
- (%cg-emit-many cg (list "%lb(" (%cg-reg->bv reg) ", "
- (%cg-reg->bv base) ", " (%n off) ")\n"))
- (cond ((eq? (ctype-kind ctype) 'i8)
- (%cg-emit-many cg (list "%shli(" (%cg-reg->bv reg) ", "
- (%cg-reg->bv reg) ", 56)\n"
- "%sari(" (%cg-reg->bv reg) ", "
- (%cg-reg->bv reg) ", 56)\n")))))
- (else (%cg-emit-ld cg reg base off))))
+ (let ((sz (ctype-size ctype)) (kind (ctype-kind ctype)))
+ (cond
+ ((= sz 1)
+ (%cg-emit-many cg (list "%lb(" (%cg-reg->bv reg) ", "
+ (%cg-reg->bv base) ", " (%n off) ")\n"))
+ (cond ((eq? kind 'i8) (%cg-emit-sext cg reg 56))))
+ ((= sz 2)
+ (%cg-emit-ld2-bytes cg reg (%cg-reg->bv base)
+ (%n off) (%n (+ off 1)))
+ (cond ((eq? kind 'i16) (%cg-emit-sext cg reg 48))))
+ (else (%cg-emit-ld cg reg base off)))))
(define (%cg-emit-st-typed cg reg ctype base off)
- (cond
- ((= (ctype-size ctype) 1)
- (%cg-emit-many cg (list "%sb(" (%cg-reg->bv reg) ", "
- (%cg-reg->bv base) ", " (%n off) ")\n")))
- (else (%cg-emit-st cg reg base off))))
+ (let ((sz (ctype-size ctype)))
+ (cond
+ ((= sz 1)
+ (%cg-emit-many cg (list "%sb(" (%cg-reg->bv reg) ", "
+ (%cg-reg->bv base) ", " (%n off) ")\n")))
+ ((= sz 2)
+ (%cg-emit-st2-bytes cg reg (%cg-reg->bv base)
+ (%n off) (%n (+ off 1))))
+ (else (%cg-emit-st cg reg base off)))))
(define (%cg-load-opnd-into cg op reg)
(let ((kind (opnd-kind op)) (lv? (opnd-lval? op))
diff --git a/docs/CC-PUNCHLIST.md b/docs/CC-PUNCHLIST.md
@@ -48,10 +48,13 @@ upstream of nearly everything else. Land this first.
sign-extend via `shli`/`sari` 56. 16/32-bit fall through to the
8-byte path until §A.2/§A.3 land.
-- [ ] **`short` (16-bit) load/store via lval**
- - cg: `cc-cg/NN-short-roundtrip.scm`
- - parse: `cc-parse/NN-short-arith.c`
- - Needs: `%ldh` / `%sth` paths.
+- [x] **`short` (16-bit) load/store via lval**
+ - cg: `cc-cg/16-short-roundtrip.scm`
+ - parse: `cc-parse/16-short-arith.c`
+ - Done: byte-decomposed dispatch in the typed helpers — store low
+ byte then `%shri` 8 + store high byte; load two bytes + `%shli` 8
+ + `%or`. `i16` sign-extends via `shli`/`sari` 48. Helpers may
+ clobber `t1`; callers never pass `reg=t1`.
- [ ] **`int` (32-bit) load/store via lval**
- cg: `cc-cg/NN-int-roundtrip.scm` — distinct from cc-cg/04 because it
diff --git a/tests/cc-cg/16-short-roundtrip.expected-exit b/tests/cc-cg/16-short-roundtrip.expected-exit
@@ -0,0 +1 @@
+1
diff --git a/tests/cc-cg/16-short-roundtrip.scm b/tests/cc-cg/16-short-roundtrip.scm
@@ -0,0 +1,36 @@
+;; tests/cc-cg/16-short-roundtrip.scm — width-correct 16-bit load/store
+;; on adjacent unsigned-short lvals (§A.2 of docs/CC-PUNCHLIST.md).
+;;
+;; Models:
+;; unsigned short a = 0xAABB, b = 0xCCDD;
+;; return a == 0xAABB;
+;;
+;; Two 2-byte slots are allocated back-to-back. If cg uses 8-byte
+;; %ld/%st, the second store contaminates bytes 2..9 of the first
+;; slot's neighborhood, so the 8-byte load of `a` yields 0xCCDDAABB
+;; — which fails the equality test against 0xAABB. Width-aware
+;; emission (byte-decomposed %sb store, %lb-gather load) yields
+;; exactly 0xAABB, the comparison is true, and exit is 1.
+
+(let ((cg (cg-init)))
+ (cg-fn-begin cg "main" '() %t-i32)
+ (let* ((off-a (cg-alloc-slot cg 2 2))
+ (off-b (cg-alloc-slot cg 2 2))
+ (sym-a (%sym "a" 'var 'auto %t-u16 off-a))
+ (sym-b (%sym "b" 'var 'auto %t-u16 off-b)))
+ ;; a = 0xAABB
+ (cg-push-sym cg sym-a)
+ (cg-push-imm cg %t-u16 43707)
+ (cg-assign cg) (cg-pop cg)
+ ;; b = 0xCCDD
+ (cg-push-sym cg sym-b)
+ (cg-push-imm cg %t-u16 52445)
+ (cg-assign cg) (cg-pop cg)
+ ;; return a == 0xAABB
+ (cg-push-sym cg sym-a)
+ (cg-load cg)
+ (cg-push-imm cg %t-u16 43707)
+ (cg-binop cg 'eq)
+ (cg-return cg))
+ (cg-fn-end cg)
+ (write-bv-fd 1 (cg-finish cg)))
diff --git a/tests/cc-parse/16-short-arith.c b/tests/cc-parse/16-short-arith.c
@@ -0,0 +1,11 @@
+// tests/cc-parse/16-short-arith.c — width-correct short load/store via
+// real C source. §A.2 of docs/CC-PUNCHLIST.md.
+//
+// Two adjacent unsigned-short locals at consecutive 2-byte slots.
+// Width-aware emission keeps b's store from contaminating a's slot.
+
+int main() {
+ unsigned short a = 43707; /* 0xAABB */
+ unsigned short b = 52445; /* 0xCCDD */
+ return a == 43707;
+}
diff --git a/tests/cc-parse/16-short-arith.expected-exit b/tests/cc-parse/16-short-arith.expected-exit
@@ -0,0 +1 @@
+1