boot2

Playing with the boostrap
git clone https://git.ryansepassi.com/git/boot2.git
Log | Files | Refs | README

commit bab1f12ae5c8898d5b816a20dae3f067479e884c
parent 3e41fa6f1053c65da38ac4a7a432077425984ab0
Author: Ryan Sepassi <rsepassi@gmail.com>
Date:   Sun, 26 Apr 2026 21:46:21 -0700

cc/cg: width-aware 32-bit load/store, generalised N-byte helpers (§A.3)

Generalises the byte-decomposed dispatch from §A.2 into
%cg-emit-{ld,st}N-bytes that handle any 1..8 byte width; size-4
routes through 4× %lb/%sb with shift+OR. i32 sign-extends via
shli/sari 32.

Multi-byte gathers can't alias destination with base (the first %lb
would clobber the address before subsequent loads), so
%cg-load-opnd-into's global-lval path and cg-load's indirect path now
stage the base address in t2 — fixing the regression that surfaced in
11-global-var and 14-take-addr the moment size-4 byte-gathered.

cc-cg/17-int-roundtrip and cc-parse/17-int-arith verify adjacent
4-byte slots round-trip cleanly.

Diffstat:
Mcc/cg.scm | 112++++++++++++++++++++++++++++++++++++++++++++++++++-----------------------------
Mdocs/CC-PUNCHLIST.md | 15+++++++++------
Atests/cc-cg/17-int-roundtrip.expected-exit | 1+
Atests/cc-cg/17-int-roundtrip.scm | 35+++++++++++++++++++++++++++++++++++
Atests/cc-parse/17-int-arith.c | 8++++++++
Atests/cc-parse/17-int-arith.expected-exit | 1+
6 files changed, 125 insertions(+), 47 deletions(-)

diff --git a/cc/cg.scm b/cc/cg.scm @@ -77,25 +77,44 @@ ;; Width-aware load/store. Dispatches on ctype-size: ;; 1: %lb / %sb (LB zero-extends; for signed i8 we sign-extend by ;; shli/sari 56 to materialize the canonical 64-bit form). -;; 2: byte-decomposed: store low then shri-shift+store high; load -;; two bytes + shli/or; sign-extend via shli/sari 48 for i16. +;; 2/4: byte-decomposed (P1 has only 1-byte and 8-byte memory ops, +;; and word ops require natural alignment which we can't promise +;; for struct fields or non-word-aligned local slots). Loads +;; gather bytes via %lb + shli/or; stores scatter via shri/%sb. +;; Signed loads (i16/i32) sign-extend via shli/sari to canonical +;; 64-bit form. ;; 8 (or anything else for now): %ld / %st. -;; Size 4 collapses to the 8-byte path until §A.3 lands. ;; Scratch convention: helpers may clobber t1; callers never pass ;; reg=t1. -(define (%cg-emit-ld2-bytes cg reg base-bv off-expr-lo off-expr-hi) - (%cg-emit-many cg (list - "%lb(" (%cg-reg->bv reg) ", " base-bv ", " off-expr-lo ")\n" - "%lb(t1, " base-bv ", " off-expr-hi ")\n" - "%shli(t1, t1, 8)\n" - "%or(" (%cg-reg->bv reg) ", " (%cg-reg->bv reg) ", t1)\n"))) - -(define (%cg-emit-st2-bytes cg reg base-bv off-expr-lo off-expr-hi) - (%cg-emit-many cg (list - "%sb(" (%cg-reg->bv reg) ", " base-bv ", " off-expr-lo ")\n" - "%shri(t1, " (%cg-reg->bv reg) ", 8)\n" - "%sb(t1, " base-bv ", " off-expr-hi ")\n"))) +(define (%cg-emit-ldN-bytes cg reg base-bv off-expr-fn n-bytes) + ;; Emit n-bytes %lb gathers into reg with shift+OR. byte 0 is low. + ;; off-expr-fn is a procedure: (off-expr-fn k) returns the bv + ;; expression for offset k. + (%cg-emit-many cg (list "%lb(" (%cg-reg->bv reg) ", " base-bv ", " + (off-expr-fn 0) ")\n")) + (let loop ((k 1)) + (cond + ((= k n-bytes) 0) + (else + (%cg-emit-many cg (list + "%lb(t1, " base-bv ", " (off-expr-fn k) ")\n" + "%shli(t1, t1, " (%n (* 8 k)) ")\n" + "%or(" (%cg-reg->bv reg) ", " (%cg-reg->bv reg) ", t1)\n")) + (loop (+ k 1)))))) + +(define (%cg-emit-stN-bytes cg reg base-bv off-expr-fn n-bytes) + ;; Emit n-bytes %sb scatters from reg via shri-shifted t1. + (%cg-emit-many cg (list "%sb(" (%cg-reg->bv reg) ", " base-bv ", " + (off-expr-fn 0) ")\n")) + (let loop ((k 1)) + (cond + ((= k n-bytes) 0) + (else + (%cg-emit-many cg (list + "%shri(t1, " (%cg-reg->bv reg) ", " (%n (* 8 k)) ")\n" + "%sb(t1, " base-bv ", " (off-expr-fn k) ")\n")) + (loop (+ k 1)))))) (define (%cg-emit-sext cg reg shift-amount) (%cg-emit-many cg (list @@ -105,53 +124,59 @@ (%n shift-amount) ")\n"))) (define (%cg-emit-ld-slot-typed cg reg ctype logical-off) - (let ((sz (ctype-size ctype)) (kind (ctype-kind ctype))) + (let* ((sz (ctype-size ctype)) (kind (ctype-kind ctype)) + (off-fn (lambda (k) (%cg-slot-expr cg (+ logical-off k))))) (cond ((= sz 1) (%cg-emit-many cg (list "%lb(" (%cg-reg->bv reg) ", sp, " - (%cg-slot-expr cg logical-off) ")\n")) + (off-fn 0) ")\n")) (cond ((eq? kind 'i8) (%cg-emit-sext cg reg 56)))) ((= sz 2) - (%cg-emit-ld2-bytes cg reg "sp" - (%cg-slot-expr cg logical-off) - (%cg-slot-expr cg (+ logical-off 1))) + (%cg-emit-ldN-bytes cg reg "sp" off-fn 2) (cond ((eq? kind 'i16) (%cg-emit-sext cg reg 48)))) + ((= sz 4) + (%cg-emit-ldN-bytes cg reg "sp" off-fn 4) + (cond ((eq? kind 'i32) (%cg-emit-sext cg reg 32)))) (else (%cg-emit-ld-slot cg reg logical-off))))) (define (%cg-emit-st-slot-typed cg reg ctype logical-off) - (let ((sz (ctype-size ctype))) + (let* ((sz (ctype-size ctype)) + (off-fn (lambda (k) (%cg-slot-expr cg (+ logical-off k))))) (cond ((= sz 1) (%cg-emit-many cg (list "%sb(" (%cg-reg->bv reg) ", sp, " - (%cg-slot-expr cg logical-off) ")\n"))) - ((= sz 2) - (%cg-emit-st2-bytes cg reg "sp" - (%cg-slot-expr cg logical-off) - (%cg-slot-expr cg (+ logical-off 1)))) + (off-fn 0) ")\n"))) + ((= sz 2) (%cg-emit-stN-bytes cg reg "sp" off-fn 2)) + ((= sz 4) (%cg-emit-stN-bytes cg reg "sp" off-fn 4)) (else (%cg-emit-st-slot cg reg logical-off))))) (define (%cg-emit-ld-typed cg reg ctype base off) - (let ((sz (ctype-size ctype)) (kind (ctype-kind ctype))) + (let* ((sz (ctype-size ctype)) (kind (ctype-kind ctype)) + (base-bv (%cg-reg->bv base)) + (off-fn (lambda (k) (%n (+ off k))))) (cond ((= sz 1) (%cg-emit-many cg (list "%lb(" (%cg-reg->bv reg) ", " - (%cg-reg->bv base) ", " (%n off) ")\n")) + base-bv ", " (off-fn 0) ")\n")) (cond ((eq? kind 'i8) (%cg-emit-sext cg reg 56)))) ((= sz 2) - (%cg-emit-ld2-bytes cg reg (%cg-reg->bv base) - (%n off) (%n (+ off 1))) + (%cg-emit-ldN-bytes cg reg base-bv off-fn 2) (cond ((eq? kind 'i16) (%cg-emit-sext cg reg 48)))) + ((= sz 4) + (%cg-emit-ldN-bytes cg reg base-bv off-fn 4) + (cond ((eq? kind 'i32) (%cg-emit-sext cg reg 32)))) (else (%cg-emit-ld cg reg base off))))) (define (%cg-emit-st-typed cg reg ctype base off) - (let ((sz (ctype-size ctype))) + (let* ((sz (ctype-size ctype)) + (base-bv (%cg-reg->bv base)) + (off-fn (lambda (k) (%n (+ off k))))) (cond ((= sz 1) (%cg-emit-many cg (list "%sb(" (%cg-reg->bv reg) ", " - (%cg-reg->bv base) ", " (%n off) ")\n"))) - ((= sz 2) - (%cg-emit-st2-bytes cg reg (%cg-reg->bv base) - (%n off) (%n (+ off 1)))) + base-bv ", " (off-fn 0) ")\n"))) + ((= sz 2) (%cg-emit-stN-bytes cg reg base-bv off-fn 2)) + ((= sz 4) (%cg-emit-stN-bytes cg reg base-bv off-fn 4)) (else (%cg-emit-st cg reg base off))))) (define (%cg-load-opnd-into cg op reg) @@ -167,8 +192,12 @@ ((eq? kind 'global) (cond ((not lv?) (%cg-emit-la cg reg ext)) - (else (%cg-emit-la cg reg ext) - (%cg-emit-ld-typed cg reg ty reg 0)))) + (else + ;; Width > 1 byte-gathers must not alias dest with base — + ;; the first %lb would otherwise clobber the address before + ;; subsequent byte loads. Stage the address in t2. + (%cg-emit-la cg 't2 ext) + (%cg-emit-ld-typed cg reg ty 't2 0)))) (else (die #f "cg internal: unknown opnd-kind" kind))))) (define (%cg-spill-reg cg reg ty) @@ -402,10 +431,11 @@ ((not (opnd-lval? p)) (die #f "cg-load: not an lvalue")) ((and (eq? (opnd-kind p) 'frame) (%cg-indirect? cg (opnd-ext p))) - ;; Indirect frame-lval: slot holds the address. Load addr → t0, - ;; then load value through t0 at the lval's type width. - (%cg-emit-ld-slot cg 't0 (opnd-ext p)) - (%cg-emit-ld-typed cg 't0 ty 't0 0) + ;; Indirect frame-lval: slot holds the address. Stage the + ;; address in t2 so multi-byte gathers don't alias dest with + ;; base. + (%cg-emit-ld-slot cg 't2 (opnd-ext p)) + (%cg-emit-ld-typed cg 't0 ty 't2 0) (%cg-spill-reg cg 't0 ty)) (else (%cg-load-opnd-into cg p 't0) (%cg-spill-reg cg 't0 ty))))) diff --git a/docs/CC-PUNCHLIST.md b/docs/CC-PUNCHLIST.md @@ -56,12 +56,15 @@ upstream of nearly everything else. Land this first. + `%or`. `i16` sign-extends via `shli`/`sari` 48. Helpers may clobber `t1`; callers never pass `reg=t1`. -- [ ] **`int` (32-bit) load/store via lval** - - cg: `cc-cg/NN-int-roundtrip.scm` — distinct from cc-cg/04 because it - forces a 4-byte slot, not an 8-byte spill. - - parse: `cc-parse/NN-int-arith.c` - - Needs: `%ldw` / `%stw`. Existing fixtures pass because cg always - spills i32 results into 8-byte slots. +- [x] **`int` (32-bit) load/store via lval** + - cg: `cc-cg/17-int-roundtrip.scm` + - parse: `cc-parse/17-int-arith.c` + - Done: byte-decomposed dispatch generalised to N bytes via + `%cg-emit-{ld,st}N-bytes`; size-4 routes through 4× `%lb`/`%sb` + with shift+OR / shri-shift. `i32` sign-extends via `shli`/`sari` + 32. The address-staging in `%cg-load-opnd-into` and `cg-load`'s + indirect path now uses `t2` so multi-byte gathers don't alias + dest with base. - [ ] **Signed narrowing keeps sign on re-widen** - cg: `cc-cg/NN-sext-narrow.scm` — `(unsigned)(int)(char)-3` → exit 253. diff --git a/tests/cc-cg/17-int-roundtrip.expected-exit b/tests/cc-cg/17-int-roundtrip.expected-exit @@ -0,0 +1 @@ +1 diff --git a/tests/cc-cg/17-int-roundtrip.scm b/tests/cc-cg/17-int-roundtrip.scm @@ -0,0 +1,35 @@ +;; tests/cc-cg/17-int-roundtrip.scm — width-correct 32-bit load/store +;; on adjacent unsigned-int lvals (§A.3 of docs/CC-PUNCHLIST.md). +;; +;; Models: +;; unsigned int a = 0xAABBCCDD, b = 0x11223344; +;; return a == 0xAABBCCDD; +;; +;; Two 4-byte slots are allocated back-to-back at offsets 8 and 12. +;; The buggy 8-byte path would store 8 bytes from sp+8 and 8 bytes +;; from sp+12, contaminating each slot. Width-aware emission (4-byte +;; decomposition: byte-gather load, %shri-cascade store) round-trips +;; cleanly → comparison true → exit 1. + +(let ((cg (cg-init))) + (cg-fn-begin cg "main" '() %t-i32) + (let* ((off-a (cg-alloc-slot cg 4 4)) + (off-b (cg-alloc-slot cg 4 4)) + (sym-a (%sym "a" 'var 'auto %t-u32 off-a)) + (sym-b (%sym "b" 'var 'auto %t-u32 off-b))) + ;; a = 0xAABBCCDD + (cg-push-sym cg sym-a) + (cg-push-imm cg %t-u32 2864434397) + (cg-assign cg) (cg-pop cg) + ;; b = 0x11223344 + (cg-push-sym cg sym-b) + (cg-push-imm cg %t-u32 287454020) + (cg-assign cg) (cg-pop cg) + ;; return a == 0xAABBCCDD + (cg-push-sym cg sym-a) + (cg-load cg) + (cg-push-imm cg %t-u32 2864434397) + (cg-binop cg 'eq) + (cg-return cg)) + (cg-fn-end cg) + (write-bv-fd 1 (cg-finish cg))) diff --git a/tests/cc-parse/17-int-arith.c b/tests/cc-parse/17-int-arith.c @@ -0,0 +1,8 @@ +// tests/cc-parse/17-int-arith.c — width-correct 32-bit load/store via +// real C source. §A.3 of docs/CC-PUNCHLIST.md. + +int main() { + unsigned int a = 2864434397u; /* 0xAABBCCDD */ + unsigned int b = 287454020u; /* 0x11223344 */ + return a == 2864434397u; +} diff --git a/tests/cc-parse/17-int-arith.expected-exit b/tests/cc-parse/17-int-arith.expected-exit @@ -0,0 +1 @@ +1