boot2

Playing with the boostrap
git clone https://git.ryansepassi.com/git/boot2.git
Log | Files | Refs | README

commit ab85e02eed979d11e2e1a9327ad7929670602ac6
parent 87ad4aceb8900a6d217748e4fea96df2c9170938
Author: Ryan Sepassi <rsepassi@gmail.com>
Date:   Sun, 26 Apr 2026 21:37:25 -0700

cc/cg: width-aware byte load/store for char lvals (§A.1)

Adjacent 1-byte lvals now use %lb / %sb instead of 8-byte %ld / %st,
preventing the second store from contaminating the first slot. New
%cg-emit-{ld,st}{,-slot}-typed helpers dispatch on ctype-size; size 1
uses byte ops with sign-extension for i8 (shli/sari 56). Sizes 2 and 4
fall through to the 8-byte path until §A.2 / §A.3 land.

cc-cg/15-char-roundtrip and cc-parse/15-char-arith lock the behavior
in; CC-PUNCHLIST §A.1 marked done.

Diffstat:
Mcc/cg.scm | 65+++++++++++++++++++++++++++++++++++++++++++++++++++++++++--------
Mdocs/CC-PUNCHLIST.md | 16+++++++++-------
Atests/cc-cg/15-char-roundtrip.expected-exit | 1+
Atests/cc-cg/15-char-roundtrip.scm | 36++++++++++++++++++++++++++++++++++++
Atests/cc-parse/15-char-arith.c | 13+++++++++++++
Atests/cc-parse/15-char-arith.expected-exit | 1+
6 files changed, 117 insertions(+), 15 deletions(-)

diff --git a/cc/cg.scm b/cc/cg.scm @@ -74,15 +74,64 @@ (%cg-emit-many cg (list "%st(" (%cg-reg->bv reg) ", " (%cg-reg->bv base) ", " (%n off) ")\n"))) +;; Width-aware load/store. Dispatches on ctype-size: +;; 1: %lb / %sb (LB zero-extends; for signed i8 we sign-extend by +;; shli/sari 56 to materialize the canonical 64-bit form). +;; 8 (or anything else for now): %ld / %st. +;; Sizes 2 and 4 collapse to the 8-byte path until §A.2 / §A.3 land. +(define (%cg-emit-ld-slot-typed cg reg ctype logical-off) + (cond + ((= (ctype-size ctype) 1) + (%cg-emit-many cg (list "%lb(" (%cg-reg->bv reg) ", sp, " + (%cg-slot-expr cg logical-off) ")\n")) + (cond ((eq? (ctype-kind ctype) 'i8) + (%cg-emit-many cg (list "%shli(" (%cg-reg->bv reg) ", " + (%cg-reg->bv reg) ", 56)\n" + "%sari(" (%cg-reg->bv reg) ", " + (%cg-reg->bv reg) ", 56)\n"))))) + (else (%cg-emit-ld-slot cg reg logical-off)))) + +(define (%cg-emit-st-slot-typed cg reg ctype logical-off) + (cond + ((= (ctype-size ctype) 1) + (%cg-emit-many cg (list "%sb(" (%cg-reg->bv reg) ", sp, " + (%cg-slot-expr cg logical-off) ")\n"))) + (else (%cg-emit-st-slot cg reg logical-off)))) + +(define (%cg-emit-ld-typed cg reg ctype base off) + (cond + ((= (ctype-size ctype) 1) + (%cg-emit-many cg (list "%lb(" (%cg-reg->bv reg) ", " + (%cg-reg->bv base) ", " (%n off) ")\n")) + (cond ((eq? (ctype-kind ctype) 'i8) + (%cg-emit-many cg (list "%shli(" (%cg-reg->bv reg) ", " + (%cg-reg->bv reg) ", 56)\n" + "%sari(" (%cg-reg->bv reg) ", " + (%cg-reg->bv reg) ", 56)\n"))))) + (else (%cg-emit-ld cg reg base off)))) + +(define (%cg-emit-st-typed cg reg ctype base off) + (cond + ((= (ctype-size ctype) 1) + (%cg-emit-many cg (list "%sb(" (%cg-reg->bv reg) ", " + (%cg-reg->bv base) ", " (%n off) ")\n"))) + (else (%cg-emit-st cg reg base off)))) + (define (%cg-load-opnd-into cg op reg) - (let ((kind (opnd-kind op)) (lv? (opnd-lval? op)) (ext (opnd-ext op))) + (let ((kind (opnd-kind op)) (lv? (opnd-lval? op)) + (ext (opnd-ext op)) (ty (opnd-type op))) (cond ((eq? kind 'imm) (%cg-emit-li cg reg ext)) - ((eq? kind 'frame) (%cg-emit-ld-slot cg reg ext)) + ;; frame lval: load value at type width. frame rval is a spilled + ;; word (alloc-slot 8 8 in %cg-spill-reg) — always 8-byte load. + ((eq? kind 'frame) + (cond (lv? (%cg-emit-ld-slot-typed cg reg ty ext)) + (else (%cg-emit-ld-slot cg reg ext)))) ((eq? kind 'global) (cond ((not lv?) (%cg-emit-la cg reg ext)) - (else (%cg-emit-la cg reg ext) (%cg-emit-ld cg reg reg 0)))) + (else (%cg-emit-la cg reg ext) + (%cg-emit-ld-typed cg reg ty reg 0)))) (else (die #f "cg internal: unknown opnd-kind" kind))))) (define (%cg-spill-reg cg reg ty) @@ -317,9 +366,9 @@ ((and (eq? (opnd-kind p) 'frame) (%cg-indirect? cg (opnd-ext p))) ;; Indirect frame-lval: slot holds the address. Load addr → t0, - ;; then load value through t0. + ;; then load value through t0 at the lval's type width. (%cg-emit-ld-slot cg 't0 (opnd-ext p)) - (%cg-emit-ld cg 't0 't0 0) + (%cg-emit-ld-typed cg 't0 ty 't0 0) (%cg-spill-reg cg 't0 ty)) (else (%cg-load-opnd-into cg p 't0) (%cg-spill-reg cg 't0 ty))))) @@ -505,12 +554,12 @@ (cond ((%cg-indirect? cg (opnd-ext lhs)) (%cg-emit-ld-slot cg 't0 (opnd-ext lhs)) - (%cg-emit-st cg 'a0 't0 0)) + (%cg-emit-st-typed cg 'a0 ty 't0 0)) (else - (%cg-emit-st-slot cg 'a0 (opnd-ext lhs))))) + (%cg-emit-st-slot-typed cg 'a0 ty (opnd-ext lhs))))) ((eq? (opnd-kind lhs) 'global) (%cg-emit-la cg 't0 (opnd-ext lhs)) - (%cg-emit-st cg 'a0 't0 0)) + (%cg-emit-st-typed cg 'a0 ty 't0 0)) (else (die #f "cg-assign: unsupported lhs kind" (opnd-kind lhs)))) (%cg-spill-reg cg 'a0 ty)))) diff --git a/docs/CC-PUNCHLIST.md b/docs/CC-PUNCHLIST.md @@ -38,13 +38,15 @@ return. The 64-bit-everything load/store path is the largest correctness gap upstream of nearly everything else. Land this first. -- [ ] **`char` (8-bit) load/store via lval** - - cg: `cc-cg/NN-char-roundtrip.scm` — store `0xAA` into a 1-byte slot, - load it, exit with the low 8 bits → exit 170. - - parse: `cc-parse/NN-char-arith.c` — `unsigned char a = 0xAA; return a;` - → exit 170. - - Needs: `%cg-emit-ld` / `%cg-emit-st` dispatch on `ctype-size` to - `%ldb` / `%stb` (and matching libp1pp helpers if absent). +- [x] **`char` (8-bit) load/store via lval** + - cg: `cc-cg/15-char-roundtrip.scm` — two adjacent 1-byte slots; + stores must not bleed across slots → exit 1 on equality check. + - parse: `cc-parse/15-char-arith.c` — same shape from C. + - Done: added `%cg-emit-{ld,st}{,-slot}-typed` helpers that dispatch + on `ctype-size = 1` to `%lb`/`%sb`; `%cg-load-opnd-into`, + `cg-load`, `cg-assign` thread the lval ctype. `i8` loads also + sign-extend via `shli`/`sari` 56. 16/32-bit fall through to the + 8-byte path until §A.2/§A.3 land. - [ ] **`short` (16-bit) load/store via lval** - cg: `cc-cg/NN-short-roundtrip.scm` diff --git a/tests/cc-cg/15-char-roundtrip.expected-exit b/tests/cc-cg/15-char-roundtrip.expected-exit @@ -0,0 +1 @@ +1 diff --git a/tests/cc-cg/15-char-roundtrip.scm b/tests/cc-cg/15-char-roundtrip.scm @@ -0,0 +1,36 @@ +;; tests/cc-cg/15-char-roundtrip.scm — width-correct byte load/store +;; on adjacent unsigned-char lvals (§A.1 of docs/CC-PUNCHLIST.md). +;; +;; Models: +;; unsigned char a = 0xAA, b = 0xBB; +;; return a == 0xAA; +;; +;; Two 1-byte slots are allocated back-to-back at offsets 0 and 1. +;; If cg uses 8-byte %ld/%st on byte-typed lvals, the second store +;; contaminates the first slot (byte 0 stays 0xAA but byte 1 becomes +;; 0xBB) and the load of `a` yields 0xBBAA — which fails the equality +;; test. Width-aware emission (%lb / %sb on size-1 ctype) yields +;; exactly 0xAA, the comparison is true, and exit is 1. + +(let ((cg (cg-init))) + (cg-fn-begin cg "main" '() %t-i32) + (let* ((off-a (cg-alloc-slot cg 1 1)) + (off-b (cg-alloc-slot cg 1 1)) + (sym-a (%sym "a" 'var 'auto %t-u8 off-a)) + (sym-b (%sym "b" 'var 'auto %t-u8 off-b))) + ;; a = 0xAA + (cg-push-sym cg sym-a) + (cg-push-imm cg %t-u8 170) + (cg-assign cg) (cg-pop cg) + ;; b = 0xBB + (cg-push-sym cg sym-b) + (cg-push-imm cg %t-u8 187) + (cg-assign cg) (cg-pop cg) + ;; return a == 0xAA + (cg-push-sym cg sym-a) + (cg-load cg) + (cg-push-imm cg %t-u8 170) + (cg-binop cg 'eq) + (cg-return cg)) + (cg-fn-end cg) + (write-bv-fd 1 (cg-finish cg))) diff --git a/tests/cc-parse/15-char-arith.c b/tests/cc-parse/15-char-arith.c @@ -0,0 +1,13 @@ +// tests/cc-parse/15-char-arith.c — width-correct char load/store via +// real C source. §A.1 of docs/CC-PUNCHLIST.md. +// +// Two adjacent unsigned-char locals at consecutive 1-byte frame slots. +// If cg uses 8-byte loads/stores on byte-typed lvals, b's store +// contaminates a's slot and the equality check fails. Width-aware +// emission yields exit 1. + +int main() { + unsigned char a = 170; + unsigned char b = 187; + return a == 170; +} diff --git a/tests/cc-parse/15-char-arith.expected-exit b/tests/cc-parse/15-char-arith.expected-exit @@ -0,0 +1 @@ +1