boot2

Playing with the boostrap
git clone https://git.ryansepassi.com/git/boot2.git
Log | Files | Refs | README

commit 3b8120ff0d624c22423a5cff80dadaf60abb599c
parent a40b5b22c833630beacec9b7575f03922a9e6918
Author: Ryan Sepassi <rsepassi@gmail.com>
Date:   Fri,  1 May 2026 17:37:27 -0700

cc/cg: signed shift right uses lhs signedness only

Per C 6.5.7 ¶3, the usual arithmetic conversions do not apply to
shift operators — the integer promotions run on each operand
separately, the result has the promoted lhs type, and signedness
for `>>` keys off the lhs alone. The previous codegen ran
cg-arith-conv before cg-binop for shifts (so a signed lhs combined
with an unsigned rhs got relabeled to the unsigned common type) and
cg-binop's shr branch decided sar-vs-shr from `(or unsigned-a
unsigned-b)`, so `int x = -16; x >> 1u` came out as logical shift
right of a sign-extended value — a large positive instead of -8.

Two fixes: parse-binary skips cg-arith-conv for shl/shr (and the
matching compound-assign path), and cg-binop's shr branch dispatches
on the lhs type only.

New cc-cg fixture 83-signed-shr-by-unsigned locks it in.

Diffstat:
Mcc/cc.scm | 23+++++++++++++++++++----
Atests/cc-cg/83-signed-shr-by-unsigned.expected-exit | 1+
Atests/cc-cg/83-signed-shr-by-unsigned.scm | 28++++++++++++++++++++++++++++
3 files changed, 48 insertions(+), 4 deletions(-)

diff --git a/cc/cc.scm b/cc/cc.scm @@ -3577,8 +3577,14 @@ ((eq? op 'xor) (%cg-emit-rrr cg "xor" 't0 'a0 'a1)) ((eq? op 'shl) (%cg-emit-rrr cg "shl" 't0 'a0 'a1)) ((eq? op 'shr) - (if unsigned? (%cg-emit-rrr cg "shr" 't0 'a0 'a1) - (%cg-emit-rrr cg "sar" 't0 'a0 'a1))) + ;; Shift right uses the LEFT operand's signedness only + ;; (C 6.5.7 ¶3 — usual arithmetic conversions don't apply + ;; to shifts; only the integer promotions, separately on + ;; each operand, and the result type is the promoted left + ;; operand). A signed lhs gets arithmetic shift right + ;; regardless of the rhs's signedness. + (if (%ctype-unsigned? ta) (%cg-emit-rrr cg "shr" 't0 'a0 'a1) + (%cg-emit-rrr cg "sar" 't0 'a0 'a1))) ((eq? op 'div) (%cg-emit-rrr cg "div" 't0 'a0 'a1)) ((eq? op 'rem) (%cg-emit-rrr cg "rem" 't0 'a0 'a1)) ((eq? op 'eq) (%cg-emit-cmp cg "eq" 'a0 'a1 't0)) @@ -6280,7 +6286,10 @@ (cg-dup (ps-cg ps)) (cg-load (ps-cg ps)) (parse-expr-bp ps rb) (rval! ps) - (cg-arith-conv (ps-cg ps)) + ;; See parse-binary's note: shifts skip arith-conv. + (cond + ((or (eq? b 'shl) (eq? b 'shr)) #t) + (else (cg-arith-conv (ps-cg ps)))) (cg-binop (ps-cg ps) b) (cg-assign (ps-cg ps)))) ((eq? op 'qmark) @@ -6316,7 +6325,13 @@ (rval! ps) (cg-promote (ps-cg ps)) (parse-expr-bp ps rb) (rval! ps) (cg-promote (ps-cg ps)) - (cg-arith-conv (ps-cg ps)) + ;; Shifts: integer promotions only; do NOT apply the + ;; usual arithmetic conversions (C 6.5.7 ¶3). The + ;; result has the promoted left operand's type, and + ;; signedness for `>>` keys off the lhs alone. + (cond + ((or (eq? op 'shl) (eq? op 'shr)) #t) + (else (cg-arith-conv (ps-cg ps)))) (cg-binop (ps-cg ps) (punct-to-cgop op)))) (parse-binary-rhs ps mn))))))))) diff --git a/tests/cc-cg/83-signed-shr-by-unsigned.expected-exit b/tests/cc-cg/83-signed-shr-by-unsigned.expected-exit @@ -0,0 +1 @@ +1 diff --git a/tests/cc-cg/83-signed-shr-by-unsigned.scm b/tests/cc-cg/83-signed-shr-by-unsigned.scm @@ -0,0 +1,28 @@ +;; tests/cc-cg/83-signed-shr-by-unsigned.scm — `int x = -16; x >> 1u` +;; must use arithmetic shift right (sar) per C 6.5.7: shifts use the +;; promoted left operand's signedness, not the result of the usual +;; arithmetic conversions. +;; +;; Models: `(int)(-16) >> (unsigned)1 == -8`. +;; +;; Replicates the corrected parse-binary sequence for shift operators +;; (no arith-conv): rval lhs; cg-promote; rval rhs; cg-promote; +;; cg-binop 'shr. +;; +;; cg-binop currently dispatched signed/unsigned shr off either operand +;; (`(or (unsigned? ta) (unsigned? tb))`), so a signed lhs with an +;; unsigned shift count incorrectly used logical shift right. Correct +;; cg keys signedness off the LEFT operand only (C 6.5.7). + +(let ((cg (cg-init))) + (cg-fn-begin cg "main" '() %t-i32) + (cg-push-imm cg %t-i32 -16) + (cg-promote cg) + (cg-push-imm cg %t-u32 1) + (cg-promote cg) + (cg-binop cg 'shr) + (cg-push-imm cg %t-i32 -8) + (cg-binop cg 'eq) + (cg-return cg) + (cg-fn-end cg) + (write-bv-fd 1 (cg-finish cg)))