lisp.M1: fix eval_if #f, prim_ashift neg; p1_gen: amd64 RRR rB-alias - boot2

commit f93d3f2de8178ba649e888096330c50cb1489180
parent 3f422c60b225b4f33692c2d1d53e6478cc3df284
Author: Ryan Sepassi <rsepassi@gmail.com>
Date:   Tue, 21 Apr 2026 15:58:13 -0700

lisp.M1: fix eval_if #f, prim_ashift neg; p1_gen: amd64 RRR rB-alias

eval_if compared cond against NIL (%7) instead of FALSE, so (if #f ...)
always took the then-branch. Fix exposed two latent bugs:

- prim_ashift_neg did sub_r0,r1,r0 assuming r1=0 (per p1_gen convention)
  but r1 still held argc. Zero r1 before the sub.
- amd64 rrr emitted mov rD,rA; op rD,rB — if rD aliased rB (e.g.
  sub_r3,r0,r3 in prim_abs), the mov clobbered rB before op read it.
  Funnel rB through rcx (non-P1 scratch) in the aliasing case for
  ADD/SUB/AND/OR/XOR, SHL/SHR/SAR, and MUL.

Diffstat:
M src/lisp.M1  | 3 ++-
M src/p1_gen.py  | 25 ++++++++++++++++++++-----

2 files changed, 22 insertions(+), 6 deletions(-)
diff --git a/src/lisp.M1 b/src/lisp.M1
@@ -2908,6 +2908,7 @@ DEFINE ZERO32 '0000000000000000000000000000000000000000000000000000000000000000'
     li_br &prim_ashift_done
     b
 :prim_ashift_neg
+    li_r1 %0
     sub_r0,r1,r0                     ## r0 = -k
     sar_r3,r3,r0
 :prim_ashift_done
@@ -4178,7 +4179,7 @@ DEFINE ZERO32 '0000000000000000000000000000000000000000000000000000000000000000'
     li_br &eval
     call                             ## r0 = cond value
 
-    li_r1 %7
+    li_r1 FALSE
     li_br &eval_if_else
     beq_r0,r1
 
diff --git a/src/p1_gen.py b/src/p1_gen.py
@@ -472,6 +472,10 @@ class AMD64(Encoder):
 
     def rrr(self, op, rD, rA, rB):
         if op == 'MUL':
+            # If rD aliases rB, save rB to rcx first (mov rD,rA would
+            # clobber it before imul reads it).
+            if rD == rB and rA != rB:
+                return amd_mov_rr('rcx', rB) + amd_mov_rr(rD, rA) + amd_imul_rr(rD, 'rcx')
             return amd_mov_rr(rD, rA) + amd_imul_rr(rD, rB)
         if op in ('DIV', 'REM'):
             # x86 idiv implicitly reads/writes rax (P1 r0) and rdx
@@ -496,13 +500,24 @@ class AMD64(Encoder):
             return seq
         if op in ('SHL', 'SHR', 'SAR'):
             ext = {'SHL': 4, 'SHR': 5, 'SAR': 7}[op]
-            seq = amd_mov_rr(rD, rA)
-            seq += amd_mov_rr('rcx', rB)
+            # Save rB → rcx FIRST so a subsequent mov rD,rA (which may
+            # alias rB) doesn't clobber the shift count.
+            seq = amd_mov_rr('rcx', rB)
+            if rD != rA:
+                seq += amd_mov_rr(rD, rA)
             seq += amd_shift_cl(ext, rD)
             return seq
-        # ADD/SUB/AND/OR/XOR: mov rD,rA ; op rD,rB
-        seq = amd_mov_rr(rD, rA)
-        seq += amd_alu_rr(AMD64_RRR_OPC[op], rD, rB)
+        # ADD/SUB/AND/OR/XOR. If rD aliases rB (e.g., sub_r3,r0,r3), the
+        # naive mov rD,rA overwrites rB before the op reads it — funnel
+        # rB through rcx.
+        opcode = AMD64_RRR_OPC[op]
+        if rD == rB and rA != rB:
+            seq  = amd_mov_rr('rcx', rB)
+            seq += amd_mov_rr(rD, rA)
+            seq += amd_alu_rr(opcode, rD, 'rcx')
+            return seq
+        seq = '' if rD == rA else amd_mov_rr(rD, rA)
+        seq += amd_alu_rr(opcode, rD, rB)
         return seq
 
     def addi(self, rD, rA, imm):

	boot2 Playing with the boostrap
	git clone https://git.ryansepassi.com/git/boot2.git
	Log \| Files \| Refs