boot2

Playing with the boostrap
git clone https://git.ryansepassi.com/git/boot2.git
Log | Files | Refs

commit f93d3f2de8178ba649e888096330c50cb1489180
parent 3f422c60b225b4f33692c2d1d53e6478cc3df284
Author: Ryan Sepassi <rsepassi@gmail.com>
Date:   Tue, 21 Apr 2026 15:58:13 -0700

lisp.M1: fix eval_if #f, prim_ashift neg; p1_gen: amd64 RRR rB-alias

eval_if compared cond against NIL (%7) instead of FALSE, so (if #f ...)
always took the then-branch. Fix exposed two latent bugs:

- prim_ashift_neg did sub_r0,r1,r0 assuming r1=0 (per p1_gen convention)
  but r1 still held argc. Zero r1 before the sub.
- amd64 rrr emitted mov rD,rA; op rD,rB — if rD aliased rB (e.g.
  sub_r3,r0,r3 in prim_abs), the mov clobbered rB before op read it.
  Funnel rB through rcx (non-P1 scratch) in the aliasing case for
  ADD/SUB/AND/OR/XOR, SHL/SHR/SAR, and MUL.

Diffstat:
Msrc/lisp.M1 | 3++-
Msrc/p1_gen.py | 25++++++++++++++++++++-----
2 files changed, 22 insertions(+), 6 deletions(-)

diff --git a/src/lisp.M1 b/src/lisp.M1 @@ -2908,6 +2908,7 @@ DEFINE ZERO32 '0000000000000000000000000000000000000000000000000000000000000000' li_br &prim_ashift_done b :prim_ashift_neg + li_r1 %0 sub_r0,r1,r0 ## r0 = -k sar_r3,r3,r0 :prim_ashift_done @@ -4178,7 +4179,7 @@ DEFINE ZERO32 '0000000000000000000000000000000000000000000000000000000000000000' li_br &eval call ## r0 = cond value - li_r1 %7 + li_r1 FALSE li_br &eval_if_else beq_r0,r1 diff --git a/src/p1_gen.py b/src/p1_gen.py @@ -472,6 +472,10 @@ class AMD64(Encoder): def rrr(self, op, rD, rA, rB): if op == 'MUL': + # If rD aliases rB, save rB to rcx first (mov rD,rA would + # clobber it before imul reads it). + if rD == rB and rA != rB: + return amd_mov_rr('rcx', rB) + amd_mov_rr(rD, rA) + amd_imul_rr(rD, 'rcx') return amd_mov_rr(rD, rA) + amd_imul_rr(rD, rB) if op in ('DIV', 'REM'): # x86 idiv implicitly reads/writes rax (P1 r0) and rdx @@ -496,13 +500,24 @@ class AMD64(Encoder): return seq if op in ('SHL', 'SHR', 'SAR'): ext = {'SHL': 4, 'SHR': 5, 'SAR': 7}[op] - seq = amd_mov_rr(rD, rA) - seq += amd_mov_rr('rcx', rB) + # Save rB → rcx FIRST so a subsequent mov rD,rA (which may + # alias rB) doesn't clobber the shift count. + seq = amd_mov_rr('rcx', rB) + if rD != rA: + seq += amd_mov_rr(rD, rA) seq += amd_shift_cl(ext, rD) return seq - # ADD/SUB/AND/OR/XOR: mov rD,rA ; op rD,rB - seq = amd_mov_rr(rD, rA) - seq += amd_alu_rr(AMD64_RRR_OPC[op], rD, rB) + # ADD/SUB/AND/OR/XOR. If rD aliases rB (e.g., sub_r3,r0,r3), the + # naive mov rD,rA overwrites rB before the op reads it — funnel + # rB through rcx. + opcode = AMD64_RRR_OPC[op] + if rD == rB and rA != rB: + seq = amd_mov_rr('rcx', rB) + seq += amd_mov_rr(rD, rA) + seq += amd_alu_rr(opcode, rD, 'rcx') + return seq + seq = '' if rD == rA else amd_mov_rr(rD, rA) + seq += amd_alu_rr(opcode, rD, rB) return seq def addi(self, rD, rA, imm):