boot2

Playing with the boostrap
git clone https://git.ryansepassi.com/git/boot2.git
Log | Files | Refs | README

commit 71fc4b9777dfcce9ddeb91fda7bf0f95f3059cdf
parent f831ef7fd9e12863ce06e59378f6e668e7c9cc4b
Author: Ryan Sepassi <rsepassi@gmail.com>
Date:   Thu, 30 Apr 2026 09:30:25 -0700

p1pp: expand libp1pp and use from cc.scm

Diffstat:
MP1/P1pp.P1pp | 275+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Mcc/cc.scm | 204+++++++++++++++++++++++++++++++++----------------------------------------------
Atests/P1/cmpset.P1pp | 98+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Atests/P1/cmpset.expected | 1+
Atests/P1/ext-macros.P1pp | 123+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Atests/P1/ext-macros.expected | 1+
Atests/P1/lea-slot.P1pp | 47+++++++++++++++++++++++++++++++++++++++++++++++
Atests/P1/lea-slot.expected | 1+
Atests/P1/memcpy-call.P1pp | 53+++++++++++++++++++++++++++++++++++++++++++++++++++++
Atests/P1/memcpy-call.expected | 1+
Atests/P1/ptr-arith.P1pp | 65+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Atests/P1/ptr-arith.expected | 1+
Atests/P1/sub-word-mem.P1pp | 96+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Atests/P1/sub-word-mem.expected | 1+
Atests/P1/switch-case.P1pp | 56++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Atests/P1/switch-case.expected | 1+
Atests/P1/unops.P1pp | 73+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Atests/P1/unops.expected | 1+
18 files changed, 979 insertions(+), 119 deletions(-)

diff --git a/P1/P1pp.P1pp b/P1/P1pp.P1pp @@ -98,6 +98,281 @@ %endm # ========================================================================= +# Sub-word memory access +# ========================================================================= +# +# P1 has only 1-byte (%lb/%sb) and 8-byte (%ld/%st) memory ops, and the +# 8-byte ops require natural 8-byte alignment. For struct fields and +# packed data laid out at narrower widths, sub-word access is byte- +# decomposed: %lb-gather + shli/or for loads, %sb-scatter + shri for +# stores. These macros encapsulate that pattern so callers do not have +# to open-code it (and so a backend can later substitute a single +# native sub-word load/store when alignment is statically known). +# +# Conventions: +# `rd` is the destination (loads); `rs` is the source (stores). +# Stores preserve `rs`; loads clobber `rd`. `scratch` is a working +# register distinct from rd/rs and base. Bytes are little-endian: +# byte 0 (low) at off+0. The signed-load variants (%ld_sh, %ld_sw) +# sign-extend the gathered value to the canonical 64-bit form. +# +# %ld_h(rd, base, off, scratch) — 2-byte zero-extending load +# %ld_w(rd, base, off, scratch) — 4-byte zero-extending load +# %ld_sh(rd, base, off, scratch) — 2-byte sign-extending load +# %ld_sw(rd, base, off, scratch) — 4-byte sign-extending load +# %st_h(rs, base, off, scratch) — 2-byte store (writes low 16 bits) +# %st_w(rs, base, off, scratch) — 4-byte store (writes low 32 bits) + +%macro ld_h(rd, base, off, scratch) + %lb(rd, base, off) + %lb(scratch, base, (+ off 1)) + %shli(scratch, scratch, 8) + %or(rd, rd, scratch) +%endm + +%macro ld_w(rd, base, off, scratch) + %lb(rd, base, off) + %lb(scratch, base, (+ off 1)) + %shli(scratch, scratch, 8) + %or(rd, rd, scratch) + %lb(scratch, base, (+ off 2)) + %shli(scratch, scratch, 16) + %or(rd, rd, scratch) + %lb(scratch, base, (+ off 3)) + %shli(scratch, scratch, 24) + %or(rd, rd, scratch) +%endm + +%macro ld_sh(rd, base, off, scratch) + %ld_h(rd, base, off, scratch) + %shli(rd, rd, 48) + %sari(rd, rd, 48) +%endm + +%macro ld_sw(rd, base, off, scratch) + %ld_w(rd, base, off, scratch) + %shli(rd, rd, 32) + %sari(rd, rd, 32) +%endm + +%macro st_h(rs, base, off, scratch) + %sb(rs, base, off) + %shri(scratch, rs, 8) + %sb(scratch, base, (+ off 1)) +%endm + +%macro st_w(rs, base, off, scratch) + %sb(rs, base, off) + %shri(scratch, rs, 8) + %sb(scratch, base, (+ off 1)) + %shri(scratch, rs, 16) + %sb(scratch, base, (+ off 2)) + %shri(scratch, rs, 24) + %sb(scratch, base, (+ off 3)) +%endm + +# ========================================================================= +# Sign and zero extension +# ========================================================================= +# +# %sextN(rd, ra) truncate ra to N bits and sign-extend to 64. +# %zextN(rd, ra) truncate ra to N bits and zero-extend to 64. +# %zext32(rd, ra, scratch) +# like zextN but needs a scratch register because +# 0xFFFFFFFF does not fit a 16-bit movz immediate +# (the path %andi takes when materializing the mask). +# +# rd may equal ra. The signed forms use shli/sari at the right amount; +# zext8/zext16 ride on %andi (the mask fits movz so no caller scratch +# needed); zext32 materializes the mask explicitly. + +%macro sext8(rd, ra) + %shli(rd, ra, 56) + %sari(rd, rd, 56) +%endm + +%macro sext16(rd, ra) + %shli(rd, ra, 48) + %sari(rd, rd, 48) +%endm + +%macro sext32(rd, ra) + %shli(rd, ra, 32) + %sari(rd, rd, 32) +%endm + +%macro zext8(rd, ra) + %andi(rd, ra, 255) +%endm + +%macro zext16(rd, ra) + %andi(rd, ra, 65535) +%endm + +%macro zext32(rd, ra, scratch) + %li(scratch, 4294967295) + %and(rd, ra, scratch) +%endm + +# ========================================================================= +# Frame-slot address +# ========================================================================= +# +# %lea_slot(rd, slot) rd = address of the frame slot at byte offset +# `slot`. Centralizes the "%mov(rd, sp) + +# %addi(rd, rd, slot)" idiom — the backend folds +# its hidden frame-header offset into %mov(rd, sp), +# so callers must not bake a literal 16 into the +# %addi. `slot` may be any M1pp integer expression +# (a literal byte offset or a %fn__SO-relative +# slot-expr). + +%macro lea_slot(rd, slot) + %mov(rd, sp) + %addi(rd, rd, slot) +%endm + +# ========================================================================= +# Pointer scaling +# ========================================================================= +# +# %ptr_add(rd, ptr, idx, sz, scratch) rd = ptr + idx*sz +# %ptr_sub(rd, ptr, idx, sz, scratch) rd = ptr - idx*sz +# %ptr_diff(rd, p, q, sz, scratch) rd = (p - q) / sz +# +# `sz` is an M1pp-time integer constant (the C pointee size). When +# sz == 1 the multiply (or divide) collapses out at expansion time. +# +# %ptr_add and %ptr_sub clobber `scratch`. %ptr_diff clobbers `scratch` +# (only when sz != 1) and computes through `rd`, so callers must not +# alias `rd` with `p` or `q` in the sz != 1 path. + +# sz <= 1 takes the byte-stride fast path: char* (sz=1) and void* +# (cc.scm uses sz=-1 for the void pointee, following GCC's byte-arith +# extension) both want raw idx with no scaling. + +%macro ptr_add(rd, ptr, idx, sz, scratch) +%select((< sz 2), + %add(rd, ptr, idx), + %li(scratch, sz) + %mul(scratch, idx, scratch) + %add(rd, ptr, scratch)) +%endm + +%macro ptr_sub(rd, ptr, idx, sz, scratch) +%select((< sz 2), + %sub(rd, ptr, idx), + %li(scratch, sz) + %mul(scratch, idx, scratch) + %sub(rd, ptr, scratch)) +%endm + +%macro ptr_diff(rd, p, q, sz, scratch) +%select((< sz 2), + %sub(rd, p, q), + %sub(rd, p, q) + %li(scratch, sz) + %div(rd, rd, scratch)) +%endm + +# ========================================================================= +# Memcpy-call shorthand +# ========================================================================= +# +# %memcpy_call(dst_reg, src_reg, n_imm) +# Marshal arguments into the libp1pp memcpy ABI and invoke it. Useful +# for fixed-size memory copies (e.g. struct copy in a code generator) +# where the size is known at expansion time. dst_reg and src_reg must +# not be a0 — the dst move would clobber a different live input. + +%macro memcpy_call(dst_reg, src_reg, n_imm) + %li(a2, n_imm) + %mov(a1, src_reg) + %mov(a0, dst_reg) + %call(&memcpy) +%endm + +# ========================================================================= +# Compare-and-set-bool macros +# ========================================================================= +# +# %cmpset_<cc>(rd, ra[, rb]) rd = (ra <cc> rb) ? 1 : 0 +# +# Two-operand: eq, ne, lt, ltu (signed/unsigned). +# Zero-operand (compare against zero): eqz, nez, ltz. +# +# Lower to %ifelse_<cc>(...) which itself works across all P1 backends. +# A backend that supports a native conditional-set instruction can later +# specialize these to a single op without touching callers. + +%macro cmpset_eq(rd, ra, rb) + %ifelse_eq(ra, rb, { %li(rd, 1) }, { %li(rd, 0) }) +%endm + +%macro cmpset_ne(rd, ra, rb) + %ifelse_ne(ra, rb, { %li(rd, 1) }, { %li(rd, 0) }) +%endm + +%macro cmpset_lt(rd, ra, rb) + %ifelse_lt(ra, rb, { %li(rd, 1) }, { %li(rd, 0) }) +%endm + +%macro cmpset_ltu(rd, ra, rb) + %ifelse_ltu(ra, rb, { %li(rd, 1) }, { %li(rd, 0) }) +%endm + +%macro cmpset_eqz(rd, ra) + %ifelse_eqz(ra, { %li(rd, 1) }, { %li(rd, 0) }) +%endm + +%macro cmpset_nez(rd, ra) + %ifelse_nez(ra, { %li(rd, 1) }, { %li(rd, 0) }) +%endm + +%macro cmpset_ltz(rd, ra) + %ifelse_ltz(ra, { %li(rd, 1) }, { %li(rd, 0) }) +%endm + +# ========================================================================= +# Tiny unops +# ========================================================================= +# +# %neg(rd, ra, scratch) rd = -ra (scratch holds the zero literal) +# %bnot(rd, ra, scratch) rd = ~ra (scratch holds the all-ones literal) +# %bool(rd, ra) rd = (ra != 0) ? 1 : 0 (alias of cmpset_nez) + +%macro neg(rd, ra, scratch) + %li(scratch, 0) + %sub(rd, scratch, ra) +%endm + +%macro bnot(rd, ra, scratch) + %li(scratch, -1) + %xor(rd, ra, scratch) +%endm + +%macro bool(rd, ra) + %cmpset_nez(rd, ra) +%endm + +# ========================================================================= +# Switch dispatch +# ========================================================================= +# +# %switch_case(ctrl, scratch, key, target) +# If `ctrl == key`, branch to `target`. `scratch` is used to +# materialize the key as a register operand. `target` is the full +# branch target (e.g. `&::case_3`). +# +# A code generator emitting a switch dispatcher emits one +# %switch_case per case, then an unconditional branch to the default. + +%macro switch_case(ctrl, scratch, key, target) + %li(scratch, key) + %beq(ctrl, scratch, target) +%endm + +# ========================================================================= # Control-flow macros # ========================================================================= # diff --git a/cc/cc.scm b/cc/cc.scm @@ -2673,100 +2673,91 @@ ;; Scratch convention: helpers may clobber t1; callers never pass ;; reg=t1. -(define (%cg-emit-ldN-bytes cg reg base-bv off-expr-fn n-bytes) - ;; Emit n-bytes %lb gathers into reg with shift+OR. byte 0 is low. - ;; off-expr-fn is a procedure: (off-expr-fn k) returns the bv - ;; expression for offset k. - (%cg-emit-many cg (list "%lb(" (%cg-reg->bv reg) ", " base-bv ", " - (off-expr-fn 0) ")\n")) - (let loop ((k 1)) - (cond - ((= k n-bytes) 0) - (else - (%cg-emit-many cg (list - "%lb(t1, " base-bv ", " (off-expr-fn k) ")\n" - "%shli(t1, t1, " (%n (* 8 k)) ")\n" - "%or(" (%cg-reg->bv reg) ", " (%cg-reg->bv reg) ", t1)\n")) - (loop (+ k 1)))))) - -(define (%cg-emit-stN-bytes cg reg base-bv off-expr-fn n-bytes) - ;; Emit n-bytes %sb scatters from reg via shri-shifted t1. - (%cg-emit-many cg (list "%sb(" (%cg-reg->bv reg) ", " base-bv ", " - (off-expr-fn 0) ")\n")) - (let loop ((k 1)) - (cond - ((= k n-bytes) 0) - (else - (%cg-emit-many cg (list - "%shri(t1, " (%cg-reg->bv reg) ", " (%n (* 8 k)) ")\n" - "%sb(t1, " base-bv ", " (off-expr-fn k) ")\n")) - (loop (+ k 1)))))) - +;; Sub-word loads/stores defer byte-decomposition to libp1pp's +;; %ld_h / %ld_w / %ld_sh / %ld_sw / %st_h / %st_w macros (see +;; P1/P1pp.P1pp). cc.scm just emits one macro call per access; the +;; macro arranges the byte gather/scatter and (for signed loads) folds +;; in the sign-extend. t1 is the conventional scratch. +(define (%cg-emit-ld-sub cg reg base-bv off-bv signed? n-bytes) + (let ((mname (cond ((= n-bytes 2) (if signed? "%ld_sh(" "%ld_h(")) + ((= n-bytes 4) (if signed? "%ld_sw(" "%ld_w(")) + (else (die #f "cg-emit-ld-sub: bad width" n-bytes))))) + (%cg-emit-many cg (list mname (%cg-reg->bv reg) ", " + base-bv ", " off-bv ", t1)\n")))) + +(define (%cg-emit-st-sub cg reg base-bv off-bv n-bytes) + (let ((mname (cond ((= n-bytes 2) "%st_h(") + ((= n-bytes 4) "%st_w(") + (else (die #f "cg-emit-st-sub: bad width" n-bytes))))) + (%cg-emit-many cg (list mname (%cg-reg->bv reg) ", " + base-bv ", " off-bv ", t1)\n")))) + +;; "address of frame slot" — defers to libp1pp's %lea_slot, which hides +;; the backend frame-header offset that %mov(rd, sp) folds in. +(define (%cg-emit-lea-slot cg reg-bv slot-bv) + (%cg-emit-many cg (list "%lea_slot(" reg-bv ", " slot-bv ")\n"))) + +;; sext8/16/32 emitted via libp1pp's %sext<N>(rd, ra). shift-amount is +;; kept as the parameter for call-site clarity (callers think in bit +;; widths via the same 56/48/32 amounts they always have). (define (%cg-emit-sext cg reg shift-amount) - (%cg-emit-many cg (list - "%shli(" (%cg-reg->bv reg) ", " (%cg-reg->bv reg) ", " - (%n shift-amount) ")\n" - "%sari(" (%cg-reg->bv reg) ", " (%cg-reg->bv reg) ", " - (%n shift-amount) ")\n"))) + (let ((width (cond ((= shift-amount 56) "8") + ((= shift-amount 48) "16") + ((= shift-amount 32) "32") + (else (die #f "cg-emit-sext: bad shift" shift-amount)))) + (rb (%cg-reg->bv reg))) + (%cg-emit-many cg (list "%sext" width "(" rb ", " rb ")\n")))) (define (%cg-emit-ld-slot-typed cg reg ctype logical-off) (%cg-fp-reject! 'ld-slot ctype) (let* ((sz (ctype-size ctype)) (kind (ctype-kind ctype)) - (off-fn (lambda (k) (%cg-slot-expr cg (+ logical-off k))))) + (off-bv (%cg-slot-expr cg logical-off))) (cond ((= sz 1) (%cg-emit-many cg (list "%lb(" (%cg-reg->bv reg) ", sp, " - (off-fn 0) ")\n")) + off-bv ")\n")) (cond ((eq? kind 'i8) (%cg-emit-sext cg reg 56)))) - ((= sz 2) - (%cg-emit-ldN-bytes cg reg "sp" off-fn 2) - (cond ((eq? kind 'i16) (%cg-emit-sext cg reg 48)))) - ((= sz 4) - (%cg-emit-ldN-bytes cg reg "sp" off-fn 4) - (cond ((eq? kind 'i32) (%cg-emit-sext cg reg 32)))) + ((= sz 2) (%cg-emit-ld-sub cg reg "sp" off-bv (eq? kind 'i16) 2)) + ((= sz 4) (%cg-emit-ld-sub cg reg "sp" off-bv (eq? kind 'i32) 4)) (else (%cg-emit-ld-slot cg reg logical-off))))) (define (%cg-emit-st-slot-typed cg reg ctype logical-off) (%cg-fp-reject! 'st-slot ctype) (let* ((sz (ctype-size ctype)) - (off-fn (lambda (k) (%cg-slot-expr cg (+ logical-off k))))) + (off-bv (%cg-slot-expr cg logical-off))) (cond ((= sz 1) (%cg-emit-many cg (list "%sb(" (%cg-reg->bv reg) ", sp, " - (off-fn 0) ")\n"))) - ((= sz 2) (%cg-emit-stN-bytes cg reg "sp" off-fn 2)) - ((= sz 4) (%cg-emit-stN-bytes cg reg "sp" off-fn 4)) + off-bv ")\n"))) + ((= sz 2) (%cg-emit-st-sub cg reg "sp" off-bv 2)) + ((= sz 4) (%cg-emit-st-sub cg reg "sp" off-bv 4)) (else (%cg-emit-st-slot cg reg logical-off))))) (define (%cg-emit-ld-typed cg reg ctype base off) (%cg-fp-reject! 'ld ctype) (let* ((sz (ctype-size ctype)) (kind (ctype-kind ctype)) (base-bv (%cg-reg->bv base)) - (off-fn (lambda (k) (%n (+ off k))))) + (off-bv (%n off))) (cond ((= sz 1) (%cg-emit-many cg (list "%lb(" (%cg-reg->bv reg) ", " - base-bv ", " (off-fn 0) ")\n")) + base-bv ", " off-bv ")\n")) (cond ((eq? kind 'i8) (%cg-emit-sext cg reg 56)))) - ((= sz 2) - (%cg-emit-ldN-bytes cg reg base-bv off-fn 2) - (cond ((eq? kind 'i16) (%cg-emit-sext cg reg 48)))) - ((= sz 4) - (%cg-emit-ldN-bytes cg reg base-bv off-fn 4) - (cond ((eq? kind 'i32) (%cg-emit-sext cg reg 32)))) + ((= sz 2) (%cg-emit-ld-sub cg reg base-bv off-bv (eq? kind 'i16) 2)) + ((= sz 4) (%cg-emit-ld-sub cg reg base-bv off-bv (eq? kind 'i32) 4)) (else (%cg-emit-ld cg reg base off))))) (define (%cg-emit-st-typed cg reg ctype base off) (%cg-fp-reject! 'st ctype) (let* ((sz (ctype-size ctype)) (base-bv (%cg-reg->bv base)) - (off-fn (lambda (k) (%n (+ off k))))) + (off-bv (%n off))) (cond ((= sz 1) (%cg-emit-many cg (list "%sb(" (%cg-reg->bv reg) ", " - base-bv ", " (off-fn 0) ")\n"))) - ((= sz 2) (%cg-emit-stN-bytes cg reg base-bv off-fn 2)) - ((= sz 4) (%cg-emit-stN-bytes cg reg base-bv off-fn 4)) + base-bv ", " off-bv ")\n"))) + ((= sz 2) (%cg-emit-st-sub cg reg base-bv off-bv 2)) + ((= sz 4) (%cg-emit-st-sub cg reg base-bv off-bv 4)) (else (%cg-emit-st cg reg base off))))) (define (%cg-load-opnd-into cg op reg) @@ -3292,9 +3283,7 @@ ;; direct frame lval: address is sp+off. (($ opnd? (kind frame) (ext ,off)) (guard (not (%cg-indirect? cg off))) - (%cg-emit-many cg (list "%mov(t0, sp)\n" - "%addi(t0, t0, " - (%cg-slot-expr cg off) ")\n")) + (%cg-emit-lea-slot cg "t0" (%cg-slot-expr cg off)) (%cg-spill-reg cg 't0 pty)) ;; indirect frame lval (rare for arrays, but support it): ;; the slot holds the address already. @@ -3327,9 +3316,7 @@ (guard (%cg-indirect? cg off)) (%cg-emit-ld-slot cg reg off)) (($ opnd? (kind frame) (ext ,off)) - (%cg-emit-many cg (list "%mov(" reg-bv ", sp)\n" - "%addi(" reg-bv ", " reg-bv ", " - (%cg-slot-expr cg off) ")\n"))) + (%cg-emit-lea-slot cg reg-bv (%cg-slot-expr cg off))) (($ opnd? (kind global) (ext ,lbl)) (%cg-emit-la cg reg lbl)) (else (die #f "cg-emit-addr-of: unsupported lval kind" @@ -3351,20 +3338,18 @@ (%cg-emit-addr-of cg dst 't2) (%cg-emit-byte-copy cg 't2 't0 't1 sz))) -;; Per-byte struct copy. dst-reg and src-reg hold addresses; emits -;; size byte-load/byte-store pairs using tmp-reg as the byte staging -;; register. All three regs are assumed caller-saved temporaries. +;; Struct copy: defer to libp1pp memcpy via %memcpy_call. dst-reg and +;; src-reg hold the addresses; size is the byte count. tmp-reg is no +;; longer needed by this helper (kept in the signature so existing +;; callers don't have to thread their scratch allocation differently), +;; but the macro itself uses a0/a1/a2 around the call. dst-reg and +;; src-reg must not be a0 (the dst move would clobber a different live +;; input register); both current callers use t-regs. (define (%cg-emit-byte-copy cg dst-reg src-reg tmp-reg size) - (let ((dr (%cg-reg->bv dst-reg)) - (sr (%cg-reg->bv src-reg)) - (tr (%cg-reg->bv tmp-reg))) - (let loop ((k 0)) - (cond - ((>= k size) #t) - (else - (%cg-emit-many cg (list "%lb(" tr ", " sr ", " (%n k) ")\n" - "%sb(" tr ", " dr ", " (%n k) ")\n")) - (loop (+ k 1))))))) + (%cg-emit-many cg (list "%memcpy_call(" + (%cg-reg->bv dst-reg) ", " + (%cg-reg->bv src-reg) ", " + (%n size) ")\n"))) (define (cg-take-addr cg) (let* ((p (cg-pop cg)) @@ -3381,12 +3366,10 @@ (guard (%cg-indirect? cg off)) (%cg-emit-ld-slot cg 't0 off) (%cg-spill-reg cg 't0 pty)) - ;; %mov(rd, sp) gives the portable-sp pointer (the backend - ;; handles any hidden frame-header offset). Then add slot. + ;; %lea_slot wraps the "%mov(rd, sp); %addi(rd, rd, slot)" idiom; + ;; the backend hides any frame-header offset inside %mov(rd, sp). (($ opnd? (kind frame) (ext ,off)) - (%cg-emit-many cg (list "%mov(t0, sp)\n" - "%addi(t0, t0, " - (%cg-slot-expr cg off) ")\n")) + (%cg-emit-lea-slot cg "t0" (%cg-slot-expr cg off)) (%cg-spill-reg cg 't0 pty)) (($ opnd? (kind global) (ext ,lbl)) (%cg-emit-la cg 't0 lbl) @@ -3426,8 +3409,7 @@ (cond ((eq? to-kind 'bool) (%cg-load-opnd-into cg p 't0) - (%cg-emit-many cg (list - "%ifelse_eqz(t0, { %li(t0, 0) }, { %li(t0, 1) })\n")) + (%cg-emit-many cg (list "%bool(t0, t0)\n")) (%cg-spill-reg cg 't0 to-type)) ((or (eq? to-kind 'ptr) (and (or (eq? to-kind 'i64) (eq? to-kind 'u64)) @@ -3462,11 +3444,9 @@ ((eq? to-kind 'i8) (%cg-emit-sext cg 't0 56)) ((eq? to-kind 'i16) (%cg-emit-sext cg 't0 48)) ((eq? to-kind 'i32) (%cg-emit-sext cg 't0 32)) - ((= to-sz 1) (%cg-emit-many cg (list "%andi(t0, t0, 255)\n"))) - ((= to-sz 2) - (%cg-emit-many cg (list "%li(t1, 65535)\n%and(t0, t0, t1)\n"))) - ((= to-sz 4) - (%cg-emit-many cg (list "%li(t1, 4294967295)\n%and(t0, t0, t1)\n"))) + ((= to-sz 1) (%cg-emit-many cg (list "%zext8(t0, t0)\n"))) + ((= to-sz 2) (%cg-emit-many cg (list "%zext16(t0, t0)\n"))) + ((= to-sz 4) (%cg-emit-many cg (list "%zext32(t0, t0, t1)\n"))) (else 0)) (%cg-spill-reg cg 't0 to-type))))) @@ -3520,10 +3500,10 @@ (%cg-reg->bv ra) ", " (%cg-reg->bv rb) ")\n"))) (define (%cg-emit-cmp cg cc ra rb rd) - (%cg-emit-many cg (list "%ifelse_" cc "(" + (%cg-emit-many cg (list "%cmpset_" cc "(" + (%cg-reg->bv rd) ", " (%cg-reg->bv ra) ", " (%cg-reg->bv rb) - ", { %li(" (%cg-reg->bv rd) ", 1) }, " - "{ %li(" (%cg-reg->bv rd) ", 0) })\n"))) + ")\n"))) (define (cg-binop cg op) (let* ((b (cg-pop cg)) @@ -3546,29 +3526,21 @@ ((and a-ptr? (or (eq? op 'add) (eq? op 'sub)) (not b-ptr?)) (%cg-load-opnd-into cg a 'a0) (%cg-load-opnd-into cg b 'a1) - (let ((sz (%ctype-size (%ctype-pointee ta)))) - (cond ((> sz 1) (%cg-emit-many cg (list "%li(t0, " (%n sz) ")\n")) - (%cg-emit-rrr cg "mul" 'a1 'a1 't0)) - (else 0))) - (%cg-emit-rrr cg (if (eq? op 'add) "add" "sub") 't0 'a0 'a1) + (let ((sz (%ctype-size (%ctype-pointee ta))) + (mac (if (eq? op 'add) "%ptr_add(" "%ptr_sub("))) + (%cg-emit-many cg (list mac "t0, a0, a1, " (%n sz) ", t1)\n"))) (%cg-spill-reg cg 't0 result-ty)) ((and b-ptr? (eq? op 'add) (not a-ptr?)) (%cg-load-opnd-into cg a 'a0) (%cg-load-opnd-into cg b 'a1) (let ((sz (%ctype-size (%ctype-pointee tb)))) - (cond ((> sz 1) (%cg-emit-many cg (list "%li(t0, " (%n sz) ")\n")) - (%cg-emit-rrr cg "mul" 'a0 'a0 't0)) - (else 0))) - (%cg-emit-rrr cg "add" 't0 'a0 'a1) + (%cg-emit-many cg (list "%ptr_add(t0, a1, a0, " (%n sz) ", t1)\n"))) (%cg-spill-reg cg 't0 result-ty)) ((and a-ptr? b-ptr? (eq? op 'sub)) (%cg-load-opnd-into cg a 'a0) (%cg-load-opnd-into cg b 'a1) - (%cg-emit-rrr cg "sub" 't0 'a0 'a1) (let ((sz (%ctype-size (%ctype-pointee ta)))) - (cond ((> sz 1) (%cg-emit-many cg (list "%li(t1, " (%n sz) ")\n")) - (%cg-emit-rrr cg "div" 't0 't0 't1)) - (else 0))) + (%cg-emit-many cg (list "%ptr_diff(t0, a0, a1, " (%n sz) ", t1)\n"))) (%cg-spill-reg cg 't0 result-ty)) (else (%cg-load-opnd-into cg a 'a0) @@ -3624,13 +3596,13 @@ (%cg-load-opnd-into cg p 't0) (cond ((eq? op 'neg) - (%cg-emit-many cg (list "%li(t1, 0)\n%sub(t0, t1, t0)\n")) + (%cg-emit-many cg (list "%neg(t0, t0, t1)\n")) (%cg-spill-reg cg 't0 ty)) ((eq? op 'bnot) - (%cg-emit-many cg (list "%li(t1, -1)\n%xor(t0, t0, t1)\n")) + (%cg-emit-many cg (list "%bnot(t0, t0, t1)\n")) (%cg-spill-reg cg 't0 ty)) ((eq? op 'lnot) - (%cg-emit-many cg (list "%ifelse_eqz(t0, { %li(t0, 1) }, { %li(t0, 0) })\n")) + (%cg-emit-many cg (list "%cmpset_eqz(t0, t0)\n")) (%cg-spill-reg cg 't0 %t-i32)) (else (die #f "cg-unop: unknown op" op))))) @@ -3705,9 +3677,7 @@ (cond ((> sa 0) (%cg-bump-outgoing! cg sa)) (else 0))) (cond (sret? - (%cg-emit-many cg (list "%mov(a0, sp)\n" - "%addi(a0, a0, " - (%cg-slot-expr cg recv-slot) ")\n")))) + (%cg-emit-lea-slot cg "a0" (%cg-slot-expr cg recv-slot)))) (cond ((and (eq? (opnd-kind fn-op) 'global) (not (opnd-lval? fn-op))) (%cg-emit-many cg (list "%call(&" (opnd-ext fn-op) ")\n"))) @@ -3753,9 +3723,7 @@ (sret? (%cg-emit-ld-slot cg 't2 (%cg-fn-get cg '%fn-sret-slot))) (else - (%cg-emit-many cg (list "%mov(t2, sp)\n" - "%addi(t2, t2, " - (%cg-slot-expr cg ret-slot) ")\n")))) + (%cg-emit-lea-slot cg "t2" (%cg-slot-expr cg ret-slot)))) (%cg-emit-byte-copy cg 't2 't0 't1 sz) (%cg-emit-many cg (list "%b(&::ret)\n")))) (else @@ -3862,9 +3830,7 @@ (cond ((not (opnd-lval? ap-lv)) (die #f "cg-va-start: ap not lvalue"))) ;; Compute address into a0. - (%cg-emit-many cg (list "%mov(a0, sp)\n" - "%addi(a0, a0, " - (%cg-slot-expr cg vsl) ")\n")) + (%cg-emit-lea-slot cg "a0" (%cg-slot-expr cg vsl)) ;; Store a0 at ap-lval. (cond ((eq? (opnd-kind ap-lv) 'frame) @@ -3982,8 +3948,8 @@ (%cg-slot-expr cg (swctx-ctrl-slot sw)) ")\n")) (for-each (lambda (c) - (%cg-emit-many cg (list "%li(t1, " (%n (car c)) ")\n" - "%beq(t0, t1, &::" (cdr c) ")\n"))) + (%cg-emit-many cg (list "%switch_case(t0, t1, " + (%n (car c)) ", &::" (cdr c) ")\n"))) cases) (cond (default-lbl (%cg-emit-many cg (list "%b(&::" default-lbl ")\n"))) diff --git a/tests/P1/cmpset.P1pp b/tests/P1/cmpset.P1pp @@ -0,0 +1,98 @@ +# tests/p1/cmpset.P1pp -- libp1pp compare-and-set-bool macros. +# +# %cmpset_eq(rd, ra, rb) rd = (ra == rb) ? 1 : 0 +# %cmpset_ne rd = (ra != rb) ? 1 : 0 +# %cmpset_lt signed less-than +# %cmpset_ltu unsigned less-than +# %cmpset_eqz(rd, ra) rd = (ra == 0) ? 1 : 0 +# %cmpset_nez rd = (ra != 0) ? 1 : 0 +# %cmpset_ltz rd = (ra < 0) ? 1 : 0 +# +# Each subtest checks both the true and false case for a comparator. +# Output: "EQ NE LT LTU EZ NZ LZ\n" on full pass. + +%fn(p1_main, 0, { + # ---- eq ------------------------------------------------------------- + %li(s0, 5) %li(s1, 5) + %cmpset_eq(t0, s0, s1) + %li(t1, 1) %bne(t0, t1, &::fail) + %li(s1, 6) + %cmpset_eq(t0, s0, s1) + %li(t1, 0) %bne(t0, t1, &::fail) + %la(a0, &c_eq) %li(a1, 3) %call(&print) + + # ---- ne ------------------------------------------------------------- + %li(s0, 5) %li(s1, 6) + %cmpset_ne(t0, s0, s1) + %li(t1, 1) %bne(t0, t1, &::fail) + %li(s1, 5) + %cmpset_ne(t0, s0, s1) + %li(t1, 0) %bne(t0, t1, &::fail) + %la(a0, &c_ne) %li(a1, 3) %call(&print) + + # ---- lt ------------------------------------------------------------- + %li(s0, -3) %li(s1, 2) + %cmpset_lt(t0, s0, s1) + %li(t1, 1) %bne(t0, t1, &::fail) + %cmpset_lt(t0, s1, s0) + %li(t1, 0) %bne(t0, t1, &::fail) + %la(a0, &c_lt) %li(a1, 3) %call(&print) + + # ---- ltu (unsigned: -1 is huge) ------------------------------------ + %li(s0, 5) %li(s1, -1) + %cmpset_ltu(t0, s0, s1) + %li(t1, 1) %bne(t0, t1, &::fail) + %cmpset_ltu(t0, s1, s0) + %li(t1, 0) %bne(t0, t1, &::fail) + %la(a0, &c_ltu) %li(a1, 4) %call(&print) + + # ---- eqz ------------------------------------------------------------ + %li(s0, 0) + %cmpset_eqz(t0, s0) + %li(t1, 1) %bne(t0, t1, &::fail) + %li(s0, 7) + %cmpset_eqz(t0, s0) + %li(t1, 0) %bne(t0, t1, &::fail) + %la(a0, &c_ez) %li(a1, 3) %call(&print) + + # ---- nez ------------------------------------------------------------ + %li(s0, 7) + %cmpset_nez(t0, s0) + %li(t1, 1) %bne(t0, t1, &::fail) + %li(s0, 0) + %cmpset_nez(t0, s0) + %li(t1, 0) %bne(t0, t1, &::fail) + %la(a0, &c_nz) %li(a1, 3) %call(&print) + + # ---- ltz ------------------------------------------------------------ + %li(s0, -1) + %cmpset_ltz(t0, s0) + %li(t1, 1) %bne(t0, t1, &::fail) + %li(s0, 0) + %cmpset_ltz(t0, s0) + %li(t1, 0) %bne(t0, t1, &::fail) + %la(a0, &c_lz) %li(a1, 3) %call(&print) + + %la(a0, &c_nl) %li(a1, 1) %call(&print) + %li(a0, 0) + %b(&::done) + + ::fail + %la(a0, &c_x) %li(a1, 1) %call(&print) + %la(a0, &c_nl) %li(a1, 1) %call(&print) + %li(a0, 1) + ::done +}) + +:c_eq "EQ " +:c_ne "NE " +:c_lt "LT " +:c_ltu "LTU " +:c_ez "EZ " +:c_nz "NZ " +:c_lz "LZ" +:c_x "X" +:c_nl " +" + +:ELF_end diff --git a/tests/P1/cmpset.expected b/tests/P1/cmpset.expected @@ -0,0 +1 @@ +EQ NE LT LTU EZ NZ LZ diff --git a/tests/P1/ext-macros.P1pp b/tests/P1/ext-macros.P1pp @@ -0,0 +1,123 @@ +# tests/p1/ext-macros.P1pp -- libp1pp sign/zero extension macros. +# +# %sext8/16/32(rd, ra) truncate to N bits and sign-extend to 64 +# %zext8(rd, ra) truncate to N bits and zero-extend +# %zext16(rd, ra) same +# %zext32(rd, ra, scratch) same; needs scratch since 0xFFFFFFFF > movz +# +# Each subtest emits one ASCII byte on success, "X" on mismatch. +# Expected: "ABCDEFGHIJKL\n". + +%fn(p1_main, 0, { + # ---- A: sext8 positive (0x7F → 0x7F) ------------------------------- + %li(t0, 0x7F) + %sext8(t0, t0) + %li(t1, 127) + %bne(t0, t1, &::fail) + %la(a0, &c_a) %li(a1, 1) %call(&print) + + # ---- B: sext8 negative (0x80 → -128) ------------------------------- + %li(t0, 0x80) + %sext8(t0, t0) + %li(t1, -128) + %bne(t0, t1, &::fail) + %la(a0, &c_b) %li(a1, 1) %call(&print) + + # ---- C: sext16 positive (0x7FFF → 32767) --------------------------- + %li(t0, 0x7FFF) + %sext16(t0, t0) + %li(t1, 32767) + %bne(t0, t1, &::fail) + %la(a0, &c_c) %li(a1, 1) %call(&print) + + # ---- D: sext16 negative (0x8000 → -32768) -------------------------- + %li(t0, 0x8000) + %sext16(t0, t0) + %li(t1, -32768) + %bne(t0, t1, &::fail) + %la(a0, &c_d) %li(a1, 1) %call(&print) + + # ---- E: sext32 positive (0x7FFFFFFF → 2147483647) ------------------ + %li(t0, 0x7FFFFFFF) + %sext32(t0, t0) + %li(t1, 2147483647) + %bne(t0, t1, &::fail) + %la(a0, &c_e) %li(a1, 1) %call(&print) + + # ---- F: sext32 negative (0x80000000 → -2147483648) ----------------- + %li(t0, 0x80000000) + %sext32(t0, t0) + %li(t1, -2147483648) + %bne(t0, t1, &::fail) + %la(a0, &c_f) %li(a1, 1) %call(&print) + + # ---- G: zext8 (-1 → 0xFF) ------------------------------------------ + %li(t0, -1) + %zext8(t0, t0) + %li(t1, 0xFF) + %bne(t0, t1, &::fail) + %la(a0, &c_g) %li(a1, 1) %call(&print) + + # ---- H: zext16 (-1 → 0xFFFF) --------------------------------------- + %li(t0, -1) + %zext16(t0, t0) + %li(t1, 0xFFFF) + %bne(t0, t1, &::fail) + %la(a0, &c_h) %li(a1, 1) %call(&print) + + # ---- I: zext32 (-1 → 0xFFFFFFFF) ----------------------------------- + %li(t0, -1) + %zext32(t0, t0, t1) + %li(t1, 0xFFFFFFFF) + %bne(t0, t1, &::fail) + %la(a0, &c_i) %li(a1, 1) %call(&print) + + # ---- J: rd != ra split (sext8) ------------------------------------- + %li(s0, 0x80) + %sext8(t0, s0) + %li(t1, -128) + %bne(t0, t1, &::fail) + %la(a0, &c_j) %li(a1, 1) %call(&print) + + # ---- K: rd != ra split (zext16) ------------------------------------ + %li(s0, -1) + %zext16(t0, s0) + %li(t1, 0xFFFF) + %bne(t0, t1, &::fail) + %la(a0, &c_k) %li(a1, 1) %call(&print) + + # ---- L: rd != ra split (zext32) ------------------------------------ + %li(s0, -1) + %zext32(t0, s0, t1) + %li(t1, 0xFFFFFFFF) + %bne(t0, t1, &::fail) + %la(a0, &c_l) %li(a1, 1) %call(&print) + + %la(a0, &c_nl) %li(a1, 1) %call(&print) + %li(a0, 0) + %b(&::done) + + ::fail + %la(a0, &c_x) %li(a1, 1) %call(&print) + %la(a0, &c_nl) %li(a1, 1) %call(&print) + %li(a0, 1) + ::done +}) + +:c_a "A" +:c_b "B" +:c_c "C" +:c_d "D" +:c_e "E" +:c_f "F" +:c_g "G" +:c_h "H" +:c_i "I" +:c_j "J" +:c_k "K" +:c_l "L" +:c_x "X" +:c_nl " +" + +:ELF_end diff --git a/tests/P1/ext-macros.expected b/tests/P1/ext-macros.expected @@ -0,0 +1 @@ +ABCDEFGHIJKL diff --git a/tests/P1/lea-slot.P1pp b/tests/P1/lea-slot.P1pp @@ -0,0 +1,47 @@ +# tests/p1/lea-slot.P1pp -- exercise libp1pp %lea_slot. +# +# %lea_slot(rd, slot_expr) rd = address of frame slot at slot_expr. +# +# Equivalent to %mov(rd, sp) + %addi(rd, rd, slot_expr) — centralizes +# the hidden 16-byte frame header that the backend folds into %mov(rd, sp). +# +# Verification: store via sp-relative %st, read via address from +# %lea_slot, expect equality. Then write via address, read via sp, +# again expect equality. Two slots so we also verify slot offset != 0. +# Output: "AB\n". + +%fn(p1_main, 16, { + # ---- A: write @sp+0, read via lea_slot ------------------------------ + %li(t0, 0xCAFEBABE) + %st(t0, sp, 0) + %lea_slot(s0, 0) + %ld(t1, s0, 0) + %bne(t0, t1, &::fail) + %la(a0, &c_a) %li(a1, 1) %call(&print) + + # ---- B: write via lea_slot @offset 8, read via sp+8 ----------------- + %lea_slot(s0, 8) + %li(t0, 0xDEADBEEF) + %st(t0, s0, 0) + %ld(t1, sp, 8) + %bne(t0, t1, &::fail) + %la(a0, &c_b) %li(a1, 1) %call(&print) + + %la(a0, &c_nl) %li(a1, 1) %call(&print) + %li(a0, 0) + %b(&::done) + + ::fail + %la(a0, &c_x) %li(a1, 1) %call(&print) + %la(a0, &c_nl) %li(a1, 1) %call(&print) + %li(a0, 1) + ::done +}) + +:c_a "A" +:c_b "B" +:c_x "X" +:c_nl " +" + +:ELF_end diff --git a/tests/P1/lea-slot.expected b/tests/P1/lea-slot.expected @@ -0,0 +1 @@ +AB diff --git a/tests/P1/memcpy-call.P1pp b/tests/P1/memcpy-call.P1pp @@ -0,0 +1,53 @@ +# tests/p1/memcpy-call.P1pp -- libp1pp %memcpy_call macro. +# +# %memcpy_call(dst_reg, src_reg, n_imm) +# Convenience wrapper around libp1pp's memcpy: marshals dst/src into +# a0/a1, sets a2=n_imm, and invokes %call(&memcpy). dst_reg and +# src_reg must not be a0 (the dst move would clobber a different +# live input register). +# +# Verification: copy a 13-byte source buffer into a destination +# buffer and byte-compare. Output: "OK\n" on pass. + +%fn(p1_main, 0, { + %la(s0, &dst) + %la(s1, &src) + %memcpy_call(s0, s1, 13) + + # Verify dst[0..13] == src[0..13] + %li(t2, 0) + %loop_tag(L0, { + %li(t1, 13) + %if_eq(t2, t1, { %break(L0) }) + %la(s0, &dst) + %add(s0, s0, t2) + %lb(t0, s0, 0) + %la(s1, &src) + %add(s1, s1, t2) + %lb(t1, s1, 0) + %bne(t0, t1, &::fail) + %addi(t2, t2, 1) + }) + + %la(a0, &c_ok) %li(a1, 3) %call(&print) + %li(a0, 0) + %b(&::done) + + ::fail + %la(a0, &c_fail) %li(a1, 5) %call(&print) + %li(a0, 1) + ::done +}) + +:src +"Hello, World!" +:dst +"............." +:c_ok +"OK +" +:c_fail +"FAIL +" + +:ELF_end diff --git a/tests/P1/memcpy-call.expected b/tests/P1/memcpy-call.expected @@ -0,0 +1 @@ +OK diff --git a/tests/P1/ptr-arith.P1pp b/tests/P1/ptr-arith.P1pp @@ -0,0 +1,65 @@ +# tests/p1/ptr-arith.P1pp -- libp1pp pointer scaling macros. +# +# %ptr_add(rd, ptr, idx, sz, scratch) rd = ptr + idx*sz +# %ptr_sub(rd, ptr, idx, sz, scratch) rd = ptr - idx*sz +# %ptr_diff(rd, p, q, sz, scratch) rd = (p - q) / sz (sz constant) + +%fn(p1_main, 0, { + # ---- A: ptr_add sz=1 ------------------------------------------------ + %li(s0, 1000) %li(s1, 7) + %ptr_add(t0, s0, s1, 1, t1) + %li(t2, 1007) %bne(t0, t2, &::fail) + %la(a0, &c_a) %li(a1, 1) %call(&print) + + # ---- B: ptr_add sz=4 ------------------------------------------------ + %li(s0, 1000) %li(s1, 5) + %ptr_add(t0, s0, s1, 4, t1) + %li(t2, 1020) %bne(t0, t2, &::fail) + %la(a0, &c_b) %li(a1, 1) %call(&print) + + # ---- C: ptr_add sz=8 ------------------------------------------------ + %li(s0, 1000) %li(s1, 3) + %ptr_add(t0, s0, s1, 8, t1) + %li(t2, 1024) %bne(t0, t2, &::fail) + %la(a0, &c_c) %li(a1, 1) %call(&print) + + # ---- D: ptr_sub sz=4 ------------------------------------------------ + %li(s0, 1000) %li(s1, 5) + %ptr_sub(t0, s0, s1, 4, t1) + %li(t2, 980) %bne(t0, t2, &::fail) + %la(a0, &c_d) %li(a1, 1) %call(&print) + + # ---- E: ptr_diff sz=4 ----------------------------------------------- + %li(s0, 1020) %li(s1, 1000) + %ptr_diff(t0, s0, s1, 4, t1) + %li(t2, 5) %bne(t0, t2, &::fail) + %la(a0, &c_e) %li(a1, 1) %call(&print) + + # ---- F: ptr_diff sz=8 ----------------------------------------------- + %li(s0, 1024) %li(s1, 1000) + %ptr_diff(t0, s0, s1, 8, t1) + %li(t2, 3) %bne(t0, t2, &::fail) + %la(a0, &c_f) %li(a1, 1) %call(&print) + + %la(a0, &c_nl) %li(a1, 1) %call(&print) + %li(a0, 0) + %b(&::done) + + ::fail + %la(a0, &c_x) %li(a1, 1) %call(&print) + %la(a0, &c_nl) %li(a1, 1) %call(&print) + %li(a0, 1) + ::done +}) + +:c_a "A" +:c_b "B" +:c_c "C" +:c_d "D" +:c_e "E" +:c_f "F" +:c_x "X" +:c_nl " +" + +:ELF_end diff --git a/tests/P1/ptr-arith.expected b/tests/P1/ptr-arith.expected @@ -0,0 +1 @@ +ABCDEF diff --git a/tests/P1/sub-word-mem.P1pp b/tests/P1/sub-word-mem.P1pp @@ -0,0 +1,96 @@ +# tests/p1/sub-word-mem.P1pp -- exercise libp1pp sub-word memory macros. +# +# %ld_h(rd, base, off, scratch) — 2-byte zero-extending load +# %ld_w(rd, base, off, scratch) — 4-byte zero-extending load +# %ld_sh(rd, base, off, scratch) — 2-byte sign-extending load +# %ld_sw(rd, base, off, scratch) — 4-byte sign-extending load +# %st_h(rs, base, off, scratch) — 2-byte store (low 16 bits) +# %st_w(rs, base, off, scratch) — 4-byte store (low 32 bits) +# +# Each subtest writes one ASCII byte to stdout on success, "X" on +# any mismatch. Expected: "ABCDEF\n". + +%fn(p1_main, 0, { + # ---- A: %st_h byte order (little-endian) ---------------------------- + %la(s0, &buf) + %li(t0, 0xCAFE) + %st_h(t0, s0, 0, t1) + %lb(t2, s0, 0) + %li(t1, 0xFE) + %bne(t2, t1, &::fail) + %lb(t2, s0, 1) + %li(t1, 0xCA) + %bne(t2, t1, &::fail) + %la(a0, &c_a) %li(a1, 1) %call(&print) + + # ---- B: %st_w byte order -------------------------------------------- + %la(s0, &buf) + %li(t0, 0xDEADBEEF) + %st_w(t0, s0, 0, t1) + %lb(t2, s0, 0) %li(t1, 0xEF) %bne(t2, t1, &::fail) + %lb(t2, s0, 1) %li(t1, 0xBE) %bne(t2, t1, &::fail) + %lb(t2, s0, 2) %li(t1, 0xAD) %bne(t2, t1, &::fail) + %lb(t2, s0, 3) %li(t1, 0xDE) %bne(t2, t1, &::fail) + %la(a0, &c_b) %li(a1, 1) %call(&print) + + # ---- C: %ld_h round-trip (zero-extend) ------------------------------ + %la(s0, &buf) + %li(t0, 0xCAFE) + %st_h(t0, s0, 8, t1) + %ld_h(t0, s0, 8, t1) + %li(t1, 0xCAFE) + %bne(t0, t1, &::fail) + %la(a0, &c_c) %li(a1, 1) %call(&print) + + # ---- D: %ld_w round-trip (zero-extend) ------------------------------ + %la(s0, &buf) + %li(t0, 0xDEADBEEF) + %st_w(t0, s0, 8, t1) + %ld_w(t0, s0, 8, t1) + %li(t1, 0xDEADBEEF) + %bne(t0, t1, &::fail) + %la(a0, &c_d) %li(a1, 1) %call(&print) + + # ---- E: %ld_sh sign-extend ------------------------------------------ + %la(s0, &buf) + %li(t0, 0x8000) + %st_h(t0, s0, 0, t1) + %ld_sh(t0, s0, 0, t1) + %li(t1, -32768) + %bne(t0, t1, &::fail) + %la(a0, &c_e) %li(a1, 1) %call(&print) + + # ---- F: %ld_sw sign-extend ------------------------------------------ + %la(s0, &buf) + %li(t0, 0x80000000) + %st_w(t0, s0, 0, t1) + %ld_sw(t0, s0, 0, t1) + %li(t1, -2147483648) + %bne(t0, t1, &::fail) + %la(a0, &c_f) %li(a1, 1) %call(&print) + + %la(a0, &c_nl) %li(a1, 1) %call(&print) + %li(a0, 0) + %b(&::done) + + ::fail + %la(a0, &c_x) %li(a1, 1) %call(&print) + %la(a0, &c_nl) %li(a1, 1) %call(&print) + %li(a0, 1) + ::done +}) + +:c_a "A" +:c_b "B" +:c_c "C" +:c_d "D" +:c_e "E" +:c_f "F" +:c_x "X" +:c_nl " +" + +:buf +%(0) %(0) + +:ELF_end diff --git a/tests/P1/sub-word-mem.expected b/tests/P1/sub-word-mem.expected @@ -0,0 +1 @@ +ABCDEF diff --git a/tests/P1/switch-case.P1pp b/tests/P1/switch-case.P1pp @@ -0,0 +1,56 @@ +# tests/p1/switch-case.P1pp -- libp1pp %switch_case dispatch macro. +# +# %switch_case(ctrl, scratch, key, target) +# if ctrl == key, branch to target. scratch holds the key literal. +# +# A small dispatcher: select(n) returns 100, 200, or 300 for n==1/2/3, +# and 999 for the default. Drive it with three calls and verify. +# Output: "ABC\n". + +%fn(select_n, 0, { + %switch_case(a0, t1, 1, &::case_1) + %switch_case(a0, t1, 2, &::case_2) + %switch_case(a0, t1, 3, &::case_3) + %li(a0, 999) + %b(&::done) + ::case_1 %li(a0, 100) %b(&::done) + ::case_2 %li(a0, 200) %b(&::done) + ::case_3 %li(a0, 300) + ::done +}) + +%fn(p1_main, 0, { + %li(a0, 1) %call(&select_n) + %li(t0, 100) %bne(a0, t0, &::fail) + %la(a0, &c_a) %li(a1, 1) %call(&print) + + %li(a0, 2) %call(&select_n) + %li(t0, 200) %bne(a0, t0, &::fail) + %la(a0, &c_b) %li(a1, 1) %call(&print) + + %li(a0, 3) %call(&select_n) + %li(t0, 300) %bne(a0, t0, &::fail) + %la(a0, &c_c) %li(a1, 1) %call(&print) + + %li(a0, 99) %call(&select_n) + %li(t0, 999) %bne(a0, t0, &::fail) + + %la(a0, &c_nl) %li(a1, 1) %call(&print) + %li(a0, 0) + %b(&::done) + + ::fail + %la(a0, &c_x) %li(a1, 1) %call(&print) + %la(a0, &c_nl) %li(a1, 1) %call(&print) + %li(a0, 1) + ::done +}) + +:c_a "A" +:c_b "B" +:c_c "C" +:c_x "X" +:c_nl " +" + +:ELF_end diff --git a/tests/P1/switch-case.expected b/tests/P1/switch-case.expected @@ -0,0 +1 @@ +ABC diff --git a/tests/P1/unops.P1pp b/tests/P1/unops.P1pp @@ -0,0 +1,73 @@ +# tests/p1/unops.P1pp -- libp1pp unary helpers. +# +# %neg(rd, ra, scratch) rd = -ra (uses scratch for the zero literal) +# %bnot(rd, ra, scratch) rd = ~ra (uses scratch for the all-ones literal) +# %bool(rd, ra) rd = (ra != 0) ? 1 : 0 +# +# Output: "ABCDEF\n". + +%fn(p1_main, 0, { + # ---- A: neg positive (5 -> -5) ------------------------------------- + %li(s0, 5) + %neg(t0, s0, t1) + %li(t2, -5) + %bne(t0, t2, &::fail) + %la(a0, &c_a) %li(a1, 1) %call(&print) + + # ---- B: neg negative (-7 -> 7) ------------------------------------- + %li(s0, -7) + %neg(t0, s0, t1) + %li(t2, 7) + %bne(t0, t2, &::fail) + %la(a0, &c_b) %li(a1, 1) %call(&print) + + # ---- C: bnot 0 -> -1 ----------------------------------------------- + %li(s0, 0) + %bnot(t0, s0, t1) + %li(t2, -1) + %bne(t0, t2, &::fail) + %la(a0, &c_c) %li(a1, 1) %call(&print) + + # ---- D: bnot 0xA5 -> ~0xA5 (= -0x166 actually) ---------------------- + %li(s0, 0xA5) + %bnot(t0, s0, t1) + %li(t2, -166) + %bne(t0, t2, &::fail) + %la(a0, &c_d) %li(a1, 1) %call(&print) + + # ---- E: bool zero -> 0 --------------------------------------------- + %li(s0, 0) + %bool(t0, s0) + %li(t2, 0) + %bne(t0, t2, &::fail) + %la(a0, &c_e) %li(a1, 1) %call(&print) + + # ---- F: bool nonzero -> 1 ------------------------------------------ + %li(s0, 42) + %bool(t0, s0) + %li(t2, 1) + %bne(t0, t2, &::fail) + %la(a0, &c_f) %li(a1, 1) %call(&print) + + %la(a0, &c_nl) %li(a1, 1) %call(&print) + %li(a0, 0) + %b(&::done) + + ::fail + %la(a0, &c_x) %li(a1, 1) %call(&print) + %la(a0, &c_nl) %li(a1, 1) %call(&print) + %li(a0, 1) + ::done +}) + +:c_a "A" +:c_b "B" +:c_c "C" +:c_d "D" +:c_e "E" +:c_f "F" +:c_x "X" +:c_nl " +" + +:ELF_end diff --git a/tests/P1/unops.expected b/tests/P1/unops.expected @@ -0,0 +1 @@ +ABCDEF