boot2

Playing with the boostrap
git clone https://git.ryansepassi.com/git/boot2.git
Log | Files | Refs | README

commit 0860de4c2197b1bb58350d0cde82491703ce542b
parent 339180395bd3c74be62412953cfb73b442acf17d
Author: Ryan Sepassi <rsepassi@gmail.com>
Date:   Sun,  3 May 2026 16:28:37 -0700

backends: wide-immediate fallbacks across all three arches

Every P1 backend's small-imm encoders silently truncated when the
immediate (or memory offset) didn't fit the native instruction's
window. The portable-ISA contract is full one-word immediates, so each
backend now picks the small encoding when it fits and transparently
spills into a materialise-then-R-type (or address-staging) sequence
otherwise.

aarch64 (P1-aarch64.M1pp):
- aa64_materialize_imm_any used by p1_logi_ANDI/ORI: MOVZ/MOVN when
  the value (or its complement) fits 16 bits, otherwise the 4-insn
  MOVZ + 3*MOVK chain that p1_li already uses.
- aa64_add_imm_any / aa64_sub_imm_any: third arm materialises and
  emits the R-type ADD/SUB once magnitude exceeds 24 bits.
- aa64_mem_fallback routes through the now-correct _any variants so
  memory accesses past the unscaled-imm9 / scaled-imm12 windows no
  longer truncate the address.

riscv64 (P1-riscv64.M1pp):
- rv_logi_any for ANDI/ORI: native I-type when imm fits the 12-bit
  signed window, else materialise in scratch (t5/x30) + R-type AND/OR.
- rv_ld_any / rv_sd_any / rv_lbu_any / rv_sb_any: address-staging
  fallback when the offset exceeds the I/S-type 12-bit signed window.
  p1_mem_LD/ST/LB/SB and p1_ldarg routed through the _any variants.

amd64 (P1-amd64.M1pp):
- p1_logi_ANDI / p1_logi_ORI / p1_addi each get a third arm so values
  outside the signed-imm32 window (>0x7FFFFFFF or <-0x80000000)
  materialise via p1_li(scratch, imm) + R-type ADD/AND/OR. Without
  this the imm32 form silently sign-extends — e.g. ANDI with
  0xFFFFFFFF would mask with -1 and yield the input unchanged.

tests/p1/wide-imm.P1pp: single behavioural fixture exercising every
wide path (%andi/%ori/%addi past each arch's small-imm window;
%ld/%st/%lb/%sb at offsets 40000 / 5000 past every arch's small-off
window). A trap value pre-stored at offset 0 catches silent address
truncation. Same source builds and runs identically on aarch64 /
riscv64 / amd64; expected stdout is "ABCDEFGH\n".

docs/P1.md: rewrite the immediate-class and memory-offset paragraphs
to reflect the new portable contract (full one-word immediate,
backends prefer the small window for code size); refresh the
toolchain envelope to describe M1pp + hex2++ + catm.

Diffstat:
MP1/P1-aarch64.M1pp | 51+++++++++++++++++++++++++++++++++++++++------------
MP1/P1-amd64.M1pp | 45+++++++++++++++++++++++++++++++++------------
MP1/P1-riscv64.M1pp | 95++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-------
Mdocs/P1.md | 41+++++++++++++++++++++++++++--------------
Atests/P1/wide-imm.P1pp | 133+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Atests/P1/wide-imm.expected | 1+
6 files changed, 320 insertions(+), 46 deletions(-)

diff --git a/P1/P1-aarch64.M1pp b/P1/P1-aarch64.M1pp @@ -169,22 +169,34 @@ %((| 0xD1400000 (<< (& imm12 0xFFF) 10) (<< %aa64_reg(ra) 5) %aa64_reg(rd))) %endm +# ADD/SUB immediate with arbitrary unsigned magnitude. The native imm12 +# form covers [0, 4095]; the imm12<<12 form (optionally combined with a +# second imm12 for the low bits) covers [4096, 0xFFFFFF]. Past 24 bits, +# materialize the constant in scratch and emit the R-type ADD/SUB. +# Callers must not pass `scratch` as `ra` (the materialize would +# clobber it before the R-type read). %macro aa64_add_imm_any(rd, ra, imm) %select((<= imm 4095), %aa64_add_imm(rd, ra, imm), - %select((= (& imm 0xFFF) 0), - %aa64_add_imm_lsl12(rd, ra, (>> imm 12)), - %aa64_add_imm_lsl12(rd, ra, (>> imm 12)) - %aa64_add_imm(rd, rd, (& imm 0xFFF)))) + %select((<= imm 0xFFFFFF), + %select((= (& imm 0xFFF) 0), + %aa64_add_imm_lsl12(rd, ra, (>> imm 12)), + %aa64_add_imm_lsl12(rd, ra, (>> imm 12)) + %aa64_add_imm(rd, rd, (& imm 0xFFF))), + %p1_li(scratch, imm) + %aa64_rrr(0x8B000000, rd, ra, scratch))) %endm %macro aa64_sub_imm_any(rd, ra, imm) %select((<= imm 4095), %aa64_sub_imm(rd, ra, imm), - %select((= (& imm 0xFFF) 0), - %aa64_sub_imm_lsl12(rd, ra, (>> imm 12)), - %aa64_sub_imm_lsl12(rd, ra, (>> imm 12)) - %aa64_sub_imm(rd, rd, (& imm 0xFFF)))) + %select((<= imm 0xFFFFFF), + %select((= (& imm 0xFFF) 0), + %aa64_sub_imm_lsl12(rd, ra, (>> imm 12)), + %aa64_sub_imm_lsl12(rd, ra, (>> imm 12)) + %aa64_sub_imm(rd, rd, (& imm 0xFFF))), + %p1_li(scratch, imm) + %aa64_rrr(0xCB000000, rd, ra, scratch))) %endm %macro aa64_mov_rr(dst, src) @@ -217,6 +229,21 @@ %aa64_movn(rd, (& (~ imm) 0xFFFF))) %endm +# Materialize an arbitrary 64-bit signed immediate into `rd`. Picks the +# 1-insn MOVZ / MOVN form when the value (or its complement, for +# negatives) fits 16 bits; otherwise emits the 4-insn MOVZ + 3*MOVK +# chain used by %p1_li. Used by ANDI/ORI/ADDI fallbacks below to avoid +# silently truncating to the small-imm window. +%macro aa64_materialize_imm_any(rd, imm) +%select((>= imm 0), + %select((<= imm 0xFFFF), + %aa64_movz(rd, imm), + %p1_li(rd, imm)), + %select((>= imm -65536), + %aa64_movn(rd, (& (~ imm) 0xFFFF)), + %p1_li(rd, imm))) +%endm + %macro aa64_ldst_uimm12(base, rt, rn, off_bytes, size_log2) %((| base (<< (>> off_bytes size_log2) 10) (<< %aa64_reg(rn) 5) %aa64_reg(rt))) %endm @@ -278,9 +305,9 @@ %macro aa64_mem_fallback(op, rt, rn, off) %select((>= off 0), - %aa64_add_imm(scratch, rn, off) + %aa64_add_imm_any(scratch, rn, off) %aa64_ldst_uimm12(%aa64_mem_uimm_base(op), rt, scratch, 0, %aa64_mem_size(op)), - %aa64_sub_imm(scratch, rn, (- 0 off)) + %aa64_sub_imm_any(scratch, rn, (- 0 off)) %aa64_ldst_uimm12(%aa64_mem_uimm_base(op), rt, scratch, 0, %aa64_mem_size(op))) %endm @@ -425,11 +452,11 @@ %endm %macro p1_logi_ANDI(rd, ra, imm) -%aa64_materialize_small_imm(scratch, imm) +%aa64_materialize_imm_any(scratch, imm) %aa64_rrr(0x8A000000, rd, ra, scratch) %endm %macro p1_logi_ORI(rd, ra, imm) -%aa64_materialize_small_imm(scratch, imm) +%aa64_materialize_imm_any(scratch, imm) %aa64_rrr(0xAA000000, rd, ra, scratch) %endm %macro p1_logi(op, rd, ra, imm) diff --git a/P1/P1-amd64.M1pp b/P1/P1-amd64.M1pp @@ -638,29 +638,50 @@ $(imm) %select((>= imm -128), %select((<= imm 127), %amd_alu_ri8(0, rd, imm), - %amd_alu_ri32(0, rd, imm)), - %amd_alu_ri32(0, rd, imm)) -%endm - -# AND/OR with imm: 83 /ext ib sign-extends imm8 to 64 bits. That works for -# imm in [-128, 127] (and for -1 as a convenient all-ones mask), but breaks -# for positive imms >= 128 — ANDI with 255 would become AND with -# 0xFFFFFFFFFFFFFFFF. Widen to the imm32 form when imm8 would misencode. + %select((<= imm 2147483647), + %amd_alu_ri32(0, rd, imm), + %p1_li(scratch, imm) + %amd_rrr_ADD(rd, rd, scratch))), + %select((>= imm -2147483648), + %amd_alu_ri32(0, rd, imm), + %p1_li(scratch, imm) + %amd_rrr_ADD(rd, rd, scratch))) +%endm + +# AND/OR with imm. Three windows: +# imm in [-128, 127] -> 83 /ext ib (imm8 sign-extends) +# imm in [INT32_MIN, INT32_MAX] -> 81 /ext id (imm32 sign-extends) +# else -> materialise imm in scratch, R-type AND/OR. +# The third arm covers positive imms above 0x7FFFFFFF (e.g. 0xFFFFFFFF +# or 0xDEADBEEF) where the imm32 sign-extension would silently flip the +# upper word to all-ones. %macro p1_logi_ANDI(rd, ra, imm) %amd_mov_rr(rd, ra) %select((>= imm -128), %select((<= imm 127), %amd_alu_ri8(4, rd, imm), - %amd_alu_ri32(4, rd, imm)), - %amd_alu_ri32(4, rd, imm)) + %select((<= imm 2147483647), + %amd_alu_ri32(4, rd, imm), + %p1_li(scratch, imm) + %amd_rrr_AND(rd, rd, scratch))), + %select((>= imm -2147483648), + %amd_alu_ri32(4, rd, imm), + %p1_li(scratch, imm) + %amd_rrr_AND(rd, rd, scratch))) %endm %macro p1_logi_ORI(rd, ra, imm) %amd_mov_rr(rd, ra) %select((>= imm -128), %select((<= imm 127), %amd_alu_ri8(1, rd, imm), - %amd_alu_ri32(1, rd, imm)), - %amd_alu_ri32(1, rd, imm)) + %select((<= imm 2147483647), + %amd_alu_ri32(1, rd, imm), + %p1_li(scratch, imm) + %amd_rrr_OR(rd, rd, scratch))), + %select((>= imm -2147483648), + %amd_alu_ri32(1, rd, imm), + %p1_li(scratch, imm) + %amd_rrr_OR(rd, rd, scratch))) %endm %macro p1_logi(op, rd, ra, imm) %p1_logi_##op(rd, ra, imm) diff --git a/P1/P1-riscv64.M1pp b/P1/P1-riscv64.M1pp @@ -237,6 +237,68 @@ %rv_i_type(0x00006003, rd, ra, imm12) %endm +# Load/store with arbitrary signed offset. The native I-type/S-type +# imm12 covers [-2048, 2047]; past that, materialize the offset in +# scratch (t5/x30), compute scratch = ra + scratch via R-type ADD, and +# issue the load/store with offset 0. Callers must not pass scratch as +# `ra` or `rs` — the materialize would clobber it before the address +# computation reads it. +%macro rv_ld_any(rd, ra, off) +%select((>= off -2048), + %select((<= off 2047), + %rv_ld(rd, ra, off), + %rv_lit64_prefix(scratch) + $(off) + %rv_r_type(0x00000033, scratch, ra, scratch) + %rv_ld(rd, scratch, 0)), + %rv_lit64_prefix(scratch) + $(off) + %rv_r_type(0x00000033, scratch, ra, scratch) + %rv_ld(rd, scratch, 0)) +%endm + +%macro rv_sd_any(rs, ra, off) +%select((>= off -2048), + %select((<= off 2047), + %rv_sd(rs, ra, off), + %rv_lit64_prefix(scratch) + $(off) + %rv_r_type(0x00000033, scratch, ra, scratch) + %rv_sd(rs, scratch, 0)), + %rv_lit64_prefix(scratch) + $(off) + %rv_r_type(0x00000033, scratch, ra, scratch) + %rv_sd(rs, scratch, 0)) +%endm + +%macro rv_lbu_any(rd, ra, off) +%select((>= off -2048), + %select((<= off 2047), + %rv_lbu(rd, ra, off), + %rv_lit64_prefix(scratch) + $(off) + %rv_r_type(0x00000033, scratch, ra, scratch) + %rv_lbu(rd, scratch, 0)), + %rv_lit64_prefix(scratch) + $(off) + %rv_r_type(0x00000033, scratch, ra, scratch) + %rv_lbu(rd, scratch, 0)) +%endm + +%macro rv_sb_any(rs, ra, off) +%select((>= off -2048), + %select((<= off 2047), + %rv_sb(rs, ra, off), + %rv_lit64_prefix(scratch) + $(off) + %rv_r_type(0x00000033, scratch, ra, scratch) + %rv_sb(rs, scratch, 0)), + %rv_lit64_prefix(scratch) + $(off) + %rv_r_type(0x00000033, scratch, ra, scratch) + %rv_sb(rs, scratch, 0)) +%endm + %macro rv_mov_rr(dst, src) %rv_addi(dst, src, 0) %endm @@ -361,11 +423,28 @@ $(imm) %rv_addi_any(rd, ra, imm) %endm +# Logical-immediate fallback: when imm fits the I-type's 12-bit signed +# field, emit the native ANDI/ORI; otherwise materialize the immediate +# in scratch (t5/x30) and use the R-type AND/OR. funct3=7 (AND) or 6 +# (OR) is shared between the I-type (opcode 0x13) and R-type +# (opcode 0x33) encodings. +%macro rv_logi_any(rd, ra, imm, base_i, base_r) +%select((>= imm -2048), + %select((<= imm 2047), + %rv_i_type(base_i, rd, ra, imm), + %rv_lit64_prefix(scratch) + $(imm) + %rv_r_type(base_r, rd, ra, scratch)), + %rv_lit64_prefix(scratch) + $(imm) + %rv_r_type(base_r, rd, ra, scratch)) +%endm + %macro p1_logi_ANDI(rd, ra, imm) -%rv_i_type(0x00007013, rd, ra, imm) +%rv_logi_any(rd, ra, imm, 0x00007013, 0x00007033) %endm %macro p1_logi_ORI(rd, ra, imm) -%rv_i_type(0x00006013, rd, ra, imm) +%rv_logi_any(rd, ra, imm, 0x00006013, 0x00006033) %endm %macro p1_logi(op, rd, ra, imm) %p1_logi_##op(rd, ra, imm) @@ -385,16 +464,16 @@ $(imm) %endm %macro p1_mem_LD(rt, rn, off) -%rv_ld(rt, rn, off) +%rv_ld_any(rt, rn, off) %endm %macro p1_mem_ST(rt, rn, off) -%rv_sd(rt, rn, off) +%rv_sd_any(rt, rn, off) %endm %macro p1_mem_LB(rt, rn, off) -%rv_lbu(rt, rn, off) +%rv_lbu_any(rt, rn, off) %endm %macro p1_mem_SB(rt, rn, off) -%rv_sb(rt, rn, off) +%rv_sb_any(rt, rn, off) %endm %macro p1_mem(op, rt, rn, off) %select((= %rv_is_sp(rn) 1), @@ -403,8 +482,8 @@ $(imm) %endm %macro p1_ldarg(rd, slot) -%rv_ld(scratch, sp, 8) -%rv_ld(rd, scratch, (+ 16 (* 8 slot))) +%rv_ld(rd, sp, 8) +%rv_ld_any(rd, rd, (+ 16 (* 8 slot))) %endm %macro p1_b() diff --git a/docs/P1.md b/docs/P1.md @@ -18,15 +18,21 @@ portable indirect-result convention described below. ## Toolchain envelope -P1 must be assemblable through the existing `M0` + `hex2` path, with -`catm` as the only composition primitive between source or generated fragments. -The spec therefore assumes only the following toolchain features: - -- `M0`-level `DEFINE name hex_bytes` substitution -- raw byte emission -- labels and label references supported by `hex2` +P1 source is assembled by the `M1pp → hex2++` chain, with `catm` as the +only composition primitive between source or generated fragments. The +spec therefore assumes only the following toolchain features: + +- `M1pp` macro expansion: function-like macros, compile-time integer + expressions, and the `!@%$` little-endian hex-emission forms used by + the per-arch backends to pack instruction words at expansion time +- labels, label references, and `.scope` / `.endscope` / `.align` / + `.fill` / `.ptrsize` directives supported by `hex2++` - file concatenation via `catm` +`hex2++` sees only contiguous bytes; all target-specific encoding +(register packing, bit-scattered immediates, native branch +displacements) lives in the per-arch M1pp backend. + ## Source notation This document describes instructions using ordinary assembly notation such as @@ -245,8 +251,11 @@ Leaf functions that need no frame-local storage may omit the frame entirely. Immediate operands appear only in instructions that explicitly admit them. Portable source has three immediate classes: -- **Inline integer immediate** — a signed 12-bit assembly-time constant in the - range `-2048..2047` +- **Inline integer immediate** — any assembly-time signed integer constant + that fits one word. Backends prefer the native instruction's small-imm + encoding when the value fits its window (e.g. signed 12-bit on + `ADDI`/`ANDI`/`ORI` and on memory offsets); larger values fall back to + a materialise-then-R-type sequence transparent to portable source. - **Materialized word value** — a full one-word assembly-time constant loaded with `LI` - **Materialized address** — the address of a label loaded with `LA` @@ -264,10 +273,11 @@ The backend may realize `LI` and `LA` using native immediates, literal pools, multi-instruction sequences, or other backend-private mechanisms. Backends may assume labels fit in 32 bits when realizing `LA` and `LA_BR`. -This reflects the stage0 image layout (`hex2-0` base `0x00600000`, programs -well under 4 GB), not a portable-ISA-level guarantee. Backends that target -images loaded above the 4 GB boundary must adjust their `LA` / `LA_BR` -lowering. `LI` makes no such assumption — it materializes any one-word value. +This reflects the current image layout (`hex2++` base `0x00600000`, +programs well under 4 GB), not a portable-ISA-level guarantee. Backends +that target images loaded above the 4 GB boundary must adjust their `LA` +/ `LA_BR` lowering. `LI` makes no such assumption — it materializes any +one-word value. ## Control Flow @@ -438,7 +448,10 @@ P1 defines the following memory-access operations: `LB` loads one byte and zero-extends it to a full word. `SB` stores the low 8 bits of the source value. -Memory offsets use signed 12-bit inline immediates. +Memory offsets are signed inline integer immediates and follow the same +backend-fallback policy as arithmetic immediates: backends prefer the +native instruction's small-offset encoding (typically signed 12-bit) and +transparently spill into address-staging when the offset is wider. The base address for a memory access may be any exposed general register or `sp`. diff --git a/tests/P1/wide-imm.P1pp b/tests/P1/wide-imm.P1pp @@ -0,0 +1,133 @@ +# tests/p1/wide-imm.P1pp -- backend wide-immediate behavioural test. +# +# Each subtest exercises one P1 op with an immediate or offset that +# falls outside its target instruction's small-imm window, so the +# backend's "_any" fallback (materialise + R-type / address-staging) +# must run for the result to be correct. Result is checked against +# the expected value; "X" on any mismatch. +# +# Coverage map (small-imm window per arch shown for context): +# aarch64 riscv64 +# %andi imm window 0..0xFFFF / -0x10000.. -2048..2047 +# %ori imm window (same) (same) +# %addi imm window 0..0xFFFFFF -2048..2047 +# %ld/%st 8B off window scaled 0..32760 + -2048..2047 +# unscaled -256..255 +# %lb/%sb 1B off window unscaled -256..255 + -2048..2047 +# scaled 0..4095 +# amd64 has native disp32/imm32, so no fallback runs there but the +# result must still be correct. +# +# Offsets chosen to land outside every arch's window: +# - 8-byte LD/ST at +40000 (past aarch64 scaled imm12) +# - 1-byte LB/SB at +5000 (past aarch64 unscaled imm12) +# +# Buffer storage: `:buf` sits just before `:ELF_end`, so &buf is in +# the BSS region the loader zero-fills past filesz (ph_memsz = 512 MB +# in the seed ELF header, so 40008 bytes past &buf is safely mapped). +# +# Expected stdout: "ABCDEFGH\n". + +%fn(p1_main, 0, { + # ---- A: %andi(rd, ra, 0xFFFFFFFF) on -1 -> 0xFFFFFFFF ---------------- + # Without the wide-andi fix: riscv64 truncates 0xFFFFFFFF to 0xFFF and + # aarch64 truncates to 0xFFFF, both giving wrong masks. + %li(t0, -1) + %andi(t0, t0, 0xFFFFFFFF) + %li(t1, 0xFFFFFFFF) + %bne(t0, t1, &.fail) + %la(a0, &c_a) %li(a1, 1) %call(&print) + + # ---- B: %ori(rd, ra, 0xDEADBEEF) on 0 -> 0xDEADBEEF ------------------ + %li(t0, 0) + %ori(t0, t0, 0xDEADBEEF) + %li(t1, 0xDEADBEEF) + %bne(t0, t1, &.fail) + %la(a0, &c_b) %li(a1, 1) %call(&print) + + # ---- C: %addi(rd, ra, 0xFFFFFFFF) on 0 -> 0xFFFFFFFF ----------------- + # Past aarch64's 24-bit add-imm window, past riscv64's 12-bit window, + # and (critically) past amd64's signed-imm32 range: the imm32 form + # would sign-extend 0xFFFFFFFF to -1 and silently subtract. + %li(t0, 0) + %addi(t0, t0, 0xFFFFFFFF) + %li(t1, 0xFFFFFFFF) + %bne(t0, t1, &.fail) + %la(a0, &c_c) %li(a1, 1) %call(&print) + + # ---- D: %addi(rd, ra, -0xFFFFFFFF) on 0xFFFFFFFF -> 0 ---------------- + # Negative magnitude past every backend's small-imm window. On amd64 + # the imm32 form would truncate -0xFFFFFFFF (low 32 bits = 0x1) + # and add 1 instead of subtracting 0xFFFFFFFF. + %li(t0, 0xFFFFFFFF) + %addi(t0, t0, -0xFFFFFFFF) + %bnez(t0, &.fail) + %la(a0, &c_d) %li(a1, 1) %call(&print) + + # Stage a "trap" value at &buf+0 so a wide-offset store/load that + # silently masks its offset down to 0 is detected as the trap value + # leaking into the wide slot. + %la(s0, &buf) + %li(t0, 0xDEAD) + %st(t0, s0, 0) + + # ---- E/F: %st + %ld at offset 40000 -> roundtrip 0xCAFEBABE ---------- + # If the wide store silently truncates to offset 0, it overwrites the + # 0xDEAD trap (rather than landing at +40000), and the subsequent + # offset-0 ld below would read 0xCAFEBABE instead of 0xDEAD. If the + # wide load truncates, it reads the 0xDEAD trap instead of 0xCAFEBABE. + %li(t0, 0xCAFEBABE) + %st(t0, s0, 40000) + %ld(t1, s0, 40000) + %li(t2, 0xCAFEBABE) + %bne(t1, t2, &.fail) + %la(a0, &c_e) %li(a1, 1) %call(&print) + + %ld(t1, s0, 0) + %li(t2, 0xDEAD) + %bne(t1, t2, &.fail) + %la(a0, &c_f) %li(a1, 1) %call(&print) + + # Stage a 1-byte trap at &buf+1 (so it doesn't overlap the 8-byte + # value already at &buf+0) before the byte-level subtest. + %li(t0, 0x99) + %sb(t0, s0, 1) + + # ---- G/H: %sb + %lb at offset 5000 -> roundtrip 0x42 ----------------- + %li(t0, 0x42) + %sb(t0, s0, 5000) + %lb(t1, s0, 5000) + %li(t2, 0x42) + %bne(t1, t2, &.fail) + %la(a0, &c_g) %li(a1, 1) %call(&print) + + %lb(t1, s0, 1) + %li(t2, 0x99) + %bne(t1, t2, &.fail) + %la(a0, &c_h) %li(a1, 1) %call(&print) + + %la(a0, &c_nl) %li(a1, 1) %call(&print) + %li(a0, 0) + %b(&.done) + + :.fail + %la(a0, &c_x) %li(a1, 1) %call(&print) + %la(a0, &c_nl) %li(a1, 1) %call(&print) + %li(a0, 1) + :.done +}) + +:c_a "A" +:c_b "B" +:c_c "C" +:c_d "D" +:c_e "E" +:c_f "F" +:c_g "G" +:c_h "H" +:c_x "X" +:c_nl " +" + +:buf +:ELF_end diff --git a/tests/P1/wide-imm.expected b/tests/P1/wide-imm.expected @@ -0,0 +1 @@ +ABCDEFGH