commit e301f7533bda6a2b139ad1771894919c12b87765
parent 29e05d332a0b41d5af518091260aeead4c1d3fca
Author: Ryan Sepassi <rsepassi@gmail.com>
Date: Tue, 5 May 2026 17:11:25 -0700
M1pp: cut wasted work in dispatch + macro-body expansion
In both the C oracle (M1pp.c) and the P1 self-host (M1pp.P1):
* First-byte gate on per-token dispatch: pass-through WORD tokens (the
vast majority — plain identifiers, hex literals, %()/!() outputs)
exit through emit_token after one byte compare instead of walking
the directive + builtin cascade.
* Second-byte dispatch within c0=='%': c1 selects at most one
directive/builtin to compare against; %FOO user macros with c1
outside {m,s,e,f,b,l} skip every tok_eq_const and go straight to
find_macro.
* Cache param/local-label classification at %macro define time into
parallel byte arrays indexed by body-token slot. expand_macro_tokens'
body loop reads the cached values instead of running find_param +
is_local_label_token per body token per expansion (and the redundant
re-derivation in emt_do_substitute_paste/_plain is gone).
* Skip paste_pool_range when neither the macro body nor the call's
arguments contain TOK_PASTE. Tracked via macro->has_paste (set in
define_macro) and a sticky args_have_paste flag (set in parse_args,
snapshotted into emt_saw_arg_paste).
P1-side details: macro record grew 296 -> 304 bytes for the new
has_paste field (BSS slot has plenty of headroom; no OFF_* shifts);
new BSS regions for the parallel arrays are appended past expr_frames;
proc_loop's old proc_check_struct/enum/frame/endframe/newline/builtin
cascade is replaced by proc_byte_gate -> proc_c0_pct -> proc_c1_*
chain. Removed the dead :err_not_implemented / :msg_not_implemented
pair noticed during the cleanup pass. Per-arch P1 tables regenerated
via 'make tables' for the new mnemonics (or_t0,t0,t1, shri_a0,a0,5,
etc.).
Validation:
* C: byte-identical output across every .P1pp in the tree, including
the 5 MB build/aarch64/.boot3-stage/out/tcc.flat.P1pp (~0.08s -> 0.06s).
* P1: 'make test SUITE=m1pp ARCH=aarch64' -> 30 passed, 0 failed.
Diffstat:
| M | M1pp/M1pp.P1 | | | 594 | +++++++++++++++++++++++++++++++++++++++++++++++++++++++------------------------ |
| M | M1pp/M1pp.c | | | 185 | +++++++++++++++++++++++++++++++++++++++++++++++++++---------------------------- |
| M | P1/P1-aarch64.M1 | | | 53 | ++++++++++++++++++++++++++++++++++++++++++++++++++--- |
| M | P1/P1-amd64.M1 | | | 53 | ++++++++++++++++++++++++++++++++++++++++++++++++++--- |
| M | P1/P1-riscv64.M1 | | | 53 | ++++++++++++++++++++++++++++++++++++++++++++++++++--- |
5 files changed, 682 insertions(+), 256 deletions(-)
diff --git a/M1pp/M1pp.P1 b/M1pp/M1pp.P1
@@ -52,13 +52,19 @@ DEFINE M1PP_TEXT_CAP 0000000400000000
## 6.5 MB tcc.flat.P1pp tokenises to ~2.5 M; 8 M leaves comfortable
## headroom for the larger TUs the harness will exercise next.
DEFINE M1PP_TOKENS_END 0000001000000000
-## Macro record is 296 bytes: name (16) + param_count (8) + params[16]*16 (256)
-## + body_start (8) + body_end (8). MACROS_CAP fits 1024 records (303104 B).
+## Macro record is 304 bytes: name (16) + param_count (8) + params[16]*16 (256)
+## + body_start (8) + body_end (8) + has_paste (8). MACROS_CAP fits 1024
+## records (311296 B). The 2x BSS slot for macros (0x94000 B) still has
+## ample headroom past the new cap. has_paste is a 0/1 flag set during
+## define_macro when the body contains TOK_PASTE; expand_macro_tokens
+## reads it (along with args_have_paste from parse_args) to skip the
+## paste_pool_range scan when neither side contributes a `##`.
## Body-token arena fits 65536 tokens (2 MB = 0x200000).
-DEFINE M1PP_MACRO_RECORD_SIZE 2801000000000000
+DEFINE M1PP_MACRO_RECORD_SIZE 3001000000000000
DEFINE M1PP_MACRO_BODY_START_OFF 1801000000000000
DEFINE M1PP_MACRO_BODY_END_OFF 2001000000000000
-DEFINE M1PP_MACROS_CAP 00A0040000000000
+DEFINE M1PP_MACRO_HAS_PASTE_OFF 2801000000000000
+DEFINE M1PP_MACROS_CAP 00C0040000000000
DEFINE M1PP_MACRO_BODY_CAP 0000200000000000
DEFINE O_WRONLY_CREAT_TRUNC 4102000000000000
DEFINE MODE_0644 A401000000000000
@@ -183,6 +189,15 @@ DEFINE OFF_expr_frames 805A291F00000000
## the macro-table linear search. Placed past expr_frames (BSS end) so
## adding it does not shift any existing OFF_*.
DEFINE OFF_local_lookup_scratch 0052850000000000
+## macro_body_param_idx / macro_body_is_local_label — 1 byte per body
+## token slot (M1PP_MACRO_BODY_CAP / 32 = 65536 slots, so each region
+## is 0x10000 bytes). Populated at %macro definition time so the
+## expand_macro_tokens body loop reads cached classifications instead
+## of re-running find_param + is_local_label_token per body token per
+## expansion. Placed past the existing expr_frames (BSS end ≈ 0x1F296380)
+## so adding them does not shift any other OFF_*.
+DEFINE OFF_macro_body_param_idx 8063291F00000000
+DEFINE OFF_macro_body_is_local_label 80632A1F00000000
## --- Runtime shell: argv, read input, call pipeline, write output, exit ------
@@ -990,6 +1005,18 @@ DEFINE OFF_local_lookup_scratch 0052850000000000
la_br &push_stream_span
call
+## proc_loop dispatch refactor:
+##
+## A — first-byte gate: most pass-through tokens (plain identifiers,
+## hex literals, the synthetic WORDs emitted by !@%$ evaluation)
+## don't begin with %, !, @, or $ and exit through proc_emit after
+## a single byte compare. The old cascade ran 5 directive + up to
+## 7 builtin tok_eq_const probes for *every* WORD.
+## B — second-byte dispatch: within the c0=='%' branch, a single
+## c1-byte switch picks at most one directive/builtin to actually
+## compare against (e.g. c1='s' selects %struct/%select/%str only).
+## A user macro %FOO with c1 outside {m,s,e,f,b,l} skips every
+## tok_eq_const and goes straight to find_macro.
:proc_loop
# s = current_stream(); if (s == 0) done
la_br ¤t_stream
@@ -1007,26 +1034,121 @@ DEFINE OFF_local_lookup_scratch 0052850000000000
# tok = s->pos
st_t0,sp,8
- # ---- tok->kind == TOK_WORD && tok eq "%macro" ----
- # Directives are recognized anywhere in the input, not only at line-start.
+ # ---- TOK_NEWLINE fast path ----
ld_a1,t0,0
+ li_a2 TOK_NEWLINE
+ la_br &proc_handle_newline
+ beq_a1,a2
+
+ # Non-WORD tokens (LPAREN, RPAREN, COMMA, LBRACE, RBRACE, STRING,
+ # PASTE) skip the whole dispatch and emit literally.
li_a2 TOK_WORD
- la_br &proc_check_newline
+ la_br &proc_emit
bne_a1,a2
- mov_a0,t0
+
+ # tok->text.len, ptr — needed for the byte gate.
+ ld_t1,t0,16
+ la_br &proc_emit
+ beqz_t1
+ ld_t2,t0,8
+ lb_a3,t2,0
+
+ # has_paren = (tok+1 < s->end && (tok+1)->kind == TOK_LPAREN
+ # && (tok+1)->tight). Stash to proc_has_paren.
+ li_a0 %0 %0
+ la_a1 &proc_has_paren
+ st_a0,a1,0
+ addi_a2,t0,32
+ ld_a1,sp,0
+ ld_a1,a1,8
+ la_br &proc_byte_gate
+ blt_a1,a2
+ la_br &proc_byte_gate
+ beq_a2,a1
+ ld_a0,a2,0
+ li_a1 TOK_LPAREN
+ la_br &proc_byte_gate
+ bne_a0,a1
+ ld_a0,a2,24
+ la_br &proc_byte_gate
+ beqz_a0
+ li_a0 %1 %0
+ la_a1 &proc_has_paren
+ st_a0,a1,0
+
+:proc_byte_gate
+ # c0 == '%' (37) -> percent branch
+ li_a0 %37 %0
+ la_br &proc_c0_pct
+ beq_a3,a0
+ # Arith builtins ! @ $ require len == 1 + has_paren. All other
+ # first-byte values fall through to proc_emit.
+ li_a0 %1 %0
+ la_br &proc_emit
+ bne_t1,a0
+ la_a1 &proc_has_paren
+ ld_a1,a1,0
+ la_br &proc_emit
+ beqz_a1
+ li_a0 %33 %0
+ la_br &proc_do_builtin
+ beq_a3,a0
+ li_a0 %64 %0
+ la_br &proc_do_builtin
+ beq_a3,a0
+ li_a0 %36 %0
+ la_br &proc_do_builtin
+ beq_a3,a0
+ la_br &proc_emit
+ b
+
+:proc_c0_pct
+ # c0 == '%'. Bare '%' (len == 1) + tight-paren -> arith builtin.
+ li_a0 %1 %0
+ la_br &proc_c0_pct_one
+ beq_t1,a0
+ # len >= 2: dispatch on c1 (text[1]).
+ lb_a3,t2,1
+
+ li_a0 %109 %0
+ la_br &proc_c1_m
+ beq_a3,a0
+ li_a0 %115 %0
+ la_br &proc_c1_s
+ beq_a3,a0
+ li_a0 %101 %0
+ la_br &proc_c1_e
+ beq_a3,a0
+ li_a0 %102 %0
+ la_br &proc_c1_f
+ beq_a3,a0
+ li_a0 %98 %0
+ la_br &proc_c1_b
+ beq_a3,a0
+ li_a0 %108 %0
+ la_br &proc_c1_l
+ beq_a3,a0
+ # No directive/builtin candidate — try user macro.
+ la_br &proc_check_macro
+ b
+
+:proc_c0_pct_one
+ la_a1 &proc_has_paren
+ ld_a1,a1,0
+ la_br &proc_emit
+ beqz_a1
+ la_br &proc_do_builtin
+ b
+
+:proc_c1_m
+ # %macro
+ ld_a0,sp,8
la_a1 &const_macro
li_a2 %6 %0
la_br &tok_eq_const
call
- la_br &proc_check_struct
+ la_br &proc_check_macro
beqz_a0
-
- # %macro: shim into define_macro through proc_pos / proc_line_start.
- # define_macro reads/writes proc_pos against source_end, so it only
- # behaves correctly when s is the source stream — which holds in
- # practice (line_start in expansion streams is cleared before any
- # %macro could matter). proc_line_start lets define_macro decide
- # whether to consume the trailing newline after %endm.
ld_a0,sp,0
ld_a1,sp,8
la_br &proc_save_pos_and_ls
@@ -1036,19 +1158,14 @@ DEFINE OFF_local_lookup_scratch 0052850000000000
la_br &proc_restore_and_loop
b
-## ---- tok eq "%struct" ----
-## The %macro guard above already proved kind == TOK_WORD; if we reach here
-## via a %macro non-match, that gate still holds. Each handler:
-## 1. tok_eq_const(tok, NAME, len)
-## 2. miss -> branch to next check
-## 3. hit -> proc_save_pos_and_ls(s, tok); shim; proc_restore_and_loop
-:proc_check_struct
+:proc_c1_s
+ # %struct (no paren); %select / %str (paren).
ld_a0,sp,8
la_a1 &const_struct
li_a2 %7 %0
la_br &tok_eq_const
call
- la_br &proc_check_enum
+ la_br &proc_c1_s_not_struct
beqz_a0
ld_a0,sp,0
ld_a1,sp,8
@@ -1061,15 +1178,39 @@ DEFINE OFF_local_lookup_scratch 0052850000000000
call
la_br &proc_restore_and_loop
b
+:proc_c1_s_not_struct
+ la_a1 &proc_has_paren
+ ld_a1,a1,0
+ la_br &proc_check_macro
+ beqz_a1
+ ld_a0,sp,8
+ la_a1 &const_select
+ li_a2 %7 %0
+ la_br &tok_eq_const
+ call
+ la_br &proc_c1_s_not_select
+ beqz_a0
+ la_br &proc_do_builtin
+ b
+:proc_c1_s_not_select
+ ld_a0,sp,8
+ la_a1 &const_str
+ li_a2 %4 %0
+ la_br &tok_eq_const
+ call
+ la_br &proc_check_macro
+ beqz_a0
+ la_br &proc_do_builtin
+ b
-## ---- tok eq "%enum" ----
-:proc_check_enum
+:proc_c1_e
+ # %enum (no paren); %endframe (no paren).
ld_a0,sp,8
la_a1 &const_enum
li_a2 %5 %0
la_br &tok_eq_const
call
- la_br &proc_check_frame
+ la_br &proc_c1_e_not_enum
beqz_a0
ld_a0,sp,0
ld_a1,sp,8
@@ -1082,15 +1223,13 @@ DEFINE OFF_local_lookup_scratch 0052850000000000
call
la_br &proc_restore_and_loop
b
-
-## ---- tok eq "%frame" ----
-:proc_check_frame
+:proc_c1_e_not_enum
ld_a0,sp,8
- la_a1 &const_frame
- li_a2 %6 %0
+ la_a1 &const_endframe
+ li_a2 %9 %0
la_br &tok_eq_const
call
- la_br &proc_check_endframe
+ la_br &proc_check_macro
beqz_a0
ld_a0,sp,0
ld_a1,sp,8
@@ -1098,19 +1237,19 @@ DEFINE OFF_local_lookup_scratch 0052850000000000
call
ld_a0,sp,0
ld_a0,a0,8
- la_br &push_frame
+ la_br &pop_frame
call
la_br &proc_restore_and_loop
b
-## ---- tok eq "%endframe" ----
-:proc_check_endframe
+:proc_c1_f
+ # %frame (no paren).
ld_a0,sp,8
- la_a1 &const_endframe
- li_a2 %9 %0
+ la_a1 &const_frame
+ li_a2 %6 %0
la_br &tok_eq_const
call
- la_br &proc_check_newline
+ la_br &proc_check_macro
beqz_a0
ld_a0,sp,0
ld_a1,sp,8
@@ -1118,107 +1257,53 @@ DEFINE OFF_local_lookup_scratch 0052850000000000
call
ld_a0,sp,0
ld_a0,a0,8
- la_br &pop_frame
+ la_br &push_frame
call
la_br &proc_restore_and_loop
b
-:proc_check_newline
- # reload s, tok
- ld_a0,sp,0
- ld_t0,sp,8
- ld_a1,t0,0
- li_a2 TOK_NEWLINE
- la_br &proc_check_builtin
- bne_a1,a2
-
- # newline: s->pos += 24; s->line_start = 1; emit_newline()
- addi_t0,t0,32
- st_t0,a0,16
- li_t1 %1 %0
- st_t1,a0,24
- la_br &emit_newline
- call
- la_br &proc_loop
- b
-
-:proc_check_builtin
- # tok->kind == TOK_WORD && tok+1 < s->end && (tok+1)->kind == TOK_LPAREN ?
- ld_a0,sp,0
- ld_t0,sp,8
- ld_a1,t0,0
- li_a2 TOK_WORD
- la_br &proc_check_macro
- bne_a1,a2
- addi_t1,t0,32
- ld_a1,a0,8
- la_br &proc_check_builtin_has_next
- blt_t1,a1
- la_br &proc_check_macro
- b
-:proc_check_builtin_has_next
- ld_a1,t1,0
- li_a2 TOK_LPAREN
- la_br &proc_check_macro
- bne_a1,a2
-
- # require (tok+1)->tight — `! ( ... )` with whitespace before `(` is
- # NOT a builtin paren-call; emit `!` literally then handle `(` later.
- ld_a1,t1,24
+:proc_c1_b
+ # %bytes (paren).
+ la_a1 &proc_has_paren
+ ld_a1,a1,0
la_br &proc_check_macro
beqz_a1
-
- # try the seven builtin names: ! @ % $ %select %str %local
- mov_a0,t0
- la_a1 &const_bang
- li_a2 %1 %0
- la_br &tok_eq_const
- call
- la_br &proc_do_builtin
- bnez_a0
- ld_a0,sp,8
- la_a1 &const_at
- li_a2 %1 %0
- la_br &tok_eq_const
- call
- la_br &proc_do_builtin
- bnez_a0
ld_a0,sp,8
- la_a1 &const_pct
- li_a2 %1 %0
- la_br &tok_eq_const
- call
- la_br &proc_do_builtin
- bnez_a0
- ld_a0,sp,8
- la_a1 &const_dlr
- li_a2 %1 %0
- la_br &tok_eq_const
- call
- la_br &proc_do_builtin
- bnez_a0
- ld_a0,sp,8
- la_a1 &const_select
- li_a2 %7 %0
- la_br &tok_eq_const
- call
- la_br &proc_do_builtin
- bnez_a0
- ld_a0,sp,8
- la_a1 &const_str
- li_a2 %4 %0
+ la_a1 &const_bytes
+ li_a2 %6 %0
la_br &tok_eq_const
call
+ la_br &proc_check_macro
+ beqz_a0
la_br &proc_do_builtin
- bnez_a0
+ b
+
+:proc_c1_l
+ # %local (paren).
+ la_a1 &proc_has_paren
+ ld_a1,a1,0
+ la_br &proc_check_macro
+ beqz_a1
ld_a0,sp,8
la_a1 &const_local
li_a2 %6 %0
la_br &tok_eq_const
call
- la_br &proc_do_builtin
- bnez_a0
la_br &proc_check_macro
+ beqz_a0
+ la_br &proc_do_builtin
+ b
+
+:proc_handle_newline
+ ld_a0,sp,0
+ ld_t0,sp,8
+ addi_t0,t0,32
+ st_t0,a0,16
+ li_t1 %1 %0
+ st_t1,a0,24
+ la_br &emit_newline
+ call
+ la_br &proc_loop
b
:proc_do_builtin
@@ -1677,6 +1762,91 @@ DEFINE OFF_local_lookup_scratch 0052850000000000
la_br &err_macro_body_overflow
blt_t2,a3
+ # === C: cache classification for this body token, so expand_macro_tokens
+ # === doesn't re-run find_param + is_local_label_token per body token per
+ # === expansion. Computed once at definition; written to byte-stride
+ # === parallel arrays indexed by (slot - macro_body_tokens_ptr) / 32.
+ # offset = (t1 - macro_body_tokens_ptr) / 32 (t1 still = macro_body_end)
+ la_a0 ¯o_body_tokens_ptr
+ ld_a0,a0,0
+ sub_a0,t1,a0
+ shri_a0,a0,5
+ la_a1 &def_body_meta_idx
+ st_a0,a1,0
+
+ # macro_body_param_idx[idx] = find_param(def_m_ptr, proc_pos)
+ la_a0 &def_m_ptr
+ ld_a0,a0,0
+ la_a1 &proc_pos
+ ld_a1,a1,0
+ la_br &find_param
+ call
+ la_a1 &def_body_meta_idx
+ ld_a1,a1,0
+ la_a2 ¯o_body_param_idx_ptr
+ ld_a2,a2,0
+ add_a2,a2,a1
+ sb_a0,a2,0
+
+ # macro_body_is_local_label[idx] = (kind==WORD && len>=3 && p[0] in ':&' && p[1]=='@')
+ # Inlined — there's no separate is_local_label_token function in P1; the
+ # full predicate is replicated here in def-time, replacing the per-expansion
+ # emt_check_local_label sequence.
+ la_a0 &proc_pos
+ ld_t0,a0,0
+ li_a3 %0 %0
+ ld_a1,t0,0
+ la_br &def_body_copy_ill_store
+ bnez_a1
+ ld_a1,t0,16
+ li_a2 %3 %0
+ la_br &def_body_copy_ill_store
+ blt_a1,a2
+ ld_a2,t0,8
+ lb_a1,a2,0
+ li_a0 %58 %0
+ la_br &def_body_copy_ill_at
+ beq_a1,a0
+ li_a0 %38 %0
+ la_br &def_body_copy_ill_store
+ bne_a1,a0
+:def_body_copy_ill_at
+ lb_a1,a2,1
+ li_a0 %64 %0
+ la_br &def_body_copy_ill_store
+ bne_a1,a0
+ li_a3 %1 %0
+:def_body_copy_ill_store
+ la_a1 &def_body_meta_idx
+ ld_a1,a1,0
+ la_a2 ¯o_body_is_local_label_ptr
+ ld_a2,a2,0
+ add_a2,a2,a1
+ sb_a3,a2,0
+ # === end C ===
+
+ # If the body token is TOK_PASTE, set m->has_paste = 1. expand_macro_tokens
+ # uses this (combined with args_have_paste) to skip paste_pool_range when
+ # neither the body nor the call's args contribute a `##`. Defaulted to 0
+ # by BSS init when macros_end advanced into this slot.
+ la_a0 &proc_pos
+ ld_t0,a0,0
+ ld_a1,t0,0
+ li_a2 TOK_PASTE
+ la_br &def_body_copy_after_paste_chk
+ bne_a1,a2
+ la_a0 &def_m_ptr
+ ld_a2,a0,0
+ li_a3 M1PP_MACRO_HAS_PASTE_OFF
+ add_a2,a2,a3
+ li_a3 %1 %0
+ st_a3,a2,0
+
+:def_body_copy_after_paste_chk
+ # Reload t1 = macro_body_end — clobbered by find_param above.
+ la_a0 ¯o_body_end
+ ld_t1,a0,0
+
# copy 32 bytes from *proc_pos to *macro_body_end (preserves tight at +24)
la_a0 &proc_pos
ld_t0,a0,0
@@ -2349,6 +2519,15 @@ DEFINE OFF_local_lookup_scratch 0052850000000000
la_a3 &pa_brace_depth
st_a2,a3,0
+ # args_have_paste = 0 — set to 1 below if any TOK_PASTE appears in the
+ # call's argument span. expand_macro_tokens snapshots this right after
+ # parse_args returns; bare arg copies preserve embedded ## tokens, and
+ # the snapshot tells us whether we still have to run paste_pool_range
+ # even when the body itself contains no ##.
+ li_a2 %0 %0
+ la_a3 &args_have_paste
+ st_a2,a3,0
+
:pa_loop
# if (tok >= limit) fatal unterminated
la_a0 &pa_pos
@@ -2361,6 +2540,15 @@ DEFINE OFF_local_lookup_scratch 0052850000000000
# kind = tok->kind
ld_a2,t0,0
+ # if (kind == TOK_PASTE) { args_have_paste = 1; fall through to default-advance }
+ li_a3 TOK_PASTE
+ la_br &pa_kind_check
+ bne_a2,a3
+ li_a3 %1 %0
+ la_a0 &args_have_paste
+ st_a3,a0,0
+
+:pa_kind_check
# if (kind == TOK_LPAREN) { depth++; tok++; loop }
li_a3 TOK_LPAREN
la_br &pa_lparen
@@ -2906,6 +3094,15 @@ DEFINE OFF_local_lookup_scratch 0052850000000000
la_br &parse_args
call
+ # Snapshot args_have_paste -> emt_saw_arg_paste BEFORE the body loop
+ # potentially runs nested expansions that would clobber the global. This
+ # snapshot is OR'd with macro->has_paste at emt_done to decide whether
+ # to run paste_pool_range.
+ la_a0 &args_have_paste
+ ld_t0,a0,0
+ la_a1 &emt_saw_arg_paste
+ st_t0,a1,0
+
# Check arg_count == macro->param_count
la_a0 &arg_count
ld_t0,a0,0
@@ -2941,6 +3138,13 @@ DEFINE OFF_local_lookup_scratch 0052850000000000
la_br &err_bad_macro_header
bnez_t1
+ # No parse_args ran in this branch; args_have_paste from a stale earlier
+ # call MUST NOT leak into emt_saw_arg_paste. Force it to 0 so emt_done's
+ # paste-gate uses only macro->has_paste here.
+ li_t0 %0 %0
+ la_a1 &emt_saw_arg_paste
+ st_t0,a1,0
+
# arg_count = 0
la_a0 &arg_count
li_t0 %0 %0
@@ -2986,22 +3190,27 @@ DEFINE OFF_local_lookup_scratch 0052850000000000
la_br &emt_done
beq_t0,t1
- # param_idx = find_param(macro, body_tok)
- la_a0 &emt_macro
- ld_a0,a0,0
- mov_a1,t0
- la_br &find_param
- call
+ # Cached param_idx = macro_body_param_idx[(body_pos - macro_body_tokens) / 32].
+ # Set at %macro define time so the body loop never has to call find_param.
+ la_a1 ¯o_body_tokens_ptr
+ ld_a1,a1,0
+ sub_a0,t0,a1
+ shri_a0,a0,5
+ la_a1 ¯o_body_param_idx_ptr
+ ld_a1,a1,0
+ add_a1,a1,a0
+ lb_a0,a1,0
+ # Spill for emt_do_substitute_paste / _plain (no need to re-derive).
+ la_a1 &emt_cached_param_idx
+ st_a0,a1,0
# if (param_idx == 0) body-native token: check for local-label rewrite,
# else fall through to substitute logic.
la_br &emt_check_local_label
beqz_a0
- # param_idx != 0: substitute. emt_do_substitute_* will re-derive
- # the arg span (calls find_param again to recover the index) after
- # the "pasted" classification below. This saves us from spilling idx
- # across the body_pos +/- TOK_PASTE peek.
+ # param_idx != 0: substitute. The emt_do_substitute_* paths read
+ # emt_cached_param_idx (no re-call to find_param).
# Reload body_pos for the pasted-classification loads.
la_a0 &emt_body_pos
@@ -3049,40 +3258,25 @@ DEFINE OFF_local_lookup_scratch 0052850000000000
la_br &emt_do_substitute_plain
b
-## emt_check_local_label: body-native token at body_pos. If it's a
-## TOK_WORD whose text starts with ":@" or "&@" and has at least one
-## char after the '@', rewrite it to ":name__NN" / "&name__NN" (NN =
-## emt_expansion_id) and push as TOK_WORD. Otherwise fall through to
-## emt_copy_literal, which copies the body token verbatim.
+## emt_check_local_label: read the cached macro_body_is_local_label[]
+## flag (set at %macro define time). 0 -> emt_copy_literal copies the
+## body token verbatim; 1 -> falls through to emt_rewrite_local_label.
+## Replaces the per-expansion ':@' / '&@' / '@' predicate that used to
+## live inline here.
:emt_check_local_label
- # t0 = body_tok ptr
la_a0 &emt_body_pos
ld_t0,a0,0
- # kind must be TOK_WORD (== 0)
- ld_a1,t0,0
- la_br &emt_copy_literal
- bnez_a1
- # len must be >= 3 (sigil + '@' + >=1 tail char)
- ld_a2,t0,16
- li_a3 %3 %0
- la_br &emt_copy_literal
- blt_a2,a3
- # first byte must be ':' (58) or '&' (38)
- ld_a3,t0,8
- lb_a1,a3,0
- li_a2 %58 %0
- la_br &emt_check_local_label_at
- beq_a1,a2
- li_a2 %38 %0
- la_br &emt_copy_literal
- bne_a1,a2
-:emt_check_local_label_at
- # second byte must be '@' (64)
- lb_a1,a3,1
- li_a2 %64 %0
+ la_a1 ¯o_body_tokens_ptr
+ ld_a1,a1,0
+ sub_a0,t0,a1
+ shri_a0,a0,5
+ la_a1 ¯o_body_is_local_label_ptr
+ ld_a1,a1,0
+ add_a1,a1,a0
+ lb_a0,a1,0
la_br &emt_copy_literal
- bne_a1,a2
- # Local label! Fall through to rewrite.
+ beqz_a0
+ # Cached flag is 1: fall through to rewrite.
## emt_rewrite_local_label: build "sigil + tail + __ + decimal(NN)" in
## local_label_scratch, stash it into text_buf via append_text, and push
@@ -3305,13 +3499,10 @@ DEFINE OFF_local_lookup_scratch 0052850000000000
b
:emt_do_substitute_paste
- # Re-derive arg span for current body_pos.
- la_a0 &emt_macro
+ # Use the cached param_idx (set at the top of emt_loop) instead of
+ # re-running find_param.
+ la_a0 &emt_cached_param_idx
ld_a0,a0,0
- la_a1 &emt_body_pos
- ld_a1,a1,0
- la_br &find_param
- call
addi_a0,a0,neg1
shli_a0,a0,3
la_a1 &arg_starts_ptr
@@ -3335,13 +3526,10 @@ DEFINE OFF_local_lookup_scratch 0052850000000000
b
:emt_do_substitute_plain
- # Re-derive arg span for current body_pos.
- la_a0 &emt_macro
+ # Use the cached param_idx (set at the top of emt_loop) instead of
+ # re-running find_param.
+ la_a0 &emt_cached_param_idx
ld_a0,a0,0
- la_a1 &emt_body_pos
- ld_a1,a1,0
- la_br &find_param
- call
addi_a0,a0,neg1
shli_a0,a0,3
la_a1 &arg_starts_ptr
@@ -3365,12 +3553,26 @@ DEFINE OFF_local_lookup_scratch 0052850000000000
b
:emt_done
- # paste_pool_range(mark). emt_after_pos and emt_mark are already published.
+ # Gate paste_pool_range(mark) on (macro->has_paste OR emt_saw_arg_paste).
+ # When neither side contains TOK_PASTE the pool sweep is wasted work — the
+ # whole point of has_paste / args_have_paste is to skip it for the common
+ # case of a `##`-free expansion.
+ la_a0 &emt_macro
+ ld_t0,a0,0
+ li_a1 M1PP_MACRO_HAS_PASTE_OFF
+ add_t0,t0,a1
+ ld_t0,t0,0
+ la_a0 &emt_saw_arg_paste
+ ld_t1,a0,0
+ or_t0,t0,t1
+ la_br &emt_done_skip_paste
+ beqz_t0
la_a0 &emt_mark
ld_a0,a0,0
la_br &paste_pool_range
call
+:emt_done_skip_paste
eret
## expand_call(a0=stream_ptr, a1=macro_ptr) -> void (fatal on bad call)
@@ -6229,10 +6431,6 @@ DEFINE OFF_local_lookup_scratch 0052850000000000
la_a0 &msg_macro_body_overflow
la_br &fatal
b
-:err_not_implemented
- la_a0 &msg_not_implemented
- la_br &fatal
- b
:err_unbalanced_braces
la_a0 &msg_unbalanced_braces
la_br &fatal
@@ -6354,6 +6552,7 @@ DEFINE OFF_local_lookup_scratch 0052850000000000
:const_frame "%frame"
:const_endframe "%endframe"
:const_local "%local"
+:const_bytes "%bytes"
## Suffix appended to the frame name when looking up <frame>_FRAME.<field>.
:const_frame_suffix "_FRAME."
@@ -6467,6 +6666,8 @@ DEFINE OFF_local_lookup_scratch 0052850000000000
&expand_pool_ptr ZERO4 OFF_expand_pool
&expr_frames_ptr ZERO4 OFF_expr_frames
&local_lookup_scratch_ptr ZERO4 OFF_local_lookup_scratch
+¯o_body_param_idx_ptr ZERO4 OFF_macro_body_param_idx
+¯o_body_is_local_label_ptr ZERO4 OFF_macro_body_is_local_label
:bss_init_tbl_end
:msg_prefix "m1pp: "
@@ -6487,7 +6688,6 @@ DEFINE OFF_local_lookup_scratch 0052850000000000
:msg_bad_macro_header "bad macro header" '00'
:msg_too_many_macros "too many macros" '00'
:msg_macro_body_overflow "macro body overflow" '00'
-:msg_not_implemented "not implemented" '00'
:msg_unbalanced_braces "unbalanced braces" '00'
:msg_bad_directive "bad %struct/%enum directive" '00'
:msg_unterminated_directive "unterminated %struct/%enum directive" '00'
@@ -6544,6 +6744,12 @@ ZERO8
ZERO8
:proc_line_start
ZERO8
+## proc_has_paren — set per iteration of proc_loop to 1 when the next
+## token is a tight TOK_LPAREN (i.e. the current token is a paren-call
+## form). Read by directive/builtin sub-handlers that gate on paren form
+## (%select, %str, %bytes, %local, !@$% arith, user-macro paren call).
+:proc_has_paren
+ZERO8
:macros_end
ZERO8
:macro_body_end
@@ -6554,6 +6760,12 @@ ZERO8
ZERO8
:def_body_line_start
ZERO8
+## def_body_meta_idx — slot index of the body token currently being copied
+## by def_body_copy, i.e. (macro_body_end - macro_body_tokens) / 32. Used
+## as the parallel-array index for macro_body_param_idx[] / _is_local_label[]
+## across the find_param call, which clobbers caller-saved registers.
+:def_body_meta_idx
+ZERO8
:pf_stream_end
ZERO8
@@ -6668,12 +6880,26 @@ ZERO8
ZERO8
:pa_brace_depth
ZERO8
+## args_have_paste — sticky 0/1 set by parse_args when the call's argument
+## span contains TOK_PASTE. expand_macro_tokens snapshots it into
+## emt_saw_arg_paste right after parse_args, then ORs it with
+## macro->has_paste to decide whether to run paste_pool_range. Lets us
+## skip the pool sweep when neither body nor args contribute a `##`.
+:args_have_paste
+ZERO8
:emt_call_tok
ZERO8
:emt_limit
ZERO8
:emt_macro
ZERO8
+:emt_saw_arg_paste
+ZERO8
+## emt_cached_param_idx — body token's param index (0 = not a param) read
+## once from macro_body_param_idx[] at the top of emt_loop, then reused in
+## emt_do_substitute_paste / _plain instead of re-running find_param.
+:emt_cached_param_idx
+ZERO8
:emt_body_pos
ZERO8
:emt_body_end
@@ -6913,5 +7139,9 @@ ZERO8
ZERO8
:local_lookup_scratch_ptr
ZERO8
+:macro_body_param_idx_ptr
+ZERO8
+:macro_body_is_local_label_ptr
+ZERO8
:ELF_end
diff --git a/M1pp/M1pp.c b/M1pp/M1pp.c
@@ -171,6 +171,7 @@ struct TokenSpan {
struct Macro {
struct TextSpan name;
int param_count;
+ int has_paste;
struct TextSpan params[MAX_PARAMS];
struct Token *body_start;
struct Token *body_end;
@@ -196,6 +197,11 @@ static char text_buf[MAX_TEXT];
static struct Token source_tokens[MAX_TOKENS];
static struct Token macro_body_tokens[MAX_MACRO_BODY_TOKENS];
+/* Per-body-token classification cached at %macro definition time, so
+ * expand_macro_tokens never re-runs find_param / is_local_label_token in
+ * its hot loop. param_idx: 0 = not a param, k = params[k-1]. */
+static unsigned char macro_body_param_idx[MAX_MACRO_BODY_TOKENS];
+static unsigned char macro_body_is_local_label[MAX_MACRO_BODY_TOKENS];
static struct Token expand_pool[MAX_EXPAND];
static struct Macro macros[MAX_MACROS];
static struct Stream streams[MAX_STACK];
@@ -219,6 +225,7 @@ static struct Token *arg_starts[MAX_PARAMS];
static struct Token *arg_ends[MAX_PARAMS];
static int arg_count;
static struct Token *call_end_pos;
+static int args_have_paste;
static const char *error_msg;
@@ -496,6 +503,7 @@ static int emit_newline(void)
static int emit_string_as_bytes(const struct Token *tok);
static int emit_hex_value(unsigned long long value, int bytes);
+static int is_local_label_token(const struct Token *tok);
static int emit_token(const struct Token *tok)
{
@@ -727,6 +735,8 @@ static int define_fielded_macro(struct TextSpan base, const char *suffix,
return 0;
}
m->body_start = macro_body_tokens + macro_body_used;
+ macro_body_param_idx[macro_body_used] = 0;
+ macro_body_is_local_label[macro_body_used] = 0;
macro_body_tokens[macro_body_used++] = body_tok;
m->body_end = macro_body_tokens + macro_body_used;
macro_count++;
@@ -864,6 +874,8 @@ static int define_macro(struct Stream *s)
m->body_start = macro_body_tokens + macro_body_used;
while (s->pos < s->end) {
+ int idx;
+
if (s->pos->kind == TOK_WORD && token_text_eq(s->pos, "%endm")) {
s->pos++;
if (s->pos >= s->end || s->pos->kind != TOK_NEWLINE) {
@@ -880,7 +892,15 @@ static int define_macro(struct Stream *s)
if (macro_body_used >= MAX_MACRO_BODY_TOKENS) {
return fail("macro body overflow");
}
- macro_body_tokens[macro_body_used++] = *s->pos;
+ idx = macro_body_used;
+ macro_body_tokens[idx] = *s->pos;
+ macro_body_param_idx[idx] = (unsigned char)find_param(m, s->pos);
+ macro_body_is_local_label[idx] =
+ is_local_label_token(s->pos) ? 1 : 0;
+ if (s->pos->kind == TOK_PASTE) {
+ m->has_paste = 1;
+ }
+ macro_body_used++;
s->pos++;
}
@@ -895,7 +915,12 @@ static int parse_args(struct Token *lparen, struct Token *limit)
int brace_depth = 0;
int arg_index = 0;
+ args_have_paste = 0;
+
while (tok < limit) {
+ if (tok->kind == TOK_PASTE) {
+ args_have_paste = 1;
+ }
if (tok->kind == TOK_LPAREN) {
depth++;
tok++;
@@ -1154,6 +1179,7 @@ static int expand_macro_tokens(struct Token *call_tok, struct Token *limit,
struct Token *end_pos;
int mark;
int expansion_id;
+ int saw_arg_paste = 0;
if (call_tok + 1 < limit && (call_tok + 1)->kind == TOK_LPAREN &&
(call_tok + 1)->tight) {
@@ -1164,6 +1190,7 @@ static int expand_macro_tokens(struct Token *call_tok, struct Token *limit,
return fail("wrong arg count");
}
end_pos = call_end_pos;
+ saw_arg_paste = args_have_paste;
} else if (m->param_count == 0) {
arg_count = 0;
end_pos = call_tok + 1;
@@ -1174,7 +1201,8 @@ static int expand_macro_tokens(struct Token *call_tok, struct Token *limit,
expansion_id = ++next_expansion_id;
mark = pool_used;
for (body_tok = m->body_start; body_tok < m->body_end; body_tok++) {
- int param_idx = find_param(m, body_tok);
+ int idx = (int)(body_tok - macro_body_tokens);
+ int param_idx = macro_body_param_idx[idx];
int pasted = 0;
int ok;
@@ -1189,7 +1217,7 @@ static int expand_macro_tokens(struct Token *call_tok, struct Token *limit,
}
continue;
}
- if (is_local_label_token(body_tok)) {
+ if (macro_body_is_local_label[idx]) {
if (!push_local_label_token(body_tok, expansion_id)) {
pool_used = mark;
return 0;
@@ -1202,7 +1230,7 @@ static int expand_macro_tokens(struct Token *call_tok, struct Token *limit,
}
}
- if (!paste_pool_range(mark)) {
+ if ((m->has_paste || saw_arg_paste) && !paste_pool_range(mark)) {
return 0;
}
*after_out = end_pos;
@@ -1891,10 +1919,15 @@ static int process_tokens(void)
return 0;
}
+ /* Per-token dispatch is gated on the first byte of WORD tokens.
+ * Plain pass-through tokens (e.g. hex literals, bare identifiers)
+ * fail the c0=='%' / c0 in {!,@,$} test in one byte compare and go
+ * straight to emit_token. Within the c0=='%' branch we dispatch on
+ * the second byte to pick the matching directive/builtin without
+ * walking ~9 token_text_eq probes. */
for (;;) {
struct Stream *s;
struct Token *tok;
- const struct Macro *macro;
s = current_stream();
if (s == NULL) {
@@ -1910,41 +1943,6 @@ static int process_tokens(void)
current_line = tok->line;
}
- if (tok->kind == TOK_WORD && token_text_eq(tok, "%macro")) {
- if (!define_macro(s)) {
- return 0;
- }
- continue;
- }
-
- if (tok->kind == TOK_WORD && token_text_eq(tok, "%struct")) {
- if (!define_fielded(s, 8, "SIZE", 4)) {
- return 0;
- }
- continue;
- }
-
- if (tok->kind == TOK_WORD && token_text_eq(tok, "%enum")) {
- if (!define_fielded(s, 1, "COUNT", 5)) {
- return 0;
- }
- continue;
- }
-
- if (tok->kind == TOK_WORD && token_text_eq(tok, "%frame")) {
- if (!push_frame(s)) {
- return 0;
- }
- continue;
- }
-
- if (tok->kind == TOK_WORD && token_text_eq(tok, "%endframe")) {
- if (!pop_frame(s)) {
- return 0;
- }
- continue;
- }
-
if (tok->kind == TOK_NEWLINE) {
s->pos++;
s->line_start = 1;
@@ -1954,33 +1952,90 @@ static int process_tokens(void)
continue;
}
- if (tok->kind == TOK_WORD &&
- tok + 1 < s->end &&
- (tok + 1)->kind == TOK_LPAREN &&
- (tok + 1)->tight &&
- (token_text_eq(tok, "!") ||
- token_text_eq(tok, "@") ||
- token_text_eq(tok, "%") ||
- token_text_eq(tok, "$") ||
- token_text_eq(tok, "%select") ||
- token_text_eq(tok, "%str") ||
- token_text_eq(tok, "%bytes") ||
- token_text_eq(tok, "%local"))) {
- if (!expand_builtin_call(s, tok)) {
- return 0;
- }
- continue;
- }
+ if (tok->kind == TOK_WORD && tok->text.len >= 1) {
+ const char *p = tok->text.ptr;
+ int len = tok->text.len;
+ char c0 = p[0];
+ int has_paren = (tok + 1 < s->end &&
+ (tok + 1)->kind == TOK_LPAREN &&
+ (tok + 1)->tight);
+
+ if (c0 == '%' && len >= 2) {
+ char c1 = p[1];
+ const struct Macro *macro;
+ int handled = 0;
+
+ switch (c1) {
+ case 'm':
+ if (len == 6 && memcmp(p + 2, "acro", 4) == 0) {
+ if (!define_macro(s)) return 0;
+ handled = 1;
+ }
+ break;
+ case 's':
+ if (len == 7 && memcmp(p + 2, "truct", 5) == 0) {
+ if (!define_fielded(s, 8, "SIZE", 4)) return 0;
+ handled = 1;
+ } else if (has_paren && len == 7 &&
+ memcmp(p + 2, "elect", 5) == 0) {
+ if (!expand_builtin_call(s, tok)) return 0;
+ handled = 1;
+ } else if (has_paren && len == 4 &&
+ memcmp(p + 2, "tr", 2) == 0) {
+ if (!expand_builtin_call(s, tok)) return 0;
+ handled = 1;
+ }
+ break;
+ case 'e':
+ if (len == 5 && memcmp(p + 2, "num", 3) == 0) {
+ if (!define_fielded(s, 1, "COUNT", 5)) return 0;
+ handled = 1;
+ } else if (len == 9 &&
+ memcmp(p + 2, "ndframe", 7) == 0) {
+ if (!pop_frame(s)) return 0;
+ handled = 1;
+ }
+ break;
+ case 'f':
+ if (len == 6 && memcmp(p + 2, "rame", 4) == 0) {
+ if (!push_frame(s)) return 0;
+ handled = 1;
+ }
+ break;
+ case 'b':
+ if (has_paren && len == 6 &&
+ memcmp(p + 2, "ytes", 4) == 0) {
+ if (!expand_builtin_call(s, tok)) return 0;
+ handled = 1;
+ }
+ break;
+ case 'l':
+ if (has_paren && len == 6 &&
+ memcmp(p + 2, "ocal", 4) == 0) {
+ if (!expand_builtin_call(s, tok)) return 0;
+ handled = 1;
+ }
+ break;
+ }
- macro = find_macro(tok);
- if (macro != NULL &&
- ((tok + 1 < s->end && (tok + 1)->kind == TOK_LPAREN &&
- (tok + 1)->tight) ||
- macro->param_count == 0)) {
- if (!expand_call(s, macro)) {
- return 0;
+ if (handled) {
+ continue;
+ }
+
+ macro = find_macro(tok);
+ if (macro != NULL &&
+ (has_paren || macro->param_count == 0)) {
+ if (!expand_call(s, macro)) return 0;
+ continue;
+ }
+ } else if (len == 1 &&
+ (c0 == '!' || c0 == '@' ||
+ c0 == '$' || c0 == '%')) {
+ if (has_paren) {
+ if (!expand_builtin_call(s, tok)) return 0;
+ continue;
+ }
}
- continue;
}
s->pos++;
diff --git a/P1/P1-aarch64.M1 b/P1/P1-aarch64.M1
@@ -67,9 +67,11 @@ DEFINE add_a1,t2,t1 61010A8B
DEFINE add_a2,a1,a3 2200038B
DEFINE add_a2,a1,t0 2200098B
DEFINE add_a2,a2,a0 4200008B
+DEFINE add_a2,a2,a1 4200018B
DEFINE add_a2,a2,a3 4200038B
DEFINE add_a2,a2,t0 4200098B
DEFINE add_a2,a2,t1 42000A8B
+DEFINE add_a2,a2,t2 42000B8B
DEFINE add_a2,a3,a1 6200018B
DEFINE add_a2,a3,a2 6200028B
DEFINE add_a2,t0,t1 22010A8B
@@ -78,6 +80,7 @@ DEFINE add_a2,t2,a2 6201028B
DEFINE add_a3,a0,a2 0300028B
DEFINE add_a3,a1,t1 23000A8B
DEFINE add_a3,a1,t2 23000B8B
+DEFINE add_a3,a2,t1 43000A8B
DEFINE add_a3,a3,a0 6300008B
DEFINE add_a3,a3,a1 6300018B
DEFINE add_a3,a3,a2 6300028B
@@ -97,23 +100,28 @@ DEFINE add_t0,t0,t1 29010A8B
DEFINE add_t0,t2,a3 6901038B
DEFINE add_t1,a0,t0 0A00098B
DEFINE add_t1,a3,t1 6A000A8B
+DEFINE add_t1,t0,a0 2A01008B
DEFINE add_t1,t0,t1 2A010A8B
DEFINE add_t1,t1,a2 4A01028B
+DEFINE add_t1,t1,t0 4A01098B
DEFINE add_t1,t1,t2 4A010B8B
DEFINE add_t2,a0,t0 0B00098B
DEFINE add_t2,a0,t1 0B000A8B
DEFINE add_t2,a1,t2 2B000B8B
DEFINE add_t2,a2,t1 4B000A8B
DEFINE add_t2,t0,t1 2B010A8B
+DEFINE add_t2,t1,t0 4B01098B
DEFINE add_t2,t1,t2 4B010B8B
DEFINE add_t2,t2,a0 6B01008B
DEFINE add_t2,t2,a3 6B01038B
DEFINE add_t2,t2,t1 6B010A8B
-DEFINE sub_a0,a0,a1 000001CB
DEFINE sub_a0,a0,t1 00000ACB
DEFINE sub_a0,a1,a0 200000CB
DEFINE sub_a0,a1,t2 20000BCB
DEFINE sub_a0,a3,a0 600000CB
+DEFINE sub_a0,t0,a1 200101CB
+DEFINE sub_a0,t0,t1 20010ACB
+DEFINE sub_a0,t1,a0 400100CB
DEFINE sub_a1,t0,a0 210100CB
DEFINE sub_a2,a1,a0 220000CB
DEFINE sub_a2,a2,a2 420002CB
@@ -137,6 +145,7 @@ DEFINE sub_t2,t1,t0 4B0109CB
DEFINE and_a3,a3,a2 6300028A
DEFINE or_a0,a0,a2 000002AA
DEFINE or_a3,a3,a2 630002AA
+DEFINE or_t0,t0,t1 29010AAA
DEFINE xor_a2,a2,a3 420003CA
DEFINE xor_a3,a3,a2 630002CA
DEFINE shl_a2,a2,a3 4220C39A
@@ -144,6 +153,7 @@ DEFINE sar_a2,a2,a3 4228C39A
DEFINE mul_a0,a0,a3 007C039B
DEFINE mul_a0,a0,t0 007C099B
DEFINE mul_a0,t1,t2 407D0B9B
+DEFINE mul_a1,t0,t1 217D0A9B
DEFINE mul_a2,a2,t1 427C0A9B
DEFINE mul_a3,a3,a2 637C029B
DEFINE mul_t0,t0,a1 297D019B
@@ -155,6 +165,7 @@ DEFINE div_a2,a2,a3 420CC39A
DEFINE rem_a2,a0,a1 100CC19A0282019B
DEFINE rem_a2,a0,t1 100CCA9A02820A9B
DEFINE rem_a2,a2,a3 500CC39A028A039B
+DEFINE rem_a2,t0,a1 300DC19A02A6019B
## ---- Immediate Arithmetic
DEFINE addi_a0,a0,neg1 000400D1
@@ -175,6 +186,7 @@ DEFINE addi_a2,a2,8 42200091
DEFINE addi_a2,a2,24 42600091
DEFINE addi_a2,a2,48 42C00091
DEFINE addi_a2,t0,1 22050091
+DEFINE addi_a2,t0,32 22810091
DEFINE addi_a2,t2,neg48 62C100D1
DEFINE addi_a3,a3,neg1 630400D1
DEFINE addi_a3,a3,1 63040091
@@ -184,6 +196,7 @@ DEFINE addi_a3,t1,32 43810091
DEFINE addi_t0,a0,32 09800091
DEFINE addi_t0,a1,neg32 298000D1
DEFINE addi_t0,a1,32 29800091
+DEFINE addi_t0,a3,neg1 690400D1
DEFINE addi_t0,t0,neg1 290500D1
DEFINE addi_t0,t0,1 29050091
DEFINE addi_t0,t0,2 29090091
@@ -212,12 +225,15 @@ DEFINE andi_a3,t2,255 F01F80D26301108A
DEFINE shli_a0,a0,3 00F07DD3
DEFINE shli_a0,a0,4 00EC7CD3
DEFINE shli_a1,a1,1 21F87FD3
+DEFINE shli_a2,a2,3 42F07DD3
DEFINE shli_a2,t1,3 42F17DD3
DEFINE shli_a3,t0,1 23F97FD3
DEFINE shli_a3,t0,4 23ED7CD3
DEFINE shli_t0,t0,4 29ED7CD3
DEFINE shli_t1,a2,3 4AF07DD3
+DEFINE shli_t1,t1,3 4AF17DD3
DEFINE shli_t2,t0,3 2BF17DD3
+DEFINE shri_a0,a0,5 00FC45D3
DEFINE shri_a2,a3,4 62FC44D3
DEFINE shri_t2,t2,8 6BFD48D3
@@ -226,7 +242,9 @@ DEFINE ld_a0,a0,0 000040F9
DEFINE ld_a0,a0,8 000440F9
DEFINE ld_a0,a0,16 000840F9
DEFINE ld_a0,a1,0 200040F9
+DEFINE ld_a0,a1,16 200840F9
DEFINE ld_a0,a2,0 400040F9
+DEFINE ld_a0,a2,24 400C40F9
DEFINE ld_a0,a3,0 600040F9
DEFINE ld_a0,a3,8 600440F9
DEFINE ld_a0,a3,16 600840F9
@@ -241,6 +259,7 @@ DEFINE ld_a1,a0,0 010040F9
DEFINE ld_a1,a0,8 010440F9
DEFINE ld_a1,a0,16 010840F9
DEFINE ld_a1,a1,0 210040F9
+DEFINE ld_a1,a1,8 210440F9
DEFINE ld_a1,a2,8 410440F9
DEFINE ld_a1,a3,8 610440F9
DEFINE ld_a1,t0,0 210140F9
@@ -250,6 +269,7 @@ DEFINE ld_a1,t0,24 210D40F9
DEFINE ld_a1,t1,0 410140F9
DEFINE ld_a1,t1,24 410D40F9
DEFINE ld_a1,t2,16 610940F9
+DEFINE ld_a1,sp,0 E10B40F9
DEFINE ld_a1,sp,8 E10F40F9
DEFINE ld_a2,a0,0 020040F9
DEFINE ld_a2,a0,16 020840F9
@@ -283,7 +303,11 @@ DEFINE ld_t0,a0,24 090C40F9
DEFINE ld_t0,a1,0 290040F9
DEFINE ld_t0,a1,8 290440F9
DEFINE ld_t0,a1,16 290840F9
+DEFINE ld_t0,a1,24 290C40F9
DEFINE ld_t0,a1,32 291040F9
+DEFINE ld_t0,a1,40 291440F9
+DEFINE ld_t0,a1,48 291840F9
+DEFINE ld_t0,a1,64 292040F9
DEFINE ld_t0,a2,0 490040F9
DEFINE ld_t0,a3,0 690040F9
DEFINE ld_t0,t0,0 290140F9
@@ -302,6 +326,7 @@ DEFINE ld_t1,a0,24 0A0C40F9
DEFINE ld_t1,a1,0 2A0040F9
DEFINE ld_t1,a1,8 2A0440F9
DEFINE ld_t1,a1,16 2A0840F9
+DEFINE ld_t1,a1,56 2A1C40F9
DEFINE ld_t1,a2,0 4A0040F9
DEFINE ld_t1,a2,8 4A0440F9
DEFINE ld_t1,a2,16 4A0840F9
@@ -316,8 +341,10 @@ DEFINE ld_t1,sp,32 EA1B40F9
DEFINE ld_t1,sp,48 EA2340F9
DEFINE ld_t2,a0,0 0B0040F9
DEFINE ld_t2,a1,0 2B0040F9
+DEFINE ld_t2,a3,0 6B0040F9
DEFINE ld_t2,a3,16 6B0840F9
DEFINE ld_t2,t0,0 2B0140F9
+DEFINE ld_t2,t0,8 2B0540F9
DEFINE ld_t2,t0,24 2B0D40F9
DEFINE ld_t2,t1,0 4B0140F9
DEFINE ld_t2,t1,8 4B0540F9
@@ -382,6 +409,14 @@ DEFINE st_t0,a0,8 090400F9
DEFINE st_t0,a0,16 090800F9
DEFINE st_t0,a0,24 090C00F9
DEFINE st_t0,a1,0 290000F9
+DEFINE st_t0,a1,8 290400F9
+DEFINE st_t0,a1,16 290800F9
+DEFINE st_t0,a1,24 290C00F9
+DEFINE st_t0,a1,32 291000F9
+DEFINE st_t0,a1,40 291400F9
+DEFINE st_t0,a1,48 291800F9
+DEFINE st_t0,a1,56 291C00F9
+DEFINE st_t0,a1,64 292000F9
DEFINE st_t0,a2,0 490000F9
DEFINE st_t0,a3,0 690000F9
DEFINE st_t0,t1,0 490100F9
@@ -395,7 +430,9 @@ DEFINE st_t0,sp,40 E91F00F9
DEFINE st_t0,sp,48 E92300F9
DEFINE st_t1,a0,0 0A0000F9
DEFINE st_t1,a0,24 0A0C00F9
+DEFINE st_t1,a0,56 0A1C00F9
DEFINE st_t1,a1,0 2A0000F9
+DEFINE st_t1,a2,0 4A0000F9
DEFINE st_t1,a3,0 6A0000F9
DEFINE st_t1,a3,8 6A0400F9
DEFINE st_t1,a3,16 6A0800F9
@@ -408,11 +445,12 @@ DEFINE st_t2,a3,0 6B0000F9
DEFINE st_t2,t0,0 2B0100F9
DEFINE st_t2,t1,0 4B0100F9
DEFINE lb_a0,a0,0 00004039
+DEFINE lb_a0,a1,0 20004039
DEFINE lb_a0,a2,0 40004039
DEFINE lb_a0,t0,0 20014039
DEFINE lb_a1,a1,0 21004039
-DEFINE lb_a1,a3,0 61004039
-DEFINE lb_a1,a3,1 61044039
+DEFINE lb_a1,a2,0 41004039
+DEFINE lb_a1,a2,1 41044039
DEFINE lb_a2,a1,0 22004039
DEFINE lb_a2,a2,0 42004039
DEFINE lb_a2,t0,0 22014039
@@ -420,6 +458,8 @@ DEFINE lb_a3,a0,0 03004039
DEFINE lb_a3,a1,0 23004039
DEFINE lb_a3,a2,0 43004039
DEFINE lb_a3,a3,0 63004039
+DEFINE lb_a3,t2,0 63014039
+DEFINE lb_a3,t2,1 63054039
DEFINE lb_t0,a0,0 09004039
DEFINE lb_t0,a0,1 09044039
DEFINE lb_t0,t0,0 29014039
@@ -429,6 +469,7 @@ DEFINE lb_t2,t0,0 2B014039
DEFINE lb_t2,t2,0 6B014039
DEFINE sb_a0,a1,0 20000039
DEFINE sb_a0,a2,0 40000039
+DEFINE sb_a0,a3,0 60000039
DEFINE sb_a0,t2,0 60010039
DEFINE sb_a1,a2,0 41000039
DEFINE sb_a1,t0,0 21010039
@@ -439,6 +480,7 @@ DEFINE sb_a3,a0,0 03000039
DEFINE sb_a3,a1,0 23000039
DEFINE sb_a3,a2,0 43000039
DEFINE sb_a3,t2,0 63010039
+DEFINE sb_t0,a3,0 69000039
DEFINE sb_t1,a0,0 0A000039
DEFINE sb_t1,a2,0 4A000039
DEFINE sb_t2,a1,0 2B000039
@@ -451,9 +493,11 @@ DEFINE b 20021FD6
DEFINE beq_a0,a1 1F0001EB4100005420021FD6
DEFINE beq_a0,t0 1F0009EB4100005420021FD6
DEFINE beq_a0,t1 1F000AEB4100005420021FD6
+DEFINE beq_a1,a0 3F0000EB4100005420021FD6
DEFINE beq_a1,a2 3F0002EB4100005420021FD6
DEFINE beq_a2,a1 5F0001EB4100005420021FD6
DEFINE beq_a2,a3 5F0003EB4100005420021FD6
+DEFINE beq_a3,a0 7F0000EB4100005420021FD6
DEFINE beq_a3,a1 7F0001EB4100005420021FD6
DEFINE beq_a3,a2 7F0002EB4100005420021FD6
DEFINE beq_a3,t0 7F0009EB4100005420021FD6
@@ -471,8 +515,10 @@ DEFINE beq_t1,t2 5F010BEB4100005420021FD6
DEFINE beq_t2,a2 7F0102EB4100005420021FD6
DEFINE beq_t2,a3 7F0103EB4100005420021FD6
DEFINE beq_t2,t1 7F010AEB4100005420021FD6
+DEFINE bne_a0,a1 1F0001EB4000005420021FD6
DEFINE bne_a0,t0 1F0009EB4000005420021FD6
DEFINE bne_a0,t1 1F000AEB4000005420021FD6
+DEFINE bne_a1,a0 3F0000EB4000005420021FD6
DEFINE bne_a1,a2 3F0002EB4000005420021FD6
DEFINE bne_a1,t0 3F0009EB4000005420021FD6
DEFINE bne_a2,a1 5F0001EB4000005420021FD6
@@ -485,6 +531,7 @@ DEFINE bne_t0,a0 3F0100EB4000005420021FD6
DEFINE bne_t0,a2 3F0102EB4000005420021FD6
DEFINE bne_t0,t1 3F010AEB4000005420021FD6
DEFINE bne_t0,t2 3F010BEB4000005420021FD6
+DEFINE bne_t1,a0 5F0100EB4000005420021FD6
DEFINE bne_t1,a2 5F0102EB4000005420021FD6
DEFINE bne_t1,a3 5F0103EB4000005420021FD6
DEFINE bne_t1,t2 5F010BEB4000005420021FD6
diff --git a/P1/P1-amd64.M1 b/P1/P1-amd64.M1
@@ -67,9 +67,11 @@ DEFINE add_a1,t2,t1 4C89C64C01DE
DEFINE add_a2,a1,a3 4889F24801CA
DEFINE add_a2,a1,t0 4889F24C01D2
DEFINE add_a2,a2,a0 4889D24801FA
+DEFINE add_a2,a2,a1 4889D24801F2
DEFINE add_a2,a2,a3 4889D24801CA
DEFINE add_a2,a2,t0 4889D24C01D2
DEFINE add_a2,a2,t1 4889D24C01DA
+DEFINE add_a2,a2,t2 4889D24C01C2
DEFINE add_a2,a3,a1 4889CA4801F2
DEFINE add_a2,a3,a2 4989D14889CA4C01CA
DEFINE add_a2,t0,t1 4C89D24C01DA
@@ -78,6 +80,7 @@ DEFINE add_a2,t2,a2 4989D14C89C24C01CA
DEFINE add_a3,a0,a2 4889F94801D1
DEFINE add_a3,a1,t1 4889F14C01D9
DEFINE add_a3,a1,t2 4889F14C01C1
+DEFINE add_a3,a2,t1 4889D14C01D9
DEFINE add_a3,a3,a0 4889C94801F9
DEFINE add_a3,a3,a1 4889C94801F1
DEFINE add_a3,a3,a2 4889C94801D1
@@ -97,23 +100,28 @@ DEFINE add_t0,t0,t1 4D89D24D01DA
DEFINE add_t0,t2,a3 4D89C24901CA
DEFINE add_t1,a0,t0 4989FB4D01D3
DEFINE add_t1,a3,t1 4D89D94989CB4D01CB
+DEFINE add_t1,t0,a0 4D89D34901FB
DEFINE add_t1,t0,t1 4D89D94D89D34D01CB
DEFINE add_t1,t1,a2 4D89DB4901D3
+DEFINE add_t1,t1,t0 4D89DB4D01D3
DEFINE add_t1,t1,t2 4D89DB4D01C3
DEFINE add_t2,a0,t0 4989F84D01D0
DEFINE add_t2,a0,t1 4989F84D01D8
DEFINE add_t2,a1,t2 4D89C14989F04D01C8
DEFINE add_t2,a2,t1 4989D04D01D8
DEFINE add_t2,t0,t1 4D89D04D01D8
+DEFINE add_t2,t1,t0 4D89D84D01D0
DEFINE add_t2,t1,t2 4D89C14D89D84D01C8
DEFINE add_t2,t2,a0 4D89C04901F8
DEFINE add_t2,t2,a3 4D89C04901C8
DEFINE add_t2,t2,t1 4D89C04D01D8
-DEFINE sub_a0,a0,a1 4889FF4829F7
DEFINE sub_a0,a0,t1 4889FF4C29DF
DEFINE sub_a0,a1,a0 4989F94889F74C29CF
DEFINE sub_a0,a1,t2 4889F74C29C7
DEFINE sub_a0,a3,a0 4989F94889CF4C29CF
+DEFINE sub_a0,t0,a1 4C89D74829F7
+DEFINE sub_a0,t0,t1 4C89D74C29DF
+DEFINE sub_a0,t1,a0 4989F94C89DF4C29CF
DEFINE sub_a1,t0,a0 4C89D64829FE
DEFINE sub_a2,a1,a0 4889F24829FA
DEFINE sub_a2,a2,a2 4989D14889D24C29CA
@@ -137,6 +145,7 @@ DEFINE sub_t2,t1,t0 4D89D84D29D0
DEFINE and_a3,a3,a2 4889C94821D1
DEFINE or_a0,a0,a2 4889FF4809D7
DEFINE or_a3,a3,a2 4889C94809D1
+DEFINE or_t0,t0,t1 4D89D24D09DA
DEFINE xor_a2,a2,a3 4889D24831CA
DEFINE xor_a3,a3,a2 4889C94831D1
DEFINE shl_a2,a2,a3 4889CD4989D14889C949D3E14889E94C89CA
@@ -144,6 +153,7 @@ DEFINE sar_a2,a2,a3 4889CD4989D14889C949D3F94889E94C89CA
DEFINE mul_a0,a0,a3 4889FF480FAFF9
DEFINE mul_a0,a0,t0 4889FF490FAFFA
DEFINE mul_a0,t1,t2 4C89DF490FAFF8
+DEFINE mul_a1,t0,t1 4C89D6490FAFF3
DEFINE mul_a2,a2,t1 4889D2490FAFD3
DEFINE mul_a3,a3,a2 4889C9480FAFCA
DEFINE mul_t0,t0,a1 4D89D24C0FAFD6
@@ -155,6 +165,7 @@ DEFINE div_a2,a2,a3 4889D54989C94889D0489949F7F94889EA4889C2
DEFINE rem_a2,a0,a1 4889D54989F14889F8489949F7F94889D04889EA4889C2
DEFINE rem_a2,a0,t1 4889D54D89D94889F8489949F7F94889D04889EA4889C2
DEFINE rem_a2,a2,a3 4889D54989C94889D0489949F7F94889D04889EA4889C2
+DEFINE rem_a2,t0,a1 4889D54989F14C89D0489949F7F94889D04889EA4889C2
## ---- Immediate Arithmetic
DEFINE addi_a0,a0,neg1 4889FF4883C7FF
@@ -175,6 +186,7 @@ DEFINE addi_a2,a2,8 4889D24883C208
DEFINE addi_a2,a2,24 4889D24883C218
DEFINE addi_a2,a2,48 4889D24883C230
DEFINE addi_a2,t0,1 4C89D24883C201
+DEFINE addi_a2,t0,32 4C89D24883C220
DEFINE addi_a2,t2,neg48 4C89C24883C2D0
DEFINE addi_a3,a3,neg1 4889C94883C1FF
DEFINE addi_a3,a3,1 4889C94883C101
@@ -184,6 +196,7 @@ DEFINE addi_a3,t1,32 4C89D94883C120
DEFINE addi_t0,a0,32 4989FA4983C220
DEFINE addi_t0,a1,neg32 4989F24983C2E0
DEFINE addi_t0,a1,32 4989F24983C220
+DEFINE addi_t0,a3,neg1 4989CA4983C2FF
DEFINE addi_t0,t0,neg1 4D89D24983C2FF
DEFINE addi_t0,t0,1 4D89D24983C201
DEFINE addi_t0,t0,2 4D89D24983C202
@@ -212,12 +225,15 @@ DEFINE andi_a3,t2,255 4C89C14881E1FF000000
DEFINE shli_a0,a0,3 4889FF48C1E703
DEFINE shli_a0,a0,4 4889FF48C1E704
DEFINE shli_a1,a1,1 4889F648C1E601
+DEFINE shli_a2,a2,3 4889D248C1E203
DEFINE shli_a2,t1,3 4C89DA48C1E203
DEFINE shli_a3,t0,1 4C89D148C1E101
DEFINE shli_a3,t0,4 4C89D148C1E104
DEFINE shli_t0,t0,4 4D89D249C1E204
DEFINE shli_t1,a2,3 4989D349C1E303
+DEFINE shli_t1,t1,3 4D89DB49C1E303
DEFINE shli_t2,t0,3 4D89D049C1E003
+DEFINE shri_a0,a0,5 4889FF48C1EF05
DEFINE shri_a2,a3,4 4889CA48C1EA04
DEFINE shri_t2,t2,8 4D89C049C1E808
@@ -226,7 +242,9 @@ DEFINE ld_a0,a0,0 488B7F00
DEFINE ld_a0,a0,8 488B7F08
DEFINE ld_a0,a0,16 488B7F10
DEFINE ld_a0,a1,0 488B7E00
+DEFINE ld_a0,a1,16 488B7E10
DEFINE ld_a0,a2,0 488B7A00
+DEFINE ld_a0,a2,24 488B7A18
DEFINE ld_a0,a3,0 488B7900
DEFINE ld_a0,a3,8 488B7908
DEFINE ld_a0,a3,16 488B7910
@@ -241,6 +259,7 @@ DEFINE ld_a1,a0,0 488B7700
DEFINE ld_a1,a0,8 488B7708
DEFINE ld_a1,a0,16 488B7710
DEFINE ld_a1,a1,0 488B7600
+DEFINE ld_a1,a1,8 488B7608
DEFINE ld_a1,a2,8 488B7208
DEFINE ld_a1,a3,8 488B7108
DEFINE ld_a1,t0,0 498B7200
@@ -250,6 +269,7 @@ DEFINE ld_a1,t0,24 498B7218
DEFINE ld_a1,t1,0 498B7300
DEFINE ld_a1,t1,24 498B7318
DEFINE ld_a1,t2,16 498B7010
+DEFINE ld_a1,sp,0 488B742410
DEFINE ld_a1,sp,8 488B742418
DEFINE ld_a2,a0,0 488B5700
DEFINE ld_a2,a0,16 488B5710
@@ -283,7 +303,11 @@ DEFINE ld_t0,a0,24 4C8B5718
DEFINE ld_t0,a1,0 4C8B5600
DEFINE ld_t0,a1,8 4C8B5608
DEFINE ld_t0,a1,16 4C8B5610
+DEFINE ld_t0,a1,24 4C8B5618
DEFINE ld_t0,a1,32 4C8B5620
+DEFINE ld_t0,a1,40 4C8B5628
+DEFINE ld_t0,a1,48 4C8B5630
+DEFINE ld_t0,a1,64 4C8B5640
DEFINE ld_t0,a2,0 4C8B5200
DEFINE ld_t0,a3,0 4C8B5100
DEFINE ld_t0,t0,0 4D8B5200
@@ -302,6 +326,7 @@ DEFINE ld_t1,a0,24 4C8B5F18
DEFINE ld_t1,a1,0 4C8B5E00
DEFINE ld_t1,a1,8 4C8B5E08
DEFINE ld_t1,a1,16 4C8B5E10
+DEFINE ld_t1,a1,56 4C8B5E38
DEFINE ld_t1,a2,0 4C8B5A00
DEFINE ld_t1,a2,8 4C8B5A08
DEFINE ld_t1,a2,16 4C8B5A10
@@ -316,8 +341,10 @@ DEFINE ld_t1,sp,32 4C8B5C2430
DEFINE ld_t1,sp,48 4C8B5C2440
DEFINE ld_t2,a0,0 4C8B4700
DEFINE ld_t2,a1,0 4C8B4600
+DEFINE ld_t2,a3,0 4C8B4100
DEFINE ld_t2,a3,16 4C8B4110
DEFINE ld_t2,t0,0 4D8B4200
+DEFINE ld_t2,t0,8 4D8B4208
DEFINE ld_t2,t0,24 4D8B4218
DEFINE ld_t2,t1,0 4D8B4300
DEFINE ld_t2,t1,8 4D8B4308
@@ -382,6 +409,14 @@ DEFINE st_t0,a0,8 4C895708
DEFINE st_t0,a0,16 4C895710
DEFINE st_t0,a0,24 4C895718
DEFINE st_t0,a1,0 4C895600
+DEFINE st_t0,a1,8 4C895608
+DEFINE st_t0,a1,16 4C895610
+DEFINE st_t0,a1,24 4C895618
+DEFINE st_t0,a1,32 4C895620
+DEFINE st_t0,a1,40 4C895628
+DEFINE st_t0,a1,48 4C895630
+DEFINE st_t0,a1,56 4C895638
+DEFINE st_t0,a1,64 4C895640
DEFINE st_t0,a2,0 4C895200
DEFINE st_t0,a3,0 4C895100
DEFINE st_t0,t1,0 4D895300
@@ -395,7 +430,9 @@ DEFINE st_t0,sp,40 4C89542438
DEFINE st_t0,sp,48 4C89542440
DEFINE st_t1,a0,0 4C895F00
DEFINE st_t1,a0,24 4C895F18
+DEFINE st_t1,a0,56 4C895F38
DEFINE st_t1,a1,0 4C895E00
+DEFINE st_t1,a2,0 4C895A00
DEFINE st_t1,a3,0 4C895900
DEFINE st_t1,a3,8 4C895908
DEFINE st_t1,a3,16 4C895910
@@ -408,11 +445,12 @@ DEFINE st_t2,a3,0 4C894100
DEFINE st_t2,t0,0 4D894200
DEFINE st_t2,t1,0 4D894300
DEFINE lb_a0,a0,0 480FB67F00
+DEFINE lb_a0,a1,0 480FB67E00
DEFINE lb_a0,a2,0 480FB67A00
DEFINE lb_a0,t0,0 490FB67A00
DEFINE lb_a1,a1,0 480FB67600
-DEFINE lb_a1,a3,0 480FB67100
-DEFINE lb_a1,a3,1 480FB67101
+DEFINE lb_a1,a2,0 480FB67200
+DEFINE lb_a1,a2,1 480FB67201
DEFINE lb_a2,a1,0 480FB65600
DEFINE lb_a2,a2,0 480FB65200
DEFINE lb_a2,t0,0 490FB65200
@@ -420,6 +458,8 @@ DEFINE lb_a3,a0,0 480FB64F00
DEFINE lb_a3,a1,0 480FB64E00
DEFINE lb_a3,a2,0 480FB64A00
DEFINE lb_a3,a3,0 480FB64900
+DEFINE lb_a3,t2,0 490FB64800
+DEFINE lb_a3,t2,1 490FB64801
DEFINE lb_t0,a0,0 4C0FB65700
DEFINE lb_t0,a0,1 4C0FB65701
DEFINE lb_t0,t0,0 4D0FB65200
@@ -429,6 +469,7 @@ DEFINE lb_t2,t0,0 4D0FB64200
DEFINE lb_t2,t2,0 4D0FB64000
DEFINE sb_a0,a1,0 48887E00
DEFINE sb_a0,a2,0 48887A00
+DEFINE sb_a0,a3,0 48887900
DEFINE sb_a0,t2,0 49887800
DEFINE sb_a1,a2,0 48887200
DEFINE sb_a1,t0,0 49887200
@@ -439,6 +480,7 @@ DEFINE sb_a3,a0,0 48884F00
DEFINE sb_a3,a1,0 48884E00
DEFINE sb_a3,a2,0 48884A00
DEFINE sb_a3,t2,0 49884800
+DEFINE sb_t0,a3,0 4C885100
DEFINE sb_t1,a0,0 4C885F00
DEFINE sb_t1,a2,0 4C885A00
DEFINE sb_t2,a1,0 4C884600
@@ -451,9 +493,11 @@ DEFINE b 41FFE7
DEFINE beq_a0,a1 4839F7750341FFE7
DEFINE beq_a0,t0 4C39D7750341FFE7
DEFINE beq_a0,t1 4C39DF750341FFE7
+DEFINE beq_a1,a0 4839FE750341FFE7
DEFINE beq_a1,a2 4839D6750341FFE7
DEFINE beq_a2,a1 4839F2750341FFE7
DEFINE beq_a2,a3 4839CA750341FFE7
+DEFINE beq_a3,a0 4839F9750341FFE7
DEFINE beq_a3,a1 4839F1750341FFE7
DEFINE beq_a3,a2 4839D1750341FFE7
DEFINE beq_a3,t0 4C39D1750341FFE7
@@ -471,8 +515,10 @@ DEFINE beq_t1,t2 4D39C3750341FFE7
DEFINE beq_t2,a2 4939D0750341FFE7
DEFINE beq_t2,a3 4939C8750341FFE7
DEFINE beq_t2,t1 4D39D8750341FFE7
+DEFINE bne_a0,a1 4839F7740341FFE7
DEFINE bne_a0,t0 4C39D7740341FFE7
DEFINE bne_a0,t1 4C39DF740341FFE7
+DEFINE bne_a1,a0 4839FE740341FFE7
DEFINE bne_a1,a2 4839D6740341FFE7
DEFINE bne_a1,t0 4C39D6740341FFE7
DEFINE bne_a2,a1 4839F2740341FFE7
@@ -485,6 +531,7 @@ DEFINE bne_t0,a0 4939FA740341FFE7
DEFINE bne_t0,a2 4939D2740341FFE7
DEFINE bne_t0,t1 4D39DA740341FFE7
DEFINE bne_t0,t2 4D39C2740341FFE7
+DEFINE bne_t1,a0 4939FB740341FFE7
DEFINE bne_t1,a2 4939D3740341FFE7
DEFINE bne_t1,a3 4939CB740341FFE7
DEFINE bne_t1,t2 4D39C3740341FFE7
diff --git a/P1/P1-riscv64.M1 b/P1/P1-riscv64.M1
@@ -67,9 +67,11 @@ DEFINE add_a1,t2,t1 B3856300
DEFINE add_a2,a1,a3 3386D500
DEFINE add_a2,a1,t0 33865500
DEFINE add_a2,a2,a0 3306A600
+DEFINE add_a2,a2,a1 3306B600
DEFINE add_a2,a2,a3 3306D600
DEFINE add_a2,a2,t0 33065600
DEFINE add_a2,a2,t1 33066600
+DEFINE add_a2,a2,t2 33067600
DEFINE add_a2,a3,a1 3386B600
DEFINE add_a2,a3,a2 3386C600
DEFINE add_a2,t0,t1 33866200
@@ -78,6 +80,7 @@ DEFINE add_a2,t2,a2 3386C300
DEFINE add_a3,a0,a2 B306C500
DEFINE add_a3,a1,t1 B3866500
DEFINE add_a3,a1,t2 B3867500
+DEFINE add_a3,a2,t1 B3066600
DEFINE add_a3,a3,a0 B386A600
DEFINE add_a3,a3,a1 B386B600
DEFINE add_a3,a3,a2 B386C600
@@ -97,23 +100,28 @@ DEFINE add_t0,t0,t1 B3826200
DEFINE add_t0,t2,a3 B382D300
DEFINE add_t1,a0,t0 33035500
DEFINE add_t1,a3,t1 33836600
+DEFINE add_t1,t0,a0 3383A200
DEFINE add_t1,t0,t1 33836200
DEFINE add_t1,t1,a2 3303C300
+DEFINE add_t1,t1,t0 33035300
DEFINE add_t1,t1,t2 33037300
DEFINE add_t2,a0,t0 B3035500
DEFINE add_t2,a0,t1 B3036500
DEFINE add_t2,a1,t2 B3837500
DEFINE add_t2,a2,t1 B3036600
DEFINE add_t2,t0,t1 B3836200
+DEFINE add_t2,t1,t0 B3035300
DEFINE add_t2,t1,t2 B3037300
DEFINE add_t2,t2,a0 B383A300
DEFINE add_t2,t2,a3 B383D300
DEFINE add_t2,t2,t1 B3836300
-DEFINE sub_a0,a0,a1 3305B540
DEFINE sub_a0,a0,t1 33056540
DEFINE sub_a0,a1,a0 3385A540
DEFINE sub_a0,a1,t2 33857540
DEFINE sub_a0,a3,a0 3385A640
+DEFINE sub_a0,t0,a1 3385B240
+DEFINE sub_a0,t0,t1 33856240
+DEFINE sub_a0,t1,a0 3305A340
DEFINE sub_a1,t0,a0 B385A240
DEFINE sub_a2,a1,a0 3386A540
DEFINE sub_a2,a2,a2 3306C640
@@ -137,6 +145,7 @@ DEFINE sub_t2,t1,t0 B3035340
DEFINE and_a3,a3,a2 B3F6C600
DEFINE or_a0,a0,a2 3365C500
DEFINE or_a3,a3,a2 B3E6C600
+DEFINE or_t0,t0,t1 B3E26200
DEFINE xor_a2,a2,a3 3346D600
DEFINE xor_a3,a3,a2 B3C6C600
DEFINE shl_a2,a2,a3 3316D600
@@ -144,6 +153,7 @@ DEFINE sar_a2,a2,a3 3356D640
DEFINE mul_a0,a0,a3 3305D502
DEFINE mul_a0,a0,t0 33055502
DEFINE mul_a0,t1,t2 33057302
+DEFINE mul_a1,t0,t1 B3856202
DEFINE mul_a2,a2,t1 33066602
DEFINE mul_a3,a3,a2 B386C602
DEFINE mul_t0,t0,a1 B382B202
@@ -155,6 +165,7 @@ DEFINE div_a2,a2,a3 3346D602
DEFINE rem_a2,a0,a1 3366B502
DEFINE rem_a2,a0,t1 33666502
DEFINE rem_a2,a2,a3 3366D602
+DEFINE rem_a2,t0,a1 33E6B202
## ---- Immediate Arithmetic
DEFINE addi_a0,a0,neg1 1305F5FF
@@ -175,6 +186,7 @@ DEFINE addi_a2,a2,8 13068600
DEFINE addi_a2,a2,24 13068601
DEFINE addi_a2,a2,48 13060603
DEFINE addi_a2,t0,1 13861200
+DEFINE addi_a2,t0,32 13860202
DEFINE addi_a2,t2,neg48 138603FD
DEFINE addi_a3,a3,neg1 9386F6FF
DEFINE addi_a3,a3,1 93861600
@@ -184,6 +196,7 @@ DEFINE addi_a3,t1,32 93060302
DEFINE addi_t0,a0,32 93020502
DEFINE addi_t0,a1,neg32 938205FE
DEFINE addi_t0,a1,32 93820502
+DEFINE addi_t0,a3,neg1 9382F6FF
DEFINE addi_t0,t0,neg1 9382F2FF
DEFINE addi_t0,t0,1 93821200
DEFINE addi_t0,t0,2 93822200
@@ -212,12 +225,15 @@ DEFINE andi_a3,t2,255 93F6F30F
DEFINE shli_a0,a0,3 13153500
DEFINE shli_a0,a0,4 13154500
DEFINE shli_a1,a1,1 93951500
+DEFINE shli_a2,a2,3 13163600
DEFINE shli_a2,t1,3 13163300
DEFINE shli_a3,t0,1 93961200
DEFINE shli_a3,t0,4 93964200
DEFINE shli_t0,t0,4 93924200
DEFINE shli_t1,a2,3 13133600
+DEFINE shli_t1,t1,3 13133300
DEFINE shli_t2,t0,3 93933200
+DEFINE shri_a0,a0,5 13555500
DEFINE shri_a2,a3,4 13D64600
DEFINE shri_t2,t2,8 93D38300
@@ -226,7 +242,9 @@ DEFINE ld_a0,a0,0 03350500
DEFINE ld_a0,a0,8 03358500
DEFINE ld_a0,a0,16 03350501
DEFINE ld_a0,a1,0 03B50500
+DEFINE ld_a0,a1,16 03B50501
DEFINE ld_a0,a2,0 03350600
+DEFINE ld_a0,a2,24 03358601
DEFINE ld_a0,a3,0 03B50600
DEFINE ld_a0,a3,8 03B58600
DEFINE ld_a0,a3,16 03B50601
@@ -241,6 +259,7 @@ DEFINE ld_a1,a0,0 83350500
DEFINE ld_a1,a0,8 83358500
DEFINE ld_a1,a0,16 83350501
DEFINE ld_a1,a1,0 83B50500
+DEFINE ld_a1,a1,8 83B58500
DEFINE ld_a1,a2,8 83358600
DEFINE ld_a1,a3,8 83B58600
DEFINE ld_a1,t0,0 83B50200
@@ -250,6 +269,7 @@ DEFINE ld_a1,t0,24 83B58201
DEFINE ld_a1,t1,0 83350300
DEFINE ld_a1,t1,24 83358301
DEFINE ld_a1,t2,16 83B50301
+DEFINE ld_a1,sp,0 83350101
DEFINE ld_a1,sp,8 83358101
DEFINE ld_a2,a0,0 03360500
DEFINE ld_a2,a0,16 03360501
@@ -283,7 +303,11 @@ DEFINE ld_t0,a0,24 83328501
DEFINE ld_t0,a1,0 83B20500
DEFINE ld_t0,a1,8 83B28500
DEFINE ld_t0,a1,16 83B20501
+DEFINE ld_t0,a1,24 83B28501
DEFINE ld_t0,a1,32 83B20502
+DEFINE ld_t0,a1,40 83B28502
+DEFINE ld_t0,a1,48 83B20503
+DEFINE ld_t0,a1,64 83B20504
DEFINE ld_t0,a2,0 83320600
DEFINE ld_t0,a3,0 83B20600
DEFINE ld_t0,t0,0 83B20200
@@ -302,6 +326,7 @@ DEFINE ld_t1,a0,24 03338501
DEFINE ld_t1,a1,0 03B30500
DEFINE ld_t1,a1,8 03B38500
DEFINE ld_t1,a1,16 03B30501
+DEFINE ld_t1,a1,56 03B38503
DEFINE ld_t1,a2,0 03330600
DEFINE ld_t1,a2,8 03338600
DEFINE ld_t1,a2,16 03330601
@@ -316,8 +341,10 @@ DEFINE ld_t1,sp,32 03330103
DEFINE ld_t1,sp,48 03330104
DEFINE ld_t2,a0,0 83330500
DEFINE ld_t2,a1,0 83B30500
+DEFINE ld_t2,a3,0 83B30600
DEFINE ld_t2,a3,16 83B30601
DEFINE ld_t2,t0,0 83B30200
+DEFINE ld_t2,t0,8 83B38200
DEFINE ld_t2,t0,24 83B38201
DEFINE ld_t2,t1,0 83330300
DEFINE ld_t2,t1,8 83338300
@@ -382,6 +409,14 @@ DEFINE st_t0,a0,8 23345500
DEFINE st_t0,a0,16 23385500
DEFINE st_t0,a0,24 233C5500
DEFINE st_t0,a1,0 23B05500
+DEFINE st_t0,a1,8 23B45500
+DEFINE st_t0,a1,16 23B85500
+DEFINE st_t0,a1,24 23BC5500
+DEFINE st_t0,a1,32 23B05502
+DEFINE st_t0,a1,40 23B45502
+DEFINE st_t0,a1,48 23B85502
+DEFINE st_t0,a1,56 23BC5502
+DEFINE st_t0,a1,64 23B05504
DEFINE st_t0,a2,0 23305600
DEFINE st_t0,a3,0 23B05600
DEFINE st_t0,t1,0 23305300
@@ -395,7 +430,9 @@ DEFINE st_t0,sp,40 233C5102
DEFINE st_t0,sp,48 23305104
DEFINE st_t1,a0,0 23306500
DEFINE st_t1,a0,24 233C6500
+DEFINE st_t1,a0,56 233C6502
DEFINE st_t1,a1,0 23B06500
+DEFINE st_t1,a2,0 23306600
DEFINE st_t1,a3,0 23B06600
DEFINE st_t1,a3,8 23B46600
DEFINE st_t1,a3,16 23B86600
@@ -408,11 +445,12 @@ DEFINE st_t2,a3,0 23B07600
DEFINE st_t2,t0,0 23B07200
DEFINE st_t2,t1,0 23307300
DEFINE lb_a0,a0,0 03450500
+DEFINE lb_a0,a1,0 03C50500
DEFINE lb_a0,a2,0 03450600
DEFINE lb_a0,t0,0 03C50200
DEFINE lb_a1,a1,0 83C50500
-DEFINE lb_a1,a3,0 83C50600
-DEFINE lb_a1,a3,1 83C51600
+DEFINE lb_a1,a2,0 83450600
+DEFINE lb_a1,a2,1 83451600
DEFINE lb_a2,a1,0 03C60500
DEFINE lb_a2,a2,0 03460600
DEFINE lb_a2,t0,0 03C60200
@@ -420,6 +458,8 @@ DEFINE lb_a3,a0,0 83460500
DEFINE lb_a3,a1,0 83C60500
DEFINE lb_a3,a2,0 83460600
DEFINE lb_a3,a3,0 83C60600
+DEFINE lb_a3,t2,0 83C60300
+DEFINE lb_a3,t2,1 83C61300
DEFINE lb_t0,a0,0 83420500
DEFINE lb_t0,a0,1 83421500
DEFINE lb_t0,t0,0 83C20200
@@ -429,6 +469,7 @@ DEFINE lb_t2,t0,0 83C30200
DEFINE lb_t2,t2,0 83C30300
DEFINE sb_a0,a1,0 2380A500
DEFINE sb_a0,a2,0 2300A600
+DEFINE sb_a0,a3,0 2380A600
DEFINE sb_a0,t2,0 2380A300
DEFINE sb_a1,a2,0 2300B600
DEFINE sb_a1,t0,0 2380B200
@@ -439,6 +480,7 @@ DEFINE sb_a3,a0,0 2300D500
DEFINE sb_a3,a1,0 2380D500
DEFINE sb_a3,a2,0 2300D600
DEFINE sb_a3,t2,0 2380D300
+DEFINE sb_t0,a3,0 23805600
DEFINE sb_t1,a0,0 23006500
DEFINE sb_t1,a2,0 23006600
DEFINE sb_t2,a1,0 23807500
@@ -451,9 +493,11 @@ DEFINE b 67800F00
DEFINE beq_a0,a1 6314B50067800F00
DEFINE beq_a0,t0 6314550067800F00
DEFINE beq_a0,t1 6314650067800F00
+DEFINE beq_a1,a0 6394A50067800F00
DEFINE beq_a1,a2 6394C50067800F00
DEFINE beq_a2,a1 6314B60067800F00
DEFINE beq_a2,a3 6314D60067800F00
+DEFINE beq_a3,a0 6394A60067800F00
DEFINE beq_a3,a1 6394B60067800F00
DEFINE beq_a3,a2 6394C60067800F00
DEFINE beq_a3,t0 6394560067800F00
@@ -471,8 +515,10 @@ DEFINE beq_t1,t2 6314730067800F00
DEFINE beq_t2,a2 6394C30067800F00
DEFINE beq_t2,a3 6394D30067800F00
DEFINE beq_t2,t1 6394630067800F00
+DEFINE bne_a0,a1 6304B50067800F00
DEFINE bne_a0,t0 6304550067800F00
DEFINE bne_a0,t1 6304650067800F00
+DEFINE bne_a1,a0 6384A50067800F00
DEFINE bne_a1,a2 6384C50067800F00
DEFINE bne_a1,t0 6384550067800F00
DEFINE bne_a2,a1 6304B60067800F00
@@ -485,6 +531,7 @@ DEFINE bne_t0,a0 6384A20067800F00
DEFINE bne_t0,a2 6384C20067800F00
DEFINE bne_t0,t1 6384620067800F00
DEFINE bne_t0,t2 6384720067800F00
+DEFINE bne_t1,a0 6304A30067800F00
DEFINE bne_t1,a2 6304C30067800F00
DEFINE bne_t1,a3 6304D30067800F00
DEFINE bne_t1,t2 6304730067800F00