boot2

Playing with the boostrap
git clone https://git.ryansepassi.com/git/boot2.git
Log | Files | Refs

commit b67850fbf7de470673af34368fe0342900082d26
parent c34eadeb5a6d1e48435c97d722fcc15a677a6a54
Author: Ryan Sepassi <rsepassi@gmail.com>
Date:   Wed, 22 Apr 2026 21:19:56 -0700

p1.m1m

Diffstat:
Ap1/P1.M1M | 224+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Ap1/aarch64.M1M | 502+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Msrc/m1macro.c | 402+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++--
3 files changed, 1122 insertions(+), 6 deletions(-)

diff --git a/p1/P1.M1M b/p1/P1.M1M @@ -0,0 +1,224 @@ +# P1.M1M -- portable P1v2 macro front-end. +# +# This file assumes an arch-specific backend M1M has already been prepended. +# The backend must provide the target hooks used below: +# %p1_li, %p1_la, %p1_labr, %p1_mov, %p1_rrr, %p1_addi, %p1_logi, +# %p1_shifti, %p1_mem, %p1_ldarg, %p1_b, %p1_br, %p1_call, %p1_callr, +# %p1_ret, %p1_leave, %p1_tail, %p1_tailr, %p1_condb, %p1_condbz, +# %p1_enter, %p1_syscall, and %p1_sys_*. + +# ---- Materialization ------------------------------------------------------ + +%macro li(rd) +%p1_li(rd) +%endm + +%macro la(rd) +%p1_la(rd) +%endm + +%macro la_br() +%p1_labr() +%endm + +# ---- Moves ---------------------------------------------------------------- + +%macro mov(rd, rs) +%p1_mov(rd, rs) +%endm + +# ---- Register arithmetic -------------------------------------------------- + +%macro add(rd, ra, rb) +%p1_rrr(ADD, rd, ra, rb) +%endm + +%macro sub(rd, ra, rb) +%p1_rrr(SUB, rd, ra, rb) +%endm + +%macro and(rd, ra, rb) +%p1_rrr(AND, rd, ra, rb) +%endm + +%macro or(rd, ra, rb) +%p1_rrr(OR, rd, ra, rb) +%endm + +%macro xor(rd, ra, rb) +%p1_rrr(XOR, rd, ra, rb) +%endm + +%macro shl(rd, ra, rb) +%p1_rrr(SHL, rd, ra, rb) +%endm + +%macro shr(rd, ra, rb) +%p1_rrr(SHR, rd, ra, rb) +%endm + +%macro sar(rd, ra, rb) +%p1_rrr(SAR, rd, ra, rb) +%endm + +%macro mul(rd, ra, rb) +%p1_rrr(MUL, rd, ra, rb) +%endm + +%macro div(rd, ra, rb) +%p1_rrr(DIV, rd, ra, rb) +%endm + +%macro rem(rd, ra, rb) +%p1_rrr(REM, rd, ra, rb) +%endm + +# ---- Immediate arithmetic ------------------------------------------------- + +%macro addi(rd, ra, imm) +%p1_addi(rd, ra, imm) +%endm + +%macro andi(rd, ra, imm) +%p1_logi(ANDI, rd, ra, imm) +%endm + +%macro ori(rd, ra, imm) +%p1_logi(ORI, rd, ra, imm) +%endm + +%macro shli(rd, ra, imm) +%p1_shifti(SHLI, rd, ra, imm) +%endm + +%macro shri(rd, ra, imm) +%p1_shifti(SHRI, rd, ra, imm) +%endm + +%macro sari(rd, ra, imm) +%p1_shifti(SARI, rd, ra, imm) +%endm + +# ---- Memory and ABI access ------------------------------------------------ + +%macro ld(rt, rn, off) +%p1_mem(LD, rt, rn, off) +%endm + +%macro st(rt, rn, off) +%p1_mem(ST, rt, rn, off) +%endm + +%macro lb(rt, rn, off) +%p1_mem(LB, rt, rn, off) +%endm + +%macro sb(rt, rn, off) +%p1_mem(SB, rt, rn, off) +%endm + +%macro ldarg(rd, slot) +%p1_ldarg(rd, slot) +%endm + +# ---- Branching ------------------------------------------------------------ + +%macro b() +%p1_b() +%endm + +%macro br(rs) +%p1_br(rs) +%endm + +%macro beq(ra, rb) +%p1_condb(BEQ, ra, rb) +%endm + +%macro bne(ra, rb) +%p1_condb(BNE, ra, rb) +%endm + +%macro blt(ra, rb) +%p1_condb(BLT, ra, rb) +%endm + +%macro beqz(ra) +%p1_condbz(BEQZ, ra) +%endm + +%macro bnez(ra) +%p1_condbz(BNEZ, ra) +%endm + +%macro bltz(ra) +%p1_condbz(BLTZ, ra) +%endm + +# ---- Calls, returns, and frames ------------------------------------------ + +%macro call() +%p1_call() +%endm + +%macro callr(rs) +%p1_callr(rs) +%endm + +%macro ret() +%p1_ret() +%endm + +%macro tail() +%p1_tail() +%endm + +%macro tailr(rs) +%p1_tailr(rs) +%endm + +%macro enter(size) +%p1_enter(size) +%endm + +%macro leave() +%p1_leave() +%endm + +# ---- System --------------------------------------------------------------- + +%macro syscall() +%p1_syscall() +%endm + +%macro sys_read() +%p1_sys_read() +%endm + +%macro sys_write() +%p1_sys_write() +%endm + +%macro sys_close() +%p1_sys_close() +%endm + +%macro sys_openat() +%p1_sys_openat() +%endm + +%macro sys_exit() +%p1_sys_exit() +%endm + +%macro sys_clone() +%p1_sys_clone() +%endm + +%macro sys_execve() +%p1_sys_execve() +%endm + +%macro sys_waitid() +%p1_sys_waitid() +%endm diff --git a/p1/aarch64.M1M b/p1/aarch64.M1M @@ -0,0 +1,502 @@ +# aarch64.M1M -- P1v2 aarch64 backend expressed in m1macro. +# +# This mirrors p1/aarch64.py using the m1macro integer builtins: +# %le32(sexpr), %le64(sexpr), and %select(cond, then, else). + +# ---- Native register numbers -------------------------------------------- + +%macro aa64_reg_a0() +0 +%endm +%macro aa64_reg_a1() +1 +%endm +%macro aa64_reg_a2() +2 +%endm +%macro aa64_reg_a3() +3 +%endm +%macro aa64_reg_x4() +4 +%endm +%macro aa64_reg_x5() +5 +%endm +%macro aa64_reg_t0() +9 +%endm +%macro aa64_reg_s0() +19 +%endm +%macro aa64_reg_s1() +20 +%endm +%macro aa64_reg_sp() +31 +%endm +%macro aa64_reg_xzr() +31 +%endm +%macro aa64_reg_lr() +30 +%endm +%macro aa64_reg_br() +17 +%endm +%macro aa64_reg_scratch() +16 +%endm +%macro aa64_reg_x8() +8 +%endm +%macro aa64_reg_save0() +21 +%endm +%macro aa64_reg_save1() +22 +%endm +%macro aa64_reg_save2() +23 +%endm + +%macro aa64_reg(r) +%aa64_reg_##r() +%endm + +%macro aa64_is_sp_a0() +0 +%endm +%macro aa64_is_sp_a1() +0 +%endm +%macro aa64_is_sp_a2() +0 +%endm +%macro aa64_is_sp_a3() +0 +%endm +%macro aa64_is_sp_x4() +0 +%endm +%macro aa64_is_sp_x5() +0 +%endm +%macro aa64_is_sp_t0() +0 +%endm +%macro aa64_is_sp_s0() +0 +%endm +%macro aa64_is_sp_s1() +0 +%endm +%macro aa64_is_sp_sp() +1 +%endm +%macro aa64_is_sp_xzr() +0 +%endm +%macro aa64_is_sp_lr() +0 +%endm +%macro aa64_is_sp_br() +0 +%endm +%macro aa64_is_sp_scratch() +0 +%endm +%macro aa64_is_sp_x8() +0 +%endm +%macro aa64_is_sp_save0() +0 +%endm +%macro aa64_is_sp_save1() +0 +%endm +%macro aa64_is_sp_save2() +0 +%endm + +%macro aa64_is_sp(r) +%aa64_is_sp_##r() +%endm + +# ---- Low-level instruction encoders -------------------------------------- + +%macro aa64_rrr(base, rd, ra, rb) +%le32((| base (<< %aa64_reg(rb) 16) (<< %aa64_reg(ra) 5) %aa64_reg(rd))) +%endm + +%macro aa64_add_imm(rd, ra, imm12) +%le32((| 0x91000000 (<< (& imm12 0xFFF) 10) (<< %aa64_reg(ra) 5) %aa64_reg(rd))) +%endm + +%macro aa64_sub_imm(rd, ra, imm12) +%le32((| 0xD1000000 (<< (& imm12 0xFFF) 10) (<< %aa64_reg(ra) 5) %aa64_reg(rd))) +%endm + +%macro aa64_mov_rr(dst, src) +%select((= %aa64_is_sp(dst) 1), + %aa64_add_imm(sp, src, 0), + %select((= %aa64_is_sp(src) 1), + %aa64_add_imm(dst, sp, 0), + %le32((| 0xAA000000 (<< %aa64_reg(src) 16) (<< 31 5) %aa64_reg(dst))))) +%endm + +%macro aa64_ubfm(rd, ra, immr, imms) +%le32((| 0xD3400000 (<< immr 16) (<< imms 10) (<< %aa64_reg(ra) 5) %aa64_reg(rd))) +%endm + +%macro aa64_sbfm(rd, ra, immr, imms) +%le32((| 0x93400000 (<< immr 16) (<< imms 10) (<< %aa64_reg(ra) 5) %aa64_reg(rd))) +%endm + +%macro aa64_movz(rd, imm16) +%le32((| 0xD2800000 (<< (& imm16 0xFFFF) 5) %aa64_reg(rd))) +%endm + +%macro aa64_movn(rd, imm16) +%le32((| 0x92800000 (<< (& imm16 0xFFFF) 5) %aa64_reg(rd))) +%endm + +%macro aa64_materialize_small_imm(rd, imm) +%select((>= imm 0), + %aa64_movz(rd, imm), + %aa64_movn(rd, (& (~ imm) 0xFFFF))) +%endm + +%macro aa64_ldst_uimm12(base, rt, rn, off_bytes, size_log2) +%le32((| base (<< (>> off_bytes size_log2) 10) (<< %aa64_reg(rn) 5) %aa64_reg(rt))) +%endm + +%macro aa64_ldst_unscaled(base, rt, rn, off) +%le32((| base (<< (& off 0x1FF) 12) (<< %aa64_reg(rn) 5) %aa64_reg(rt))) +%endm + +%macro aa64_mem_uimm_base_LD() +0xF9400000 +%endm +%macro aa64_mem_uimm_base_ST() +0xF9000000 +%endm +%macro aa64_mem_uimm_base_LB() +0x39400000 +%endm +%macro aa64_mem_uimm_base_SB() +0x39000000 +%endm + +%macro aa64_mem_unscaled_base_LD() +0xF8400000 +%endm +%macro aa64_mem_unscaled_base_ST() +0xF8000000 +%endm +%macro aa64_mem_unscaled_base_LB() +0x38400000 +%endm +%macro aa64_mem_unscaled_base_SB() +0x38000000 +%endm + +%macro aa64_mem_size_LD() +3 +%endm +%macro aa64_mem_size_ST() +3 +%endm +%macro aa64_mem_size_LB() +0 +%endm +%macro aa64_mem_size_SB() +0 +%endm + +%macro aa64_mem_uimm_base(op) +%aa64_mem_uimm_base_##op() +%endm + +%macro aa64_mem_unscaled_base(op) +%aa64_mem_unscaled_base_##op() +%endm + +%macro aa64_mem_size(op) +%aa64_mem_size_##op() +%endm + +%macro aa64_mem_fallback(op, rt, rn, off) +%select((>= off 0), + %aa64_add_imm(scratch, rn, off) + %aa64_ldst_uimm12(%aa64_mem_uimm_base(op), rt, scratch, 0, %aa64_mem_size(op)), + %aa64_sub_imm(scratch, rn, (- 0 off)) + %aa64_ldst_uimm12(%aa64_mem_uimm_base(op), rt, scratch, 0, %aa64_mem_size(op))) +%endm + +%macro aa64_mem_after_uimm(op, rt, rn, off) +%select((>= off -256), + %select((<= off 255), + %aa64_ldst_unscaled(%aa64_mem_unscaled_base(op), rt, rn, off), + %aa64_mem_fallback(op, rt, rn, off)), + %aa64_mem_fallback(op, rt, rn, off)) +%endm + +%macro aa64_mem_after_nonneg(op, rt, rn, off) +%select((= (& off (- (<< 1 %aa64_mem_size(op)) 1)) 0), + %select((< off (<< 4096 %aa64_mem_size(op))), + %aa64_ldst_uimm12(%aa64_mem_uimm_base(op), rt, rn, off, %aa64_mem_size(op)), + %aa64_mem_after_uimm(op, rt, rn, off)), + %aa64_mem_after_uimm(op, rt, rn, off)) +%endm + +%macro aa64_mem(op, rt, rn, off) +%select((>= off 0), + %aa64_mem_after_nonneg(op, rt, rn, off), + %aa64_mem_after_uimm(op, rt, rn, off)) +%endm + +%macro aa64_cmp_skip(cond, ra, rb) +%le32((| 0xEB000000 (<< %aa64_reg(rb) 16) (<< %aa64_reg(ra) 5) 31)) +%le32((| 0x54000040 cond)) +%endm + +%macro aa64_br(reg) +%le32((| 0xD61F0000 (<< %aa64_reg(reg) 5))) +%endm + +%macro aa64_blr(reg) +%le32((| 0xD63F0000 (<< %aa64_reg(reg) 5))) +%endm + +%macro aa64_ret() +%le32(0xD65F03C0) +%endm + +%macro aa64_lit64_prefix(rd) +%le32((| 0x58000040 %aa64_reg(rd))) +%le32(0x14000002) +%endm + +# ---- P1 register-register op lowering ----------------------------------- + +%macro aa64_rrr_ADD(rd, ra, rb) +%aa64_rrr(0x8B000000, rd, ra, rb) +%endm +%macro aa64_rrr_SUB(rd, ra, rb) +%aa64_rrr(0xCB000000, rd, ra, rb) +%endm +%macro aa64_rrr_AND(rd, ra, rb) +%aa64_rrr(0x8A000000, rd, ra, rb) +%endm +%macro aa64_rrr_OR(rd, ra, rb) +%aa64_rrr(0xAA000000, rd, ra, rb) +%endm +%macro aa64_rrr_XOR(rd, ra, rb) +%aa64_rrr(0xCA000000, rd, ra, rb) +%endm +%macro aa64_rrr_SHL(rd, ra, rb) +%aa64_rrr(0x9AC02000, rd, ra, rb) +%endm +%macro aa64_rrr_SHR(rd, ra, rb) +%aa64_rrr(0x9AC02400, rd, ra, rb) +%endm +%macro aa64_rrr_SAR(rd, ra, rb) +%aa64_rrr(0x9AC02800, rd, ra, rb) +%endm +%macro aa64_rrr_DIV(rd, ra, rb) +%aa64_rrr(0x9AC00C00, rd, ra, rb) +%endm +%macro aa64_rrr_MUL(rd, ra, rb) +%le32((| 0x9B000000 (<< %aa64_reg(rb) 16) (<< 31 10) (<< %aa64_reg(ra) 5) %aa64_reg(rd))) +%endm +%macro aa64_rrr_REM(rd, ra, rb) +%le32((| 0x9AC00C00 (<< %aa64_reg(rb) 16) (<< %aa64_reg(ra) 5) %aa64_reg(scratch))) +%le32((| 0x9B008000 (<< %aa64_reg(rb) 16) (<< %aa64_reg(ra) 10) (<< %aa64_reg(scratch) 5) %aa64_reg(rd))) +%endm + +%macro aa64_rrr_op(op, rd, ra, rb) +%aa64_rrr_##op(rd, ra, rb) +%endm + +# ---- P1 operation lowering ----------------------------------------------- + +%macro p1_li(rd) +%aa64_lit64_prefix(rd) +%endm + +%macro p1_la(rd) +%aa64_lit64_prefix(rd) +%endm + +%macro p1_labr() +%aa64_lit64_prefix(br) +%endm + +%macro p1_mov(rd, rs) +%aa64_mov_rr(rd, rs) +%endm + +%macro p1_rrr(op, rd, ra, rb) +%aa64_rrr_op(op, rd, ra, rb) +%endm + +%macro p1_addi(rd, ra, imm) +%select((>= imm 0), + %aa64_add_imm(rd, ra, imm), + %aa64_sub_imm(rd, ra, (- 0 imm))) +%endm + +%macro p1_logi_ANDI(rd, ra, imm) +%aa64_materialize_small_imm(scratch, imm) +%aa64_rrr(0x8A000000, rd, ra, scratch) +%endm +%macro p1_logi_ORI(rd, ra, imm) +%aa64_materialize_small_imm(scratch, imm) +%aa64_rrr(0xAA000000, rd, ra, scratch) +%endm +%macro p1_logi(op, rd, ra, imm) +%p1_logi_##op(rd, ra, imm) +%endm + +%macro p1_shifti_SHLI(rd, ra, imm) +%aa64_ubfm(rd, ra, (& (- 0 imm) 63), (- 63 imm)) +%endm +%macro p1_shifti_SHRI(rd, ra, imm) +%aa64_ubfm(rd, ra, imm, 63) +%endm +%macro p1_shifti_SARI(rd, ra, imm) +%aa64_sbfm(rd, ra, imm, 63) +%endm +%macro p1_shifti(op, rd, ra, imm) +%p1_shifti_##op(rd, ra, imm) +%endm + +%macro p1_mem(op, rt, rn, off) +%aa64_mem(op, rt, rn, off) +%endm + +%macro p1_ldarg(rd, slot) +%aa64_mem(LD, scratch, sp, 8) +%aa64_mem(LD, rd, scratch, (+ 16 (* 8 slot))) +%endm + +%macro p1_b() +%aa64_br(br) +%endm + +%macro p1_br(rs) +%aa64_br(rs) +%endm + +%macro p1_call() +%aa64_blr(br) +%endm + +%macro p1_callr(rs) +%aa64_blr(rs) +%endm + +%macro p1_ret() +%aa64_ret() +%endm + +%macro p1_leave() +%aa64_mem(LD, lr, sp, 0) +%aa64_mem(LD, x8, sp, 8) +%aa64_mov_rr(sp, x8) +%endm + +%macro p1_tail() +%p1_leave() +%aa64_br(br) +%endm + +%macro p1_tailr(rs) +%p1_leave() +%aa64_br(rs) +%endm + +%macro p1_condb_BEQ(ra, rb) +%aa64_cmp_skip(1, ra, rb) +%aa64_br(br) +%endm +%macro p1_condb_BNE(ra, rb) +%aa64_cmp_skip(0, ra, rb) +%aa64_br(br) +%endm +%macro p1_condb_BLT(ra, rb) +%aa64_cmp_skip(10, ra, rb) +%aa64_br(br) +%endm +%macro p1_condb(op, ra, rb) +%p1_condb_##op(ra, rb) +%endm + +%macro p1_condbz_BEQZ(ra) +%le32((| 0xB5000000 (<< 2 5) %aa64_reg(ra))) +%aa64_br(br) +%endm +%macro p1_condbz_BNEZ(ra) +%le32((| 0xB4000000 (<< 2 5) %aa64_reg(ra))) +%aa64_br(br) +%endm +%macro p1_condbz_BLTZ(ra) +%le32((| 0xEB1F001F (<< %aa64_reg(ra) 5))) +%le32((| 0x54000040 10)) +%aa64_br(br) +%endm +%macro p1_condbz(op, ra) +%p1_condbz_##op(ra) +%endm + +%macro p1_enter(size) +%aa64_sub_imm(sp, sp, (& (+ (+ 16 size) 15) -16)) +%aa64_mem(ST, lr, sp, 0) +%aa64_add_imm(x8, sp, (& (+ (+ 16 size) 15) -16)) +%aa64_mem(ST, x8, sp, 8) +%endm + +%macro p1_syscall() +%aa64_mov_rr(x8, a0) +%aa64_mov_rr(save0, a1) +%aa64_mov_rr(save1, a2) +%aa64_mov_rr(save2, a3) +%aa64_mov_rr(a0, save0) +%aa64_mov_rr(a1, save1) +%aa64_mov_rr(a2, save2) +%aa64_mov_rr(a3, t0) +%aa64_mov_rr(x4, s0) +%aa64_mov_rr(x5, s1) +%le32(0xD4000001) +%aa64_mov_rr(a1, save0) +%aa64_mov_rr(a2, save1) +%aa64_mov_rr(a3, save2) +%endm + +# ---- Linux syscall number data words ------------------------------------- + +%macro p1_sys_read() +%le64(63) +%endm +%macro p1_sys_write() +%le64(64) +%endm +%macro p1_sys_close() +%le64(57) +%endm +%macro p1_sys_openat() +%le64(56) +%endm +%macro p1_sys_exit() +%le64(93) +%endm +%macro p1_sys_clone() +%le64(220) +%endm +%macro p1_sys_execve() +%le64(221) +%endm +%macro p1_sys_waitid() +%le64(95) +%endm diff --git a/src/m1macro.c b/src/m1macro.c @@ -1,5 +1,7 @@ #include <stdio.h> #include <string.h> +#include <stdlib.h> +#include <errno.h> /* * Tiny single-pass M1 macro expander. @@ -14,6 +16,15 @@ * @loop local token inside a macro body * :@loop / &@loop local label / reference shorthand * :param / &param prefix a single-token parameter with ':' or '&' + * %le32(expr) evaluate an integer S-expression, emit LE 32-bit hex + * %le64(expr) evaluate an integer S-expression, emit LE 64-bit hex + * %select(c,t,e) evaluate condition S-expression; expand t if nonzero else e + * + * Expression syntax is intentionally Lisp-shaped: + * atoms: decimal or 0x-prefixed integer literals + * calls: (+ a b), (- a b), (* a b), (/ a b), (% a b), (<< a b), (>> a b) + * (& a b), (| a b), (^ a b), (~ a), (= a b), (!= a b), + * (< a b), (<= a b), (> a b), (>= a b) * * Notes: * - Macros are define-before-use. There is no prescan. @@ -687,7 +698,8 @@ static int paste_range(int start, int *count) return 1; } -static int expand_call(struct Stream *s, int macro_idx) +static int expand_macro_at(struct Stream *s, int call_pos, int macro_idx, + int *end_pos_out, int *mark_out, int *count_out) { struct Macro *m = &macros[macro_idx]; int arg_starts[MAX_PARAMS]; @@ -699,23 +711,21 @@ static int expand_call(struct Stream *s, int macro_idx) int local_id; int i; - if (s->pos + 1 >= s->count || s->toks[s->pos + 1].kind != TOK_LPAREN) { + if (call_pos + 1 >= s->count || s->toks[call_pos + 1].kind != TOK_LPAREN) { snprintf(error_buf, sizeof(error_buf), "internal macro call error"); return 0; } - if (!parse_args(s, s->pos + 1, arg_starts, arg_ends, &arg_count, &end_pos)) { + if (!parse_args(s, call_pos + 1, arg_starts, arg_ends, &arg_count, &end_pos)) { return 0; } if (arg_count != m->param_count) { snprintf(error_buf, sizeof(error_buf), "macro '%.*s' expects %d args, got %d at line %d", m->name_len, text_buf + m->name_start, - m->param_count, arg_count, s->toks[s->pos].line); + m->param_count, arg_count, s->toks[call_pos].line); return 0; } - s->pos = end_pos; - s->line_start = 0; mark = pool_used; local_id = local_id_next++; @@ -768,7 +778,375 @@ static int expand_call(struct Stream *s, int macro_idx) return 0; } pool_used = mark + count; + *end_pos_out = end_pos; + *mark_out = mark; + *count_out = count; + return 1; +} + +static void skip_expr_newlines(struct Token *toks, int count, int *pos) +{ + while (*pos < count && toks[*pos].kind == TOK_NEWLINE) { + *pos += 1; + } +} + +static int parse_int_token(const struct Token *tok, long long *out) +{ + char tmp[128]; + char *end; + unsigned long long uv; + long long sv; + + if (tok->kind != TOK_WORD || tok->len <= 0 || tok->len >= (int)sizeof(tmp)) { + snprintf(error_buf, sizeof(error_buf), "expected integer atom at line %d", tok->line); + return 0; + } + memcpy(tmp, text_buf + tok->start, (size_t)tok->len); + tmp[tok->len] = '\0'; + + errno = 0; + if (tmp[0] == '-') { + sv = strtoll(tmp, &end, 0); + if (errno != 0 || *end != '\0') { + snprintf(error_buf, sizeof(error_buf), "bad integer literal '%s' at line %d", tmp, tok->line); + return 0; + } + *out = sv; + return 1; + } + + uv = strtoull(tmp, &end, 0); + if (errno != 0 || *end != '\0') { + snprintf(error_buf, sizeof(error_buf), "bad integer literal '%s' at line %d", tmp, tok->line); + return 0; + } + *out = (long long)uv; + return 1; +} + +static int eval_expr_tokens(struct Token *toks, int count, int *pos, long long *out); + +static int eval_macro_expr(struct Token *toks, int count, int *pos, long long *out) +{ + struct Stream tmp; + int macro_idx; + int end_pos; + int mark; + int expanded_count; + int expanded_pos = 0; + int ok; + + macro_idx = find_macro(&toks[*pos]); + if (macro_idx < 0 || *pos + 1 >= count || toks[*pos + 1].kind != TOK_LPAREN) { + return parse_int_token(&toks[(*pos)++], out); + } + + tmp.toks = toks; + tmp.count = count; + tmp.pos = *pos; + tmp.line_start = 0; + tmp.pool_mark = -1; + + if (!expand_macro_at(&tmp, *pos, macro_idx, &end_pos, &mark, &expanded_count)) { + return 0; + } + if (expanded_count == 0) { + pool_used = mark; + snprintf(error_buf, sizeof(error_buf), "macro in expression expanded to nothing"); + return 0; + } + + ok = eval_expr_tokens(expand_pool + mark, expanded_count, &expanded_pos, out); + if (ok) { + skip_expr_newlines(expand_pool + mark, expanded_count, &expanded_pos); + if (expanded_pos != expanded_count) { + snprintf(error_buf, sizeof(error_buf), "macro in expression expanded to extra tokens"); + ok = 0; + } + } + pool_used = mark; + if (!ok) { + return 0; + } + *pos = end_pos; + return 1; +} + +static int apply_expr_op(const struct Token *op, long long *args, int argc, long long *out) +{ + int i; + + if (token_text_eq(op, "+")) { + if (argc < 1) { + snprintf(error_buf, sizeof(error_buf), "'+' needs at least one argument"); + return 0; + } + *out = args[0]; + for (i = 1; i < argc; i++) { + *out += args[i]; + } + return 1; + } + if (token_text_eq(op, "-")) { + if (argc < 1) { + snprintf(error_buf, sizeof(error_buf), "'-' needs at least one argument"); + return 0; + } + *out = (argc == 1) ? -args[0] : args[0]; + for (i = 1; i < argc; i++) { + *out -= args[i]; + } + return 1; + } + if (token_text_eq(op, "*")) { + if (argc < 1) { + snprintf(error_buf, sizeof(error_buf), "'*' needs at least one argument"); + return 0; + } + *out = args[0]; + for (i = 1; i < argc; i++) { + *out *= args[i]; + } + return 1; + } + if (token_text_eq(op, "/") || token_text_eq(op, "%") || + token_text_eq(op, "<<") || token_text_eq(op, ">>") || + token_text_eq(op, "=") || token_text_eq(op, "==") || + token_text_eq(op, "!=") || token_text_eq(op, "<") || + token_text_eq(op, "<=") || token_text_eq(op, ">") || + token_text_eq(op, ">=")) { + if (argc != 2) { + snprintf(error_buf, sizeof(error_buf), "binary expression operator needs two arguments"); + return 0; + } + if (token_text_eq(op, "/")) { + if (args[1] == 0) { + snprintf(error_buf, sizeof(error_buf), "division by zero"); + return 0; + } + *out = args[0] / args[1]; + } else if (token_text_eq(op, "%")) { + if (args[1] == 0) { + snprintf(error_buf, sizeof(error_buf), "modulo by zero"); + return 0; + } + *out = args[0] % args[1]; + } else if (token_text_eq(op, "<<")) { + *out = (long long)((unsigned long long)args[0] << args[1]); + } else if (token_text_eq(op, ">>")) { + *out = args[0] >> args[1]; + } else if (token_text_eq(op, "=") || token_text_eq(op, "==")) { + *out = args[0] == args[1]; + } else if (token_text_eq(op, "!=")) { + *out = args[0] != args[1]; + } else if (token_text_eq(op, "<")) { + *out = args[0] < args[1]; + } else if (token_text_eq(op, "<=")) { + *out = args[0] <= args[1]; + } else if (token_text_eq(op, ">")) { + *out = args[0] > args[1]; + } else { + *out = args[0] >= args[1]; + } + return 1; + } + if (token_text_eq(op, "&") || token_text_eq(op, "|") || token_text_eq(op, "^")) { + if (argc < 1) { + snprintf(error_buf, sizeof(error_buf), "bitwise expression operator needs at least one argument"); + return 0; + } + *out = args[0]; + for (i = 1; i < argc; i++) { + if (token_text_eq(op, "&")) { + *out &= args[i]; + } else if (token_text_eq(op, "|")) { + *out |= args[i]; + } else { + *out ^= args[i]; + } + } + return 1; + } + if (token_text_eq(op, "~")) { + if (argc != 1) { + snprintf(error_buf, sizeof(error_buf), "'~' needs one argument"); + return 0; + } + *out = ~args[0]; + return 1; + } + + snprintf(error_buf, sizeof(error_buf), "unknown expression operator '%.*s'", + op->len, text_buf + op->start); + return 0; +} + +static int eval_expr_tokens(struct Token *toks, int count, int *pos, long long *out) +{ + struct Token op; + long long args[MAX_PARAMS]; + int argc = 0; + + skip_expr_newlines(toks, count, pos); + if (*pos >= count) { + snprintf(error_buf, sizeof(error_buf), "expected expression"); + return 0; + } + + if (toks[*pos].kind != TOK_LPAREN) { + return eval_macro_expr(toks, count, pos, out); + } + + *pos += 1; + skip_expr_newlines(toks, count, pos); + if (*pos >= count || toks[*pos].kind != TOK_WORD) { + snprintf(error_buf, sizeof(error_buf), "expected expression operator"); + return 0; + } + op = toks[*pos]; + *pos += 1; + + while (1) { + skip_expr_newlines(toks, count, pos); + if (*pos >= count) { + snprintf(error_buf, sizeof(error_buf), "unterminated expression"); + return 0; + } + if (toks[*pos].kind == TOK_RPAREN) { + *pos += 1; + return apply_expr_op(&op, args, argc, out); + } + if (argc >= MAX_PARAMS) { + snprintf(error_buf, sizeof(error_buf), "too many expression arguments"); + return 0; + } + if (!eval_expr_tokens(toks, count, pos, &args[argc])) { + return 0; + } + argc++; + } +} + +static int eval_expr_range(struct Token *toks, int start, int end, long long *out) +{ + int pos = start; + + if (!eval_expr_tokens(toks, end, &pos, out)) { + return 0; + } + skip_expr_newlines(toks, end, &pos); + if (pos != end) { + snprintf(error_buf, sizeof(error_buf), "extra tokens after expression"); + return 0; + } + return 1; +} + +static int emit_hex_value(unsigned long long value, int bytes) +{ + char tmp[17]; + static const char hex[] = "0123456789ABCDEF"; + int i; + int text_start; + struct Token tok; + + for (i = 0; i < bytes; i++) { + unsigned int b = (unsigned int)((value >> (8 * i)) & 0xFF); + tmp[2 * i] = hex[b >> 4]; + tmp[2 * i + 1] = hex[b & 0xF]; + } + tmp[2 * bytes] = '\0'; + + text_start = append_text_len(tmp, 2 * bytes); + if (text_start < 0) { + return 0; + } + tok.kind = TOK_WORD; + tok.start = text_start; + tok.len = 2 * bytes; + tok.line = 0; + return emit_token(&tok); +} + +static int expand_builtin_call(struct Stream *s, const char *name) +{ + int arg_starts[MAX_PARAMS]; + int arg_ends[MAX_PARAMS]; + int arg_count; + int end_pos; + long long value; + + if (s->pos + 1 >= s->count || s->toks[s->pos + 1].kind != TOK_LPAREN) { + snprintf(error_buf, sizeof(error_buf), "expected '(' after %s", name); + return 0; + } + if (!parse_args(s, s->pos + 1, arg_starts, arg_ends, &arg_count, &end_pos)) { + return 0; + } + + if (strcmp(name, "%le32") == 0 || strcmp(name, "%le64") == 0) { + if (arg_count != 1) { + snprintf(error_buf, sizeof(error_buf), "%s expects one argument", name); + return 0; + } + if (!eval_expr_range(s->toks, arg_starts[0], arg_ends[0], &value)) { + return 0; + } + s->pos = end_pos; + s->line_start = 0; + return emit_hex_value((unsigned long long)value, strcmp(name, "%le32") == 0 ? 4 : 8); + } + + if (strcmp(name, "%select") == 0) { + int selected_start; + int selected_end; + int mark; + int i; + + if (arg_count != 3) { + snprintf(error_buf, sizeof(error_buf), "%%select expects three arguments"); + return 0; + } + if (!eval_expr_range(s->toks, arg_starts[0], arg_ends[0], &value)) { + return 0; + } + selected_start = value != 0 ? arg_starts[1] : arg_starts[2]; + selected_end = value != 0 ? arg_ends[1] : arg_ends[2]; + + s->pos = end_pos; + s->line_start = 0; + if (selected_start == selected_end) { + return 1; + } + + mark = pool_used; + for (i = selected_start; i < selected_end; i++) { + if (!push_pool_token(s->toks[i])) { + pool_used = mark; + return 0; + } + } + return push_stream(expand_pool + mark, selected_end - selected_start, mark); + } + + snprintf(error_buf, sizeof(error_buf), "unknown builtin '%s'", name); + return 0; +} + +static int expand_call(struct Stream *s, int macro_idx) +{ + int end_pos; + int mark; + int count; + + if (!expand_macro_at(s, s->pos, macro_idx, &end_pos, &mark, &count)) { + return 0; + } + s->pos = end_pos; + s->line_start = 0; if (count == 0) { + pool_used = mark; return 1; } return push_stream(expand_pool + mark, count, mark); @@ -814,6 +1192,18 @@ static int process_tokens(void) continue; } + if (tok->kind == TOK_WORD && + (token_text_eq(tok, "%le32") || + token_text_eq(tok, "%le64") || + token_text_eq(tok, "%select")) && + s->pos + 1 < s->count && + s->toks[s->pos + 1].kind == TOK_LPAREN) { + if (!expand_builtin_call(s, text_buf + tok->start)) { + return 0; + } + continue; + } + macro_idx = find_macro(tok); if (macro_idx >= 0 && s->pos + 1 < s->count && s->toks[s->pos + 1].kind == TOK_LPAREN) { if (!expand_call(s, macro_idx)) {