boot2

Playing with the boostrap
git clone https://git.ryansepassi.com/git/boot2.git
Log | Files | Refs | README

commit 339180395bd3c74be62412953cfb73b442acf17d
parent ea3e075a6aab78970bf2b1726b72fb6688bf70b2
Author: Ryan Sepassi <rsepassi@gmail.com>
Date:   Sun,  3 May 2026 16:28:08 -0700

libp1pp: migrate to hex2++ .scope and scoped-loop family

P1pp.P1pp drops the M1pp %scope/%endscope idiom in favour of hex2++'s
.scope/.endscope (which nest natively in the assembler), and replaces
all `::label` / `&::label` definitions and references with dotted
.label / &.label scope-locals. %fn / %fn2 now open a `.scope` around
the function body; the libp1pp__ prefix continues to mark file-scope
symbols only.

The tagged-loop family (%loop_tag, %while_tag_*, %for_lt_tag, plus
%break(tag) / %continue(tag) which paste-built scope-local labels)
is replaced by %loop_scoped / %while_scoped_<cc> / %for_lt_scoped:
each opens its own .scope and defines :.top / :.end inside it, so
the generic `%break` / `%continue` resolve through hex2++'s
innermost-out scope walk.

The trace-message data block stops relying on M0-style implicit NUL
termination and pads with `.align 8` directives so following
data labels stay 8-aligned for aarch64 LDR.

tests/P1/*.P1pp: same `::` -> `:.` rewrite across every P1pp fixture.
loop-tag-scoping.P1pp is rewritten to verify the scoped-loop variant
(its original tag-collision regression is structurally impossible
under .scope nesting). cmpset.P1pp gets a fixed-by-inspection length
that previously relied on M0 NUL termination padding the print buffer.

docs/LIBP1PP.md: re-document the unscoped/scoped split, the new
%break / %continue contract, and the function-definition scope rules.

Diffstat:
MP1/P1pp.P1pp | 591++++++++++++++++++++++++++++++++++++++++---------------------------------------
Mdocs/LIBP1PP.md | 206++++++++++++++++++++++++++++++++++++++-----------------------------------------
Mtests/P1/cmpset.P1pp | 60++++++++++++++++++++++++++++++------------------------------
Mtests/P1/ext-macros.P1pp | 30+++++++++++++++---------------
Mtests/P1/lea-slot.P1pp | 10+++++-----
Mtests/P1/loop-tag-scoping.P1pp | 62+++++++++++++++-----------------------------------------------
Mtests/P1/memcpy-call.P1pp | 12++++++------
Mtests/P1/ptr-arith.P1pp | 18+++++++++---------
Mtests/P1/sub-word-mem.P1pp | 26+++++++++++++-------------
Mtests/P1/switch-case.P1pp | 30+++++++++++++++---------------
Mtests/P1/sys_calls.P1pp | 30+++++++++++++++---------------
Mtests/P1/unops.P1pp | 18+++++++++---------
12 files changed, 533 insertions(+), 560 deletions(-)

diff --git a/P1/P1pp.P1pp b/P1/P1pp.P1pp @@ -383,7 +383,7 @@ # %switch_case(ctrl, scratch, key, target) # If `ctrl == key`, branch to `target`. `scratch` is used to # materialize the key as a register operand. `target` is the full -# branch target (e.g. `&::case_3`). +# branch target (e.g. `&.case_3`). # # A code generator emitting a switch dispatcher emits one # %switch_case per case, then an unconditional branch to the default. @@ -651,125 +651,144 @@ %b(&@top) %endm -# ---- Tagged loops ------------------------------------------------------- +# ---- Scoped loops ------------------------------------------------------- # -# Each tagged form emits two scope-local labels `tag_top` and `tag_end`, -# built by `##` paste so references cross every macro boundary cleanly. -# `%break(tag)` jumps to `tag_end`; `%continue(tag)` jumps to `tag_top`. +# Each scoped form opens a hex2++ `.scope` and defines two dotted labels +# inside it: `.top` (where `%continue` should land) and `.end` +# (immediately after the loop, where `%break` should land). The generic +# `%break` and `%continue` macros below emit branches to `&.end` / +# `&.top`; hex2++'s innermost-out scope walk binds those references to +# the nearest enclosing scoped loop. # -# The labels use `::` so M1pp mangles them with the enclosing %scope -# (which `%fn` opens automatically). Without that, two TUs catm'd into -# one binary — e.g. libc.P1pp + tcc.flat.P1pp — would both define -# `:L0_top` ... `:LN_top` from cc.scm's per-TU label counter, and the -# late definitions would silently steal earlier branches. Outside any -# %scope (hand-written libp1pp callers, libp1pp's own internals at file -# scope) `::tag_top` degrades to `:tag_top` — backwards compatible. - -%macro loop_tag(tag, body) - :: ## tag ## _top +# Nested scoped loops shadow each other: a `%break` inside an inner loop +# targets the inner loop's `.end`. Non-loop control-flow macros +# (`%if_<cc>`, `%ifelse_<cc>`) do not open a `.scope`, so `%break` / +# `%continue` inside them passes through to the enclosing scoped loop. + +%macro loop_scoped(body) + .scope + :.top body - %b(&:: ## tag ## _top) - :: ## tag ## _end + %b(&.top) + :.end + .endscope %endm -%macro while_tag_eq(tag, ra, rb, body) - %b(&:: ## tag ## _top) - :@body +%macro while_scoped_eq(ra, rb, body) + .scope + %b(&.top) + :.body body - :: ## tag ## _top - %beq(ra, rb, &@body) - :: ## tag ## _end + :.top + %beq(ra, rb, &.body) + :.end + .endscope %endm -%macro while_tag_ne(tag, ra, rb, body) - %b(&:: ## tag ## _top) - :@body +%macro while_scoped_ne(ra, rb, body) + .scope + %b(&.top) + :.body body - :: ## tag ## _top - %bne(ra, rb, &@body) - :: ## tag ## _end + :.top + %bne(ra, rb, &.body) + :.end + .endscope %endm -%macro while_tag_lt(tag, ra, rb, body) - %b(&:: ## tag ## _top) - :@body +%macro while_scoped_lt(ra, rb, body) + .scope + %b(&.top) + :.body body - :: ## tag ## _top - %blt(ra, rb, &@body) - :: ## tag ## _end + :.top + %blt(ra, rb, &.body) + :.end + .endscope %endm -%macro while_tag_ltu(tag, ra, rb, body) - %b(&:: ## tag ## _top) - :@body +%macro while_scoped_ltu(ra, rb, body) + .scope + %b(&.top) + :.body body - :: ## tag ## _top - %bltu(ra, rb, &@body) - :: ## tag ## _end + :.top + %bltu(ra, rb, &.body) + :.end + .endscope %endm -%macro while_tag_eqz(tag, ra, body) - %b(&:: ## tag ## _top) - :@body +%macro while_scoped_eqz(ra, body) + .scope + %b(&.top) + :.body body - :: ## tag ## _top - %beqz(ra, &@body) - :: ## tag ## _end + :.top + %beqz(ra, &.body) + :.end + .endscope %endm -%macro while_tag_nez(tag, ra, body) - %b(&:: ## tag ## _top) - :@body +%macro while_scoped_nez(ra, body) + .scope + %b(&.top) + :.body body - :: ## tag ## _top - %bnez(ra, &@body) - :: ## tag ## _end + :.top + %bnez(ra, &.body) + :.end + .endscope %endm -%macro while_tag_ltz(tag, ra, body) - %b(&:: ## tag ## _top) - :@body +%macro while_scoped_ltz(ra, body) + .scope + %b(&.top) + :.body body - :: ## tag ## _top - %bltz(ra, &@body) - :: ## tag ## _end + :.top + %bltz(ra, &.body) + :.end + .endscope %endm -%macro for_lt_tag(tag, i_reg, n_reg, body) +%macro for_lt_scoped(i_reg, n_reg, body) + .scope %li(i_reg, 0) - %b(&:: ## tag ## _test) - :@body + %b(&.test) + :.body body - :: ## tag ## _top + :.top %addi(i_reg, i_reg, 1) - :: ## tag ## _test - %blt(i_reg, n_reg, &@body) - :: ## tag ## _end + :.test + %blt(i_reg, n_reg, &.body) + :.end + .endscope %endm -%macro break(tag) - %b(&:: ## tag ## _end) +%macro break() + %b(&.end) %endm -%macro continue(tag) - %b(&:: ## tag ## _top) +%macro continue() + %b(&.top) %endm # ========================================================================= # %fn -- scope-introducing function definition # ========================================================================= # -# Pushes a scope named after the function so `::foo` inside the body -# mangles to `name__foo`. The body is bracketed by %enter(size) and -# %eret, so functions defined with %fn always carry a standard frame. +# Opens a hex2++ `.scope` around the body so dotted local labels (`:.foo`, +# `&.foo`) are private to this function. The body is bracketed by +# %enter(size) and %eret, so functions defined with %fn always carry a +# standard frame. %macro fn(name, size, body) : ## name - %scope name + .scope %enter(size) body %eret - %endscope + .endscope %endm # ========================================================================= @@ -778,19 +797,19 @@ # # Like %fn, but the second argument is a braced list of local names # instead of a byte frame size. Synthesizes a `name_FRAME` %struct -# (one 8-byte slot per local), opens both an m1pp scope and an m1pp -# frame named after the function, and sizes the stack frame from -# %name_FRAME.SIZE. +# (one 8-byte slot per local), opens both a hex2++ `.scope` and an +# m1pp `%frame` named after the function, and sizes the stack frame +# from %name_FRAME.SIZE. # # Inside the body these helpers resolve against the enclosing frame: # %local(slot) byte offset of local `slot` # %stl(reg, slot) store reg into local `slot` # %ldl(reg, slot) load local `slot` into reg # -# Because m1pp tracks frames in a single slot independent of the -# %scope stack, %local / %stl / %ldl keep resolving against the -# function even when the body opens nested %scope blocks (e.g. from -# a control-flow macro). +# m1pp tracks the active frame in a single slot independent of hex2++ +# scope nesting, so %local / %stl / %ldl keep resolving against the +# function even when the body opens nested `.scope` blocks (e.g. from +# a scoped control-flow macro). # # Locals follow the same braces convention as `body`: a multi-local # list must be braced (`{a, b, c}`); a zero-local function uses `{}`. @@ -798,13 +817,13 @@ %macro fn2(name, locals, body) %struct name ## _FRAME { locals } : ## name - %scope name + .scope %frame name %enter(% ## name ## _FRAME.SIZE) body %eret %endframe - %endscope + .endscope %endm %macro stl(reg, slot) %st(reg, sp, %local(slot)) %endm @@ -875,118 +894,118 @@ # Leaf. Copies n bytes from src to dst. No overlap support where # dst > src && dst < src + n. :memcpy -%scope memcpy +.scope %mov(a3, a0) %li(t0, 0) - ::loop - %beq(t0, a2, &::done) + :.loop + %beq(t0, a2, &.done) %add(t1, a1, t0) %lb(t1, t1, 0) %add(t2, a3, t0) %sb(t1, t2, 0) %addi(t0, t0, 1) - %b(&::loop) - ::done + %b(&.loop) + :.done %mov(a0, a3) %ret -%endscope +.endscope # memset(dst=a0, byte=a1, n=a2) -> dst (a0) :memset -%scope memset +.scope %mov(a3, a0) %li(t0, 0) - ::loop - %beq(t0, a2, &::done) + :.loop + %beq(t0, a2, &.done) %add(t1, a3, t0) %sb(a1, t1, 0) %addi(t0, t0, 1) - %b(&::loop) - ::done + %b(&.loop) + :.done %mov(a0, a3) %ret -%endscope +.endscope # memcmp(a=a0, b=a1, n=a2) -> -1/0/1 (a0) :memcmp -%scope memcmp +.scope %li(t0, 0) - ::loop - %beq(t0, a2, &::eq) + :.loop + %beq(t0, a2, &.eq) %add(t1, a0, t0) %lb(t1, t1, 0) %add(t2, a1, t0) %lb(t2, t2, 0) - %bltu(t1, t2, &::lt) - %bltu(t2, t1, &::gt) + %bltu(t1, t2, &.lt) + %bltu(t2, t1, &.gt) %addi(t0, t0, 1) - %b(&::loop) - ::lt + %b(&.loop) + :.lt %li(a0, -1) %ret - ::gt + :.gt %li(a0, 1) %ret - ::eq + :.eq %li(a0, 0) %ret -%endscope +.endscope # strlen(cstr=a0) -> n (a0) :strlen -%scope strlen +.scope %mov(a1, a0) - ::loop + :.loop %lb(t0, a1, 0) - %beqz(t0, &::done) + %beqz(t0, &.done) %addi(a1, a1, 1) - %b(&::loop) - ::done + %b(&.loop) + :.done %sub(a0, a1, a0) %ret -%endscope +.endscope # streq(a=a0, b=a1) -> 0 or 1 :streq -%scope streq - ::loop +.scope + :.loop %lb(t0, a0, 0) %lb(t1, a1, 0) - %bne(t0, t1, &::ne) - %beqz(t0, &::eq) + %bne(t0, t1, &.ne) + %beqz(t0, &.eq) %addi(a0, a0, 1) %addi(a1, a1, 1) - %b(&::loop) - ::ne + %b(&.loop) + :.ne %li(a0, 0) %ret - ::eq + :.eq %li(a0, 1) %ret -%endscope +.endscope # strcmp(a=a0, b=a1) -> -1/0/1 :strcmp -%scope strcmp - ::loop +.scope + :.loop %lb(t0, a0, 0) %lb(t1, a1, 0) - %bltu(t0, t1, &::lt) - %bltu(t1, t0, &::gt) - %beqz(t0, &::eq) + %bltu(t0, t1, &.lt) + %bltu(t1, t0, &.gt) + %beqz(t0, &.eq) %addi(a0, a0, 1) %addi(a1, a1, 1) - %b(&::loop) - ::lt + %b(&.loop) + :.lt %li(a0, -1) %ret - ::gt + :.gt %li(a0, 1) %ret - ::eq + :.eq %li(a0, 0) %ret -%endscope +.endscope # ========================================================================= # Integer parsing and formatting @@ -996,7 +1015,7 @@ # Uses an 8-byte frame slot to save buf_start; all hot-loop state lives # in caller-saved registers. :parse_dec -%scope parse_dec +.scope %enter(8) %st(a0, sp, 0) %add(a3, a0, a1) @@ -1004,53 +1023,53 @@ %li(t0, 0) %li(t1, 0) - %beq(a2, a3, &::after_sign) + %beq(a2, a3, &.after_sign) %lb(t2, a2, 0) %addi(t2, t2, -45) - %bnez(t2, &::after_sign) + %bnez(t2, &.after_sign) %li(t0, 1) %addi(a2, a2, 1) - ::after_sign + :.after_sign %mov(a1, a2) - ::digit_loop - %beq(a2, a3, &::digits_done) + :.digit_loop + %beq(a2, a3, &.digits_done) %lb(t2, a2, 0) %addi(t2, t2, -48) - %bltz(t2, &::digits_done) + %bltz(t2, &.digits_done) %li(a0, 9) - %bltu(a0, t2, &::digits_done) + %bltu(a0, t2, &.digits_done) %li(a0, 10) %mul(t1, t1, a0) %add(t1, t1, t2) %addi(a2, a2, 1) - %b(&::digit_loop) + %b(&.digit_loop) - ::digits_done - %beq(a2, a1, &::no_digits) + :.digits_done + %beq(a2, a1, &.no_digits) - %bnez(t0, &::apply_sign) - %b(&::compute_return) - ::apply_sign + %bnez(t0, &.apply_sign) + %b(&.compute_return) + :.apply_sign %li(a0, 0) %sub(t1, a0, t1) - ::compute_return + :.compute_return %ld(a0, sp, 0) %sub(a1, a2, a0) %mov(a0, t1) %eret - ::no_digits + :.no_digits %li(a0, 0) %li(a1, 0) %eret -%endscope +.endscope # parse_hex(buf=a0, len=a1) -> (value=a0, consumed=a1) :parse_hex -%scope parse_hex +.scope %enter(8) %st(a0, sp, 0) %add(a3, a0, a1) @@ -1058,49 +1077,49 @@ %li(t1, 0) %mov(a1, a2) - ::loop - %beq(a2, a3, &::done) + :.loop + %beq(a2, a3, &.done) %lb(t2, a2, 0) %addi(t0, t2, -48) - %bltz(t0, &::check_lower) + %bltz(t0, &.check_lower) %li(a0, 9) - %bltu(a0, t0, &::check_lower) - %b(&::accept) + %bltu(a0, t0, &.check_lower) + %b(&.accept) - ::check_lower + :.check_lower %addi(t0, t2, -97) - %bltz(t0, &::check_upper) + %bltz(t0, &.check_upper) %li(a0, 5) - %bltu(a0, t0, &::check_upper) + %bltu(a0, t0, &.check_upper) %addi(t0, t0, 10) - %b(&::accept) + %b(&.accept) - ::check_upper + :.check_upper %addi(t0, t2, -65) - %bltz(t0, &::done) + %bltz(t0, &.done) %li(a0, 5) - %bltu(a0, t0, &::done) + %bltu(a0, t0, &.done) %addi(t0, t0, 10) - ::accept + :.accept %shli(t1, t1, 4) %or(t1, t1, t0) %addi(a2, a2, 1) - %b(&::loop) + %b(&.loop) - ::done - %beq(a2, a1, &::no_digits) + :.done + %beq(a2, a1, &.no_digits) %ld(a0, sp, 0) %sub(a1, a2, a0) %mov(a0, t1) %eret - ::no_digits + :.no_digits %li(a0, 0) %li(a1, 0) %eret -%endscope +.endscope # fmt_dec(buf=a0, value=a1) -> n_bytes (a0) # @@ -1108,92 +1127,92 @@ # `value % 10`, negated when value is negative. This avoids the # INT_MIN-overflow trap that `value = -value` would hit. :fmt_dec -%scope fmt_dec +.scope %enter(8) %st(a0, sp, 0) - %bltz(a1, &::is_neg) - %b(&::count) - ::is_neg + %bltz(a1, &.is_neg) + %b(&.count) + :.is_neg %li(t0, 45) %sb(t0, a0, 0) %addi(a0, a0, 1) - ::count + :.count %mov(t0, a1) %li(a2, 1) %li(t1, 10) - ::count_loop + :.count_loop %div(t0, t0, t1) - %beqz(t0, &::count_done) + %beqz(t0, &.count_done) %addi(a2, a2, 1) - %b(&::count_loop) - ::count_done + %b(&.count_loop) + :.count_done %add(a3, a0, a2) - ::dig_loop + :.dig_loop %addi(a3, a3, -1) %rem(t0, a1, t1) - %bltz(t0, &::neg_digit) - %b(&::write_digit) - ::neg_digit + %bltz(t0, &.neg_digit) + %b(&.write_digit) + :.neg_digit %li(t2, 0) %sub(t0, t2, t0) - ::write_digit + :.write_digit %addi(t0, t0, 48) %sb(t0, a3, 0) %div(a1, a1, t1) - %bnez(a1, &::dig_loop) + %bnez(a1, &.dig_loop) %ld(t2, sp, 0) %add(a0, a0, a2) %sub(a0, a0, t2) %eret -%endscope +.endscope # fmt_hex(buf=a0, value=a1) -> n_bytes (a0) :fmt_hex -%scope fmt_hex +.scope %enter(8) %st(a0, sp, 0) - %bnez(a1, &::nonzero) + %bnez(a1, &.nonzero) %li(t0, 48) %sb(t0, a0, 0) %li(a0, 1) %eret - ::nonzero + :.nonzero %mov(t0, a1) %li(a2, 0) - ::count_loop + :.count_loop %addi(a2, a2, 1) %shri(t0, t0, 4) - %bnez(t0, &::count_loop) + %bnez(t0, &.count_loop) %add(a3, a0, a2) - ::dig_loop + :.dig_loop %addi(a3, a3, -1) %andi(t0, a1, 15) %li(t1, 10) - %bltu(t0, t1, &::is_letter) + %bltu(t0, t1, &.is_letter) %addi(t0, t0, -10) %addi(t0, t0, 97) - %b(&::write_digit) - ::is_letter + %b(&.write_digit) + :.is_letter %addi(t0, t0, 48) - ::write_digit + :.write_digit %sb(t0, a3, 0) %shri(a1, a1, 4) - %bnez(a1, &::dig_loop) + %bnez(a1, &.dig_loop) %ld(t2, sp, 0) %add(a0, a0, a2) %sub(a0, a0, t2) %eret -%endscope +.endscope # ========================================================================= # Character predicates @@ -1201,81 +1220,81 @@ # is_digit(c=a0) -> 0 or 1 :is_digit -%scope is_digit +.scope %addi(t0, a0, -48) %li(t1, 10) %li(a0, 1) - %bltu(t0, t1, &::done) + %bltu(t0, t1, &.done) %li(a0, 0) - ::done + :.done %ret -%endscope +.endscope # is_hex_digit(c=a0) -> 0 or 1 :is_hex_digit -%scope is_hex_digit +.scope %li(t2, 1) %addi(t0, a0, -48) %li(t1, 10) - %bltu(t0, t1, &::done) + %bltu(t0, t1, &.done) %addi(t0, a0, -97) %li(t1, 6) - %bltu(t0, t1, &::done) + %bltu(t0, t1, &.done) %addi(t0, a0, -65) - %bltu(t0, t1, &::done) + %bltu(t0, t1, &.done) %li(t2, 0) - ::done + :.done %mov(a0, t2) %ret -%endscope +.endscope # is_space(c=a0) -> 0 or 1 :is_space -%scope is_space +.scope %li(t2, 1) %addi(t0, a0, -32) - %beqz(t0, &::done) + %beqz(t0, &.done) %addi(t0, a0, -9) %li(t1, 5) - %bltu(t0, t1, &::done) + %bltu(t0, t1, &.done) %li(t2, 0) - ::done + :.done %mov(a0, t2) %ret -%endscope +.endscope # is_alpha(c=a0) -> 0 or 1 :is_alpha -%scope is_alpha +.scope %li(t2, 1) %addi(t0, a0, -97) %li(t1, 26) - %bltu(t0, t1, &::done) + %bltu(t0, t1, &.done) %addi(t0, a0, -65) - %bltu(t0, t1, &::done) + %bltu(t0, t1, &.done) %li(t2, 0) - ::done + :.done %mov(a0, t2) %ret -%endscope +.endscope # is_alnum(c=a0) -> 0 or 1 :is_alnum -%scope is_alnum +.scope %li(t2, 1) %addi(t0, a0, -48) %li(t1, 10) - %bltu(t0, t1, &::done) + %bltu(t0, t1, &.done) %addi(t0, a0, -97) %li(t1, 26) - %bltu(t0, t1, &::done) + %bltu(t0, t1, &.done) %addi(t0, a0, -65) - %bltu(t0, t1, &::done) + %bltu(t0, t1, &.done) %li(t2, 0) - ::done + :.done %mov(a0, t2) %ret -%endscope +.endscope # ========================================================================= # Raw syscall wrappers @@ -1350,13 +1369,13 @@ # sys_exit(code=a0) -> never returns :sys_exit -%scope sys_exit +.scope %mov(a1, a0) %li(a0, %p1_sys_exit) %syscall - ::spin - %b(&::spin) -%endscope + :.spin + %b(&.spin) +.endscope # ========================================================================= # Print helpers @@ -1372,20 +1391,20 @@ %mov(s0, a0) %mov(s1, a1) - ::loop - %beqz(s1, &::done_ok) + :.loop + %beqz(s1, &.done_ok) %li(a0, 1) %mov(a1, s0) %mov(a2, s1) %call(&sys_write) - %bltz(a0, &::done) + %bltz(a0, &.done) %add(s0, s0, a0) %sub(s1, s1, a0) - %b(&::loop) + %b(&.loop) - ::done_ok + :.done_ok %li(a0, 0) - ::done + :.done %ld(s0, sp, 0) %ld(s1, sp, 8) }) @@ -1396,20 +1415,20 @@ %mov(s0, a0) %mov(s1, a1) - ::loop - %beqz(s1, &::done_ok) + :.loop + %beqz(s1, &.done_ok) %li(a0, 2) %mov(a1, s0) %mov(a2, s1) %call(&sys_write) - %bltz(a0, &::done) + %bltz(a0, &.done) %add(s0, s0, a0) %sub(s1, s1, a0) - %b(&::loop) + %b(&.loop) - ::done_ok + :.done_ok %li(a0, 0) - ::done + :.done %ld(s0, sp, 0) %ld(s1, sp, 8) }) @@ -1419,14 +1438,14 @@ %call(&print) %mov(s0, a0) - %bltz(s0, &::done) + %bltz(s0, &.done) %la(a0, &libp1pp__newline) %li(a1, 1) %call(&print) %mov(s0, a0) - ::done + :.done %mov(a0, s0) %ld(s0, sp, 0) }) @@ -1436,14 +1455,14 @@ %call(&eprint) %mov(s0, a0) - %bltz(s0, &::done) + %bltz(s0, &.done) %la(a0, &libp1pp__newline) %li(a1, 1) %call(&eprint) %mov(s0, a0) - ::done + :.done %mov(a0, s0) %ld(s0, sp, 0) }) @@ -1503,7 +1522,7 @@ %li(a1, 0) %li(a2, 0) %call(&sys_open) - %bltz(a0, &::open_fail) + %bltz(a0, &.open_fail) %mov(s3, a0) %mov(a0, s3) @@ -1516,17 +1535,17 @@ %call(&sys_close) %mov(a0, s0) - %bltz(a0, &::read_fail) - %b(&::done) + %bltz(a0, &.read_fail) + %b(&.done) - ::read_fail + :.read_fail %li(a0, -1) - %b(&::done) + %b(&.done) - ::open_fail + :.open_fail %li(a0, -1) - ::done + :.done %ld(s0, sp, 0) %ld(s1, sp, 8) %ld(s2, sp, 16) @@ -1547,20 +1566,20 @@ %mov(s1, a1) %mov(s2, a2) - ::loop - %beqz(s2, &::done_ok) + :.loop + %beqz(s2, &.done_ok) %mov(a0, s0) %mov(a1, s1) %mov(a2, s2) %call(&sys_write) - %bltz(a0, &::done) + %bltz(a0, &.done) %add(s1, s1, a0) %sub(s2, s2, a0) - %b(&::loop) + %b(&.loop) - ::done_ok + :.done_ok %li(a0, 0) - ::done + :.done %ld(s0, sp, 0) %ld(s1, sp, 8) %ld(s2, sp, 16) @@ -1581,7 +1600,7 @@ %li(a1, 0x241) %li(a2, 0x1A4) %call(&sys_open) - %bltz(a0, &::open_fail) + %bltz(a0, &.open_fail) %mov(s2, a0) %mov(a0, s2) @@ -1594,18 +1613,18 @@ %call(&sys_close) %mov(a0, s0) - %bltz(a0, &::fail_ret) + %bltz(a0, &.fail_ret) %li(a0, 0) - %b(&::done) + %b(&.done) - ::fail_ret + :.fail_ret %li(a0, -1) - %b(&::done) + %b(&.done) - ::open_fail + :.open_fail %li(a0, -1) - ::done + :.done %ld(s0, sp, 0) %ld(s1, sp, 8) %ld(s2, sp, 16) @@ -1632,21 +1651,21 @@ # offset starting at 0. For each entry: *slot = base + offset, then # offset += size. Leaf. :init_arenas -%scope init_arenas +.scope %li(t0, 0) - ::loop - %beq(a1, a2, &::done) + :.loop + %beq(a1, a2, &.done) %ld(t1, a1, 0) %ld(t2, a1, 8) %add(a3, a0, t0) %st(a3, t1, 0) %add(t0, t0, t2) %addi(a1, a1, 16) - %b(&::loop) - ::done + %b(&.loop) + :.done %li(a0, 0) %ret -%endscope +.endscope # ========================================================================= # Bump allocator @@ -1672,7 +1691,7 @@ # not exceed base + cap. On success, advance the cursor and return the # pre-advance value; on failure, leave the cursor untouched and return 0. :bump_alloc -%scope bump_alloc +.scope %addi(a0, a0, 7) %li(t0, -8) %and(a0, a0, t0) @@ -1684,14 +1703,14 @@ %la(a1, &libp1pp__bump_cap) %ld(a3, a1, 0) %add(a3, a2, a3) - %bltu(a3, t2, &::fail) + %bltu(a3, t2, &.fail) %st(t2, t0, 0) %mov(a0, t1) %ret - ::fail + :.fail %li(a0, 0) %ret -%endscope +.endscope # bump_mark() -> saved :bump_mark @@ -1727,8 +1746,8 @@ %call(&eprint) %li(a0, 1) %call(&sys_exit) - ::spin - %b(&::spin) + :.spin + %b(&.spin) }) # ========================================================================= @@ -1830,24 +1849,16 @@ %ld(s3, sp, 24) }) -# Tracepoint message fragments. M0 implicitly NUL-terminates every -# `"..."` literal, so each blob is (visible bytes + 1 NUL) + hex -# padding to land on a 16- or 8-byte boundary, keeping the next -# file-scope label 8-aligned (aarch64 LDR / 4-byte inline-data loads -# fault otherwise. -# eprint reads only the leading visible-byte count (8, 1, 2); the -# trailing NUL + hex zeros are pad. -# -# trace_pre = "[trace @" 8 visible + 1 NUL + 7 pad = 16 bytes -# trace_sep = " " 1 visible + 1 NUL + 6 pad = 8 bytes -# trace_post = ']' '\n' 2 visible 6 pad = 8 bytes -# -# trace_post avoids `"]"` because that would inject a NUL between -# the `]` and the `\n` we need adjacent — raw hex bytes give us -# exact placement. -:libp1pp__trace_pre "[trace @" '00000000000000' -:libp1pp__trace_sep " " '000000000000' -:libp1pp__trace_post '5d' '0a' '000000000000' +# Tracepoint message fragments. eprint reads only the leading +# visible-byte count (8, 1, 2); .align 8 keeps each fragment and the +# data labels that follow 8-byte aligned (aarch64 LDR / 4-byte +# inline-data loads fault otherwise). +:libp1pp__trace_pre "[trace @" +.align 8 +:libp1pp__trace_sep " " +.align 8 +:libp1pp__trace_post "]\n" +.align 8 # ========================================================================= # Internal data diff --git a/docs/LIBP1PP.md b/docs/LIBP1PP.md @@ -25,11 +25,9 @@ definitions. ### Width -libp1pp v1 targets **P1-64 only**. Word size is 8 bytes. Pointer values, +libp1pp targets **P1-64 only**. Word size is 8 bytes. Pointer values, integer results, and syscall arguments are all one word. -Porting libp1pp to P1-32 is out of scope for this document. - ### Syscall numbers libp1pp does not hard-code syscall numbers. It relies on the backend header @@ -70,12 +68,17 @@ region the caller explicitly installed. ### Internal label namespace -libp1pp reserves the label prefix `libp1pp__` for all internal state and -helper labels — bump allocator cursor/base/cap words, internal scratch -buffers used by `print_int` / `print_hex`, private helper routines. -User code must not define labels beginning with `libp1pp__`, and must -not reference them directly; everything libp1pp exposes is reachable -through its documented functions and macros. +libp1pp reserves the **global** label prefix `libp1pp__` for all internal +state and helper labels — bump allocator cursor/base/cap words, internal +scratch buffers used by `print_int` / `print_hex`, private helper routines. +User code must not define globals beginning with `libp1pp__`, and must not +reference them directly; everything libp1pp exposes is reachable through +its documented functions and macros. + +Labels *inside* libp1pp functions are scope-local hex2++ dotted labels +(`:.loop`, `&.done`) and never appear in the global namespace, so they +cannot collide with user labels. The `libp1pp__` prefix matters only for +file-scope data and helper functions. Public entry points (the functions and macros listed in this document, such as `memcpy`, `bump_alloc`, `%if_eq`) are unprefixed. A user who @@ -88,11 +91,10 @@ libp1pp requires no global init step at program entry. Subsystems are either self-initializing or require an explicit per-subsystem init call, documented with that subsystem. -In v1 the only subsystem that requires explicit init is the bump -allocator: `bump_alloc` called before `bump_init` returns `0` (the -"arena exhausted" sentinel) because no arena is installed yet. Every -other libp1pp function is callable from the first instruction of -`p1_main`. +The only subsystem that requires explicit init is the bump allocator: +`bump_alloc` called before `bump_init` returns `0` (the "arena +exhausted" sentinel) because no arena is installed yet. Every other +libp1pp function is callable from the first instruction of `p1_main`. `p1_main` itself inherits the portable entry contract from P1: `a0` = `argc`, `a1` = `argv`. libp1pp does not wrap or interpose on @@ -101,9 +103,20 @@ other libp1pp function is callable from the first instruction of ## Control-flow macros All control-flow macros take braced blocks as arguments. The braces are -M1PP argument delimiters; they are stripped on substitution. Inside a -block, `:@name` and `&@name` local labels resolve within the macro -expansion's own namespace, so nested control flow is safe. +M1PP argument delimiters; they are stripped on substitution. + +There are two flavors: + +- **Unscoped** forms (`%if_<cc>`, `%while_<cc>`, `%for_lt`, `%loop`) use + M1PP per-expansion `@`-mangled labels for their internal targets. They + emit no hex2++ `.scope` and do not interact with `%break` / + `%continue`. Use these when the body does not need mid-body exit. + +- **Scoped** forms (`%loop_scoped`, `%while_scoped_<cc>`, + `%for_lt_scoped`) open a nested hex2++ `.scope` and define dotted + labels `.top` and `.end` inside it. The generic `%break` and + `%continue` macros resolve through hex2++'s innermost-out scope + lookup, so they always target the nearest enclosing scoped loop. ### Condition suffixes @@ -139,7 +152,8 @@ falls through to the code after the macro. Neither form establishes a new frame or changes `sp`. A block that issues a `CALL` must sit inside a function that has already established a frame -with `ENTER`. +with `ENTER`. Neither form opens a `.scope`, so `%break` / `%continue` +inside the body resolve through to the enclosing scoped loop (if any). ### `%while_<cc>` / `%do_while_<cc>` @@ -168,6 +182,9 @@ All `%while_<cc>` macros share a single lowering pattern so they work uniformly across conditions, including `lt`, `ltu`, and `ltz` which have no inverted P1 branches. +These are unscoped: they do not support `%break` / `%continue`. Use the +`%while_scoped_<cc>` family below if the body needs mid-body exit. + ### `%for_lt` %for_lt(i_reg, n_reg, { body }) @@ -192,76 +209,55 @@ increments. %loop({ body }) -An unconditional loop with no built-in exit. The body runs forever unless -it transfers control out by another mechanism. libp1pp does not provide -`%break` or `%continue`; a loop that needs mid-body exit should use -explicit labels: - - :scan_loop - ... - BEQZ a0, &scan_end - ... - B &scan_loop - :scan_end - -### Tagged loops: `%loop_tag`, `%while_tag_<cc>`, `%for_lt_tag` - -> Tagged loops predate M1PP's `%scope` feature. They still work, but new -> code should prefer the scoped equivalents (`%loop_scoped`, -> `%while_scoped_<cc>`, `%for_lt_scoped`) paired with the generic -> `%break` / `%continue` — no tag argument required. - -M1PP's `@` local-label mechanism is scoped to the defining macro's body: an -`&@name` token passed to a macro through an argument is not stamped and -does not share a namespace with the receiving macro. Consequently, a -generic `%break` / `%continue` that uses `@` cannot be written. +An unconditional unscoped loop with no built-in exit. The body runs +forever unless it transfers control out by another mechanism. Use +`%loop_scoped` if the body needs `%break`. -libp1pp provides a tagged variant family for loops that need mid-body exit -or explicit continue. The tag becomes a label-name prefix via `##` paste, -so references cross every macro boundary cleanly. +### Scoped loops: `%loop_scoped`, `%while_scoped_<cc>`, `%for_lt_scoped` - %loop_tag(tag, { body }) + %loop_scoped({ body }) - %while_tag_eq(tag, ra, rb, { body }) - %while_tag_ne(tag, ra, rb, { body }) - %while_tag_lt(tag, ra, rb, { body }) - %while_tag_ltu(tag, ra, rb, { body }) - %while_tag_eqz(tag, ra, { body }) - %while_tag_nez(tag, ra, { body }) - %while_tag_ltz(tag, ra, { body }) + %while_scoped_eq(ra, rb, { body }) + %while_scoped_ne(ra, rb, { body }) + %while_scoped_lt(ra, rb, { body }) + %while_scoped_ltu(ra, rb, { body }) + %while_scoped_eqz(ra, { body }) + %while_scoped_nez(ra, { body }) + %while_scoped_ltz(ra, { body }) - %for_lt_tag(tag, i_reg, n_reg, { body }) + %for_lt_scoped(i_reg, n_reg, { body }) -Each tagged loop emits two ordinary labels: `:tag_top` at the point where -a `%continue(tag)` should land, and `:tag_end` immediately after the -loop. For top-tested `%while_tag_<cc>`, `tag_top` names the condition -test; for `%for_lt_tag`, `tag_top` names the increment-and-test block; -for `%loop_tag`, `tag_top` names the head of the body. +Each scoped loop opens a hex2++ `.scope` around its expansion and defines +two dotted labels inside it: `:.top` at the point where `%continue` should +land, and `:.end` immediately after the loop. For top-tested +`%while_scoped_<cc>`, `.top` names the condition test; for +`%for_lt_scoped`, `.top` names the increment-and-test block; for +`%loop_scoped`, `.top` names the head of the body. - %break(tag) + %break -Emits `B &tag_end`. Transfers control out of the enclosing tagged loop. +Emits `B &.end`. Transfers control to the `.end` of the innermost +enclosing scoped loop, resolved by hex2++'s scope-walk. - %continue(tag) + %continue -Emits `B &tag_top`. Transfers control to the enclosing tagged loop's -re-test / increment point. +Emits `B &.top`. Transfers control to the innermost enclosing scoped +loop's re-test / increment point. -Both `%break` and `%continue` work from arbitrary depth inside a tagged -loop, including inside `%if_<cc>`, `%ifelse_<cc>`, or another nested -loop's body. They resolve purely by label name, not by macro-expansion -namespace. +`%break` and `%continue` work from arbitrary depth inside a scoped loop, +including inside `%if_<cc>`, `%ifelse_<cc>`, or another nested loop's +body — because none of those forms open their own `.scope`, the lookup +walks past them to the enclosing scoped loop. -Tags are not scoped: `tag_top` and `tag_end` are ordinary hex2 labels -visible across the whole program. Tags must therefore be unique within a -function, and conventionally across functions as well. The recommended -style is `<function>_<role>` (e.g., `parse_outer`, `scan_inner`). hex2 -reports a duplicate-label error if two loops share a tag; libp1pp does not -detect this at macro-expansion time. +A nested scoped loop *does* open its own `.scope` and shadows the outer +`.top` / `.end`. Inside the inner loop, `%break` / `%continue` target the +inner loop. To break out of an outer loop from inside an inner one, fall +through with a manual branch or a status flag — libp1pp does not provide +named-label break. -Untagged forms (`%while_<cc>`, `%for_lt`, `%loop`) are preferred when the -body does not need `%break` or `%continue`. They nest without the user -picking names, and their local labels cannot collide. +Unscoped forms (`%while_<cc>`, `%for_lt`, `%loop`) are preferred when the +body does not need `%break` or `%continue`. They emit no `.scope` and use +per-expansion local labels that cannot collide. ## Frame locals @@ -296,18 +292,16 @@ Defines a non-leaf function named `name` with `size` bytes of frame-local storage. Expands to: - a global label `:name` at the function entry, -- a `%scope name` push, so labels inside `body` are short - (`::start`, `::done`) and mangle to `name__start`, `name__done`, +- a `.scope` push, so dotted labels inside `body` (`:.start`, `:.done`) + are local to the function and never collide with sibling functions, - an `%enter(size)` prologue, - the body, - an `%eret()` epilogue, -- a matching `%endscope`. +- a matching `.endscope`. -`%fn` is a scope-introducing-with-block macro: it pushes the scope -`name` around `body`. Any `%break` / `%continue` directly in `body` -would target `name__end` / `name__top` — which `%fn` itself does not -define, so those should only appear inside a nested scope-introducing -loop. +`%fn` does not itself define `.top` or `.end`, so a bare `%break` / +`%continue` directly in `body` would resolve outside the function (or +fail to resolve) — they should appear only inside a nested scoped loop. Example: @@ -317,18 +311,34 @@ Example: ST a0, [sp + %parse_f.state] ST a1, [sp + %parse_f.cursor] ... - BEQZ t0, &::done + BEQZ t0, &.done ... - ::done + :.done LD a0, [sp + %parse_f.state] }) `size` may be a literal byte count, a `%struct` `SIZE` reference, or any M1PP-time integer expression that the backend `%enter` macro accepts. + %fn2(name, { local1 local2 ... }, { body }) + +Like `%fn`, but the second argument is a braced list of local names +instead of a byte frame size. Synthesizes a `name_FRAME` `%struct` (one +8-byte slot per local), opens both a hex2++ `.scope` and an M1PP +`%frame` named after the function, and sizes the stack frame from +`%name_FRAME.SIZE`. + +Inside the body these helpers resolve against the enclosing `%frame`: + + %local(slot) byte offset of local `slot` + %stl(reg, slot) store reg into local `slot` + %ldl(reg, slot) load local `slot` into reg + +A zero-local function uses `{}` for the locals list. + Leaf functions that need no frame do not use `%fn`: they write the entry label, body, and `%ret()` directly, and may optionally wrap the -body in `%scope name` / `%endscope` if they want scoped labels. +body in `.scope` / `.endscope` if they want scope-local dotted labels. ## Memory and strings @@ -410,7 +420,7 @@ bits are ignored) and return `1` or `0`. is_alpha(c) -> 0 or 1 # a-z, A-Z is_alnum(c) -> 0 or 1 # is_alpha OR is_digit -Predicates are functions in v1 and may become macros later. +Predicates are functions. ## IO @@ -509,7 +519,7 @@ undefined behavior. Rewinds the cursor to the arena's `base`. -v1 provides one arena. Multi-arena variants are deferred. +libp1pp provides exactly one arena. ## Panic and assertions @@ -520,9 +530,7 @@ v1 provides one arena. Multi-arena variants are deferred. Writes `msg_cstr` followed by `"\n"` to fd `2`, then calls `sys_exit(1)`. Does not return. -`panic` is used from libp1pp internally only for unrecoverable programmer -errors (none in v1). User code is encouraged to use it for its own -invariant violations. +User code is encouraged to use `panic` for its own invariant violations. ### `%assert_<cc>` macros @@ -543,17 +551,3 @@ beyond the original branch. Because the failure path issues a `CALL`, `%assert_*` may be used only in functions that have established a frame with `ENTER`. -## Not in v1 - -The following were considered and deferred: - -- Field-access helpers such as `%ld_field` — `LD rd, [base + %S.f]` is - short enough. -- `printf`-style formatted output — replaced by dedicated `print_*` and - `fmt_*` primitives. -- Multiple bump arenas — one global arena covers bootstrap needs. -- `strcpy` / `strcat` — length-aware callers should use `memcpy` with an - explicit byte count. -- P1-32 support. -- `envp` / auxv / command-line-aware helpers beyond what `p1_main` - already receives. diff --git a/tests/P1/cmpset.P1pp b/tests/P1/cmpset.P1pp @@ -20,122 +20,122 @@ # ---- eq ------------------------------------------------------------- %li(s0, 5) %li(s1, 5) %cmpset_eq(t0, s0, s1) - %li(t1, 1) %bne(t0, t1, &::fail) + %li(t1, 1) %bne(t0, t1, &.fail) %li(s1, 6) %cmpset_eq(t0, s0, s1) - %li(t1, 0) %bne(t0, t1, &::fail) + %li(t1, 0) %bne(t0, t1, &.fail) %la(a0, &c_eq) %li(a1, 3) %call(&print) # ---- ne ------------------------------------------------------------- %li(s0, 5) %li(s1, 6) %cmpset_ne(t0, s0, s1) - %li(t1, 1) %bne(t0, t1, &::fail) + %li(t1, 1) %bne(t0, t1, &.fail) %li(s1, 5) %cmpset_ne(t0, s0, s1) - %li(t1, 0) %bne(t0, t1, &::fail) + %li(t1, 0) %bne(t0, t1, &.fail) %la(a0, &c_ne) %li(a1, 3) %call(&print) # ---- lt ------------------------------------------------------------- %li(s0, -3) %li(s1, 2) %cmpset_lt(t0, s0, s1) - %li(t1, 1) %bne(t0, t1, &::fail) + %li(t1, 1) %bne(t0, t1, &.fail) %cmpset_lt(t0, s1, s0) - %li(t1, 0) %bne(t0, t1, &::fail) + %li(t1, 0) %bne(t0, t1, &.fail) %la(a0, &c_lt) %li(a1, 3) %call(&print) # ---- ltu (unsigned: -1 is huge) ------------------------------------ %li(s0, 5) %li(s1, -1) %cmpset_ltu(t0, s0, s1) - %li(t1, 1) %bne(t0, t1, &::fail) + %li(t1, 1) %bne(t0, t1, &.fail) %cmpset_ltu(t0, s1, s0) - %li(t1, 0) %bne(t0, t1, &::fail) + %li(t1, 0) %bne(t0, t1, &.fail) %la(a0, &c_ltu) %li(a1, 4) %call(&print) # ---- le (signed) ---------------------------------------------------- # true cases: a < b, and a == b %li(s0, -3) %li(s1, 2) %cmpset_le(t0, s0, s1) - %li(t1, 1) %bne(t0, t1, &::fail) + %li(t1, 1) %bne(t0, t1, &.fail) %li(s0, 5) %li(s1, 5) %cmpset_le(t0, s0, s1) - %li(t1, 1) %bne(t0, t1, &::fail) + %li(t1, 1) %bne(t0, t1, &.fail) # false: a > b %li(s0, 6) %li(s1, 5) %cmpset_le(t0, s0, s1) - %li(t1, 0) %bne(t0, t1, &::fail) + %li(t1, 0) %bne(t0, t1, &.fail) %la(a0, &c_le) %li(a1, 3) %call(&print) # ---- leu (unsigned) ------------------------------------------------- # -1 unsigned is huge; 5 <= -1 (true), -1 <= 5 (false), 5 <= 5 (true) %li(s0, 5) %li(s1, -1) %cmpset_leu(t0, s0, s1) - %li(t1, 1) %bne(t0, t1, &::fail) + %li(t1, 1) %bne(t0, t1, &.fail) %cmpset_leu(t0, s1, s0) - %li(t1, 0) %bne(t0, t1, &::fail) + %li(t1, 0) %bne(t0, t1, &.fail) %li(s0, 5) %li(s1, 5) %cmpset_leu(t0, s0, s1) - %li(t1, 1) %bne(t0, t1, &::fail) + %li(t1, 1) %bne(t0, t1, &.fail) %la(a0, &c_leu) %li(a1, 4) %call(&print) # ---- ge (signed) ---------------------------------------------------- %li(s0, 2) %li(s1, -3) %cmpset_ge(t0, s0, s1) - %li(t1, 1) %bne(t0, t1, &::fail) + %li(t1, 1) %bne(t0, t1, &.fail) %li(s0, 5) %li(s1, 5) %cmpset_ge(t0, s0, s1) - %li(t1, 1) %bne(t0, t1, &::fail) + %li(t1, 1) %bne(t0, t1, &.fail) %li(s0, -3) %li(s1, 2) %cmpset_ge(t0, s0, s1) - %li(t1, 0) %bne(t0, t1, &::fail) + %li(t1, 0) %bne(t0, t1, &.fail) %la(a0, &c_ge) %li(a1, 3) %call(&print) # ---- geu (unsigned) ------------------------------------------------- %li(s0, -1) %li(s1, 5) %cmpset_geu(t0, s0, s1) - %li(t1, 1) %bne(t0, t1, &::fail) + %li(t1, 1) %bne(t0, t1, &.fail) %cmpset_geu(t0, s1, s0) - %li(t1, 0) %bne(t0, t1, &::fail) + %li(t1, 0) %bne(t0, t1, &.fail) %li(s0, 5) %li(s1, 5) %cmpset_geu(t0, s0, s1) - %li(t1, 1) %bne(t0, t1, &::fail) + %li(t1, 1) %bne(t0, t1, &.fail) %la(a0, &c_geu) %li(a1, 4) %call(&print) # ---- eqz ------------------------------------------------------------ %li(s0, 0) %cmpset_eqz(t0, s0) - %li(t1, 1) %bne(t0, t1, &::fail) + %li(t1, 1) %bne(t0, t1, &.fail) %li(s0, 7) %cmpset_eqz(t0, s0) - %li(t1, 0) %bne(t0, t1, &::fail) + %li(t1, 0) %bne(t0, t1, &.fail) %la(a0, &c_ez) %li(a1, 3) %call(&print) # ---- nez ------------------------------------------------------------ %li(s0, 7) %cmpset_nez(t0, s0) - %li(t1, 1) %bne(t0, t1, &::fail) + %li(t1, 1) %bne(t0, t1, &.fail) %li(s0, 0) %cmpset_nez(t0, s0) - %li(t1, 0) %bne(t0, t1, &::fail) + %li(t1, 0) %bne(t0, t1, &.fail) %la(a0, &c_nz) %li(a1, 3) %call(&print) # ---- ltz ------------------------------------------------------------ %li(s0, -1) %cmpset_ltz(t0, s0) - %li(t1, 1) %bne(t0, t1, &::fail) + %li(t1, 1) %bne(t0, t1, &.fail) %li(s0, 0) %cmpset_ltz(t0, s0) - %li(t1, 0) %bne(t0, t1, &::fail) - %la(a0, &c_lz) %li(a1, 3) %call(&print) + %li(t1, 0) %bne(t0, t1, &.fail) + %la(a0, &c_lz) %li(a1, 2) %call(&print) %la(a0, &c_nl) %li(a1, 1) %call(&print) %li(a0, 0) - %b(&::done) + %b(&.done) - ::fail + :.fail %la(a0, &c_x) %li(a1, 1) %call(&print) %la(a0, &c_nl) %li(a1, 1) %call(&print) %li(a0, 1) - ::done + :.done }) :c_eq "EQ " diff --git a/tests/P1/ext-macros.P1pp b/tests/P1/ext-macros.P1pp @@ -13,95 +13,95 @@ %li(t0, 0x7F) %sext8(t0, t0) %li(t1, 127) - %bne(t0, t1, &::fail) + %bne(t0, t1, &.fail) %la(a0, &c_a) %li(a1, 1) %call(&print) # ---- B: sext8 negative (0x80 → -128) ------------------------------- %li(t0, 0x80) %sext8(t0, t0) %li(t1, -128) - %bne(t0, t1, &::fail) + %bne(t0, t1, &.fail) %la(a0, &c_b) %li(a1, 1) %call(&print) # ---- C: sext16 positive (0x7FFF → 32767) --------------------------- %li(t0, 0x7FFF) %sext16(t0, t0) %li(t1, 32767) - %bne(t0, t1, &::fail) + %bne(t0, t1, &.fail) %la(a0, &c_c) %li(a1, 1) %call(&print) # ---- D: sext16 negative (0x8000 → -32768) -------------------------- %li(t0, 0x8000) %sext16(t0, t0) %li(t1, -32768) - %bne(t0, t1, &::fail) + %bne(t0, t1, &.fail) %la(a0, &c_d) %li(a1, 1) %call(&print) # ---- E: sext32 positive (0x7FFFFFFF → 2147483647) ------------------ %li(t0, 0x7FFFFFFF) %sext32(t0, t0) %li(t1, 2147483647) - %bne(t0, t1, &::fail) + %bne(t0, t1, &.fail) %la(a0, &c_e) %li(a1, 1) %call(&print) # ---- F: sext32 negative (0x80000000 → -2147483648) ----------------- %li(t0, 0x80000000) %sext32(t0, t0) %li(t1, -2147483648) - %bne(t0, t1, &::fail) + %bne(t0, t1, &.fail) %la(a0, &c_f) %li(a1, 1) %call(&print) # ---- G: zext8 (-1 → 0xFF) ------------------------------------------ %li(t0, -1) %zext8(t0, t0) %li(t1, 0xFF) - %bne(t0, t1, &::fail) + %bne(t0, t1, &.fail) %la(a0, &c_g) %li(a1, 1) %call(&print) # ---- H: zext16 (-1 → 0xFFFF) --------------------------------------- %li(t0, -1) %zext16(t0, t0) %li(t1, 0xFFFF) - %bne(t0, t1, &::fail) + %bne(t0, t1, &.fail) %la(a0, &c_h) %li(a1, 1) %call(&print) # ---- I: zext32 (-1 → 0xFFFFFFFF) ----------------------------------- %li(t0, -1) %zext32(t0, t0, t1) %li(t1, 0xFFFFFFFF) - %bne(t0, t1, &::fail) + %bne(t0, t1, &.fail) %la(a0, &c_i) %li(a1, 1) %call(&print) # ---- J: rd != ra split (sext8) ------------------------------------- %li(s0, 0x80) %sext8(t0, s0) %li(t1, -128) - %bne(t0, t1, &::fail) + %bne(t0, t1, &.fail) %la(a0, &c_j) %li(a1, 1) %call(&print) # ---- K: rd != ra split (zext16) ------------------------------------ %li(s0, -1) %zext16(t0, s0) %li(t1, 0xFFFF) - %bne(t0, t1, &::fail) + %bne(t0, t1, &.fail) %la(a0, &c_k) %li(a1, 1) %call(&print) # ---- L: rd != ra split (zext32) ------------------------------------ %li(s0, -1) %zext32(t0, s0, t1) %li(t1, 0xFFFFFFFF) - %bne(t0, t1, &::fail) + %bne(t0, t1, &.fail) %la(a0, &c_l) %li(a1, 1) %call(&print) %la(a0, &c_nl) %li(a1, 1) %call(&print) %li(a0, 0) - %b(&::done) + %b(&.done) - ::fail + :.fail %la(a0, &c_x) %li(a1, 1) %call(&print) %la(a0, &c_nl) %li(a1, 1) %call(&print) %li(a0, 1) - ::done + :.done }) :c_a "A" diff --git a/tests/P1/lea-slot.P1pp b/tests/P1/lea-slot.P1pp @@ -16,7 +16,7 @@ %st(t0, sp, 0) %lea_slot(s0, 0) %ld(t1, s0, 0) - %bne(t0, t1, &::fail) + %bne(t0, t1, &.fail) %la(a0, &c_a) %li(a1, 1) %call(&print) # ---- B: write via lea_slot @offset 8, read via sp+8 ----------------- @@ -24,18 +24,18 @@ %li(t0, 0xDEADBEEF) %st(t0, s0, 0) %ld(t1, sp, 8) - %bne(t0, t1, &::fail) + %bne(t0, t1, &.fail) %la(a0, &c_b) %li(a1, 1) %call(&print) %la(a0, &c_nl) %li(a1, 1) %call(&print) %li(a0, 0) - %b(&::done) + %b(&.done) - ::fail + :.fail %la(a0, &c_x) %li(a1, 1) %call(&print) %la(a0, &c_nl) %li(a1, 1) %call(&print) %li(a0, 1) - ::done + :.done }) :c_a "A" diff --git a/tests/P1/loop-tag-scoping.P1pp b/tests/P1/loop-tag-scoping.P1pp @@ -1,59 +1,27 @@ -# tests/p1/loop-tag-scoping.P1pp — regression test for libp1pp's -# tag-loop scoping (commit that switched %loop_tag and friends from -# `: ## tag ## _top` to `:: ## tag ## _top`). +# tests/p1/loop-tag-scoping.P1pp — sibling-function scoped-loop hygiene. # -# The bug: %loop_tag, %while_tag_*, %for_lt_tag, %break, %continue -# all paste-built `:tag_top` / `:tag_end` as single-colon globals. -# When two functions in the same TU both used `%loop_tag(L0, ...)`, -# they emitted duplicate `:L0_top` / `:L0_end` labels — M0 keeps the -# last definition, so all `&L0_top` references resolved to the -# winner regardless of which function's loop they were emitted from. -# -# In real life this hit when libc.P1pp + tcc.flat.P1pp were catm'd -# (see TCC-TODO §loop-tag fix). The shape reproduces in a single TU -# by having two `%fn` blocks both use tag `L0`, and arranging for -# the FIRST function's `%continue` / `%break` to be the ones whose -# correctness matters. +# Two %fn blocks each contain a %loop_scoped whose body uses %break and +# %continue. Each %fn opens its own hex2++ .scope, and each +# %loop_scoped opens a further nested .scope defining :.top / :.end, so +# the two loops' labels can never collide. This test exercises both +# loops to verify that hygiene end-to-end. # # Reproducer setup: # - Global byte slot starting at '0'. -# - `bumper` is the FIRST function: it loops 3 times, incrementing -# the slot (`'0'` → `'3'`) on each iteration. -# - `p1_main` is the SECOND function. It calls bumper once, then -# enters its own `%loop_tag(L0, { %break(L0) })` (no-op). -# After the loop it writes the slot to stdout and exits 0. -# -# Without the scope fix: -# - bumper's `:L0_top` / `:L0_end` are emitted but lose to -# p1_main's later definitions in M0's symbol table. -# - bumper's `%continue(L0)` and `%break(L0)` resolve to -# p1_main's L0_top/L0_end, which are at p1_main's text address. -# - On bumper's first iteration: increment runs once, then -# `%continue(L0)` jumps into p1_main's body (in bumper's frame). -# p1_main's loop body just `%break(L0)` -> p1_main's L0_end. -# End of p1_main body -> %eret pops bumper's saved frame -> -# ret to "after %call(&bumper)" in p1_main with sp restored. -# - bumper effectively ran one iteration; slot = '0'+1 = '1'. -# - p1_main writes '1' to stdout and exits. -# -# With the fix: -# - %loop_tag emits `::tag_top` / `::tag_end` which M1pp scope- -# mangles against the enclosing %fn's %scope. bumper's labels -# become `:bumper__L0_top` / `_end`, p1_main's become -# `:p1_main__L0_top` / `_end`. No collision; each function's -# control flow is local. -# - bumper iterates the full 3 times; slot = '0'+3 = '3'. -# - p1_main writes '3' to stdout and exits. +# - `bumper` is the first function: it loops 3 times, incrementing +# the slot ('0' -> '3') on each iteration, exiting via %break. +# - `p1_main` is the second function. It calls bumper, then enters +# its own %loop_scoped that immediately %breaks. After the loop it +# writes the slot to stdout and exits 0. # # Expected stdout: "3" (single byte). :counter_buf $(0) %fn(bumper, 8, { - # Local 0 (sp+16 with the p1_mem +16 compensation): int iter %li(a0, 0) %st(a0, sp, 0) - %loop_tag(L0, { + %loop_scoped({ %la(t0, &counter_buf) %lb(t1, t0, 0) %addi(t1, t1, 1) @@ -63,8 +31,8 @@ %st(t0, sp, 0) # iter++ %ld(a0, sp, 0) %li(a1, 3) - %if_lt(a0, a1, { %continue(L0) }) - %break(L0) + %if_lt(a0, a1, { %continue }) + %break }) }) @@ -73,7 +41,7 @@ %la(t0, &counter_buf) %sb(a0, t0, 0) # counter_buf[0] = '0' %call(&bumper) - %loop_tag(L0, { %break(L0) }) # second loop_tag with same tag — collision target + %loop_scoped({ %break }) # sibling loop — own .scope, no collision %li(a0, 1) # fd stdout %la(a1, &counter_buf) %li(a2, 1) diff --git a/tests/P1/memcpy-call.P1pp b/tests/P1/memcpy-call.P1pp @@ -16,27 +16,27 @@ # Verify dst[0..13] == src[0..13] %li(t2, 0) - %loop_tag(L0, { + %loop_scoped({ %li(t1, 13) - %if_eq(t2, t1, { %break(L0) }) + %if_eq(t2, t1, { %break }) %la(s0, &dst) %add(s0, s0, t2) %lb(t0, s0, 0) %la(s1, &src) %add(s1, s1, t2) %lb(t1, s1, 0) - %bne(t0, t1, &::fail) + %bne(t0, t1, &.fail) %addi(t2, t2, 1) }) %la(a0, &c_ok) %li(a1, 3) %call(&print) %li(a0, 0) - %b(&::done) + %b(&.done) - ::fail + :.fail %la(a0, &c_fail) %li(a1, 5) %call(&print) %li(a0, 1) - ::done + :.done }) :src diff --git a/tests/P1/ptr-arith.P1pp b/tests/P1/ptr-arith.P1pp @@ -8,48 +8,48 @@ # ---- A: ptr_add sz=1 ------------------------------------------------ %li(s0, 1000) %li(s1, 7) %ptr_add(t0, s0, s1, 1, t1) - %li(t2, 1007) %bne(t0, t2, &::fail) + %li(t2, 1007) %bne(t0, t2, &.fail) %la(a0, &c_a) %li(a1, 1) %call(&print) # ---- B: ptr_add sz=4 ------------------------------------------------ %li(s0, 1000) %li(s1, 5) %ptr_add(t0, s0, s1, 4, t1) - %li(t2, 1020) %bne(t0, t2, &::fail) + %li(t2, 1020) %bne(t0, t2, &.fail) %la(a0, &c_b) %li(a1, 1) %call(&print) # ---- C: ptr_add sz=8 ------------------------------------------------ %li(s0, 1000) %li(s1, 3) %ptr_add(t0, s0, s1, 8, t1) - %li(t2, 1024) %bne(t0, t2, &::fail) + %li(t2, 1024) %bne(t0, t2, &.fail) %la(a0, &c_c) %li(a1, 1) %call(&print) # ---- D: ptr_sub sz=4 ------------------------------------------------ %li(s0, 1000) %li(s1, 5) %ptr_sub(t0, s0, s1, 4, t1) - %li(t2, 980) %bne(t0, t2, &::fail) + %li(t2, 980) %bne(t0, t2, &.fail) %la(a0, &c_d) %li(a1, 1) %call(&print) # ---- E: ptr_diff sz=4 ----------------------------------------------- %li(s0, 1020) %li(s1, 1000) %ptr_diff(t0, s0, s1, 4, t1) - %li(t2, 5) %bne(t0, t2, &::fail) + %li(t2, 5) %bne(t0, t2, &.fail) %la(a0, &c_e) %li(a1, 1) %call(&print) # ---- F: ptr_diff sz=8 ----------------------------------------------- %li(s0, 1024) %li(s1, 1000) %ptr_diff(t0, s0, s1, 8, t1) - %li(t2, 3) %bne(t0, t2, &::fail) + %li(t2, 3) %bne(t0, t2, &.fail) %la(a0, &c_f) %li(a1, 1) %call(&print) %la(a0, &c_nl) %li(a1, 1) %call(&print) %li(a0, 0) - %b(&::done) + %b(&.done) - ::fail + :.fail %la(a0, &c_x) %li(a1, 1) %call(&print) %la(a0, &c_nl) %li(a1, 1) %call(&print) %li(a0, 1) - ::done + :.done }) :c_a "A" diff --git a/tests/P1/sub-word-mem.P1pp b/tests/P1/sub-word-mem.P1pp @@ -17,20 +17,20 @@ %st_h(t0, s0, 0, t1) %lb(t2, s0, 0) %li(t1, 0xFE) - %bne(t2, t1, &::fail) + %bne(t2, t1, &.fail) %lb(t2, s0, 1) %li(t1, 0xCA) - %bne(t2, t1, &::fail) + %bne(t2, t1, &.fail) %la(a0, &c_a) %li(a1, 1) %call(&print) # ---- B: %st_w byte order -------------------------------------------- %la(s0, &buf) %li(t0, 0xDEADBEEF) %st_w(t0, s0, 0, t1) - %lb(t2, s0, 0) %li(t1, 0xEF) %bne(t2, t1, &::fail) - %lb(t2, s0, 1) %li(t1, 0xBE) %bne(t2, t1, &::fail) - %lb(t2, s0, 2) %li(t1, 0xAD) %bne(t2, t1, &::fail) - %lb(t2, s0, 3) %li(t1, 0xDE) %bne(t2, t1, &::fail) + %lb(t2, s0, 0) %li(t1, 0xEF) %bne(t2, t1, &.fail) + %lb(t2, s0, 1) %li(t1, 0xBE) %bne(t2, t1, &.fail) + %lb(t2, s0, 2) %li(t1, 0xAD) %bne(t2, t1, &.fail) + %lb(t2, s0, 3) %li(t1, 0xDE) %bne(t2, t1, &.fail) %la(a0, &c_b) %li(a1, 1) %call(&print) # ---- C: %ld_h round-trip (zero-extend) ------------------------------ @@ -39,7 +39,7 @@ %st_h(t0, s0, 8, t1) %ld_h(t0, s0, 8, t1) %li(t1, 0xCAFE) - %bne(t0, t1, &::fail) + %bne(t0, t1, &.fail) %la(a0, &c_c) %li(a1, 1) %call(&print) # ---- D: %ld_w round-trip (zero-extend) ------------------------------ @@ -48,7 +48,7 @@ %st_w(t0, s0, 8, t1) %ld_w(t0, s0, 8, t1) %li(t1, 0xDEADBEEF) - %bne(t0, t1, &::fail) + %bne(t0, t1, &.fail) %la(a0, &c_d) %li(a1, 1) %call(&print) # ---- E: %ld_sh sign-extend ------------------------------------------ @@ -57,7 +57,7 @@ %st_h(t0, s0, 0, t1) %ld_sh(t0, s0, 0, t1) %li(t1, -32768) - %bne(t0, t1, &::fail) + %bne(t0, t1, &.fail) %la(a0, &c_e) %li(a1, 1) %call(&print) # ---- F: %ld_sw sign-extend ------------------------------------------ @@ -66,18 +66,18 @@ %st_w(t0, s0, 0, t1) %ld_sw(t0, s0, 0, t1) %li(t1, -2147483648) - %bne(t0, t1, &::fail) + %bne(t0, t1, &.fail) %la(a0, &c_f) %li(a1, 1) %call(&print) %la(a0, &c_nl) %li(a1, 1) %call(&print) %li(a0, 0) - %b(&::done) + %b(&.done) - ::fail + :.fail %la(a0, &c_x) %li(a1, 1) %call(&print) %la(a0, &c_nl) %li(a1, 1) %call(&print) %li(a0, 1) - ::done + :.done }) :c_a "A" diff --git a/tests/P1/switch-case.P1pp b/tests/P1/switch-case.P1pp @@ -8,42 +8,42 @@ # Output: "ABC\n". %fn(select_n, 0, { - %switch_case(a0, t1, 1, &::case_1) - %switch_case(a0, t1, 2, &::case_2) - %switch_case(a0, t1, 3, &::case_3) + %switch_case(a0, t1, 1, &.case_1) + %switch_case(a0, t1, 2, &.case_2) + %switch_case(a0, t1, 3, &.case_3) %li(a0, 999) - %b(&::done) - ::case_1 %li(a0, 100) %b(&::done) - ::case_2 %li(a0, 200) %b(&::done) - ::case_3 %li(a0, 300) - ::done + %b(&.done) + :.case_1 %li(a0, 100) %b(&.done) + :.case_2 %li(a0, 200) %b(&.done) + :.case_3 %li(a0, 300) + :.done }) %fn(p1_main, 0, { %li(a0, 1) %call(&select_n) - %li(t0, 100) %bne(a0, t0, &::fail) + %li(t0, 100) %bne(a0, t0, &.fail) %la(a0, &c_a) %li(a1, 1) %call(&print) %li(a0, 2) %call(&select_n) - %li(t0, 200) %bne(a0, t0, &::fail) + %li(t0, 200) %bne(a0, t0, &.fail) %la(a0, &c_b) %li(a1, 1) %call(&print) %li(a0, 3) %call(&select_n) - %li(t0, 300) %bne(a0, t0, &::fail) + %li(t0, 300) %bne(a0, t0, &.fail) %la(a0, &c_c) %li(a1, 1) %call(&print) %li(a0, 99) %call(&select_n) - %li(t0, 999) %bne(a0, t0, &::fail) + %li(t0, 999) %bne(a0, t0, &.fail) %la(a0, &c_nl) %li(a1, 1) %call(&print) %li(a0, 0) - %b(&::done) + %b(&.done) - ::fail + :.fail %la(a0, &c_x) %li(a1, 1) %call(&print) %la(a0, &c_nl) %li(a1, 1) %call(&print) %li(a0, 1) - ::done + :.done }) :c_a "A" diff --git a/tests/P1/sys_calls.P1pp b/tests/P1/sys_calls.P1pp @@ -16,79 +16,79 @@ # --- sys_brk ----------------------------------------------------------- %li(a0, 0) %call(&sys_brk) - %bltz(a0, &::fail) - %beqz(a0, &::fail) + %bltz(a0, &.fail) + %beqz(a0, &.fail) %la(a0, &b_msg) %li(a1, 1) %call(&write_stdout) - %bltz(a0, &::fail) + %bltz(a0, &.fail) # --- create + write "X" ------------------------------------------------ %la(a0, &fname) %li(a1, 577) # O_WRONLY|O_CREAT|O_TRUNC %li(a2, 0644) %call(&sys_open) - %bltz(a0, &::fail) + %bltz(a0, &.fail) %mov(s0, a0) # save fd %mov(a0, s0) %la(a1, &payload) %li(a2, 1) %call(&sys_write) - %bltz(a0, &::fail) + %bltz(a0, &.fail) %mov(a0, s0) %call(&sys_close) - %bltz(a0, &::fail) + %bltz(a0, &.fail) # --- reopen, lseek, read, verify, close -------------------------------- %la(a0, &fname) %li(a1, 0) # O_RDONLY %li(a2, 0) %call(&sys_open) - %bltz(a0, &::fail) + %bltz(a0, &.fail) %mov(s0, a0) %mov(a0, s0) %li(a1, 0) %li(a2, 0) # SEEK_SET %call(&sys_lseek) - %bltz(a0, &::fail) + %bltz(a0, &.fail) %mov(a0, s0) %la(a1, &readbuf) %li(a2, 1) %call(&sys_read) - %bltz(a0, &::fail) + %bltz(a0, &.fail) %mov(a0, s0) %call(&sys_close) - %bltz(a0, &::fail) + %bltz(a0, &.fail) %la(s1, &readbuf) %lb(s1, s1, 0) %li(t0, 88) # 'X' - %bne(s1, t0, &::fail) + %bne(s1, t0, &.fail) %la(a0, &l_msg) %li(a1, 1) %call(&write_stdout) - %bltz(a0, &::fail) + %bltz(a0, &.fail) # --- sys_unlink -------------------------------------------------------- %la(a0, &fname) %call(&sys_unlink) - %bltz(a0, &::fail) + %bltz(a0, &.fail) %la(a0, &u_msg) %li(a1, 2) %call(&write_stdout) - %bltz(a0, &::fail) + %bltz(a0, &.fail) %li(a0, 0) %eret - ::fail + :.fail %li(a0, 1) %eret diff --git a/tests/P1/unops.P1pp b/tests/P1/unops.P1pp @@ -11,53 +11,53 @@ %li(s0, 5) %neg(t0, s0, t1) %li(t2, -5) - %bne(t0, t2, &::fail) + %bne(t0, t2, &.fail) %la(a0, &c_a) %li(a1, 1) %call(&print) # ---- B: neg negative (-7 -> 7) ------------------------------------- %li(s0, -7) %neg(t0, s0, t1) %li(t2, 7) - %bne(t0, t2, &::fail) + %bne(t0, t2, &.fail) %la(a0, &c_b) %li(a1, 1) %call(&print) # ---- C: bnot 0 -> -1 ----------------------------------------------- %li(s0, 0) %bnot(t0, s0, t1) %li(t2, -1) - %bne(t0, t2, &::fail) + %bne(t0, t2, &.fail) %la(a0, &c_c) %li(a1, 1) %call(&print) # ---- D: bnot 0xA5 -> ~0xA5 (= -0x166 actually) ---------------------- %li(s0, 0xA5) %bnot(t0, s0, t1) %li(t2, -166) - %bne(t0, t2, &::fail) + %bne(t0, t2, &.fail) %la(a0, &c_d) %li(a1, 1) %call(&print) # ---- E: bool zero -> 0 --------------------------------------------- %li(s0, 0) %bool(t0, s0) %li(t2, 0) - %bne(t0, t2, &::fail) + %bne(t0, t2, &.fail) %la(a0, &c_e) %li(a1, 1) %call(&print) # ---- F: bool nonzero -> 1 ------------------------------------------ %li(s0, 42) %bool(t0, s0) %li(t2, 1) - %bne(t0, t2, &::fail) + %bne(t0, t2, &.fail) %la(a0, &c_f) %li(a1, 1) %call(&print) %la(a0, &c_nl) %li(a1, 1) %call(&print) %li(a0, 0) - %b(&::done) + %b(&.done) - ::fail + :.fail %la(a0, &c_x) %li(a1, 1) %call(&print) %la(a0, &c_nl) %li(a1, 1) %call(&print) %li(a0, 1) - ::done + :.done }) :c_a "A"