commit 9a3e9f8a0fa616639d12f375d6b7a46cfada0546 parent a8c2dc08392a373708f1b305b93c938cfad696fc Author: Ryan Sepassi <rsepassi@gmail.com> Date: Sat, 25 Apr 2026 17:25:53 -0700 scheme1: implement pmatch special form per LISP-PMATCH.md Reader gains `,X` -> (unquote X) datum sugar (mirrors `'X`); eval gains a sym_pmatch dispatch entry; eval_pmatch + pmatch_match cover literal, binder, wildcard, structural-pair, improper-tail, else, and guard clauses, with defined-error die paths for no-match and malformed ,-patterns. Bindings reuse the existing flat-alist env; bodies tail-call eval_body so the matched clause keeps tail position. Tests 86-92 cover the full surface, including the compiler-style form dispatch from LISP-PMATCH.md. Diffstat:
17 files changed, 524 insertions(+), 0 deletions(-)
diff --git a/scheme1/scheme1.P1pp b/scheme1/scheme1.P1pp @@ -605,6 +605,8 @@ %beqz(a1, &::hash) %addi(a1, a0, -39) ; '\'' %beqz(a1, &::quote) + %addi(a1, a0, -44) ; ',' + %beqz(a1, &::comma) %addi(a1, a0, -34) ; '"' %beqz(a1, &::string) @@ -715,6 +717,26 @@ %mov(a0, t0) %tail(&cons) + ::comma + # Mirror of ::quote: consume the leading ','; recurse into parse_one + # for the inner datum; build (unquote <datum>). The comma sugar exists + # purely so pmatch patterns can be written as `,ident`; there is no + # quasiquote evaluator. Outside a pmatch pattern this list reaches + # eval as an application of the (unbound) `unquote` and dies through + # the standard unbound-variable path. + %la(t2, &readbuf_pos) + %ld(t0, t2, 0) + %addi(t0, t0, 1) + %st(t0, t2, 0) + %call(&parse_one) + %li(a1, %imm_val(%IMM.NIL)) + %call(&cons) + %la(t0, &sym_unquote) + %ld(t0, t0, 0) + %mov(a1, a0) + %mov(a0, t0) + %tail(&cons) + ::char_lit # Cursor is already past '#\\'; parse_char scans the body and returns # a tagged fixnum (the u8 char value). @@ -1475,6 +1497,7 @@ %dispatch_form(&sym_or, &::do_or) %dispatch_form(&sym_setbang, &::do_setbang) %dispatch_form(&sym_define_record_type, &::do_define_record_type) + %dispatch_form(&sym_pmatch, &::do_pmatch) # head = eval(car(expr), env) %ld(a0, sp, 0) @@ -1525,6 +1548,8 @@ %tail_to_handler(&eval_setbang) ::do_define_record_type %tail_to_handler(&eval_define_record_type) + ::do_pmatch + %tail_to_handler(&eval_pmatch) }) # eval_args(args=a0, env=a1) -> evaluated args list (cons-built). @@ -1660,6 +1685,10 @@ %intern_form(&name_or, 2, &sym_or) %intern_form(&name_setbang, 4, &sym_setbang) %intern_form(&name_define_record_type, 18, &sym_define_record_type) + %intern_form(&name_pmatch, 6, &sym_pmatch) + %intern_form(&name_unquote, 7, &sym_unquote) + %intern_form(&name_guard, 5, &sym_guard) + %intern_form(&name_underscore, 1, &sym_underscore) }) # eval_if(rest=a0, env=a1) -> value (a0). `rest` is (test then else). @@ -2195,6 +2224,249 @@ %mov(a0, t0) }) +# eval_pmatch(rest=a0, env=a1) -> value (a0). +# rest is (subject-expr . clauses). The subject is evaluated once; each +# clause is then tried in order against the same subject value, restarting +# from the outer env per clause. Clause shape: +# (<pat> <body>...) +# (<pat> (guard <g>...) <body>...) +# (else <body>...) +# An `else` clause always matches with no bindings; a guarded clause is +# selected only if every guard expression evaluates non-#f. The matched +# clause's body is tail-evaluated via eval_body so the last form keeps +# tail position. No-match (and no else) dies via runtime_error. +# +# Frame: 48 bytes +# +0 subject +# +8 env_outer (per-clause restart point) +# +16 clauses (current cursor; advances on miss / failed guard) +# +24 env_ext (env extended with the matched clause's bindings) +# +32 guard cursor (advances during the guard AND-fold) +# +40 body (saved across guard evals, tail-evaluated on success) +%fn(eval_pmatch, 48, { + %st(a1, sp, 8) + + # subject = eval(car(rest), env_outer); clauses = cdr(rest). + %mov(t0, a0) + %cdr(t1, t0) + %st(t1, sp, 16) + %car(a0, t0) + %ld(a1, sp, 8) + %call(&eval) + %st(a0, sp, 0) + + ::loop + %ld(t0, sp, 16) + %if_nil(t1, t0, &::no_match) + + %car(t1, t0) ; clause + %car(t2, t1) ; pat + + %la(a3, &sym_else) + %ld(a3, a3, 0) + %beq(t2, a3, &::do_else) + + # pmatch_match(pat, subject, env_outer) -> (a0=env_ext, a1=ok) + %mov(a0, t2) + %ld(a1, sp, 0) + %ld(a2, sp, 8) + %call(&pmatch_match) + %beqz(a1, &::next) + + %st(a0, sp, 24) ; env_ext + + # tail = cdr(clause) + %ld(t0, sp, 16) + %car(t0, t0) + %cdr(t0, t0) ; tail = (body...) or ((guard ...) body...) + + # Guard form? tail is a pair, car(tail) is a pair, head of car(tail) + # eq? sym_guard. + %tagof(t1, t0) + %li(t2, %TAG.PAIR) + %bne(t1, t2, &::body_simple) + %car(t1, t0) ; first form of tail + %tagof(t2, t1) + %li(a0, %TAG.PAIR) + %bne(t2, a0, &::body_simple) + %car(a0, t1) ; head of first form + %la(a1, &sym_guard) + %ld(a1, a1, 0) + %bne(a0, a1, &::body_simple) + + # Guard clause. guards = cdr(car(tail)); body = cdr(tail). + %cdr(a0, t1) + %st(a0, sp, 32) + %cdr(t0, t0) + %st(t0, sp, 40) + + ::g_loop + %ld(t0, sp, 32) + %if_nil(t1, t0, &::body_run) + + %car(a0, t0) ; guard expr + %ld(a1, sp, 24) ; env_ext + %call(&eval) + %li(t0, %imm_val(%IMM.FALSE)) + %beq(a0, t0, &::next) + + %ld(t0, sp, 32) + %cdr(t0, t0) + %st(t0, sp, 32) + %b(&::g_loop) + + ::body_run + %ld(a0, sp, 40) + %ld(a1, sp, 24) + %tail(&eval_body) + + ::body_simple + # tail itself is the body (no guard wrapper). Tail-call eval_body + # with the extended env; tail position of the matched clause's body + # is preserved. + %mov(a0, t0) + %ld(a1, sp, 24) + %tail(&eval_body) + + ::do_else + %ld(t0, sp, 16) + %car(t0, t0) + %cdr(a0, t0) ; body + %ld(a1, sp, 8) ; env_outer (no bindings introduced) + %tail(&eval_body) + + ::next + %ld(t0, sp, 16) + %cdr(t0, t0) + %st(t0, sp, 16) + %b(&::loop) + + ::no_match + %die(msg_pmatch_no_match) +}) + +# pmatch_match(pat=a0, subj=a1, env=a2) -> (env=a0, ok=a1) +# +# Walks pat and subj structurally. On success returns the (possibly +# extended) env in a0 and 1 in a1; on failure returns 0 in a1 (a0 is +# undefined and callers must not use it). Pattern shapes: +# +# - pair (car eq? sym_unquote): binder `,ident` or wildcard `,_`. The +# pattern must be exactly (unquote <sym>) — any other shape dies +# with msg_bad_unquote_pattern (the only carve-out from the spec's +# primitive-failure UB policy, since pattern shape is a syntax +# error in the user's source). +# - pair (otherwise): subj must be a pair; recurse on car, then cdr. +# - atomic (fixnum, sym, immediate, identical heap pointer): raw +# word equality. +# - HEAP-tagged HDR.BV: structural byte-for-byte equality via +# bv_equal_check; only when both pat and subj are HDR.BV. +# +# Frame: 24 bytes +# +0 pat +# +8 subj +# +16 env +%fn(pmatch_match, 24, { + %st(a0, sp, 0) + %st(a1, sp, 8) + %st(a2, sp, 16) + + %tagof(t0, a0) + %li(t1, %TAG.PAIR) + %beq(t0, t1, &::pair_pat) + + # Atomic pattern. Identity covers fixnum / symbol / immediate / same + # heap pointer. + %beq(a0, a1, &::ok) + + # HDR.BV equality. + %li(t1, %TAG.HEAP) + %bne(t0, t1, &::no) + %hdr_type(t1, a0) + %li(t2, %HDR.BV) + %bne(t1, t2, &::no) + %tagof(t1, a1) + %li(t2, %TAG.HEAP) + %bne(t1, t2, &::no) + %hdr_type(t1, a1) + %li(t2, %HDR.BV) + %bne(t1, t2, &::no) + %call(&bv_equal_check) + %li(t0, %imm_val(%IMM.TRUE)) + %beq(a0, t0, &::ok) + %b(&::no) + + ::pair_pat + %car(t0, a0) ; phead + %la(t1, &sym_unquote) + %ld(t1, t1, 0) + %beq(t0, t1, &::binder) + + # Structural pair. subj must be a pair too. + %tagof(t0, a1) + %li(t1, %TAG.PAIR) + %bne(t0, t1, &::no) + + # Recurse on the cars; on success, recurse on the cdrs as a tail call. + %ld(t0, sp, 0) + %car(a0, t0) + %ld(t0, sp, 8) + %car(a1, t0) + %ld(a2, sp, 16) + %call(&pmatch_match) + %beqz(a1, &::no) + + %mov(a2, a0) ; env_after_car + %ld(t0, sp, 0) + %cdr(a0, t0) + %ld(t0, sp, 8) + %cdr(a1, t0) + %tail(&pmatch_match) + + ::binder + # Validate (unquote <sym>): cdr(pat) is a pair, cdr(cdr(pat)) is NIL, + # car(cdr(pat)) is a symbol. + %ld(t0, sp, 0) + %cdr(t1, t0) ; cdr(pat) + %tagof(t0, t1) + %li(t2, %TAG.PAIR) + %bne(t0, t2, &::bad) + %cdr(t0, t1) ; cdr(cdr(pat)) + %li(t2, %imm_val(%IMM.NIL)) + %bne(t0, t2, &::bad) + %car(t0, t1) ; pident (kept in t0) + %tagof(t2, t0) + %li(a3, %TAG.SYM) + %bne(t2, a3, &::bad) + + # Wildcard? Compare against sym_underscore; if so, no binding. + %la(t1, &sym_underscore) + %ld(t1, t1, 0) + %beq(t0, t1, &::ok) + + # Bind: env' = cons(cons(pident, subj), env). pident lives in t0; + # cons clobbers t0..t2, so move it into a0 right away. + %mov(a0, t0) + %ld(a1, sp, 8) + %call(&cons) + %ld(a1, sp, 16) + %call(&cons) + %li(a1, 1) + %eret + + ::ok + %ld(a0, sp, 16) + %li(a1, 1) + %eret + + ::no + %li(a1, 0) + %eret + + ::bad + %die(msg_bad_unquote_pattern) +}) + # eval_let_named(rest=a0, env=a1) -> value (a0). # rest = (name bindings . body). Builds a closure whose captured env # contains a self-binding that resolves `name` to the closure itself @@ -4682,6 +4954,10 @@ :name_or "or" :name_setbang "set!" :name_define_record_type "define-record-type" +:name_pmatch "pmatch" +:name_unquote "unquote" +:name_guard "guard" +:name_underscore "_" # Primitive surface names. :name_sys_exit "sys-exit" @@ -4843,6 +5119,8 @@ :msg_bad_number "scheme1: bad number literal" '0a' '00' :msg_bad_ident "scheme1: bad identifier" '0a' '00' :msg_internal_define "scheme1: internal define is not supported (use letrec)" '0a' '00' +:msg_pmatch_no_match "scheme1: pmatch: no clause matched" '0a' '00' +:msg_bad_unquote_pattern "scheme1: pmatch: malformed ,-pattern" '0a' '00' :name_ch_tab "tab" :name_ch_null "null" @@ -4897,6 +5175,10 @@ :sym_or $(0) :sym_setbang $(0) :sym_define_record_type $(0) +:sym_pmatch $(0) +:sym_unquote $(0) +:sym_guard $(0) +:sym_underscore $(0) # Process startup state, captured by p1_main and read by sys-argv. :saved_argc $(0) diff --git a/tests/scheme1/86-pmatch-basic.expected-exit b/tests/scheme1/86-pmatch-basic.expected-exit @@ -0,0 +1 @@ +0 diff --git a/tests/scheme1/86-pmatch-basic.scm b/tests/scheme1/86-pmatch-basic.scm @@ -0,0 +1,29 @@ +; Basic pmatch — literal-symbol head dispatch + simple binder + else. +; The compiler will use this exact shape for form dispatch, so getting +; it green is the smallest useful step. + +(define e '(if a b c)) + +(if (= 7 + (pmatch e + ((quote ,x) (sys-exit 91)) + ((if ,t ,a ,b) 7) + (else (sys-exit 92)))) + 0 (sys-exit 1)) + +;; Trailing else clause matches when nothing else does. +(if (= 11 + (pmatch 42 + ((1) (sys-exit 93)) + (else 11))) + 0 (sys-exit 2)) + +;; Literal-integer pattern matches integers by =. +(if (= 99 + (pmatch 5 + (4 (sys-exit 94)) + (5 99) + (else (sys-exit 95)))) + 0 (sys-exit 3)) + +(sys-exit 0) diff --git a/tests/scheme1/87-pmatch-binders.expected-exit b/tests/scheme1/87-pmatch-binders.expected-exit @@ -0,0 +1 @@ +0 diff --git a/tests/scheme1/87-pmatch-binders.scm b/tests/scheme1/87-pmatch-binders.scm @@ -0,0 +1,55 @@ +; pmatch — binders, wildcards, nested patterns, improper-tail binders. + +;; Single binder (subject is an atom). +(if (= 5 + (pmatch 5 + (,x x))) + 0 (sys-exit 1)) + +;; Wildcard ,_ matches but introduces no binding. +(if (= 7 + (pmatch 99 + (,_ 7))) + 0 (sys-exit 2)) + +;; Multiple binders in a flat list. +(if (= 5 + (pmatch (cons 2 3) + ((,a . ,b) (+ a b)))) + 0 (sys-exit 3)) + +;; Nested pattern. +(if (= 84 + (pmatch '(if cond (op 40 44) else) + ((if ,t (op ,a ,b) ,e) (+ a b)))) + 0 (sys-exit 4)) + +;; Improper-tail binder picks up the cdr. +(if (= 6 + (pmatch '(1 2 3) + ((,h . ,t) (+ h (+ (car t) (car (cdr t))))))) + 0 (sys-exit 5)) + +;; ptail = () enforces a proper list (equivalent to (,x ,y)). +(if (= 11 + (pmatch '(a b) + ((,x ,y . ()) 11) + (else (sys-exit 6)))) + 0 (sys-exit 7)) + +;; Same shape, improper subject, falls through to the explicit-tail clause. +(if (= 6 + (pmatch (cons 1 (cons 2 3)) + ((,x ,y) (sys-exit 8)) ; demands proper list -> miss + ((,x ,y . ,t) (+ x (+ y t))) + (else (sys-exit 9)))) + 0 (sys-exit 10)) + +;; Recursive sum exercises the same shape the compiler will use. +(define (sum xs) + (pmatch xs + (() 0) + ((,h . ,t) (+ h (sum t))))) +(if (= 15 (sum '(1 2 3 4 5))) 0 (sys-exit 9)) + +(sys-exit 0) diff --git a/tests/scheme1/88-pmatch-literals.expected-exit b/tests/scheme1/88-pmatch-literals.expected-exit @@ -0,0 +1 @@ +0 diff --git a/tests/scheme1/88-pmatch-literals.scm b/tests/scheme1/88-pmatch-literals.scm @@ -0,0 +1,57 @@ +; pmatch — literal-shaped patterns: string, char, bool, sym, (). + +;; String pattern: byte-for-byte equality. +(if (= 1 + (pmatch "hi" + ("ho" (sys-exit 90)) + ("hi" 1) + (else (sys-exit 91)))) + 0 (sys-exit 1)) + +;; Character literals are integers; #\A == 65. +(if (= 65 + (pmatch #\A + (#\B (sys-exit 92)) + (#\A 65) + (else (sys-exit 93)))) + 0 (sys-exit 2)) + +;; #t / #f match by eq?. +(if (= 11 + (pmatch #t + (#f (sys-exit 94)) + (#t 11))) + 0 (sys-exit 3)) + +;; Bare symbol pattern matches that exact symbol; ,x in a sibling +;; clause is the binder spelling. +(if (eq? 'matched + (pmatch 'foo + (bar 'no-bar) + (foo 'matched) + (,x 'fallback))) + 0 (sys-exit 4)) + +;; Symbol-vs-binder: ,x binds anything; the literal symbol clause +;; for `foo` only fires on exactly `foo`. +(if (eq? 'bound-baz + (pmatch 'baz + (foo 'literal-foo) + (,x (if (eq? x 'baz) 'bound-baz 'unexpected)))) + 0 (sys-exit 5)) + +;; () pattern matches the empty list and nothing else. +(if (= 22 + (pmatch '() + ((,h . ,t) (sys-exit 95)) + (() 22))) + 0 (sys-exit 6)) + +;; () pattern doesn't match a non-empty list. +(if (= 33 + (pmatch '(a) + (() (sys-exit 96)) + ((,_) 33))) + 0 (sys-exit 7)) + +(sys-exit 0) diff --git a/tests/scheme1/89-pmatch-guards.expected-exit b/tests/scheme1/89-pmatch-guards.expected-exit @@ -0,0 +1 @@ +0 diff --git a/tests/scheme1/89-pmatch-guards.scm b/tests/scheme1/89-pmatch-guards.scm @@ -0,0 +1,47 @@ +; pmatch — (guard ...) clauses. + +;; Single guard: passes -> body fires. +(if (= 1 + (pmatch 4 + (,x (guard (= x 4)) 1) + (else (sys-exit 90)))) + 0 (sys-exit 1)) + +;; Single guard: fails -> next clause. +(if (= 2 + (pmatch 4 + (,x (guard (= x 5)) (sys-exit 91)) + (,y 2) + (else (sys-exit 92)))) + 0 (sys-exit 2)) + +;; Multi-guard AND-fold. +(if (= 3 + (pmatch 7 + (,n (guard (> n 0) (< n 10)) 3) + (else (sys-exit 93)))) + 0 (sys-exit 3)) + +;; Multi-guard short-circuits on first #f. +(if (= 4 + (pmatch 7 + (,n (guard (> n 0) (< n 5)) (sys-exit 94)) + (,n 4))) + 0 (sys-exit 4)) + +;; Compiler-style dispatch: guarded fallback on atoms. +(define (classify e) + (pmatch e + ((quote ,_) 'quote) + ((if ,_ ,_ ,_) 'if) + (,x (guard (symbol? x)) 'var) + (,x (guard (integer? x)) 'int) + (else 'unknown))) + +(if (eq? (classify '(quote 42)) 'quote) 0 (sys-exit 5)) +(if (eq? (classify '(if a b c)) 'if) 0 (sys-exit 6)) +(if (eq? (classify 'foo) 'var) 0 (sys-exit 7)) +(if (eq? (classify 99) 'int) 0 (sys-exit 8)) +(if (eq? (classify "string") 'unknown) 0 (sys-exit 9)) + +(sys-exit 0) diff --git a/tests/scheme1/90-pmatch-no-match.expected b/tests/scheme1/90-pmatch-no-match.expected @@ -0,0 +1 @@ +scheme1: pmatch: no clause matched diff --git a/tests/scheme1/90-pmatch-no-match.expected-exit b/tests/scheme1/90-pmatch-no-match.expected-exit @@ -0,0 +1 @@ +1 diff --git a/tests/scheme1/90-pmatch-no-match.scm b/tests/scheme1/90-pmatch-no-match.scm @@ -0,0 +1,4 @@ +; No clause matches and no else -> die with msg_pmatch_no_match. +(pmatch 'nope + ((,h . ,t) 1) + (42 2)) diff --git a/tests/scheme1/91-pmatch-bad-unquote.expected b/tests/scheme1/91-pmatch-bad-unquote.expected @@ -0,0 +1 @@ +scheme1: pmatch: malformed ,-pattern diff --git a/tests/scheme1/91-pmatch-bad-unquote.expected-exit b/tests/scheme1/91-pmatch-bad-unquote.expected-exit @@ -0,0 +1 @@ +1 diff --git a/tests/scheme1/91-pmatch-bad-unquote.scm b/tests/scheme1/91-pmatch-bad-unquote.scm @@ -0,0 +1,6 @@ +; Malformed ,-pattern: ,42 puts a non-symbol after `unquote`. Per +; LISP-PMATCH.md this is a defined runtime error (the carve-out from +; the primitive-failure UB policy). +(pmatch 99 + (,42 'never) + (else 'fallback)) diff --git a/tests/scheme1/92-pmatch-compiler-dispatch.expected-exit b/tests/scheme1/92-pmatch-compiler-dispatch.expected-exit @@ -0,0 +1 @@ +0 diff --git a/tests/scheme1/92-pmatch-compiler-dispatch.scm b/tests/scheme1/92-pmatch-compiler-dispatch.scm @@ -0,0 +1,35 @@ +; The exact compiler form-dispatch shape from LISP-PMATCH.md. This is the +; reason pmatch is built in: every form the self-hosted compiler walks is +; structured this way. + +(define (kind e) + (pmatch e + ((quote ,_) 'lit) + ((if ,_ ,_ ,_) 'if) + ((lambda ,_ . ,_) 'lambda) + ((set! ,_ ,_) 'set!) + (,x (guard (symbol? x)) 'var) + (,x (guard (integer? x)) 'int) + ((,_ . ,_) 'call) + (else 'unknown))) + +(if (eq? (kind '(quote x)) 'lit) 0 (sys-exit 1)) +(if (eq? (kind '(if a b c)) 'if) 0 (sys-exit 2)) +(if (eq? (kind '(lambda (x) x)) 'lambda) 0 (sys-exit 3)) +(if (eq? (kind '(lambda (x y) x y)) 'lambda) 0 (sys-exit 4)) +(if (eq? (kind '(set! a 1)) 'set!) 0 (sys-exit 5)) +(if (eq? (kind 'foo) 'var) 0 (sys-exit 6)) +(if (eq? (kind 42) 'int) 0 (sys-exit 7)) +(if (eq? (kind '(f 1 2)) 'call) 0 (sys-exit 8)) +(if (eq? (kind "hi") 'unknown) 0 (sys-exit 9)) + +;; Destructure a let-clause as the spec example shows. +(define (split-let-clause c) + (pmatch c + ((,name ,init) (cons name init)) + (else (sys-exit 95)))) +(define p (split-let-clause '(x 42))) +(if (eq? (car p) 'x) 0 (sys-exit 10)) +(if (= (cdr p) 42) 0 (sys-exit 11)) + +(sys-exit 0)