scheme1: pmatch record-destructuring patterns - boot2

commit 41b5fb541064317f3fe63dfbc1d8aafd4e477275
parent bdb3a2653ef840a8f64964230faab989d3a6dc43
Author: Ryan Sepassi <rsepassi@gmail.com>
Date:   Mon, 27 Apr 2026 11:41:28 -0700

scheme1: pmatch record-destructuring patterns

Adds ($ <pred> (<field> <pat>) ...) clauses to pmatch. The pattern
names a record predicate (e.g. tok?), and each (field pat) is matched
against the value at that field. Listed fields only; missing fields
are unconstrained; clause order is irrelevant. Subject of a different
record type or a non-record value falls through.

TD layout extended with a fields slot holding the declaration-order
list of field-name symbols, used at match time for name->index lookup.
define-record-type's user-visible bindings are unchanged: the type
name itself stays unbound (39-record-typename-unbound still passes).

Tests: 97-pmatch-record covers subset matching, clause ordering,
type-check-only patterns, type/value mismatch fall-through, nested
records, guards, binders, and literal-symbol values.

Diffstat:
M scheme1/scheme1.P1pp  | 182 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++---
A tests/scheme1/97-pmatch-record.expected-exit  | 1 +
A tests/scheme1/97-pmatch-record.scm  | 112 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++

3 files changed, 289 insertions(+), 6 deletions(-)
diff --git a/scheme1/scheme1.P1pp b/scheme1/scheme1.P1pp
@@ -25,7 +25,7 @@
 %struct SYMENT  { name_ptr name_len global_val pad } # .SIZE = 32
 %struct PRIM    { hdr entry_w data }                  # .SIZE = 24
 %struct CLOSURE { hdr params body env }              # .SIZE = 32
-%struct TD      { hdr name nfields }                  # .SIZE = 24
+%struct TD      { hdr name nfields fields }           # .SIZE = 32
 %struct BV      { hdr data }                          # .SIZE = 16
 %struct REC     { hdr td }                            # .SIZE = 16 (header)
 # Records are variable width: header + td slot + N field slots.
@@ -1369,6 +1369,7 @@
     %intern_form(&name_unquote,     7, &sym_unquote)
     %intern_form(&name_guard,       5, &sym_guard)
     %intern_form(&name_underscore,  1, &sym_underscore)
+    %intern_form(&name_dollar,      1, &sym_dollar)
 })
 
 # eval_if(rest=a0, env=a1) -> value (a0). `rest` is (test then else).
@@ -2161,6 +2162,14 @@
 #     with msg_bad_unquote_pattern (the only carve-out from the spec's
 #     primitive-failure UB policy, since pattern shape is a syntax
 #     error in the user's source).
+#   - pair (car eq? sym_dollar): record pattern `($ pred (f1 p1) ...)`.
+#     Looks up `pred` in the current env; expects a record predicate
+#     PRIM (the one bound by define-record-type). The TD pulled from
+#     PRIM.data drives the type check on subj and the field-name -> idx
+#     lookup. Each clause matches recursively. Listed fields only;
+#     missing fields are unconstrained. Malformed pattern shape, an
+#     unknown field name, a non-record subject, or a TD mismatch fall
+#     through as ::no.
 #   - pair (otherwise): subj must be a pair; recurse on car, then cdr.
 #   - atomic (fixnum, sym, immediate, identical heap pointer): raw
 #     word equality.
@@ -2171,7 +2180,9 @@
 #   pat
 #   subj
 #   env
-%fn2(pmatch_match, {pat subj env}, {
+#   td   (record-pattern: TD pulled from the predicate PRIM)
+#   flw  (record-pattern: cursor over remaining (fname pat) clauses)
+%fn2(pmatch_match, {pat subj env td flw}, {
     %stl(a0, pat)
     %stl(a1, subj)
     %stl(a2, env)
@@ -2205,6 +2216,8 @@
     %car(t0, a0)                   ; phead
     %ld_global(t1, &sym_unquote)
     %beq(t0, t1, &::binder)
+    %ld_global(t1, &sym_dollar)
+    %beq(t0, t1, &::record_pat)
 
     # Structural pair. subj must be a pair too.
     %tagof(t0, a1)
@@ -2257,6 +2270,112 @@
     %li(a1, 1)
     %eret
 
+    ::record_pat
+    # pat = ($ pred-sym (f1 p1) (f2 p2) ...). Resolve pred-sym -> PRIM via
+    # eval, pull TD from PRIM.data, type-check subj, then iterate the
+    # (fname pat_i) clauses. Clobbers `pat` local once we begin the loop:
+    # we stash each pat_i there before recursing so the recursion has the
+    # right argument and the local stays usable as scratch.
+    %ldl(t0, pat)
+    %cdr(t1, t0)                  ; (pred-sym . clauses)
+    %tagof(t0, t1)
+    %li(t2, %TAG.PAIR)
+    %bne(t0, t2, &::no)
+    %car(t0, t1)                  ; t0 = pred-sym
+    %tagof(t2, t0)
+    %li(a3, %TAG.SYM)
+    %bne(t2, a3, &::no)
+    %cdr(t2, t1)                  ; t2 = clauses
+    %stl(t2, flw)
+
+    # eval(pred-sym, env) -> a0 = pred PRIM (or dies "unbound").
+    %mov(a0, t0)
+    %ldl(a1, env)
+    %call(&eval)
+
+    # Verify HEAP / HDR.PRIM, entry == &prim_predicate_entry; extract TD
+    # from PRIM.data; sanity-check TD is HEAP / HDR.TD.
+    %tagof(t0, a0)
+    %li(t1, %TAG.HEAP)
+    %bne(t0, t1, &::no)
+    %hdr_type(t0, a0)
+    %li(t1, %HDR.PRIM)
+    %bne(t0, t1, &::no)
+    %heap_ld(t1, a0, %PRIM.entry_w)
+    %la(t2, &prim_predicate_entry)
+    %bne(t1, t2, &::no)
+    %heap_ld(t1, a0, %PRIM.data)  ; t1 = TD
+    %tagof(t0, t1)
+    %li(t2, %TAG.HEAP)
+    %bne(t0, t2, &::no)
+    %hdr_type(t0, t1)
+    %li(t2, %HDR.TD)
+    %bne(t0, t2, &::no)
+    %stl(t1, td)
+
+    # Verify subj is HDR.REC with REC.td == TD.
+    %ldl(a0, subj)
+    %tagof(t0, a0)
+    %li(t1, %TAG.HEAP)
+    %bne(t0, t1, &::no)
+    %hdr_type(t0, a0)
+    %li(t1, %HDR.REC)
+    %bne(t0, t1, &::no)
+    %heap_ld(t0, a0, %REC.td)
+    %ldl(t1, td)
+    %bne(t0, t1, &::no)
+
+    ::record_field_loop
+    # flw points at remaining (fname pat_i) clauses; NIL ends the loop.
+    %ldl(t0, flw)
+    %if_nil(t1, t0, &::ok)
+    %car(t1, t0)                  ; t1 = clause
+    %tagof(t0, t1)
+    %li(t2, %TAG.PAIR)
+    %bne(t0, t2, &::no)
+    %car(t2, t1)                  ; t2 = fname
+    %cdr(t1, t1)                  ; t1 = (pat_i)
+    %tagof(t0, t1)
+    %li(a3, %TAG.PAIR)
+    %bne(t0, a3, &::no)
+    %car(a3, t1)                  ; a3 = pat_i
+    %stl(a3, pat)                 ; reuse `pat` local for pat_i across recursion
+
+    # Linear scan of TD.fields for fname; idx accumulated in t1. t2 holds
+    # fname (still live); a3 is scratch (since we no longer need pat_i in
+    # a register — it's in the local).
+    %ldl(t0, td)
+    %heap_ld(t0, t0, %TD.fields)
+    %li(t1, 0)
+    ::record_field_idx_loop
+    %if_nil(a3, t0, &::no)
+    %car(a3, t0)
+    %beq(a3, t2, &::record_field_found)
+    %cdr(t0, t0)
+    %addi(t1, t1, 1)
+    %b(&::record_field_idx_loop)
+
+    ::record_field_found
+    # val = ld(subj_tagged + (idx<<3) + 13). Same offset arithmetic as
+    # prim_accessor_entry. Compute the address into a1 directly so the
+    # recursive call's val arg is in place.
+    %ldl(a1, subj)
+    %shli(t1, t1, 3)
+    %add(a1, a1, t1)
+    %ld(a1, a1, 13)
+
+    # Recurse: pmatch_match(pat_i, val, env). pat_i is in `pat` local.
+    %ldl(a0, pat)
+    %ldl(a2, env)
+    %call(&pmatch_match)
+    %beqz(a1, &::no)
+    %stl(a0, env)
+
+    %ldl(t0, flw)
+    %cdr(t0, t0)
+    %stl(t0, flw)
+    %b(&::record_field_loop)
+
     ::ok
     %ldl(a0, env)
     %li(a1, 1)
@@ -3985,6 +4104,9 @@
 # Each clause is (field-name accessor) or (field-name accessor mutator).
 # Allocates one TD + one parameterized PRIM per name introduced (ctor,
 # predicate, accessor, mutator) and binds each to the symbol's global.
+# The TD also stores a list of field-name symbols in declaration order;
+# pmatch's ($ pred (field pat) ...) record pattern uses this to map
+# field names to indices at match time.
 #
 # Locals:
 #   rest
@@ -3993,8 +4115,10 @@
 #   walk  (clauses, advancing)
 #   idx  (raw counter)
 #   nfields
-#   pad
-%fn2(eval_define_record_type, {rest env td walk idx nfields pad}, {
+#   fl_head  (head of field-name list under construction)
+#   fl_tail  (tail cell of field-name list under construction)
+#   fl_cur   (cursor walking clauses for field-name pre-pass)
+%fn2(eval_define_record_type, {rest env td walk idx nfields fl_head fl_tail fl_cur}, {
     %stl(a0, rest)
     %stl(a1, env)
 
@@ -4007,8 +4131,9 @@
     %call(&list_length)
     %stl(a0, nfields)
 
-    # td = alloc_hdr(24, HDR.TD); td.name = type-name; td.nfields = nfields.
-    %li(a0, 24)
+    # td = alloc_hdr(TD.SIZE, HDR.TD); td.name = type-name;
+    # td.nfields = nfields; td.fields = NIL (filled below).
+    %li(a0, %TD.SIZE)
     %li(a1, %HDR.TD)
     %call(&alloc_hdr)
     %stl(a0, td)
@@ -4017,6 +4142,49 @@
     %heap_st(t0, a0, %TD.name)
     %ldl(t1, nfields)
     %heap_st(t1, a0, %TD.nfields)
+    %li(t1, %imm_val(%IMM.NIL))
+    %heap_st(t1, a0, %TD.fields)
+
+    # Pre-pass: build (field-name-1 ... field-name-N) in declaration order
+    # via head/tail accumulator, then store at td.fields. Each clause's
+    # car is the field-name symbol. Uses fl_cur as a separate cursor so
+    # walk is left intact for the accessor-binding loop below.
+    %li(t0, %imm_val(%IMM.NIL))
+    %stl(t0, fl_head)
+    %stl(t0, fl_tail)
+    %ldl(t0, walk)
+    %stl(t0, fl_cur)
+
+    ::fl_loop
+    %ldl(t0, fl_cur)
+    %if_nil(t1, t0, &::fl_done)
+    # cell = cons(car(car(fl_cur)), NIL)
+    %car(t1, t0)
+    %car(a0, t1)
+    %li(a1, %imm_val(%IMM.NIL))
+    %call(&cons)
+    # Splice into list: if head is NIL, head = tail = cell.
+    # Else set-cdr!(tail, cell); tail = cell.
+    %ldl(t1, fl_head)
+    %li(t2, %imm_val(%IMM.NIL))
+    %bne(t1, t2, &::fl_append)
+    %stl(a0, fl_head)
+    %stl(a0, fl_tail)
+    %b(&::fl_next)
+    ::fl_append
+    %ldl(t1, fl_tail)
+    %set_cdr(a0, t1)
+    %stl(a0, fl_tail)
+    ::fl_next
+    %ldl(t0, fl_cur)
+    %cdr(t0, t0)
+    %stl(t0, fl_cur)
+    %b(&::fl_loop)
+
+    ::fl_done
+    %ldl(t0, td)
+    %ldl(t1, fl_head)
+    %heap_st(t1, t0, %TD.fields)
 
     # ctor-prim = make_param_prim(prim_ctor_entry, td); bind ctor-name.
     %la(a0, &prim_ctor_entry)
@@ -5127,6 +5295,7 @@
 :name_unquote     "unquote"
 :name_guard       "guard" 00 00
 :name_underscore  "_" 00 00 00 00 00 00
+:name_dollar      "$" 00 00 00 00 00 00
 
 # Primitive surface names.
 :name_sys_exit    "sys-exit" 00 00 00 00 00 00 00
@@ -5391,6 +5560,7 @@
 :sym_unquote      $(0)
 :sym_guard        $(0)
 :sym_underscore   $(0)
+:sym_dollar       $(0)
 
 # Process startup state, captured by p1_main and read by sys-argv.
 :saved_argc       $(0)
diff --git a/tests/scheme1/97-pmatch-record.expected-exit b/tests/scheme1/97-pmatch-record.expected-exit
@@ -0,0 +1 @@
+0
diff --git a/tests/scheme1/97-pmatch-record.scm b/tests/scheme1/97-pmatch-record.scm
@@ -0,0 +1,112 @@
+; pmatch record-destructuring patterns: ($ <pred> (<field> <pat>) ...).
+; The pattern names the record's predicate (e.g. tok?), and each field
+; clause is matched against the value at that field. Listed fields only;
+; missing fields are unconstrained. Order of (field pat) clauses doesn't
+; matter. Subject of a different record type, or a non-record value, falls
+; through to the next clause.
+;
+; Inside a record clause's <pat>, the standard pmatch literal/binder rules
+; apply: bare symbols (IDENT, lparen) are literal-symbol patterns, ,x is a
+; binder, and ($ ...) nests for record fields.
+
+(define-record-type tok
+  (%tok kind value loc)
+  tok?
+  (kind  tok-kind)
+  (value tok-value)
+  (loc   tok-loc))
+
+(define-record-type loc
+  (%loc file line)
+  loc?
+  (file loc-file)
+  (line loc-line))
+
+;; Subset matching: bind two of three fields; ignore loc.
+(define t1 (%tok 'IDENT "x" (%loc "a.c" 12)))
+(if (eq? 'ok
+         (pmatch t1
+           (($ tok? (kind IDENT) (value ,v)) (if (equal? v "x") 'ok 'wrong-v))
+           (else (sys-exit 91))))
+    0 (sys-exit 1))
+
+;; Field clauses can appear in any order.
+(if (eq? 'reordered
+         (pmatch t1
+           (($ tok? (value ,v) (kind IDENT)) 'reordered)
+           (else (sys-exit 92))))
+    0 (sys-exit 2))
+
+;; A pattern with no field clauses is a pure type-check.
+(if (eq? 'just-tok
+         (pmatch t1
+           (($ tok?) 'just-tok)
+           (else (sys-exit 93))))
+    0 (sys-exit 3))
+
+;; Type mismatch: subject isn't a tok, falls through.
+(define l1 (%loc "b.c" 7))
+(if (eq? 'fell-through
+         (pmatch l1
+           (($ tok? (kind ,_)) (sys-exit 94))
+           (else 'fell-through)))
+    0 (sys-exit 4))
+
+;; Non-record subject (a fixnum) also falls through cleanly.
+(if (eq? 'not-a-record
+         (pmatch 42
+           (($ tok? (kind ,_)) (sys-exit 95))
+           (else 'not-a-record)))
+    0 (sys-exit 5))
+
+;; Field-pat fails -> clause fails -> next clause runs.
+(define t2 (%tok 'PUNCT 'lparen (%loc "c.c" 1)))
+(if (eq? 'punct
+         (pmatch t2
+           (($ tok? (kind IDENT)) (sys-exit 96))
+           (($ tok? (kind PUNCT)) 'punct)
+           (else (sys-exit 97))))
+    0 (sys-exit 6))
+
+;; Nested record pattern: destructure the loc inside the tok.
+(if (eq? 'nested-ok
+         (pmatch t1
+           (($ tok? (loc ($ loc? (file ,f) (line ,n))))
+            (if (and (equal? f "a.c") (= n 12)) 'nested-ok 'nested-bad))
+           (else (sys-exit 98))))
+    0 (sys-exit 7))
+
+;; Combined with guard: the record must type-check first, then guards run.
+(if (eq? 'short-name
+         (pmatch t1
+           (($ tok? (value ,v))
+            (guard (< (string-length v) 3))
+            'short-name)
+           (else (sys-exit 99))))
+    0 (sys-exit 8))
+
+;; Guard rejection falls through.
+(define t3 (%tok 'IDENT "longer-name" (%loc "d.c" 2)))
+(if (eq? 'long-fallthrough
+         (pmatch t3
+           (($ tok? (value ,v))
+            (guard (< (string-length v) 3))
+            (sys-exit 100))
+           (else 'long-fallthrough)))
+    0 (sys-exit 9))
+
+;; Nested binder inside a record clause: ,x captures the field value.
+(if (eq? 'IDENT
+         (pmatch t1
+           (($ tok? (kind ,k)) k)
+           (else (sys-exit 101))))
+    0 (sys-exit 10))
+
+;; Match against a literal punct value (bare symbol pattern, not quoted).
+(if (eq? 'open-paren
+         (pmatch t2
+           (($ tok? (kind PUNCT) (value lparen)) 'open-paren)
+           (else (sys-exit 102))))
+    0 (sys-exit 11))
+
+(sys-exit 0)

	boot2 Playing with the boostrap
	git clone https://git.ryansepassi.com/git/boot2.git
	Log \| Files \| Refs \| README

M	scheme1/scheme1.P1pp	\|	182	++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++---
A	tests/scheme1/97-pmatch-record.expected-exit	\|	1	+
A	tests/scheme1/97-pmatch-record.scm	\|	112	+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++