commit 41b5fb541064317f3fe63dfbc1d8aafd4e477275
parent bdb3a2653ef840a8f64964230faab989d3a6dc43
Author: Ryan Sepassi <rsepassi@gmail.com>
Date: Mon, 27 Apr 2026 11:41:28 -0700
scheme1: pmatch record-destructuring patterns
Adds ($ <pred> (<field> <pat>) ...) clauses to pmatch. The pattern
names a record predicate (e.g. tok?), and each (field pat) is matched
against the value at that field. Listed fields only; missing fields
are unconstrained; clause order is irrelevant. Subject of a different
record type or a non-record value falls through.
TD layout extended with a fields slot holding the declaration-order
list of field-name symbols, used at match time for name->index lookup.
define-record-type's user-visible bindings are unchanged: the type
name itself stays unbound (39-record-typename-unbound still passes).
Tests: 97-pmatch-record covers subset matching, clause ordering,
type-check-only patterns, type/value mismatch fall-through, nested
records, guards, binders, and literal-symbol values.
Diffstat:
3 files changed, 289 insertions(+), 6 deletions(-)
diff --git a/scheme1/scheme1.P1pp b/scheme1/scheme1.P1pp
@@ -25,7 +25,7 @@
%struct SYMENT { name_ptr name_len global_val pad } # .SIZE = 32
%struct PRIM { hdr entry_w data } # .SIZE = 24
%struct CLOSURE { hdr params body env } # .SIZE = 32
-%struct TD { hdr name nfields } # .SIZE = 24
+%struct TD { hdr name nfields fields } # .SIZE = 32
%struct BV { hdr data } # .SIZE = 16
%struct REC { hdr td } # .SIZE = 16 (header)
# Records are variable width: header + td slot + N field slots.
@@ -1369,6 +1369,7 @@
%intern_form(&name_unquote, 7, &sym_unquote)
%intern_form(&name_guard, 5, &sym_guard)
%intern_form(&name_underscore, 1, &sym_underscore)
+ %intern_form(&name_dollar, 1, &sym_dollar)
})
# eval_if(rest=a0, env=a1) -> value (a0). `rest` is (test then else).
@@ -2161,6 +2162,14 @@
# with msg_bad_unquote_pattern (the only carve-out from the spec's
# primitive-failure UB policy, since pattern shape is a syntax
# error in the user's source).
+# - pair (car eq? sym_dollar): record pattern `($ pred (f1 p1) ...)`.
+# Looks up `pred` in the current env; expects a record predicate
+# PRIM (the one bound by define-record-type). The TD pulled from
+# PRIM.data drives the type check on subj and the field-name -> idx
+# lookup. Each clause matches recursively. Listed fields only;
+# missing fields are unconstrained. Malformed pattern shape, an
+# unknown field name, a non-record subject, or a TD mismatch fall
+# through as ::no.
# - pair (otherwise): subj must be a pair; recurse on car, then cdr.
# - atomic (fixnum, sym, immediate, identical heap pointer): raw
# word equality.
@@ -2171,7 +2180,9 @@
# pat
# subj
# env
-%fn2(pmatch_match, {pat subj env}, {
+# td (record-pattern: TD pulled from the predicate PRIM)
+# flw (record-pattern: cursor over remaining (fname pat) clauses)
+%fn2(pmatch_match, {pat subj env td flw}, {
%stl(a0, pat)
%stl(a1, subj)
%stl(a2, env)
@@ -2205,6 +2216,8 @@
%car(t0, a0) ; phead
%ld_global(t1, &sym_unquote)
%beq(t0, t1, &::binder)
+ %ld_global(t1, &sym_dollar)
+ %beq(t0, t1, &::record_pat)
# Structural pair. subj must be a pair too.
%tagof(t0, a1)
@@ -2257,6 +2270,112 @@
%li(a1, 1)
%eret
+ ::record_pat
+ # pat = ($ pred-sym (f1 p1) (f2 p2) ...). Resolve pred-sym -> PRIM via
+ # eval, pull TD from PRIM.data, type-check subj, then iterate the
+ # (fname pat_i) clauses. Clobbers `pat` local once we begin the loop:
+ # we stash each pat_i there before recursing so the recursion has the
+ # right argument and the local stays usable as scratch.
+ %ldl(t0, pat)
+ %cdr(t1, t0) ; (pred-sym . clauses)
+ %tagof(t0, t1)
+ %li(t2, %TAG.PAIR)
+ %bne(t0, t2, &::no)
+ %car(t0, t1) ; t0 = pred-sym
+ %tagof(t2, t0)
+ %li(a3, %TAG.SYM)
+ %bne(t2, a3, &::no)
+ %cdr(t2, t1) ; t2 = clauses
+ %stl(t2, flw)
+
+ # eval(pred-sym, env) -> a0 = pred PRIM (or dies "unbound").
+ %mov(a0, t0)
+ %ldl(a1, env)
+ %call(&eval)
+
+ # Verify HEAP / HDR.PRIM, entry == &prim_predicate_entry; extract TD
+ # from PRIM.data; sanity-check TD is HEAP / HDR.TD.
+ %tagof(t0, a0)
+ %li(t1, %TAG.HEAP)
+ %bne(t0, t1, &::no)
+ %hdr_type(t0, a0)
+ %li(t1, %HDR.PRIM)
+ %bne(t0, t1, &::no)
+ %heap_ld(t1, a0, %PRIM.entry_w)
+ %la(t2, &prim_predicate_entry)
+ %bne(t1, t2, &::no)
+ %heap_ld(t1, a0, %PRIM.data) ; t1 = TD
+ %tagof(t0, t1)
+ %li(t2, %TAG.HEAP)
+ %bne(t0, t2, &::no)
+ %hdr_type(t0, t1)
+ %li(t2, %HDR.TD)
+ %bne(t0, t2, &::no)
+ %stl(t1, td)
+
+ # Verify subj is HDR.REC with REC.td == TD.
+ %ldl(a0, subj)
+ %tagof(t0, a0)
+ %li(t1, %TAG.HEAP)
+ %bne(t0, t1, &::no)
+ %hdr_type(t0, a0)
+ %li(t1, %HDR.REC)
+ %bne(t0, t1, &::no)
+ %heap_ld(t0, a0, %REC.td)
+ %ldl(t1, td)
+ %bne(t0, t1, &::no)
+
+ ::record_field_loop
+ # flw points at remaining (fname pat_i) clauses; NIL ends the loop.
+ %ldl(t0, flw)
+ %if_nil(t1, t0, &::ok)
+ %car(t1, t0) ; t1 = clause
+ %tagof(t0, t1)
+ %li(t2, %TAG.PAIR)
+ %bne(t0, t2, &::no)
+ %car(t2, t1) ; t2 = fname
+ %cdr(t1, t1) ; t1 = (pat_i)
+ %tagof(t0, t1)
+ %li(a3, %TAG.PAIR)
+ %bne(t0, a3, &::no)
+ %car(a3, t1) ; a3 = pat_i
+ %stl(a3, pat) ; reuse `pat` local for pat_i across recursion
+
+ # Linear scan of TD.fields for fname; idx accumulated in t1. t2 holds
+ # fname (still live); a3 is scratch (since we no longer need pat_i in
+ # a register — it's in the local).
+ %ldl(t0, td)
+ %heap_ld(t0, t0, %TD.fields)
+ %li(t1, 0)
+ ::record_field_idx_loop
+ %if_nil(a3, t0, &::no)
+ %car(a3, t0)
+ %beq(a3, t2, &::record_field_found)
+ %cdr(t0, t0)
+ %addi(t1, t1, 1)
+ %b(&::record_field_idx_loop)
+
+ ::record_field_found
+ # val = ld(subj_tagged + (idx<<3) + 13). Same offset arithmetic as
+ # prim_accessor_entry. Compute the address into a1 directly so the
+ # recursive call's val arg is in place.
+ %ldl(a1, subj)
+ %shli(t1, t1, 3)
+ %add(a1, a1, t1)
+ %ld(a1, a1, 13)
+
+ # Recurse: pmatch_match(pat_i, val, env). pat_i is in `pat` local.
+ %ldl(a0, pat)
+ %ldl(a2, env)
+ %call(&pmatch_match)
+ %beqz(a1, &::no)
+ %stl(a0, env)
+
+ %ldl(t0, flw)
+ %cdr(t0, t0)
+ %stl(t0, flw)
+ %b(&::record_field_loop)
+
::ok
%ldl(a0, env)
%li(a1, 1)
@@ -3985,6 +4104,9 @@
# Each clause is (field-name accessor) or (field-name accessor mutator).
# Allocates one TD + one parameterized PRIM per name introduced (ctor,
# predicate, accessor, mutator) and binds each to the symbol's global.
+# The TD also stores a list of field-name symbols in declaration order;
+# pmatch's ($ pred (field pat) ...) record pattern uses this to map
+# field names to indices at match time.
#
# Locals:
# rest
@@ -3993,8 +4115,10 @@
# walk (clauses, advancing)
# idx (raw counter)
# nfields
-# pad
-%fn2(eval_define_record_type, {rest env td walk idx nfields pad}, {
+# fl_head (head of field-name list under construction)
+# fl_tail (tail cell of field-name list under construction)
+# fl_cur (cursor walking clauses for field-name pre-pass)
+%fn2(eval_define_record_type, {rest env td walk idx nfields fl_head fl_tail fl_cur}, {
%stl(a0, rest)
%stl(a1, env)
@@ -4007,8 +4131,9 @@
%call(&list_length)
%stl(a0, nfields)
- # td = alloc_hdr(24, HDR.TD); td.name = type-name; td.nfields = nfields.
- %li(a0, 24)
+ # td = alloc_hdr(TD.SIZE, HDR.TD); td.name = type-name;
+ # td.nfields = nfields; td.fields = NIL (filled below).
+ %li(a0, %TD.SIZE)
%li(a1, %HDR.TD)
%call(&alloc_hdr)
%stl(a0, td)
@@ -4017,6 +4142,49 @@
%heap_st(t0, a0, %TD.name)
%ldl(t1, nfields)
%heap_st(t1, a0, %TD.nfields)
+ %li(t1, %imm_val(%IMM.NIL))
+ %heap_st(t1, a0, %TD.fields)
+
+ # Pre-pass: build (field-name-1 ... field-name-N) in declaration order
+ # via head/tail accumulator, then store at td.fields. Each clause's
+ # car is the field-name symbol. Uses fl_cur as a separate cursor so
+ # walk is left intact for the accessor-binding loop below.
+ %li(t0, %imm_val(%IMM.NIL))
+ %stl(t0, fl_head)
+ %stl(t0, fl_tail)
+ %ldl(t0, walk)
+ %stl(t0, fl_cur)
+
+ ::fl_loop
+ %ldl(t0, fl_cur)
+ %if_nil(t1, t0, &::fl_done)
+ # cell = cons(car(car(fl_cur)), NIL)
+ %car(t1, t0)
+ %car(a0, t1)
+ %li(a1, %imm_val(%IMM.NIL))
+ %call(&cons)
+ # Splice into list: if head is NIL, head = tail = cell.
+ # Else set-cdr!(tail, cell); tail = cell.
+ %ldl(t1, fl_head)
+ %li(t2, %imm_val(%IMM.NIL))
+ %bne(t1, t2, &::fl_append)
+ %stl(a0, fl_head)
+ %stl(a0, fl_tail)
+ %b(&::fl_next)
+ ::fl_append
+ %ldl(t1, fl_tail)
+ %set_cdr(a0, t1)
+ %stl(a0, fl_tail)
+ ::fl_next
+ %ldl(t0, fl_cur)
+ %cdr(t0, t0)
+ %stl(t0, fl_cur)
+ %b(&::fl_loop)
+
+ ::fl_done
+ %ldl(t0, td)
+ %ldl(t1, fl_head)
+ %heap_st(t1, t0, %TD.fields)
# ctor-prim = make_param_prim(prim_ctor_entry, td); bind ctor-name.
%la(a0, &prim_ctor_entry)
@@ -5127,6 +5295,7 @@
:name_unquote "unquote"
:name_guard "guard" 00 00
:name_underscore "_" 00 00 00 00 00 00
+:name_dollar "$" 00 00 00 00 00 00
# Primitive surface names.
:name_sys_exit "sys-exit" 00 00 00 00 00 00 00
@@ -5391,6 +5560,7 @@
:sym_unquote $(0)
:sym_guard $(0)
:sym_underscore $(0)
+:sym_dollar $(0)
# Process startup state, captured by p1_main and read by sys-argv.
:saved_argc $(0)
diff --git a/tests/scheme1/97-pmatch-record.expected-exit b/tests/scheme1/97-pmatch-record.expected-exit
@@ -0,0 +1 @@
+0
diff --git a/tests/scheme1/97-pmatch-record.scm b/tests/scheme1/97-pmatch-record.scm
@@ -0,0 +1,112 @@
+; pmatch record-destructuring patterns: ($ <pred> (<field> <pat>) ...).
+; The pattern names the record's predicate (e.g. tok?), and each field
+; clause is matched against the value at that field. Listed fields only;
+; missing fields are unconstrained. Order of (field pat) clauses doesn't
+; matter. Subject of a different record type, or a non-record value, falls
+; through to the next clause.
+;
+; Inside a record clause's <pat>, the standard pmatch literal/binder rules
+; apply: bare symbols (IDENT, lparen) are literal-symbol patterns, ,x is a
+; binder, and ($ ...) nests for record fields.
+
+(define-record-type tok
+ (%tok kind value loc)
+ tok?
+ (kind tok-kind)
+ (value tok-value)
+ (loc tok-loc))
+
+(define-record-type loc
+ (%loc file line)
+ loc?
+ (file loc-file)
+ (line loc-line))
+
+;; Subset matching: bind two of three fields; ignore loc.
+(define t1 (%tok 'IDENT "x" (%loc "a.c" 12)))
+(if (eq? 'ok
+ (pmatch t1
+ (($ tok? (kind IDENT) (value ,v)) (if (equal? v "x") 'ok 'wrong-v))
+ (else (sys-exit 91))))
+ 0 (sys-exit 1))
+
+;; Field clauses can appear in any order.
+(if (eq? 'reordered
+ (pmatch t1
+ (($ tok? (value ,v) (kind IDENT)) 'reordered)
+ (else (sys-exit 92))))
+ 0 (sys-exit 2))
+
+;; A pattern with no field clauses is a pure type-check.
+(if (eq? 'just-tok
+ (pmatch t1
+ (($ tok?) 'just-tok)
+ (else (sys-exit 93))))
+ 0 (sys-exit 3))
+
+;; Type mismatch: subject isn't a tok, falls through.
+(define l1 (%loc "b.c" 7))
+(if (eq? 'fell-through
+ (pmatch l1
+ (($ tok? (kind ,_)) (sys-exit 94))
+ (else 'fell-through)))
+ 0 (sys-exit 4))
+
+;; Non-record subject (a fixnum) also falls through cleanly.
+(if (eq? 'not-a-record
+ (pmatch 42
+ (($ tok? (kind ,_)) (sys-exit 95))
+ (else 'not-a-record)))
+ 0 (sys-exit 5))
+
+;; Field-pat fails -> clause fails -> next clause runs.
+(define t2 (%tok 'PUNCT 'lparen (%loc "c.c" 1)))
+(if (eq? 'punct
+ (pmatch t2
+ (($ tok? (kind IDENT)) (sys-exit 96))
+ (($ tok? (kind PUNCT)) 'punct)
+ (else (sys-exit 97))))
+ 0 (sys-exit 6))
+
+;; Nested record pattern: destructure the loc inside the tok.
+(if (eq? 'nested-ok
+ (pmatch t1
+ (($ tok? (loc ($ loc? (file ,f) (line ,n))))
+ (if (and (equal? f "a.c") (= n 12)) 'nested-ok 'nested-bad))
+ (else (sys-exit 98))))
+ 0 (sys-exit 7))
+
+;; Combined with guard: the record must type-check first, then guards run.
+(if (eq? 'short-name
+ (pmatch t1
+ (($ tok? (value ,v))
+ (guard (< (string-length v) 3))
+ 'short-name)
+ (else (sys-exit 99))))
+ 0 (sys-exit 8))
+
+;; Guard rejection falls through.
+(define t3 (%tok 'IDENT "longer-name" (%loc "d.c" 2)))
+(if (eq? 'long-fallthrough
+ (pmatch t3
+ (($ tok? (value ,v))
+ (guard (< (string-length v) 3))
+ (sys-exit 100))
+ (else 'long-fallthrough)))
+ 0 (sys-exit 9))
+
+;; Nested binder inside a record clause: ,x captures the field value.
+(if (eq? 'IDENT
+ (pmatch t1
+ (($ tok? (kind ,k)) k)
+ (else (sys-exit 101))))
+ 0 (sys-exit 10))
+
+;; Match against a literal punct value (bare symbol pattern, not quoted).
+(if (eq? 'open-paren
+ (pmatch t2
+ (($ tok? (kind PUNCT) (value lparen)) 'open-paren)
+ (else (sys-exit 102))))
+ 0 (sys-exit 11))
+
+(sys-exit 0)