boot2

Playing with the boostrap
git clone https://git.ryansepassi.com/git/boot2.git
Log | Files | Refs | README

commit 41b5fb541064317f3fe63dfbc1d8aafd4e477275
parent bdb3a2653ef840a8f64964230faab989d3a6dc43
Author: Ryan Sepassi <rsepassi@gmail.com>
Date:   Mon, 27 Apr 2026 11:41:28 -0700

scheme1: pmatch record-destructuring patterns

Adds ($ <pred> (<field> <pat>) ...) clauses to pmatch. The pattern
names a record predicate (e.g. tok?), and each (field pat) is matched
against the value at that field. Listed fields only; missing fields
are unconstrained; clause order is irrelevant. Subject of a different
record type or a non-record value falls through.

TD layout extended with a fields slot holding the declaration-order
list of field-name symbols, used at match time for name->index lookup.
define-record-type's user-visible bindings are unchanged: the type
name itself stays unbound (39-record-typename-unbound still passes).

Tests: 97-pmatch-record covers subset matching, clause ordering,
type-check-only patterns, type/value mismatch fall-through, nested
records, guards, binders, and literal-symbol values.

Diffstat:
Mscheme1/scheme1.P1pp | 182++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++---
Atests/scheme1/97-pmatch-record.expected-exit | 1+
Atests/scheme1/97-pmatch-record.scm | 112+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
3 files changed, 289 insertions(+), 6 deletions(-)

diff --git a/scheme1/scheme1.P1pp b/scheme1/scheme1.P1pp @@ -25,7 +25,7 @@ %struct SYMENT { name_ptr name_len global_val pad } # .SIZE = 32 %struct PRIM { hdr entry_w data } # .SIZE = 24 %struct CLOSURE { hdr params body env } # .SIZE = 32 -%struct TD { hdr name nfields } # .SIZE = 24 +%struct TD { hdr name nfields fields } # .SIZE = 32 %struct BV { hdr data } # .SIZE = 16 %struct REC { hdr td } # .SIZE = 16 (header) # Records are variable width: header + td slot + N field slots. @@ -1369,6 +1369,7 @@ %intern_form(&name_unquote, 7, &sym_unquote) %intern_form(&name_guard, 5, &sym_guard) %intern_form(&name_underscore, 1, &sym_underscore) + %intern_form(&name_dollar, 1, &sym_dollar) }) # eval_if(rest=a0, env=a1) -> value (a0). `rest` is (test then else). @@ -2161,6 +2162,14 @@ # with msg_bad_unquote_pattern (the only carve-out from the spec's # primitive-failure UB policy, since pattern shape is a syntax # error in the user's source). +# - pair (car eq? sym_dollar): record pattern `($ pred (f1 p1) ...)`. +# Looks up `pred` in the current env; expects a record predicate +# PRIM (the one bound by define-record-type). The TD pulled from +# PRIM.data drives the type check on subj and the field-name -> idx +# lookup. Each clause matches recursively. Listed fields only; +# missing fields are unconstrained. Malformed pattern shape, an +# unknown field name, a non-record subject, or a TD mismatch fall +# through as ::no. # - pair (otherwise): subj must be a pair; recurse on car, then cdr. # - atomic (fixnum, sym, immediate, identical heap pointer): raw # word equality. @@ -2171,7 +2180,9 @@ # pat # subj # env -%fn2(pmatch_match, {pat subj env}, { +# td (record-pattern: TD pulled from the predicate PRIM) +# flw (record-pattern: cursor over remaining (fname pat) clauses) +%fn2(pmatch_match, {pat subj env td flw}, { %stl(a0, pat) %stl(a1, subj) %stl(a2, env) @@ -2205,6 +2216,8 @@ %car(t0, a0) ; phead %ld_global(t1, &sym_unquote) %beq(t0, t1, &::binder) + %ld_global(t1, &sym_dollar) + %beq(t0, t1, &::record_pat) # Structural pair. subj must be a pair too. %tagof(t0, a1) @@ -2257,6 +2270,112 @@ %li(a1, 1) %eret + ::record_pat + # pat = ($ pred-sym (f1 p1) (f2 p2) ...). Resolve pred-sym -> PRIM via + # eval, pull TD from PRIM.data, type-check subj, then iterate the + # (fname pat_i) clauses. Clobbers `pat` local once we begin the loop: + # we stash each pat_i there before recursing so the recursion has the + # right argument and the local stays usable as scratch. + %ldl(t0, pat) + %cdr(t1, t0) ; (pred-sym . clauses) + %tagof(t0, t1) + %li(t2, %TAG.PAIR) + %bne(t0, t2, &::no) + %car(t0, t1) ; t0 = pred-sym + %tagof(t2, t0) + %li(a3, %TAG.SYM) + %bne(t2, a3, &::no) + %cdr(t2, t1) ; t2 = clauses + %stl(t2, flw) + + # eval(pred-sym, env) -> a0 = pred PRIM (or dies "unbound"). + %mov(a0, t0) + %ldl(a1, env) + %call(&eval) + + # Verify HEAP / HDR.PRIM, entry == &prim_predicate_entry; extract TD + # from PRIM.data; sanity-check TD is HEAP / HDR.TD. + %tagof(t0, a0) + %li(t1, %TAG.HEAP) + %bne(t0, t1, &::no) + %hdr_type(t0, a0) + %li(t1, %HDR.PRIM) + %bne(t0, t1, &::no) + %heap_ld(t1, a0, %PRIM.entry_w) + %la(t2, &prim_predicate_entry) + %bne(t1, t2, &::no) + %heap_ld(t1, a0, %PRIM.data) ; t1 = TD + %tagof(t0, t1) + %li(t2, %TAG.HEAP) + %bne(t0, t2, &::no) + %hdr_type(t0, t1) + %li(t2, %HDR.TD) + %bne(t0, t2, &::no) + %stl(t1, td) + + # Verify subj is HDR.REC with REC.td == TD. + %ldl(a0, subj) + %tagof(t0, a0) + %li(t1, %TAG.HEAP) + %bne(t0, t1, &::no) + %hdr_type(t0, a0) + %li(t1, %HDR.REC) + %bne(t0, t1, &::no) + %heap_ld(t0, a0, %REC.td) + %ldl(t1, td) + %bne(t0, t1, &::no) + + ::record_field_loop + # flw points at remaining (fname pat_i) clauses; NIL ends the loop. + %ldl(t0, flw) + %if_nil(t1, t0, &::ok) + %car(t1, t0) ; t1 = clause + %tagof(t0, t1) + %li(t2, %TAG.PAIR) + %bne(t0, t2, &::no) + %car(t2, t1) ; t2 = fname + %cdr(t1, t1) ; t1 = (pat_i) + %tagof(t0, t1) + %li(a3, %TAG.PAIR) + %bne(t0, a3, &::no) + %car(a3, t1) ; a3 = pat_i + %stl(a3, pat) ; reuse `pat` local for pat_i across recursion + + # Linear scan of TD.fields for fname; idx accumulated in t1. t2 holds + # fname (still live); a3 is scratch (since we no longer need pat_i in + # a register — it's in the local). + %ldl(t0, td) + %heap_ld(t0, t0, %TD.fields) + %li(t1, 0) + ::record_field_idx_loop + %if_nil(a3, t0, &::no) + %car(a3, t0) + %beq(a3, t2, &::record_field_found) + %cdr(t0, t0) + %addi(t1, t1, 1) + %b(&::record_field_idx_loop) + + ::record_field_found + # val = ld(subj_tagged + (idx<<3) + 13). Same offset arithmetic as + # prim_accessor_entry. Compute the address into a1 directly so the + # recursive call's val arg is in place. + %ldl(a1, subj) + %shli(t1, t1, 3) + %add(a1, a1, t1) + %ld(a1, a1, 13) + + # Recurse: pmatch_match(pat_i, val, env). pat_i is in `pat` local. + %ldl(a0, pat) + %ldl(a2, env) + %call(&pmatch_match) + %beqz(a1, &::no) + %stl(a0, env) + + %ldl(t0, flw) + %cdr(t0, t0) + %stl(t0, flw) + %b(&::record_field_loop) + ::ok %ldl(a0, env) %li(a1, 1) @@ -3985,6 +4104,9 @@ # Each clause is (field-name accessor) or (field-name accessor mutator). # Allocates one TD + one parameterized PRIM per name introduced (ctor, # predicate, accessor, mutator) and binds each to the symbol's global. +# The TD also stores a list of field-name symbols in declaration order; +# pmatch's ($ pred (field pat) ...) record pattern uses this to map +# field names to indices at match time. # # Locals: # rest @@ -3993,8 +4115,10 @@ # walk (clauses, advancing) # idx (raw counter) # nfields -# pad -%fn2(eval_define_record_type, {rest env td walk idx nfields pad}, { +# fl_head (head of field-name list under construction) +# fl_tail (tail cell of field-name list under construction) +# fl_cur (cursor walking clauses for field-name pre-pass) +%fn2(eval_define_record_type, {rest env td walk idx nfields fl_head fl_tail fl_cur}, { %stl(a0, rest) %stl(a1, env) @@ -4007,8 +4131,9 @@ %call(&list_length) %stl(a0, nfields) - # td = alloc_hdr(24, HDR.TD); td.name = type-name; td.nfields = nfields. - %li(a0, 24) + # td = alloc_hdr(TD.SIZE, HDR.TD); td.name = type-name; + # td.nfields = nfields; td.fields = NIL (filled below). + %li(a0, %TD.SIZE) %li(a1, %HDR.TD) %call(&alloc_hdr) %stl(a0, td) @@ -4017,6 +4142,49 @@ %heap_st(t0, a0, %TD.name) %ldl(t1, nfields) %heap_st(t1, a0, %TD.nfields) + %li(t1, %imm_val(%IMM.NIL)) + %heap_st(t1, a0, %TD.fields) + + # Pre-pass: build (field-name-1 ... field-name-N) in declaration order + # via head/tail accumulator, then store at td.fields. Each clause's + # car is the field-name symbol. Uses fl_cur as a separate cursor so + # walk is left intact for the accessor-binding loop below. + %li(t0, %imm_val(%IMM.NIL)) + %stl(t0, fl_head) + %stl(t0, fl_tail) + %ldl(t0, walk) + %stl(t0, fl_cur) + + ::fl_loop + %ldl(t0, fl_cur) + %if_nil(t1, t0, &::fl_done) + # cell = cons(car(car(fl_cur)), NIL) + %car(t1, t0) + %car(a0, t1) + %li(a1, %imm_val(%IMM.NIL)) + %call(&cons) + # Splice into list: if head is NIL, head = tail = cell. + # Else set-cdr!(tail, cell); tail = cell. + %ldl(t1, fl_head) + %li(t2, %imm_val(%IMM.NIL)) + %bne(t1, t2, &::fl_append) + %stl(a0, fl_head) + %stl(a0, fl_tail) + %b(&::fl_next) + ::fl_append + %ldl(t1, fl_tail) + %set_cdr(a0, t1) + %stl(a0, fl_tail) + ::fl_next + %ldl(t0, fl_cur) + %cdr(t0, t0) + %stl(t0, fl_cur) + %b(&::fl_loop) + + ::fl_done + %ldl(t0, td) + %ldl(t1, fl_head) + %heap_st(t1, t0, %TD.fields) # ctor-prim = make_param_prim(prim_ctor_entry, td); bind ctor-name. %la(a0, &prim_ctor_entry) @@ -5127,6 +5295,7 @@ :name_unquote "unquote" :name_guard "guard" 00 00 :name_underscore "_" 00 00 00 00 00 00 +:name_dollar "$" 00 00 00 00 00 00 # Primitive surface names. :name_sys_exit "sys-exit" 00 00 00 00 00 00 00 @@ -5391,6 +5560,7 @@ :sym_unquote $(0) :sym_guard $(0) :sym_underscore $(0) +:sym_dollar $(0) # Process startup state, captured by p1_main and read by sys-argv. :saved_argc $(0) diff --git a/tests/scheme1/97-pmatch-record.expected-exit b/tests/scheme1/97-pmatch-record.expected-exit @@ -0,0 +1 @@ +0 diff --git a/tests/scheme1/97-pmatch-record.scm b/tests/scheme1/97-pmatch-record.scm @@ -0,0 +1,112 @@ +; pmatch record-destructuring patterns: ($ <pred> (<field> <pat>) ...). +; The pattern names the record's predicate (e.g. tok?), and each field +; clause is matched against the value at that field. Listed fields only; +; missing fields are unconstrained. Order of (field pat) clauses doesn't +; matter. Subject of a different record type, or a non-record value, falls +; through to the next clause. +; +; Inside a record clause's <pat>, the standard pmatch literal/binder rules +; apply: bare symbols (IDENT, lparen) are literal-symbol patterns, ,x is a +; binder, and ($ ...) nests for record fields. + +(define-record-type tok + (%tok kind value loc) + tok? + (kind tok-kind) + (value tok-value) + (loc tok-loc)) + +(define-record-type loc + (%loc file line) + loc? + (file loc-file) + (line loc-line)) + +;; Subset matching: bind two of three fields; ignore loc. +(define t1 (%tok 'IDENT "x" (%loc "a.c" 12))) +(if (eq? 'ok + (pmatch t1 + (($ tok? (kind IDENT) (value ,v)) (if (equal? v "x") 'ok 'wrong-v)) + (else (sys-exit 91)))) + 0 (sys-exit 1)) + +;; Field clauses can appear in any order. +(if (eq? 'reordered + (pmatch t1 + (($ tok? (value ,v) (kind IDENT)) 'reordered) + (else (sys-exit 92)))) + 0 (sys-exit 2)) + +;; A pattern with no field clauses is a pure type-check. +(if (eq? 'just-tok + (pmatch t1 + (($ tok?) 'just-tok) + (else (sys-exit 93)))) + 0 (sys-exit 3)) + +;; Type mismatch: subject isn't a tok, falls through. +(define l1 (%loc "b.c" 7)) +(if (eq? 'fell-through + (pmatch l1 + (($ tok? (kind ,_)) (sys-exit 94)) + (else 'fell-through))) + 0 (sys-exit 4)) + +;; Non-record subject (a fixnum) also falls through cleanly. +(if (eq? 'not-a-record + (pmatch 42 + (($ tok? (kind ,_)) (sys-exit 95)) + (else 'not-a-record))) + 0 (sys-exit 5)) + +;; Field-pat fails -> clause fails -> next clause runs. +(define t2 (%tok 'PUNCT 'lparen (%loc "c.c" 1))) +(if (eq? 'punct + (pmatch t2 + (($ tok? (kind IDENT)) (sys-exit 96)) + (($ tok? (kind PUNCT)) 'punct) + (else (sys-exit 97)))) + 0 (sys-exit 6)) + +;; Nested record pattern: destructure the loc inside the tok. +(if (eq? 'nested-ok + (pmatch t1 + (($ tok? (loc ($ loc? (file ,f) (line ,n)))) + (if (and (equal? f "a.c") (= n 12)) 'nested-ok 'nested-bad)) + (else (sys-exit 98)))) + 0 (sys-exit 7)) + +;; Combined with guard: the record must type-check first, then guards run. +(if (eq? 'short-name + (pmatch t1 + (($ tok? (value ,v)) + (guard (< (string-length v) 3)) + 'short-name) + (else (sys-exit 99)))) + 0 (sys-exit 8)) + +;; Guard rejection falls through. +(define t3 (%tok 'IDENT "longer-name" (%loc "d.c" 2))) +(if (eq? 'long-fallthrough + (pmatch t3 + (($ tok? (value ,v)) + (guard (< (string-length v) 3)) + (sys-exit 100)) + (else 'long-fallthrough))) + 0 (sys-exit 9)) + +;; Nested binder inside a record clause: ,x captures the field value. +(if (eq? 'IDENT + (pmatch t1 + (($ tok? (kind ,k)) k) + (else (sys-exit 101)))) + 0 (sys-exit 10)) + +;; Match against a literal punct value (bare symbol pattern, not quoted). +(if (eq? 'open-paren + (pmatch t2 + (($ tok? (kind PUNCT) (value lparen)) 'open-paren) + (else (sys-exit 102)))) + 0 (sys-exit 11)) + +(sys-exit 0)