commit 3c1ff085bd23764d9ade055decf074f17d7761e0
parent 5e08cdf7556541c012c8ff18571f9bbad60b69c2
Author: Ryan Sepassi <rsepassi@gmail.com>
Date: Mon, 27 Apr 2026 09:46:47 -0700
scheme1: split string semantics from raw bytevectors
Bytevectors and strings shared one BV layout, with a universal
"cap > length" invariant that reserved a NUL slot for any bv whose
data_ptr might flow into a syscall or runtime_error. That conflated two
contracts: raw u8[] and NUL-terminated string. The conflation also
forced bv_capacity_for to return the smallest power of two strictly
greater than n -- so make-bytevector(2^k) doubled to 2^(k+1).
Changes
- bv_capacity_for: return smallest pow2 >= n (was strictly > n).
- bv_putn / bv_putc: drop the +1 NUL slack; raw u8[] semantics.
- New str_alloc / str_putn / str_putc / str_putint family. cap > length
AND data[length] = 0 on every return; the zero is written explicitly
because heap-mark/heap-rewind! reuses memory (BSS-zero is unreliable
past a rewind).
- Migrate string-producing callsites to str_*: parse_string,
prim_symbol_to_string, prim_number_to_string, value_to_bv +
write_to_bv + write_pair_to_bv, prim_error_entry, prim_format_entry,
prim_sys_argv_entry. Raw bv producers (parse_u8_body, make-bytevector,
bytevector-copy, bytevector-append) stay on bv_*.
- New (string-length s) primitive: strlen(data_ptr) on a NUL-terminated
bv. Mirror of bytevector-length but reads via the terminator.
- (sys-read fd buf offset count) and (sys-write fd buf offset count):
add an offset arg so callers can read/write into the middle of a bv
without bytevector-copy of the unwritten tail.
Prelude
- write-string is now a standalone function (was an alias of
write-bytes); it uses string-length and assumes its input is a
NUL-terminated bv.
- write-line calls write-string.
- write-bytes / write-string / refill! pass the new offset to sys-*;
the partial-write fallback advances an offset instead of copying.
cc/util.scm
- slurp-fd / write-bv-fd: pass the new offset to sys-*; the
bytevector-copy slice in write-bv-fd is gone.
Diffstat:
3 files changed, 252 insertions(+), 111 deletions(-)
diff --git a/cc/util.scm b/cc/util.scm
@@ -218,7 +218,7 @@
;; allocation so a multi-MB tcc.c stays linear.
(let ((buf (make-bytevector BUFSIZE)))
(let loop ((acc '()))
- (let ((r (sys-read fd buf BUFSIZE)))
+ (let ((r (sys-read fd buf 0 BUFSIZE)))
(cond ((not (car r))
(die #f "slurp-fd: sys-read failed" (cdr r)))
((zero? (cdr r))
@@ -228,8 +228,7 @@
(define (write-bv-fd fd bv)
;; Full write or die. sys-write may write fewer bytes than requested;
- ;; loop until everything is written. Avoid the slice copy when a
- ;; single sys-write completes the whole bv (the common case).
+ ;; advance the offset and retry the unwritten tail.
;;
;; On failure we sys-exit directly instead of routing through `die`
;; — `die` itself uses write-bv-fd, so a write failure to fd 2 must
@@ -238,9 +237,7 @@
(let loop ((off 0))
(if (= off len)
#t
- (let* ((rem (- len off))
- (chunk (if (zero? off) bv (bytevector-copy bv off len)))
- (r (sys-write fd chunk rem)))
+ (let ((r (sys-write fd bv off (- len off))))
(cond ((not (car r)) (sys-exit 1))
((zero? (cdr r)) (sys-exit 1))
(else (loop (+ off (cdr r))))))))))
diff --git a/scheme1/prelude.scm b/scheme1/prelude.scm
@@ -208,7 +208,7 @@
;; --- shell.scm reads -----------------------------------------------
(define (refill! p)
- (let ((r (sys-read (port-fd p) (port-buf p) BUFSIZE)))
+ (let ((r (sys-read (port-fd p) (port-buf p) 0 BUFSIZE)))
(cond
((not (car r)) r)
(else (port-pos-set! p 0)
@@ -288,20 +288,30 @@
(loop (cdr ys) (+ i len)))))))
;; --- shell.scm writes (unbuffered; handle partial writes) ----------
+;; sys-write takes an offset, so the partial-write fallback advances
+;; the offset into the same bv instead of copying a tail.
(define (write-bytes p bv)
- (let loop ((bv bv) (total 0))
- (let ((len (bytevector-length bv)))
- (if (zero? len)
- (cons #t total)
- (let ((r (sys-write (port-fd p) bv len)))
+ (let ((len (bytevector-length bv)))
+ (let loop ((off 0))
+ (if (= off len)
+ (cons #t len)
+ (let ((r (sys-write (port-fd p) bv off (- len off))))
(cond
((not (car r)) r)
- ((= (cdr r) len) (cons #t (+ total len)))
- (else (loop (bytevector-copy bv (cdr r) len)
- (+ total (cdr r))))))))))
-
-(define write-string write-bytes)
+ (else (loop (+ off (cdr r))))))))))
+
+;; write-string assumes its input is a NUL-terminated bv (a "string")
+;; and uses string-length, not bytevector-length, to bound the write.
+(define (write-string p s)
+ (let ((len (string-length s)))
+ (let loop ((off 0))
+ (if (= off len)
+ (cons #t len)
+ (let ((r (sys-write (port-fd p) s off (- len off))))
+ (cond
+ ((not (car r)) r)
+ (else (loop (+ off (cdr r))))))))))
(define (write-line p s)
- (let ((r (write-bytes p s)))
+ (let ((r (write-string p s)))
(if (car r) (write-bytes p NL-BV) r)))
diff --git a/scheme1/scheme1.P1pp b/scheme1/scheme1.P1pp
@@ -797,7 +797,7 @@
::scan_done
%stl(t1, end)
- %call(&bv_alloc)
+ %call(&str_alloc)
%stl(a0, bv)
# Pass 2: decode into the freshly allocated data buffer.
@@ -2817,9 +2817,9 @@
%b(&intern)
# (symbol->string sym) -- fresh bv copy of the symtab name. sym_name
-# returns (ptr, len); bv_alloc gives us a clean wrapper; memcpy fills
-# the data. Frame holds the (ptr, len) pair across bv_alloc and the
-# resulting bv across memcpy.
+# returns (ptr, len); str_alloc gives us a NUL-terminated wrapper;
+# memcpy fills the data. Frame holds the (ptr, len) pair across
+# str_alloc and the resulting bv across memcpy.
%fn2(prim_symbol_to_string_entry, {ptr len bv}, {
%car(a0, a0)
@@ -2828,7 +2828,7 @@
%stl(a0, ptr)
%stl(a1, len)
%mov(a0, a1)
- %call(&bv_alloc) ; tagged bv in a0
+ %call(&str_alloc) ; tagged bv in a0
%stl(a0, bv)
%ldl(a1, ptr) ; src ptr
%ldl(a2, len) ; len
@@ -2841,18 +2841,18 @@
# (number->string n [radix]) -- decimal repr in a fresh bv. The radix
# arg is part of the surface per LISP.md (10 and 16 required) but the
# implementation is decimal-only for now: the second arg, if present,
-# is silently ignored. bv_putint takes the raw value, so untag first;
-# bv_alloc(0) gives an empty wrapper that bv_putint grows in place.
-# +0 holds the raw value across bv_alloc.
+# is silently ignored. str_putint takes the raw value, so untag first;
+# str_alloc(0) gives an empty NUL-terminated wrapper that str_putint
+# grows in place. +0 holds the raw value across str_alloc.
%fn2(prim_number_to_string_entry, {value pad}, {
%car(t0, a0)
%sari(t0, t0, 3) ; raw value
%stl(t0, value)
%li(a0, 0)
- %call(&bv_alloc)
+ %call(&str_alloc)
%ldl(a1, value)
- %tail(&bv_putint)
+ %tail(&str_putint)
})
# (string->number bv [radix]) -- delegate parsing to parse_dec. Returns
@@ -3114,9 +3114,12 @@
# data_ptr = ld(bv, 5)
# capacity = ld(bv, 13)
#
-# bv_capacity_for(n) returns the smallest power-of-two ≥ max(n, 16) so
-# every fresh bytevector starts with at least some headroom; bv_grow then
-# doubles by repeatedly shifting until cap ≥ requested.
+# bv_capacity_for(n) returns the smallest power-of-two ≥ max(n, 16); bv_grow
+# then doubles by repeatedly shifting until cap ≥ requested. Bytevectors
+# are raw u8[] and need no headroom for a NUL terminator -- callers that
+# build "strings" use the str_* writers, which reserve cap > len AND
+# explicitly zero data[len] (the heap is reused via heap-mark/heap-rewind!,
+# so BSS-zero cannot be assumed).
# alloc_bytes(size=a0) -> raw addr (a0). Untagged data buffer; size is
# rounded up to 8 to keep the next bump 8-byte-aligned.
@@ -3134,20 +3137,19 @@
%die(msg_heap_full)
%endscope
-# bv_capacity_for(n=a0) -> smallest power-of-two strictly greater than n,
-# minimum 16 (so capacity > length and the byte at index `length` is the
-# zero-init NUL terminator -- syscalls that take C strings can pass a
-# bytevector's data_ptr directly).
+# bv_capacity_for(n=a0) -> smallest power-of-two ≥ n, minimum 16. Pure
+# bytevector sizing -- no NUL slack. Callers building "strings" call
+# bv_capacity_for(raw_len + 1) to reserve room for the trailing NUL.
:bv_capacity_for
%scope bv_capacity_for
%li(t0, 16)
::loop
- %bltu(a0, t0, &::done)
+ %bltu(t0, a0, &::shift) ; t0 < a0: keep doubling
+ %mov(a0, t0) ; t0 >= a0: done
+ %ret
+ ::shift
%shli(t0, t0, 1)
%b(&::loop)
- ::done
- %mov(a0, t0)
- %ret
%endscope
# bv_alloc(raw_len=a0) -> tagged bv (a0). Length = raw_len, capacity from
@@ -3271,6 +3273,20 @@
%shri(a0, t1, 5)
%ret
+# (string-length s) -- assumes s is a NUL-terminated bv (a "string");
+# returns strlen(data_ptr). Mirrors bytevector-length but uses the NUL
+# terminator instead of the bv header. For a well-formed string built
+# via str_alloc / str_putn / etc the two agree; for a raw bytevector
+# without a NUL the result is unspecified (strlen may walk past the
+# data buffer).
+%fn(prim_string_length_entry, 0, {
+ %car(t0, a0)
+ %heap_ld(a0, t0, %BV.data)
+ %call(&strlen)
+ %mkfix(a0, a0)
+ %eret
+})
+
:prim_bv_u8_ref_entry
%scope prim_bv_u8_ref
%args2(t0, t1, a0) ; bv, tagged idx
@@ -3952,16 +3968,16 @@
# write (bytevectors emit "..." with a leading and trailing double quote;
# escapes are not handled because string literals are not yet supported).
#
-# bv_putn / bv_putc / bv_putint append to the output bv and return the
-# (same wrapper, possibly-grown) bv. bv_grow patches data_ptr/capacity
-# in place, so the wrapper pointer never changes -- callers can keep a
-# stable handle in a single frame slot.
+# bv_putn / bv_putc / bv_putint append raw bytes to a bv and return the
+# (same wrapper, possibly-grown) bv. They do NOT maintain a trailing NUL
+# -- callers building "strings" must use the str_* family below.
+# bv_grow patches data_ptr/capacity in place, so the wrapper pointer
+# never changes -- callers can keep a stable handle in a single frame
+# slot.
# bv_putn(bv=a0, src=a1, n=a2) -> bv (a0). Append n bytes from src to bv,
-# growing the data buffer when capacity falls short. The byte at index
-# `length` after append is left zero (preserved by the cap > length
-# invariant from bv_capacity_for + the BSS-zero heap), so syscalls that
-# read the data_ptr as a C string still see a NUL terminator.
+# growing the data buffer when capacity falls short. Raw u8[] semantics:
+# the byte at index `length` after append is unspecified.
%fn2(bv_putn, {bv src n old_len}, {
%stl(a0, bv)
@@ -3972,9 +3988,8 @@
%shri(t0, t0, 8) ; old_len
%stl(t0, old_len)
- # bv_grow ensures cap >= old_len + n + 1, so cap > new_len.
+ # bv_grow ensures cap >= old_len + n.
%add(a1, t0, a2)
- %addi(a1, a1, 1)
%call(&bv_grow)
%ldl(t0, bv)
@@ -3997,7 +4012,7 @@
})
# bv_putc(bv=a0, byte=a1) -> bv (a0). Append a single byte (low 8 bits
-# of a1). Same growth + length-update protocol as bv_putn.
+# of a1). Same growth + length-update protocol as bv_putn; no NUL.
%fn2(bv_putc, {bv byte}, {
%stl(a0, bv)
@@ -4005,7 +4020,7 @@
%heap_ld(t0, a0, %BV.hdr)
%shri(t0, t0, 8) ; old_len
- %addi(a1, t0, 2) ; min_cap = old_len + 2
+ %addi(a1, t0, 1) ; min_cap = old_len + 1
%call(&bv_grow)
%ldl(t0, bv)
@@ -4039,6 +4054,102 @@
%tail(&bv_putn)
})
+# String writers: identical to bv_putn / bv_putc / bv_putint except they
+# guarantee cap > length AND data[length] == 0 on return. Required for
+# any bv whose data_ptr is later read as a C string (syscall paths,
+# runtime_error). The explicit zero is necessary -- the heap is reused
+# via heap-mark / heap-rewind!, so a fresh data buffer from alloc_bytes
+# may carry stale bytes.
+
+# str_alloc(raw_len=a0) -> tagged bv (a0). Like bv_alloc, but cap >
+# raw_len and data[raw_len] = 0.
+%fn2(str_alloc, {raw_len bv}, {
+ %stl(a0, raw_len)
+ %addi(a0, a0, 1) ; reserve a NUL slot
+ %call(&bv_alloc)
+ %stl(a0, bv)
+
+ # Patch hdr length back down to raw_len.
+ %ldl(t0, raw_len)
+ %shli(t0, t0, 8) ; HDR.BV is 0
+ %heap_st(t0, a0, %BV.hdr)
+
+ # Zero data[raw_len].
+ %heap_ld(t1, a0, %BV.data)
+ %ldl(t2, raw_len)
+ %add(t1, t1, t2)
+ %li(t0, 0)
+ %sb(t0, t1, 0)
+
+ %ldl(a0, bv)
+})
+
+# str_putn(bv=a0, src=a1, n=a2) -> bv (a0). Append n bytes; on return
+# cap > new_len and data[new_len] == 0.
+%fn2(str_putn, {bv src n}, {
+ %stl(a0, bv)
+ %stl(a1, src)
+ %stl(a2, n)
+
+ # Pre-grow so the post-append buffer has a NUL slot.
+ %heap_ld(t0, a0, %BV.hdr)
+ %shri(t0, t0, 8) ; old_len
+ %add(a1, t0, a2)
+ %addi(a1, a1, 1) ; min_cap = old_len + n + 1
+ %call(&bv_grow)
+
+ %ldl(a0, bv)
+ %ldl(a1, src)
+ %ldl(a2, n)
+ %call(&bv_putn) ; appends + updates length
+
+ # Zero data[new_len]. bv_putn left cap and data_ptr alone, so the
+ # NUL slot reserved above is still ours.
+ %heap_ld(t0, a0, %BV.hdr)
+ %shri(t0, t0, 8) ; new_len
+ %heap_ld(t1, a0, %BV.data)
+ %add(t1, t1, t0)
+ %li(t2, 0)
+ %sb(t2, t1, 0)
+})
+
+# str_putc(bv=a0, byte=a1) -> bv (a0). Append one byte; cap > new_len
+# and data[new_len] == 0 on return.
+%fn2(str_putc, {bv byte}, {
+ %stl(a0, bv)
+ %stl(a1, byte)
+
+ %heap_ld(t0, a0, %BV.hdr)
+ %shri(t0, t0, 8) ; old_len
+ %addi(a1, t0, 2) ; min_cap = old_len + 1 + 1
+ %call(&bv_grow)
+
+ %ldl(a0, bv)
+ %ldl(a1, byte)
+ %call(&bv_putc)
+
+ %heap_ld(t0, a0, %BV.hdr)
+ %shri(t0, t0, 8) ; new_len
+ %heap_ld(t1, a0, %BV.data)
+ %add(t1, t1, t0)
+ %li(t2, 0)
+ %sb(t2, t1, 0)
+})
+
+# str_putint(bv=a0, value=a1) -> bv (a0). Like bv_putint but tails into
+# str_putn, so the result is NUL-terminated.
+%fn2(str_putint, {bv pad}, {
+ %stl(a0, bv)
+
+ %la(a0, &writer_num_buf)
+ %call(&fmt_dec) ; n_bytes (a0)
+
+ %mov(a2, a0)
+ %la(a1, &writer_num_buf)
+ %ldl(a0, bv)
+ %tail(&str_putn)
+})
+
# sym_name(idx=a0) -> (ptr=a0, len=a1). Leaf. idx is the untagged sym
# slot index; both fields come straight out of the symtab entry.
:sym_name
@@ -4052,6 +4163,10 @@
# bytes (display); mode = 1 emits them as `"..."` (write). Pairs are
# delegated to write_pair_to_bv so the recursion through PAIR has its
# own frame.
+#
+# Output is treated as a string by callers (display / write / error /
+# format), so all internal append calls go through the str_* family --
+# the result has cap > length and a trailing NUL.
%fn2(write_to_bv, {val bv mode pad}, {
%stl(a0, val)
@@ -4072,7 +4187,7 @@
%ldl(a0, bv)
%ldl(a1, val)
%sari(a1, a1, 3)
- %tail(&bv_putint)
+ %tail(&str_putint)
::sym
%ldl(a0, val)
@@ -4081,7 +4196,7 @@
%mov(a2, a1)
%mov(a1, a0)
%ldl(a0, bv)
- %tail(&bv_putn)
+ %tail(&str_putn)
::pair
%ldl(a0, val)
@@ -4109,14 +4224,14 @@
# write mode: emit `"`, then the raw bytes, then `"`.
%ldl(a0, bv)
%li(a1, 34)
- %call(&bv_putc)
+ %call(&str_putc)
%ldl(t0, val)
%heap_ld(a1, t0, %BV.data)
%heap_ld(a2, t0, %BV.hdr)
%shri(a2, a2, 8)
- %call(&bv_putn)
+ %call(&str_putn)
%li(a1, 34)
- %tail(&bv_putc)
+ %tail(&str_putc)
::heap_bv_raw
%ldl(t0, val)
@@ -4124,37 +4239,37 @@
%heap_ld(a2, t0, %BV.hdr)
%shri(a2, a2, 8)
%ldl(a0, bv)
- %tail(&bv_putn)
+ %tail(&str_putn)
::heap_closure
%la(a1, &str_closure)
%li(a2, 10)
%ldl(a0, bv)
- %tail(&bv_putn)
+ %tail(&str_putn)
::heap_prim
%la(a1, &str_prim)
%li(a2, 7)
%ldl(a0, bv)
- %tail(&bv_putn)
+ %tail(&str_putn)
::heap_td
%la(a1, &str_td)
%li(a2, 11)
%ldl(a0, bv)
- %tail(&bv_putn)
+ %tail(&str_putn)
::heap_rec
%la(a1, &str_rec)
%li(a2, 9)
%ldl(a0, bv)
- %tail(&bv_putn)
+ %tail(&str_putn)
::heap_unknown
%la(a1, &str_unknown)
%li(a2, 10)
%ldl(a0, bv)
- %tail(&bv_putn)
+ %tail(&str_putn)
::imm
%ldl(a0, val)
@@ -4172,37 +4287,37 @@
%la(a1, &str_eof)
%li(a2, 5)
%ldl(a0, bv)
- %tail(&bv_putn)
+ %tail(&str_putn)
::imm_false
%la(a1, &str_false)
%li(a2, 2)
%ldl(a0, bv)
- %tail(&bv_putn)
+ %tail(&str_putn)
::imm_true
%la(a1, &str_true)
%li(a2, 2)
%ldl(a0, bv)
- %tail(&bv_putn)
+ %tail(&str_putn)
::imm_nil
%la(a1, &str_nil)
%li(a2, 2)
%ldl(a0, bv)
- %tail(&bv_putn)
+ %tail(&str_putn)
::imm_unspec
%la(a1, &str_unspec)
%li(a2, 8)
%ldl(a0, bv)
- %tail(&bv_putn)
+ %tail(&str_putn)
::imm_unbound
%la(a1, &str_unbound)
%li(a2, 9)
%ldl(a0, bv)
- %tail(&bv_putn)
+ %tail(&str_putn)
})
# write_pair_to_bv(pair=a0, bv=a1, mode=a2) -> bv (a0). Emits `(elt elt
@@ -4223,7 +4338,7 @@
%ldl(a0, bv)
%li(a1, 40)
- %call(&bv_putc)
+ %call(&str_putc)
::loop
%ldl(t0, pair)
@@ -4244,13 +4359,13 @@
# Dotted tail: emit ` . ` then write_to_bv(cdr).
%ldl(a0, bv)
%li(a1, 32)
- %call(&bv_putc)
+ %call(&str_putc)
%ldl(a0, bv)
%li(a1, 46)
- %call(&bv_putc)
+ %call(&str_putc)
%ldl(a0, bv)
%li(a1, 32)
- %call(&bv_putc)
+ %call(&str_putc)
%ldl(a0, pair)
%ldl(a1, bv)
%ldl(a2, mode)
@@ -4260,25 +4375,26 @@
::cont
%ldl(a0, bv)
%li(a1, 32)
- %call(&bv_putc)
+ %call(&str_putc)
%b(&::loop)
::done
%ldl(a0, bv)
%li(a1, 41)
- %tail(&bv_putc)
+ %tail(&str_putc)
})
-# value_to_bv(val=a0, mode=a1) -> bv (a0). Allocate an empty bv and
-# delegate to write_to_bv; helper for display / write. The 16-byte
-# starting capacity is the floor from bv_capacity_for; bv_putn /
-# bv_putc grow as needed.
+# value_to_bv(val=a0, mode=a1) -> bv (a0). Allocate an empty NUL-
+# terminated bv and delegate to write_to_bv; helper for display / write
+# / error / format. write_to_bv internally uses str_*, so the result
+# has cap > length and a trailing NUL -- safe to hand to syscalls or
+# runtime_error as a C string.
%fn2(value_to_bv, {val mode}, {
%stl(a0, val)
%stl(a1, mode)
%li(a0, 0)
- %call(&bv_alloc)
+ %call(&str_alloc)
%mov(a1, a0)
%ldl(a0, val)
%ldl(a2, mode)
@@ -4315,10 +4431,10 @@
})
# (error msg-bv irritant ...). Builds `scheme1: error: <msg> <irr> ...`
-# in a bv (irritants joined by single spaces, all rendered with display
-# semantics) and tails into runtime_error. The bv has cap > length so
-# the byte at `length` is the BSS-zero NUL terminator, making the bv's
-# data_ptr a valid C string for panic's eprint_cstr.
+# in a string-bv (irritants joined by single spaces, all rendered with
+# display semantics) and tails into runtime_error. str_alloc + str_*
+# guarantee cap > length and a trailing NUL, making the bv's data_ptr
+# a valid C string for panic's eprint_cstr.
#
# Locals:
# walk (initially args; advances over irritants)
@@ -4327,13 +4443,13 @@
%stl(a0, walk)
%li(a0, 0)
- %call(&bv_alloc)
+ %call(&str_alloc)
%stl(a0, bv)
%la(a1, &str_error_prefix)
%li(a2, 16)
%ldl(a0, bv)
- %call(&bv_putn)
+ %call(&str_putn)
# First arg (the message) goes through write_to_bv with display mode.
%ldl(t0, walk)
@@ -4352,7 +4468,7 @@
%ldl(a0, bv)
%li(a1, 32)
- %call(&bv_putc)
+ %call(&str_putc)
%ldl(t0, walk)
%car(a0, t0)
@@ -4386,7 +4502,7 @@
%stl(a0, args) ; spill incoming args while we set up
%li(a0, 0)
- %call(&bv_alloc)
+ %call(&str_alloc)
%stl(a0, out)
%ldl(t0, args)
@@ -4415,7 +4531,7 @@
# Plain byte: emit and advance.
%ldl(a0, out)
%mov(a1, a3)
- %call(&bv_putc)
+ %call(&str_putc)
%ldl(t0, idx)
%addi(t0, t0, 1)
%stl(t0, idx)
@@ -4448,10 +4564,10 @@
%beqz(t1, &::spec_tilde)
# Unknown directive: emit `~` then the spec byte verbatim. Re-read
- # the spec byte from the template since bv_putc may clobber a3.
+ # the spec byte from the template since str_putc may clobber a3.
%ldl(a0, out)
%li(a1, 126)
- %call(&bv_putc)
+ %call(&str_putc)
%ldl(t0, template)
%heap_ld(t1, t0, %BV.data)
%ldl(t0, idx)
@@ -4459,14 +4575,14 @@
%add(t1, t1, t0)
%lb(a1, t1, 0)
%ldl(a0, out)
- %call(&bv_putc)
+ %call(&str_putc)
%b(&::loop)
::tilde_lit
# `~` at end of template: emit literal `~` and finish next iter.
%ldl(a0, out)
%li(a1, 126)
- %call(&bv_putc)
+ %call(&str_putc)
%ldl(t0, idx)
%addi(t0, t0, 1)
%stl(t0, idx)
@@ -4499,19 +4615,19 @@
%stl(t0, args)
%sari(a1, t1, 3)
%ldl(a0, out)
- %call(&bv_putint)
+ %call(&str_putint)
%b(&::loop)
::spec_pct
%ldl(a0, out)
%li(a1, 10)
- %call(&bv_putc)
+ %call(&str_putc)
%b(&::loop)
::spec_tilde
%ldl(a0, out)
%li(a1, 126)
- %call(&bv_putc)
+ %call(&str_putc)
%b(&::loop)
::done
@@ -4527,9 +4643,11 @@
# wrap_syscall_result: r >= 0 -> (#t . r), r < 0 -> (#f . -r).
#
# Bytevector args (paths, buffers) are passed by their raw data_ptr (slot
-# +5 from the tagged wrapper). bv_capacity_for guarantees capacity > length
-# so the byte at [length] is the zero-init NUL terminator -- safe to hand
-# to syscalls expecting a C string.
+# +5 from the tagged wrapper). For syscalls that read data_ptr as a C
+# string (paths, argv elements), the caller must produce the bv via the
+# str_* family so cap > length and data[length] == 0. Callers that only
+# expose the bv as a (data_ptr, count) pair (sys-read, sys-write buffers)
+# can pass plain bytevectors -- no NUL needed.
# wrap_syscall_result(raw=a0) -> (#t . r) or (#f . errno).
@@ -4634,12 +4752,16 @@
%ldl(a0, array)
})
-# (sys-read fd buf count)
+# (sys-read fd buf offset count). Passes (buf.data_ptr + offset) to the
+# kernel; offset lets callers read into the middle of a bv without first
+# slicing/copying.
%fn(prim_sys_read_entry, 0, {
- %args3(t0, t1, t2, a0)
+ %args4(t0, t1, t2, a3, a0)
%sari(t0, t0, 3) ; fd
%heap_ld(t1, t1, %BV.data) ; buf data ptr
- %sari(t2, t2, 3) ; count
+ %sari(t2, t2, 3) ; offset
+ %add(t1, t1, t2) ; data_ptr + offset
+ %sari(t2, a3, 3) ; count
%mov(a0, t0)
%mov(a1, t1)
%mov(a2, t2)
@@ -4647,12 +4769,16 @@
%tail(&wrap_syscall_result)
})
-# (sys-write fd buf count)
+# (sys-write fd buf offset count). Passes (buf.data_ptr + offset) to the
+# kernel; offset lets callers retry the unwritten tail of a partial
+# write without bytevector-copy.
%fn(prim_sys_write_entry, 0, {
- %args3(t0, t1, t2, a0)
+ %args4(t0, t1, t2, a3, a0)
%sari(t0, t0, 3) ; fd
%heap_ld(t1, t1, %BV.data) ; buf data ptr
- %sari(t2, t2, 3) ; count
+ %sari(t2, t2, 3) ; offset
+ %add(t1, t1, t2) ; data_ptr + offset
+ %sari(t2, a3, 3) ; count
%mov(a0, t0)
%mov(a1, t1)
%mov(a2, t2)
@@ -4743,8 +4869,10 @@
%ld(a0, t0, 0)
%call(&strlen)
- # bv = bv_alloc(len)
- %call(&bv_alloc)
+ # bv = str_alloc(len). argv entries flow into syscalls (sys-openat,
+ # sys-execve) that read data_ptr as a C string, so the trailing NUL
+ # is required.
+ %call(&str_alloc)
%stl(a0, bv)
# memcpy(bv.data_ptr, *argv, len-from-bv-hdr).
@@ -4901,6 +5029,10 @@
:name_apply "apply"
:name_make_bv "make-bytevector"
:name_bv_length "bytevector-length"
+;; "string-length" + NUL = 14 bytes; pad with 2 '00' to 16 (multiple of 8)
+;; so the sum of all name strings stays 8-aligned for the prim_table $()
+;; rows that follow.
+:name_string_length "string-length" '00' '00'
:name_bv_u8_ref "bytevector-u8-ref"
:name_bv_u8_set "bytevector-u8-set!"
:name_bv_copy "bytevector-copy"
@@ -4934,8 +5066,9 @@
:name_heap_rewind_bang "heap-rewind!" '00' '00' '00'
:name_heap_usage "heap-usage" '00' '00' '00' '00' '00'
-# Writer string constants. Lengths are hard-coded at the bv_putn call
-# sites (write_to_bv branches). No NUL needed -- bv_putn takes (ptr, n).
+# Writer string constants. Lengths are hard-coded at the str_putn call
+# sites (write_to_bv branches). No NUL needed in the source bytes --
+# str_putn takes (ptr, n).
:str_false "#f"
:str_true "#t"
:str_nil "()"
@@ -4992,6 +5125,7 @@
&name_apply %(0) $(5) &prim_apply_entry %(0)
&name_make_bv %(0) $(15) &prim_make_bytevector_entry %(0)
&name_bv_length %(0) $(17) &prim_bv_length_entry %(0)
+&name_string_length %(0) $(13) &prim_string_length_entry %(0)
&name_bv_u8_ref %(0) $(17) &prim_bv_u8_ref_entry %(0)
&name_bv_u8_set %(0) $(18) &prim_bv_u8_set_entry %(0)
&name_bv_copy %(0) $(15) &prim_bv_copy_entry %(0)
@@ -5122,9 +5256,9 @@
:saved_argc $(0)
:saved_argv $(0)
-# Scratch buffer for bv_putint -> fmt_dec. fmt_dec writes at most 20
-# bytes for a 64-bit signed integer; 24 bytes (three words) is comfortable
-# room and keeps following slots word-aligned.
+# Scratch buffer for bv_putint / str_putint -> fmt_dec. fmt_dec writes
+# at most 20 bytes for a 64-bit signed integer; 24 bytes (three words)
+# is comfortable room and keeps following slots word-aligned.
:writer_num_buf $(0) $(0) $(0)
# Pointer slots for the past-:ELF_end arenas.