commit 3917becca3ca07243e9f5fe602981914c6880d30
parent 0b61df9e1743dabbe4d1912d22c97edbebd95d9b
Author: Ryan Sepassi <rsepassi@gmail.com>
Date: Sun, 26 Apr 2026 19:07:28 -0700
cc/pp: adjacent-string concat, more built-ins, stringize escapes, empty args
- Translation phase 6: post-expand pass merges adjacent STR tokens.
- New built-ins: __DATE__, __TIME__ (fixed), __STDC_VERSION__, __STDC_HOSTED__.
- Stringize on STR/CHAR now escapes embedded `\` and the delimiter, so
`#"a\b"` reconstructs the source spelling instead of dropping the slash.
- Empty function-like-macro arguments: `F()` for one-param F binds the
parameter to the empty token list; the 0-param special case still
resolves via the existing bind-args clause.
- cc-pp tests 31..37 cover the above (34/37 were already green; kept
as regression coverage).
Diffstat:
16 files changed, 127 insertions(+), 9 deletions(-)
diff --git a/cc/pp.scm b/cc/pp.scm
@@ -64,12 +64,22 @@
(define %pp-bv-LINE "__LINE__")
(define %pp-bv-STDC "__STDC__")
(define %pp-bv-LISPCC "__LISPCC__")
+(define %pp-bv-DATE "__DATE__")
+(define %pp-bv-TIME "__TIME__")
+(define %pp-bv-STDC-VERSION "__STDC_VERSION__")
+(define %pp-bv-STDC-HOSTED "__STDC_HOSTED__")
(define %pp-bv-VA-ARGS "__VA_ARGS__")
(define %pp-bv-defined "defined")
+;; Fixed values for reproducibility — we don't read the wall clock.
+(define %pp-bv-DATE-VALUE "Jan 1 1970")
+(define %pp-bv-TIME-VALUE "00:00:00")
+
(define (%pp-builtin? name)
(or (bv= name %pp-bv-FILE) (bv= name %pp-bv-LINE)
- (bv= name %pp-bv-STDC) (bv= name %pp-bv-LISPCC)))
+ (bv= name %pp-bv-STDC) (bv= name %pp-bv-LISPCC)
+ (bv= name %pp-bv-DATE) (bv= name %pp-bv-TIME)
+ (bv= name %pp-bv-STDC-VERSION) (bv= name %pp-bv-STDC-HOSTED)))
(define (%pp-expand-builtin name loc state)
(let* ((file (or (pps-cur-file state) (loc-file loc)))
@@ -77,10 +87,14 @@
(col (loc-col loc))
(here (%loc file line col)))
(cond
- ((bv= name %pp-bv-FILE) (list (%tok 'STR file here '())))
- ((bv= name %pp-bv-LINE) (list (%tok 'INT line here '())))
- ((bv= name %pp-bv-STDC) (list (%tok 'INT 1 here '())))
- ((bv= name %pp-bv-LISPCC) (list (%tok 'INT 1 here '())))
+ ((bv= name %pp-bv-FILE) (list (%tok 'STR file here '())))
+ ((bv= name %pp-bv-LINE) (list (%tok 'INT line here '())))
+ ((bv= name %pp-bv-STDC) (list (%tok 'INT 1 here '())))
+ ((bv= name %pp-bv-LISPCC) (list (%tok 'INT 1 here '())))
+ ((bv= name %pp-bv-DATE) (list (%tok 'STR %pp-bv-DATE-VALUE here '())))
+ ((bv= name %pp-bv-TIME) (list (%tok 'STR %pp-bv-TIME-VALUE here '())))
+ ((bv= name %pp-bv-STDC-VERSION) (list (%tok 'INT 199901 here '())))
+ ((bv= name %pp-bv-STDC-HOSTED) (list (%tok 'INT 1 here '())))
(else (die loc "internal: not a builtin" name)))))
;; --- buf-list: simple reversed-list buffer of toks ---
@@ -96,6 +110,24 @@
(else (buf-list-push! b (car ts)) (loop (cdr ts))))))
(define (buf-list-flush b) (reverse (buf-list-xs b)))
+;; Translation phase 6: concatenate adjacent string literals. The merged
+;; token keeps the loc and hide-set of the first; values are byte-appended.
+(define (%pp-merge-adjacent-strs toks)
+ (let loop ((toks toks) (acc '()))
+ (cond
+ ((null? toks) (reverse acc))
+ ((and (not (null? acc))
+ (eq? (tok-kind (car toks)) 'STR)
+ (eq? (tok-kind (car acc)) 'STR))
+ (let* ((prev (car acc))
+ (cur (car toks))
+ (merged (%tok 'STR
+ (bytevector-append (tok-value prev) (tok-value cur))
+ (tok-loc prev)
+ (tok-hide prev))))
+ (loop (cdr toks) (cons merged (cdr acc)))))
+ (else (loop (cdr toks) (cons (car toks) acc))))))
+
;; --- pp-expand: top-level driver ---
(define (pp-expand toks initial-defines)
(let ((state (%pp-make-state initial-defines))
@@ -108,7 +140,7 @@
(die (tok-loc (car toks)) "unterminated #if/#ifdef/#ifndef"))
(else
(buf-list-push! out (car toks))
- (buf-list-flush out))))
+ (%pp-merge-adjacent-strs (buf-list-flush out)))))
((%pp-nl? (car toks)) (loop (cdr toks)))
((%pp-hash? (car toks))
(let* ((lr (%pp-take-line (cdr toks)))
@@ -349,12 +381,29 @@
(cond
((eq? k 'IDENT) v)
((eq? k 'INT) (fixnum->bv v 10))
- ((eq? k 'STR) (bytevector-append "\"" (bytevector-append v "\"")))
- ((eq? k 'CHAR) (bytevector-append "'" (bytevector-append (bv-of-byte v) "'")))
+ ((eq? k 'STR) (%pp-quote-bytes v 34))
+ ((eq? k 'CHAR) (%pp-quote-bytes (bv-of-byte v) 39))
((eq? k 'KW) (symbol->string v))
((eq? k 'PUNCT) (symbol->string v))
(else "?"))))
+;; Reconstruct a string/char literal source spelling from cooked content.
+;; Per C11 6.10.3.2: insert `\` before each `"` and `\` (or `'` for char).
+;; `delim` is 34 for STR, 39 for CHAR.
+(define (%pp-quote-bytes bv delim)
+ (let* ((n (bytevector-length bv))
+ (delim-bv (bv-of-byte delim)))
+ (let loop ((i 0) (acc (list delim-bv)))
+ (cond
+ ((= i n) (bv-cat (reverse (cons delim-bv acc))))
+ (else
+ (let ((b (bytevector-u8-ref bv i)))
+ (cond
+ ((or (= b delim) (= b 92))
+ (loop (+ i 1) (cons (bv-of-byte b) (cons "\\" acc))))
+ (else
+ (loop (+ i 1) (cons (bv-of-byte b) acc))))))))))
+
;; --- #line / #pragma / #include ---
;; Approximate #line: subsequent toks have line = (orig-line + delta),
;; where delta = (N - here-line - 1). Good enough for most cases.
@@ -456,7 +505,9 @@
((and (= depth 0) (%pp-punct? (car toks) 'rparen))
(let ((args*
(cond
- ((and (null? args) (null? cur)) '())
+ ;; Empty parens count as one empty argument; bind-args
+ ;; degenerates this back to "no args" for 0-param macros.
+ ((and (null? args) (null? cur)) (list '()))
(else (reverse (cons (reverse cur) args))))))
(cons args* (cdr toks))))
((and (= depth 0) (%pp-punct? (car toks) 'comma))
diff --git a/docs/CC.md b/docs/CC.md
@@ -138,9 +138,16 @@ Built-in macros:
- `__FILE__` — current source file (a string literal)
- `__LINE__` — current line number (a decimal integer)
+- `__DATE__` — `"Jan 1 1970"` (fixed; we don't read the wall clock)
+- `__TIME__` — `"00:00:00"` (fixed)
- `__STDC__` — `1`
+- `__STDC_VERSION__` — `199901`
+- `__STDC_HOSTED__` — `1`
- `__LISPCC__` — `1` (our analogue of MesCC's `__MESC__`)
+Adjacent string-literal tokens in the post-expansion stream are
+concatenated (translation phase 6).
+
Expression evaluator (used by `#if`/`#elif`):
- All integer operators including `defined NAME` / `defined(NAME)`.
diff --git a/tests/cc-pp/31-string-concat.c b/tests/cc-pp/31-string-concat.c
@@ -0,0 +1,4 @@
+"a" "b" "c"
+1
+"p" "q"
+2 "r"
diff --git a/tests/cc-pp/31-string-concat.expected-toks b/tests/cc-pp/31-string-concat.expected-toks
@@ -0,0 +1,6 @@
+(STR "abc" "31-string-concat.c" 1 1)
+(INT 1 "31-string-concat.c" 2 1)
+(STR "pq" "31-string-concat.c" 3 1)
+(INT 2 "31-string-concat.c" 4 1)
+(STR "r" "31-string-concat.c" 4 3)
+(EOF #f "31-string-concat.c" 5 1)
diff --git a/tests/cc-pp/32-builtin-date-time.c b/tests/cc-pp/32-builtin-date-time.c
@@ -0,0 +1,2 @@
+__DATE__ ;
+__TIME__ ;
diff --git a/tests/cc-pp/32-builtin-date-time.expected-toks b/tests/cc-pp/32-builtin-date-time.expected-toks
@@ -0,0 +1,5 @@
+(STR "Jan 1 1970" "32-builtin-date-time.c" 1 1)
+(PUNCT semi "32-builtin-date-time.c" 1 10)
+(STR "00:00:00" "32-builtin-date-time.c" 2 1)
+(PUNCT semi "32-builtin-date-time.c" 2 10)
+(EOF #f "32-builtin-date-time.c" 3 1)
diff --git a/tests/cc-pp/33-builtin-stdc-version-hosted.c b/tests/cc-pp/33-builtin-stdc-version-hosted.c
@@ -0,0 +1,2 @@
+__STDC_VERSION__
+__STDC_HOSTED__
diff --git a/tests/cc-pp/33-builtin-stdc-version-hosted.expected-toks b/tests/cc-pp/33-builtin-stdc-version-hosted.expected-toks
@@ -0,0 +1,3 @@
+(INT 199901 "33-builtin-stdc-version-hosted.c" 1 1)
+(INT 1 "33-builtin-stdc-version-hosted.c" 2 1)
+(EOF #f "33-builtin-stdc-version-hosted.c" 3 1)
diff --git a/tests/cc-pp/34-builtin-file-line.c b/tests/cc-pp/34-builtin-file-line.c
@@ -0,0 +1,2 @@
+__FILE__
+__LINE__
diff --git a/tests/cc-pp/34-builtin-file-line.expected-toks b/tests/cc-pp/34-builtin-file-line.expected-toks
@@ -0,0 +1,3 @@
+(STR "34-builtin-file-line.c" "34-builtin-file-line.c" 1 1)
+(INT 2 "34-builtin-file-line.c" 2 1)
+(EOF #f "34-builtin-file-line.c" 3 1)
diff --git a/tests/cc-pp/35-stringize-str-char.c b/tests/cc-pp/35-stringize-str-char.c
@@ -0,0 +1,4 @@
+#define S(x) #x
+S("hi") ;
+S('a') ;
+S("a\\b") ;
diff --git a/tests/cc-pp/35-stringize-str-char.expected-toks b/tests/cc-pp/35-stringize-str-char.expected-toks
@@ -0,0 +1,7 @@
+(STR "\"hi\"" "35-stringize-str-char.c" 1 14)
+(PUNCT semi "35-stringize-str-char.c" 2 9)
+(STR "'a'" "35-stringize-str-char.c" 1 14)
+(PUNCT semi "35-stringize-str-char.c" 3 8)
+(STR "\"a\\\\b\"" "35-stringize-str-char.c" 1 14)
+(PUNCT semi "35-stringize-str-char.c" 4 11)
+(EOF #f "35-stringize-str-char.c" 5 1)
diff --git a/tests/cc-pp/36-empty-arg.c b/tests/cc-pp/36-empty-arg.c
@@ -0,0 +1,3 @@
+#define F(a) [a]
+F(x)
+F()
diff --git a/tests/cc-pp/36-empty-arg.expected-toks b/tests/cc-pp/36-empty-arg.expected-toks
@@ -0,0 +1,6 @@
+(PUNCT lbrack "36-empty-arg.c" 1 14)
+(IDENT "x" "36-empty-arg.c" 2 3)
+(PUNCT rbrack "36-empty-arg.c" 1 16)
+(PUNCT lbrack "36-empty-arg.c" 1 14)
+(PUNCT rbrack "36-empty-arg.c" 1 16)
+(EOF #f "36-empty-arg.c" 4 1)
diff --git a/tests/cc-pp/37-vararg-empty.c b/tests/cc-pp/37-vararg-empty.c
@@ -0,0 +1,3 @@
+#define LOG(fmt, ...) [fmt; __VA_ARGS__]
+LOG("x")
+LOG("y", 1)
diff --git a/tests/cc-pp/37-vararg-empty.expected-toks b/tests/cc-pp/37-vararg-empty.expected-toks
@@ -0,0 +1,10 @@
+(PUNCT lbrack "37-vararg-empty.c" 1 23)
+(STR "x" "37-vararg-empty.c" 2 5)
+(PUNCT semi "37-vararg-empty.c" 1 27)
+(PUNCT rbrack "37-vararg-empty.c" 1 40)
+(PUNCT lbrack "37-vararg-empty.c" 1 23)
+(STR "y" "37-vararg-empty.c" 3 5)
+(PUNCT semi "37-vararg-empty.c" 1 27)
+(INT 1 "37-vararg-empty.c" 3 10)
+(PUNCT rbrack "37-vararg-empty.c" 1 40)
+(EOF #f "37-vararg-empty.c" 4 1)