commit 90d731b094126c3440a1f938701db2ba86fdc26d
parent b66a3d7d212b4e0612709887f92a2568aa53e130
Author: Ryan Sepassi <rsepassi@gmail.com>
Date: Fri, 1 May 2026 17:23:32 -0700
cc/lex: line-leading %: digraph emits HASH for directives
The lex-iter's BOL handler only checked the literal '#' byte (35),
missing the digraph '%:' which standard C maps to '#'. As a result
'%:define X 7' at column 1 lexed as (PUNCT hash)(IDENT define)... and
pp never saw a directive start, so the line passed through as code.
Detect 'hash' coming back from %lex-read-punct on a BOL line and
re-tag the token as HASH (the same role the literal-# branch fills).
Test: tests/cc-pp/52-digraph-hash-directive.c uses %:define and
expects the macro to be defined and expanded.
Diffstat:
3 files changed, 10 insertions(+), 1 deletion(-)
diff --git a/cc/cc.scm b/cc/cc.scm
@@ -1629,7 +1629,12 @@
(else
(let* ((r (%lex-read-punct src pos1 line1 col1 file))
(tok (car r)) (rest (cdr r)))
- (set! kind 'PUNCT) (set! val (tok-value tok))
+ ;; Line-leading `%:` digraph also acts as HASH for directives.
+ (cond
+ ((and bol? (eq? (tok-value tok) 'hash))
+ (set! kind 'HASH))
+ (else
+ (set! kind 'PUNCT) (set! val (tok-value tok))))
(set! npos (car rest))
(set! nline (car (cdr rest)))
(set! ncol (car (cdr (cdr rest))))))))))
diff --git a/tests/cc-pp/52-digraph-hash-directive.c b/tests/cc-pp/52-digraph-hash-directive.c
@@ -0,0 +1,2 @@
+%:define FOO 7
+FOO
diff --git a/tests/cc-pp/52-digraph-hash-directive.expected-toks b/tests/cc-pp/52-digraph-hash-directive.expected-toks
@@ -0,0 +1,2 @@
+(INT 7 "52-digraph-hash-directive.c" 1 14)
+(EOF #f "52-digraph-hash-directive.c" 3 1)