commit 6488cca37dcdc7e2b323bb6f3bd37c045ff41a19
parent 772b42d44f685524177de9acef5b2318064ec4fa
Author: Ryan Sepassi <rsepassi@gmail.com>
Date: Tue, 28 Apr 2026 18:42:51 -0700
cc: follow C linkage rules for external/internal symbols
Rebases the cc.scm symbol-mangling around the C linkage model rather
than the previous "always cc__-prefix every global" rule. External
linkage (the default at file scope, and any `extern` decl) uses the
bare ident; internal linkage (`static`) keeps a cc__ prefix to stay
out of the runtime/libp1pp namespace. tcc.c references libc symbols
like `&memcpy` directly with this rule, and libp1pp's bare `:memcpy`
links them — the two-namespace dance that previously left every libc
call unresolvable is gone.
Three loose ends had to come along:
- cg-fn-end now looks up the bound sym in scope so a definition
emits the same label its callers reference. Previously it always
took the `cc__` prefix, which split `static T f(); ... T f() {…}`
across two label namespaces.
- parse-fn-body gained a sto argument so the bound fn-sym carries
the parsed storage class (`static`-vs-default), instead of always
coercing to 'extern.
- sym-merge now carries a prior `static` declaration's storage
forward when the later definition omits it (C 6.2.2 ¶4). tcc.c
relies on this with `static void gfunc_call(int);` followed by
`void gfunc_call(int n){…}` in the included gen.c file.
The entry stub was also retargeted from `&cc__main` to `&main` to
match `int main()`'s new external-linkage label.
Tests:
129-extern-libp1pp.c — extern decls of libp1pp's bare `:memcpy`,
`:memcmp`, `:memset`, `:strlen` link directly,
plus the extern-then-define pattern.
130-static-decl-def.c — three decl/def storage permutations
including the static-inheritance case.
Both pass on host cc and on cc.scm. Full cc suite: 150 passed.
Diffstat:
5 files changed, 161 insertions(+), 22 deletions(-)
diff --git a/cc/cc.scm b/cc/cc.scm
@@ -2596,6 +2596,25 @@
(define (%cg-mangle-global name-bv)
(bytevector-append "cc__" name-bv))
+;; Label for a sym at the M1 layer.
+;;
+;; C linkage rules drive this directly:
+;; - external linkage (the default at file scope, plus any `extern`
+;; decl): bare ident. Same label name shared between every decl
+;; and the eventual definition, in any order. `extern T memcpy()`
+;; links to libp1pp's `:memcpy`; `int g_acc;` and refs to it
+;; share `:g_acc`.
+;; - internal linkage (`static`): cc__-prefixed. Free to mangle
+;; since `static` is invisible across TUs, and the prefix keeps
+;; it out of the external/runtime namespace.
+;; Block-scope statics already mangle their sym-name to
+;; `<fnname>__<n>` at parse time (see line ~5125); the cc__ prefix
+;; here just nests another layer of namespacing on top of that.
+(define (%cg-sym-label sm)
+ (cond
+ ((eq? (sym-storage sm) 'static) (%cg-mangle-global (sym-name sm)))
+ (else (sym-name sm))))
+
(define (%cg-reg->bv r) (symbol->string r))
(define (%cg-emit-li cg reg n)
@@ -2818,13 +2837,13 @@
(cg-flush-tentatives! cg)
;; Entry stub. P1's program-entry contract (docs/P1.md §Program Entry)
;; delivers argc in a0 and argv in a1 at p1_main. %call doesn't
- ;; clobber a0/a1, so falling straight through to cc__main forwards
+ ;; clobber a0/a1, so falling straight through to main forwards
;; them unchanged. The 16-byte frame is just enough for %enter's
- ;; saved-fp/lr to fit; cc__main builds its own frame on top.
+ ;; saved-fp/lr to fit; main builds its own frame on top.
(let ((tb (cg-text cg)))
- (buf-push! tb "# entry stub: forwards argc=a0, argv=a1 to cc__main\n")
+ (buf-push! tb "# entry stub: forwards argc=a0, argv=a1 to main\n")
(buf-push! tb "%fn(p1_main, 16, {\n")
- (buf-push! tb "%call(&cc__main)\n")
+ (buf-push! tb "%call(&main)\n")
(buf-push! tb "})\n"))
;; Every P1pp translation unit must end with :ELF_end so the ELF
;; header can compute file-size and ph_memsz boundaries.
@@ -2949,7 +2968,13 @@
(staging-bytes (* 8 (cg-max-outgoing cg)))
(raw-size (+ staging-bytes locals-hi))
(frame-size (align-up raw-size 16))
- (mangled (%cg-mangle-global name))
+ ;; Look up the bound sym for this fn so `static void foo(){...}`
+ ;; emits the same cc__-mangled label that callers reference.
+ ;; The sym was bound by parse-fn-body before the body parse,
+ ;; so it's in the top scope frame at this point.
+ (fn-sym (alist-ref name (car (world-scope (cg-world cg)))))
+ (mangled (cond (fn-sym (%cg-sym-label fn-sym))
+ (else name)))
(tb (cg-text cg)))
;; Now that the body is fully emitted, leave fn dispatch so any
;; trailing emits in this function (including the ret-block below)
@@ -3064,21 +3089,21 @@
(cp-ty (%ctype 'ptr 8 8 %t-i8)))
(cg-push cg (%opnd 'global cp-ty label #f))))
-(define (cg-push-sym cg sym)
- (pmatch sym
- (($ sym? (kind fn) (type ,ty) (name ,nm))
- (cg-push cg (%opnd 'global ty (%cg-mangle-global nm) #f)))
+(define (cg-push-sym cg sm)
+ (pmatch sm
+ (($ sym? (kind fn) (type ,ty))
+ (cg-push cg (%opnd 'global ty (%cg-sym-label sm) #f)))
(($ sym? (kind enum-const) (type ,ty) (slot ,v))
(cg-push cg (%opnd 'imm ty v #f)))
- (($ sym? (kind var) (storage extern) (type ,ty) (name ,nm))
- (cg-push cg (%opnd 'global ty (%cg-mangle-global nm) #t)))
- (($ sym? (kind var) (storage static) (type ,ty) (name ,nm))
- (cg-push cg (%opnd 'global ty (%cg-mangle-global nm) #t)))
+ (($ sym? (kind var) (storage extern) (type ,ty))
+ (cg-push cg (%opnd 'global ty (%cg-sym-label sm) #t)))
+ (($ sym? (kind var) (storage static) (type ,ty))
+ (cg-push cg (%opnd 'global ty (%cg-sym-label sm) #t)))
(($ sym? (kind var) (type ,ty) (slot ,off))
(cg-push cg (%opnd 'frame ty off #t)))
(($ sym? (kind param) (type ,ty) (slot ,off))
(cg-push cg (%opnd 'frame ty off #t)))
- (else (die #f "cg-push-sym: unsupported sym-kind" (sym-kind sym)))))
+ (else (die #f "cg-push-sym: unsupported sym-kind" (sym-kind sm)))))
;; A cg-push-deref result is a frame-lval whose slot HOLDS THE ADDRESS
;; (not the value). To distinguish from ordinary frame-lvals (whose
@@ -3941,8 +3966,7 @@
(else (die #f "cg-emit-global: bad init piece" piece))))
(define (cg-emit-global cg sym init)
- (let* ((nm (sym-name sym))
- (lbl (%cg-mangle-global nm))
+ (let* ((lbl (%cg-sym-label sym))
(sz (ctype-size (sym-type sym)))
(size (if (< sz 0) 8 sz)))
(cond
@@ -4145,7 +4169,18 @@
(else (die #f "enum-const redecl" (sym-name old)))))
((and (sym-defined? old) (sym-defined? new))
(die #f "redefinition" (sym-name old)))
- ((sym-defined? new) new)
+ ;; Linkage inherits from the first declaration (C 6.2.2 ¶4): if a
+ ;; later decl/def of the same identifier doesn't carry a storage
+ ;; class, it picks up the prior one. tcc.c relies on this with
+ ;; `static T f(); ... T f() {…}` — the prior `static` makes both
+ ;; the decl and the def internal-linkage. Without this carry-
+ ;; through cc.scm split them across two label namespaces.
+ ((sym-defined? new)
+ (cond
+ ((eq? (sym-storage old) 'static)
+ (%sym (sym-name new) (sym-kind new) 'static
+ (sym-type new) (sym-slot new) #t))
+ (else new)))
(else old)))
(define (scope-bind! ps n s)
@@ -5071,7 +5106,7 @@
(let-values (((n t) (parse-declarator ps b)))
(cond
((and (ctype-is-fn? t) (at-punct? ps 'lbrace))
- (parse-fn-body ps n t) 'fn)
+ (parse-fn-body ps sto n t) 'fn)
(else
(handle-decl ps sto n t)
(let lp ()
@@ -5222,7 +5257,7 @@
(and (eq? (sym-kind sm) 'var)
(or (eq? (sym-storage sm) 'static)
(eq? (sym-storage sm) 'extern))))
- (cons 'label-ref (%cg-mangle-global (sym-name sm))))
+ (cons 'label-ref (%cg-sym-label sm)))
(else
(die (tok-loc it) "init: &x must reference a global"
(tok-value it))))))
@@ -5239,7 +5274,7 @@
(eq? (sym-storage sm) 'extern)))))))
(advance ps)
(let ((sm (scope-lookup ps (tok-value t))))
- (cons 'label-ref (%cg-mangle-global (sym-name sm)))))
+ (cons 'label-ref (%cg-sym-label sm))))
;; Plain string literal as char* initializer.
((eq? (tok-kind t) 'STR)
(advance ps)
@@ -5807,8 +5842,8 @@
;; roots (block-statics, string literals, block-scope tags that escape
;; via the global tables) are promoted en masse there. See the Phase 3
;; section above parse-translation-unit.
-(define (parse-fn-body ps name dt)
- (scope-bind! ps name (%sym name 'fn 'extern dt #f #t))
+(define (parse-fn-body ps sto name dt)
+ (scope-bind! ps name (%sym name 'fn (or sto 'extern) dt #f #t))
(%parse-fn-body-inner ps name dt))
(define (%parse-fn-body-inner ps name dt)
diff --git a/tests/cc/129-extern-libp1pp.c b/tests/cc/129-extern-libp1pp.c
@@ -0,0 +1,73 @@
+/* Calls into libp1pp routines via plain C `extern` declarations. The
+ * libp1pp side already provides `:memcpy`, `:memcmp`, `:strlen`, and
+ * `:memset` as bare-name labels (see P1/P1pp.P1pp). For these to link,
+ * cc.scm must NOT prefix `extern`-but-not-defined-here symbols with
+ * its `cc__` namespace — bare-name extern decls should pass through.
+ *
+ * This is the linkage rule that lets the tcc-boot2 path resolve all
+ * the libc symbols (memcpy/strlen/memset/etc.) tcc.c calls.
+ */
+
+extern void *memcpy(void *, const void *, unsigned long);
+extern int memcmp(const void *, const void *, unsigned long);
+extern void *memset(void *, int, unsigned long);
+extern unsigned long strlen(const char *);
+
+int test_strlen(void) {
+ if (strlen("") != 0) return 1;
+ if (strlen("a") != 1) return 2;
+ if (strlen("hello") != 5) return 3;
+ /* String contains a NUL via embedded byte; strlen stops at first NUL. */
+ return 0;
+}
+
+int test_memcpy(void) {
+ char buf[8];
+ memcpy(buf, "abcdefg", 8);
+ if (buf[0] != 'a') return 1;
+ if (buf[3] != 'd') return 2;
+ if (buf[6] != 'g') return 3;
+ if (buf[7] != 0) return 4;
+ return 0;
+}
+
+int test_memcmp(void) {
+ if (memcmp("hello", "hello", 5) != 0) return 1;
+ if (memcmp("hello", "help!", 5) == 0) return 2;
+ if (memcmp("a", "b", 1) == 0) return 3;
+ return 0;
+}
+
+int test_memset(void) {
+ char buf[6];
+ memset(buf, 'X', 5);
+ buf[5] = 0;
+ if (buf[0] != 'X') return 1;
+ if (buf[2] != 'X') return 2;
+ if (buf[4] != 'X') return 3;
+ if (buf[5] != 0) return 4;
+ return 0;
+}
+
+int test_extern_then_define(void) {
+ /* If a function is declared extern AND later defined here in the
+ * same TU, the definition's `cc__` prefix takes precedence — the
+ * scope-bind! merge sets defined?=#t, the call resolves to the
+ * local definition rather than the bare libp1pp symbol. */
+ extern int helper_local(int); /* declared local */
+ return helper_local(7); /* should call the cc__helper_local below */
+}
+
+int helper_local(int x) {
+ return x == 7 ? 0 : 99;
+}
+
+int main(int argc, char **argv) {
+ int r;
+ if ((r = test_strlen())) return 10 + r;
+ if ((r = test_memcpy())) return 20 + r;
+ if ((r = test_memcmp())) return 30 + r;
+ if ((r = test_memset())) return 40 + r;
+ if ((r = test_extern_then_define())) return 50 + r;
+ return 0;
+}
diff --git a/tests/cc/129-extern-libp1pp.expected-exit b/tests/cc/129-extern-libp1pp.expected-exit
@@ -0,0 +1 @@
+0
diff --git a/tests/cc/130-static-decl-def.c b/tests/cc/130-static-decl-def.c
@@ -0,0 +1,29 @@
+/* C 6.2.2 ¶4 — linkage inherits from the prior declaration. tcc.c
+ * relies on this in places like:
+ * static void gfunc_call(int); // forward, internal linkage
+ * ...
+ * void gfunc_call(int n) { ... } // omits `static` but inherits it
+ *
+ * Both the decl and the def must point at the same internal-linkage
+ * label; the merge in scope-bind!/sym-merge has to carry the static
+ * forward so cc.scm doesn't split them across two label namespaces.
+ */
+
+/* Pattern A — static decl, plain def. */
+static int helper_a(int);
+int helper_a(int x) { return x + 100; }
+
+/* Pattern B — static decl, static def (explicit on both sides). */
+static int helper_b(int);
+static int helper_b(int x) { return x + 200; }
+
+/* Pattern C — plain decl, plain def. External linkage end-to-end. */
+int helper_c(int);
+int helper_c(int x) { return x + 300; }
+
+int main(int argc, char **argv) {
+ if (helper_a(1) != 101) return 1;
+ if (helper_b(2) != 202) return 2;
+ if (helper_c(3) != 303) return 3;
+ return 0;
+}
diff --git a/tests/cc/130-static-decl-def.expected-exit b/tests/cc/130-static-decl-def.expected-exit
@@ -0,0 +1 @@
+0