boot2

Playing with the boostrap
git clone https://git.ryansepassi.com/git/boot2.git
Log | Files | Refs | README

commit 6488cca37dcdc7e2b323bb6f3bd37c045ff41a19
parent 772b42d44f685524177de9acef5b2318064ec4fa
Author: Ryan Sepassi <rsepassi@gmail.com>
Date:   Tue, 28 Apr 2026 18:42:51 -0700

cc: follow C linkage rules for external/internal symbols

Rebases the cc.scm symbol-mangling around the C linkage model rather
than the previous "always cc__-prefix every global" rule. External
linkage (the default at file scope, and any `extern` decl) uses the
bare ident; internal linkage (`static`) keeps a cc__ prefix to stay
out of the runtime/libp1pp namespace. tcc.c references libc symbols
like `&memcpy` directly with this rule, and libp1pp's bare `:memcpy`
links them — the two-namespace dance that previously left every libc
call unresolvable is gone.

Three loose ends had to come along:

  - cg-fn-end now looks up the bound sym in scope so a definition
    emits the same label its callers reference. Previously it always
    took the `cc__` prefix, which split `static T f(); ... T f() {…}`
    across two label namespaces.

  - parse-fn-body gained a sto argument so the bound fn-sym carries
    the parsed storage class (`static`-vs-default), instead of always
    coercing to 'extern.

  - sym-merge now carries a prior `static` declaration's storage
    forward when the later definition omits it (C 6.2.2 ¶4). tcc.c
    relies on this with `static void gfunc_call(int);` followed by
    `void gfunc_call(int n){…}` in the included gen.c file.

The entry stub was also retargeted from `&cc__main` to `&main` to
match `int main()`'s new external-linkage label.

Tests:
  129-extern-libp1pp.c — extern decls of libp1pp's bare `:memcpy`,
                        `:memcmp`, `:memset`, `:strlen` link directly,
                        plus the extern-then-define pattern.
  130-static-decl-def.c — three decl/def storage permutations
                        including the static-inheritance case.

Both pass on host cc and on cc.scm. Full cc suite: 150 passed.

Diffstat:
Mcc/cc.scm | 79+++++++++++++++++++++++++++++++++++++++++++++++++++++++++----------------------
Atests/cc/129-extern-libp1pp.c | 73+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Atests/cc/129-extern-libp1pp.expected-exit | 1+
Atests/cc/130-static-decl-def.c | 29+++++++++++++++++++++++++++++
Atests/cc/130-static-decl-def.expected-exit | 1+
5 files changed, 161 insertions(+), 22 deletions(-)

diff --git a/cc/cc.scm b/cc/cc.scm @@ -2596,6 +2596,25 @@ (define (%cg-mangle-global name-bv) (bytevector-append "cc__" name-bv)) +;; Label for a sym at the M1 layer. +;; +;; C linkage rules drive this directly: +;; - external linkage (the default at file scope, plus any `extern` +;; decl): bare ident. Same label name shared between every decl +;; and the eventual definition, in any order. `extern T memcpy()` +;; links to libp1pp's `:memcpy`; `int g_acc;` and refs to it +;; share `:g_acc`. +;; - internal linkage (`static`): cc__-prefixed. Free to mangle +;; since `static` is invisible across TUs, and the prefix keeps +;; it out of the external/runtime namespace. +;; Block-scope statics already mangle their sym-name to +;; `<fnname>__<n>` at parse time (see line ~5125); the cc__ prefix +;; here just nests another layer of namespacing on top of that. +(define (%cg-sym-label sm) + (cond + ((eq? (sym-storage sm) 'static) (%cg-mangle-global (sym-name sm))) + (else (sym-name sm)))) + (define (%cg-reg->bv r) (symbol->string r)) (define (%cg-emit-li cg reg n) @@ -2818,13 +2837,13 @@ (cg-flush-tentatives! cg) ;; Entry stub. P1's program-entry contract (docs/P1.md §Program Entry) ;; delivers argc in a0 and argv in a1 at p1_main. %call doesn't - ;; clobber a0/a1, so falling straight through to cc__main forwards + ;; clobber a0/a1, so falling straight through to main forwards ;; them unchanged. The 16-byte frame is just enough for %enter's - ;; saved-fp/lr to fit; cc__main builds its own frame on top. + ;; saved-fp/lr to fit; main builds its own frame on top. (let ((tb (cg-text cg))) - (buf-push! tb "# entry stub: forwards argc=a0, argv=a1 to cc__main\n") + (buf-push! tb "# entry stub: forwards argc=a0, argv=a1 to main\n") (buf-push! tb "%fn(p1_main, 16, {\n") - (buf-push! tb "%call(&cc__main)\n") + (buf-push! tb "%call(&main)\n") (buf-push! tb "})\n")) ;; Every P1pp translation unit must end with :ELF_end so the ELF ;; header can compute file-size and ph_memsz boundaries. @@ -2949,7 +2968,13 @@ (staging-bytes (* 8 (cg-max-outgoing cg))) (raw-size (+ staging-bytes locals-hi)) (frame-size (align-up raw-size 16)) - (mangled (%cg-mangle-global name)) + ;; Look up the bound sym for this fn so `static void foo(){...}` + ;; emits the same cc__-mangled label that callers reference. + ;; The sym was bound by parse-fn-body before the body parse, + ;; so it's in the top scope frame at this point. + (fn-sym (alist-ref name (car (world-scope (cg-world cg))))) + (mangled (cond (fn-sym (%cg-sym-label fn-sym)) + (else name))) (tb (cg-text cg))) ;; Now that the body is fully emitted, leave fn dispatch so any ;; trailing emits in this function (including the ret-block below) @@ -3064,21 +3089,21 @@ (cp-ty (%ctype 'ptr 8 8 %t-i8))) (cg-push cg (%opnd 'global cp-ty label #f)))) -(define (cg-push-sym cg sym) - (pmatch sym - (($ sym? (kind fn) (type ,ty) (name ,nm)) - (cg-push cg (%opnd 'global ty (%cg-mangle-global nm) #f))) +(define (cg-push-sym cg sm) + (pmatch sm + (($ sym? (kind fn) (type ,ty)) + (cg-push cg (%opnd 'global ty (%cg-sym-label sm) #f))) (($ sym? (kind enum-const) (type ,ty) (slot ,v)) (cg-push cg (%opnd 'imm ty v #f))) - (($ sym? (kind var) (storage extern) (type ,ty) (name ,nm)) - (cg-push cg (%opnd 'global ty (%cg-mangle-global nm) #t))) - (($ sym? (kind var) (storage static) (type ,ty) (name ,nm)) - (cg-push cg (%opnd 'global ty (%cg-mangle-global nm) #t))) + (($ sym? (kind var) (storage extern) (type ,ty)) + (cg-push cg (%opnd 'global ty (%cg-sym-label sm) #t))) + (($ sym? (kind var) (storage static) (type ,ty)) + (cg-push cg (%opnd 'global ty (%cg-sym-label sm) #t))) (($ sym? (kind var) (type ,ty) (slot ,off)) (cg-push cg (%opnd 'frame ty off #t))) (($ sym? (kind param) (type ,ty) (slot ,off)) (cg-push cg (%opnd 'frame ty off #t))) - (else (die #f "cg-push-sym: unsupported sym-kind" (sym-kind sym))))) + (else (die #f "cg-push-sym: unsupported sym-kind" (sym-kind sm))))) ;; A cg-push-deref result is a frame-lval whose slot HOLDS THE ADDRESS ;; (not the value). To distinguish from ordinary frame-lvals (whose @@ -3941,8 +3966,7 @@ (else (die #f "cg-emit-global: bad init piece" piece)))) (define (cg-emit-global cg sym init) - (let* ((nm (sym-name sym)) - (lbl (%cg-mangle-global nm)) + (let* ((lbl (%cg-sym-label sym)) (sz (ctype-size (sym-type sym))) (size (if (< sz 0) 8 sz))) (cond @@ -4145,7 +4169,18 @@ (else (die #f "enum-const redecl" (sym-name old))))) ((and (sym-defined? old) (sym-defined? new)) (die #f "redefinition" (sym-name old))) - ((sym-defined? new) new) + ;; Linkage inherits from the first declaration (C 6.2.2 ¶4): if a + ;; later decl/def of the same identifier doesn't carry a storage + ;; class, it picks up the prior one. tcc.c relies on this with + ;; `static T f(); ... T f() {…}` — the prior `static` makes both + ;; the decl and the def internal-linkage. Without this carry- + ;; through cc.scm split them across two label namespaces. + ((sym-defined? new) + (cond + ((eq? (sym-storage old) 'static) + (%sym (sym-name new) (sym-kind new) 'static + (sym-type new) (sym-slot new) #t)) + (else new))) (else old))) (define (scope-bind! ps n s) @@ -5071,7 +5106,7 @@ (let-values (((n t) (parse-declarator ps b))) (cond ((and (ctype-is-fn? t) (at-punct? ps 'lbrace)) - (parse-fn-body ps n t) 'fn) + (parse-fn-body ps sto n t) 'fn) (else (handle-decl ps sto n t) (let lp () @@ -5222,7 +5257,7 @@ (and (eq? (sym-kind sm) 'var) (or (eq? (sym-storage sm) 'static) (eq? (sym-storage sm) 'extern)))) - (cons 'label-ref (%cg-mangle-global (sym-name sm)))) + (cons 'label-ref (%cg-sym-label sm))) (else (die (tok-loc it) "init: &x must reference a global" (tok-value it)))))) @@ -5239,7 +5274,7 @@ (eq? (sym-storage sm) 'extern))))))) (advance ps) (let ((sm (scope-lookup ps (tok-value t)))) - (cons 'label-ref (%cg-mangle-global (sym-name sm))))) + (cons 'label-ref (%cg-sym-label sm)))) ;; Plain string literal as char* initializer. ((eq? (tok-kind t) 'STR) (advance ps) @@ -5807,8 +5842,8 @@ ;; roots (block-statics, string literals, block-scope tags that escape ;; via the global tables) are promoted en masse there. See the Phase 3 ;; section above parse-translation-unit. -(define (parse-fn-body ps name dt) - (scope-bind! ps name (%sym name 'fn 'extern dt #f #t)) +(define (parse-fn-body ps sto name dt) + (scope-bind! ps name (%sym name 'fn (or sto 'extern) dt #f #t)) (%parse-fn-body-inner ps name dt)) (define (%parse-fn-body-inner ps name dt) diff --git a/tests/cc/129-extern-libp1pp.c b/tests/cc/129-extern-libp1pp.c @@ -0,0 +1,73 @@ +/* Calls into libp1pp routines via plain C `extern` declarations. The + * libp1pp side already provides `:memcpy`, `:memcmp`, `:strlen`, and + * `:memset` as bare-name labels (see P1/P1pp.P1pp). For these to link, + * cc.scm must NOT prefix `extern`-but-not-defined-here symbols with + * its `cc__` namespace — bare-name extern decls should pass through. + * + * This is the linkage rule that lets the tcc-boot2 path resolve all + * the libc symbols (memcpy/strlen/memset/etc.) tcc.c calls. + */ + +extern void *memcpy(void *, const void *, unsigned long); +extern int memcmp(const void *, const void *, unsigned long); +extern void *memset(void *, int, unsigned long); +extern unsigned long strlen(const char *); + +int test_strlen(void) { + if (strlen("") != 0) return 1; + if (strlen("a") != 1) return 2; + if (strlen("hello") != 5) return 3; + /* String contains a NUL via embedded byte; strlen stops at first NUL. */ + return 0; +} + +int test_memcpy(void) { + char buf[8]; + memcpy(buf, "abcdefg", 8); + if (buf[0] != 'a') return 1; + if (buf[3] != 'd') return 2; + if (buf[6] != 'g') return 3; + if (buf[7] != 0) return 4; + return 0; +} + +int test_memcmp(void) { + if (memcmp("hello", "hello", 5) != 0) return 1; + if (memcmp("hello", "help!", 5) == 0) return 2; + if (memcmp("a", "b", 1) == 0) return 3; + return 0; +} + +int test_memset(void) { + char buf[6]; + memset(buf, 'X', 5); + buf[5] = 0; + if (buf[0] != 'X') return 1; + if (buf[2] != 'X') return 2; + if (buf[4] != 'X') return 3; + if (buf[5] != 0) return 4; + return 0; +} + +int test_extern_then_define(void) { + /* If a function is declared extern AND later defined here in the + * same TU, the definition's `cc__` prefix takes precedence — the + * scope-bind! merge sets defined?=#t, the call resolves to the + * local definition rather than the bare libp1pp symbol. */ + extern int helper_local(int); /* declared local */ + return helper_local(7); /* should call the cc__helper_local below */ +} + +int helper_local(int x) { + return x == 7 ? 0 : 99; +} + +int main(int argc, char **argv) { + int r; + if ((r = test_strlen())) return 10 + r; + if ((r = test_memcpy())) return 20 + r; + if ((r = test_memcmp())) return 30 + r; + if ((r = test_memset())) return 40 + r; + if ((r = test_extern_then_define())) return 50 + r; + return 0; +} diff --git a/tests/cc/129-extern-libp1pp.expected-exit b/tests/cc/129-extern-libp1pp.expected-exit @@ -0,0 +1 @@ +0 diff --git a/tests/cc/130-static-decl-def.c b/tests/cc/130-static-decl-def.c @@ -0,0 +1,29 @@ +/* C 6.2.2 ¶4 — linkage inherits from the prior declaration. tcc.c + * relies on this in places like: + * static void gfunc_call(int); // forward, internal linkage + * ... + * void gfunc_call(int n) { ... } // omits `static` but inherits it + * + * Both the decl and the def must point at the same internal-linkage + * label; the merge in scope-bind!/sym-merge has to carry the static + * forward so cc.scm doesn't split them across two label namespaces. + */ + +/* Pattern A — static decl, plain def. */ +static int helper_a(int); +int helper_a(int x) { return x + 100; } + +/* Pattern B — static decl, static def (explicit on both sides). */ +static int helper_b(int); +static int helper_b(int x) { return x + 200; } + +/* Pattern C — plain decl, plain def. External linkage end-to-end. */ +int helper_c(int); +int helper_c(int x) { return x + 300; } + +int main(int argc, char **argv) { + if (helper_a(1) != 101) return 1; + if (helper_b(2) != 202) return 2; + if (helper_c(3) != 303) return 3; + return 0; +} diff --git a/tests/cc/130-static-decl-def.expected-exit b/tests/cc/130-static-decl-def.expected-exit @@ -0,0 +1 @@ +0