kit

kit
git clone https://git.ryansepassi.com/git/kit.git
Log | Files | Refs | README

commit fc7bdd38228d702866ea143dd58bf85d06fa48c0
parent 72f78c0141fca9e68a8efb575a098ba4db5b44ce
Author: Ryan Sepassi <rsepassi@gmail.com>
Date:   Sat, 23 May 2026 16:04:14 -0700

bootstrap: fix self-compilation across frontend, codegen, and harness

`make bootstrap` now reaches a fixed point (stage2 ≡ stage3, bitwise).
Fixes four compiler bugs surfaced by compiling cfree with cfree:

- Aggregate store with an effective-address rider: struct assignment to a
  member/element destination emitted a scalar store of an aggregate.
  Collapse the destination EA to a plain pointer-deref lvalue first.
- Sub-object aggregate reads at offset 0: a first-member read kept the
  outer struct's CG type (wrong copy size). Track an is_subobject flag on
  the lvalue aux and materialize such reads in to_rvalue.
- Member of an rvalue aggregate (mk().field): never applied the member
  offset. Mark it a readable (non-modifiable) lvalue.
- Out-of-order designated initializers: a designator targeting the last
  field terminated the field loop, rejecting a following backward
  designator. Loop now bounded by '}'/EOF in both runtime and static paths.
- type_composite dropped pointer qualifiers (lost the inner const of
  `const char* const*`); re-apply via type_qualified. This was the stage-3
  divergence, masked in stage1 by type interning.

Harness (scripts/stage2_link.sh, Makefile): add the missing lang/cpp
compile stage; supply -Ilang/cpp, -Ilang, and -isystem <sdk>/usr/include
where the Makefile does; link the host-arch libcfree_rt.a for compiler-rt
builtins (__multi3); make bootstrap depend on rt.

Diffstat:
MMakefile | 2+-
Mlang/c/parse/cg_adapter.c | 18+++++++++++++++++-
Mlang/c/parse/cg_public_compat.h | 3++-
Mlang/c/parse/parse_expr.c | 14+++++++++++++-
Mlang/c/parse/parse_init.c | 27+++++++++++++++++++++++----
Mlang/c/type/type.c | 4+++-
Mscripts/stage2_link.sh | 49++++++++++++++++++++++++++++++++++++++++++++++---
7 files changed, 105 insertions(+), 12 deletions(-)

diff --git a/Makefile b/Makefile @@ -152,7 +152,7 @@ BOOTSTRAP_STAGE3_OUT = build/stage3-probe BOOTSTRAP_STAGE2_BIN = build/cfree-stage2 BOOTSTRAP_STAGE3_BIN = build/cfree-stage3 -bootstrap: $(BIN) +bootstrap: $(BIN) rt cp $(BIN) $(STAGE1_BIN) rm -rf $(BOOTSTRAP_STAGE2_OUT) $(BOOTSTRAP_STAGE3_OUT) \ $(BOOTSTRAP_STAGE2_BIN) $(BOOTSTRAP_STAGE3_BIN) diff --git a/lang/c/parse/cg_adapter.c b/lang/c/parse/cg_adapter.c @@ -46,6 +46,7 @@ static void pcg_aux_clear(PcgLvAux* a) { a->storage_size = 0; a->bit_signed = 0; a->base_kind = PCG_LV_BASE_LOCAL; + a->is_subobject = 0; a->pad[0] = a->pad[1] = a->pad[2] = a->pad[3] = 0; a->pad[4] = a->pad[5] = 0; } @@ -656,6 +657,13 @@ void pcg_lv_member(Parser* p, i64 byte_offset, const Type* field_ty, u16 bf_offset, u16 bf_width, u32 bf_storage_size) { PcgLvAux* lv = pcg_top_lv_aux(p); int was_lvalue = pcg_top_is_lvalue(p); + const Type* base_ty = pcg_top_type(p); + /* A member of an rvalue aggregate (e.g. `mk().field` for a struct-returning + * call) lives in a CG-side temporary that is memory-backed, so the member is + * readable as an lvalue — but per C it is not a *modifiable* lvalue. */ + int base_is_rvalue_agg = + !was_lvalue && base_ty && + (base_ty->kind == TY_STRUCT || base_ty->kind == TY_UNION); i64 saved_offset = lv ? lv->offset + byte_offset : byte_offset; u32 saved_scale = lv ? lv->scale : 0u; u8 saved_base_kind = lv ? lv->base_kind : PCG_LV_BASE_LOCAL; @@ -663,7 +671,11 @@ void pcg_lv_member(Parser* p, i64 byte_offset, const Type* field_ty, * accumulated on the chain (`a[i].f.g` keeps `scale = sizeof(elem)` and * adds the field offsets). */ pcg_retag_top(p, field_ty); - if (was_lvalue) pcg_set_top_lvalue(p); + if (was_lvalue) { + pcg_set_top_lvalue(p); + } else if (base_is_rvalue_agg && p->cg_type_sp) { + p->cg_value_flags[p->cg_type_sp - 1u] = PCG_VALUE_LVALUE; + } /* pcg_retag_top cleared aux; re-apply the bumped offset and base kind. */ { PcgLvAux* lv_after = pcg_top_lv_aux(p); @@ -675,6 +687,10 @@ void pcg_lv_member(Parser* p, i64 byte_offset, const Type* field_ty, lv_after->bit_width = bf_width; lv_after->storage_size = bf_storage_size; lv_after->bit_signed = pcg_type_is_signed(field_ty) ? 1u : 0u; + /* A member access narrows to a sub-object of a larger CG-tracked + * object; record it so aggregate reads materialize a pointer to the + * exact sub-object (the offset alone can be 0 for a first member). */ + lv_after->is_subobject = 1u; } if (bf_width && p->cg_type_sp) p->cg_value_flags[p->cg_type_sp - 1u] |= PCG_VALUE_BITFIELD; diff --git a/lang/c/parse/cg_public_compat.h b/lang/c/parse/cg_public_compat.h @@ -47,7 +47,8 @@ typedef struct PcgLvAux { u32 storage_size; u8 bit_signed; u8 base_kind; /* PcgLvBaseKind */ - u8 pad[6]; + u8 is_subobject; /* lvalue is a member/element of a larger CG object */ + u8 pad[5]; } PcgLvAux; typedef enum BinOp { diff --git a/lang/c/parse/parse_expr.c b/lang/c/parse/parse_expr.c @@ -1059,7 +1059,7 @@ void to_rvalue(Parser* p) { PcgLvAux* lv = pcg_top_lv_aux(p); int materialize = is_lvalue && lv && - (lv->offset != 0 || lv->scale != 0 || + (lv->offset != 0 || lv->scale != 0 || lv->is_subobject || lv->base_kind == PCG_LV_BASE_POINTER_RV); p->cg_type_stack[p->cg_type_sp - 1u] = uty; if (materialize) { @@ -3329,6 +3329,18 @@ void parse_assign_expr(Parser* p) { } } if (is_simple_assign) { + /* Aggregate destinations keep any member/index displacement in the lvalue + * aux rather than a scalar EA the store can fold. Collapse a non-trivial + * EA into a plain pointer-deref lvalue (mirrors the source-side handling in + * to_rvalue) so the aggregate copy lands at the right address. */ + if (lhs && (lhs->kind == TY_STRUCT || lhs->kind == TY_UNION)) { + PcgLvAux* dlv = pcg_top_lv_aux(p); + if (pcg_top_is_lvalue(p) && dlv && (dlv->offset != 0 || dlv->scale != 0)) { + const Type* uty = type_unqual(p->pool, lhs); + pcg_addr(p); + pcg_deref(p, uty); + } + } parse_assign_expr(p); to_rvalue(p); { diff --git a/lang/c/parse/parse_init.c b/lang/c/parse/parse_init.c @@ -498,8 +498,16 @@ static u32 init_struct_fields(Parser* p, FrameSlot slot, const Type* arr_ty, const ABIRecordLayout* L = c_abi_record_layout(p->abi, p->pool, ty); u32 i = start_field; u32 zero_lo = start_field; - for (; i < ty->rec.nfields; ++i) { - if (braced && (is_punct(&p->cur, '}') || p->cur.kind == TOK_EOF)) break; + /* A braced list may carry designators that re-target any field, including a + * field earlier than one already seen, so the running position `i` does not + * bound the loop — only '}'/EOF (or, for an unbraced nested list, the field + * count) terminates it. */ + for (;;) { + if (braced) { + if (is_punct(&p->cur, '}') || p->cur.kind == TOK_EOF) break; + } else if (i >= ty->rec.nfields) { + break; + } if (braced && is_punct(&p->cur, '.')) { const Type* sub_ty; u32 sub_off; @@ -536,6 +544,11 @@ static u32 init_struct_fields(Parser* p, FrameSlot slot, const Type* arr_ty, if (zero_lo <= top_idx) zero_lo = top_idx + 1; goto next_item_struct; } + if (i >= ty->rec.nfields) { + /* Excess positional initializer with no designator to place it; stop and + * let the caller diagnose the missing '}'. */ + break; + } init_field_at(p, slot, arr_ty, offset, ty, i); if (zero_lo <= i) zero_lo = i + 1; if (!braced) { @@ -551,6 +564,7 @@ static u32 init_struct_fields(Parser* p, FrameSlot slot, const Type* arr_ty, ++i; break; } + ++i; } if (braced) { u32 j; @@ -1419,8 +1433,11 @@ void parse_static_init_at(Parser* p, u8* buf, u32 buflen, u32 offset, parse_static_aggregate_remainder(p, buf, buflen, offset, ty, 0); return; } - while (i < ty->rec.nfields && !is_punct(&p->cur, '}')) { - const Field* f = &ty->rec.fields[i]; + /* Designators may re-target any field regardless of the running position + * `i`, so the loop is bounded by '}'/EOF; `i` only drives positional + * placement (and is range-checked before use). */ + while (!is_punct(&p->cur, '}') && p->cur.kind != TOK_EOF) { + const Field* f; if (is_punct(&p->cur, '.')) { const Type* sub_ty; u32 sub_off; @@ -1443,6 +1460,8 @@ void parse_static_init_at(Parser* p, u8* buf, u32 buflen, u32 offset, if (!accept_punct(p, ',')) break; continue; } + if (i >= ty->rec.nfields) break; /* excess positional initializer */ + f = &ty->rec.fields[i]; if (f->flags & FIELD_BITFIELD) { if (!(f->flags & FIELD_ZERO_WIDTH)) { parse_static_bitfield_at(p, buf, buflen, offset, &L->fields[i], diff --git a/lang/c/type/type.c b/lang/c/type/type.c @@ -416,7 +416,9 @@ const Type* type_composite(Pool* p, const Type* a, const Type* b) { case TY_PTR: { const Type* pointee = type_composite(p, a->ptr.pointee, b->ptr.pointee); if (!pointee) pointee = a->ptr.pointee; - return type_ptr(p, pointee); + /* Preserve the pointer's own qualifiers (e.g. the inner `const` of + * `const char* const*`); type_ptr alone yields an unqualified pointer. */ + return type_qualified(p, type_ptr(p, pointee), a->qual); } case TY_FUNC: if (a->fn.nparams == 0) return a; diff --git a/scripts/stage2_link.sh b/scripts/stage2_link.sh @@ -21,15 +21,21 @@ fi SDK="$(xcrun --show-sdk-path)" OUT="${CFREE_STAGE_OUT:-$ROOT/build/stage2-probe}" LIB_OUT="$OUT/lib" +LANG_CPP_OUT="$OUT/lang/cpp" LANG_C_OUT="$OUT/lang/c" LANG_WASM_OUT="$OUT/lang/wasm" LANG_TOY_OUT="$OUT/lang/toy" DRV_OUT="$OUT/driver" LOG="$OUT/log" -mkdir -p "$LIB_OUT" "$LANG_C_OUT" "$LANG_WASM_OUT" "$LANG_TOY_OUT" "$DRV_OUT" "$LOG" +mkdir -p "$LIB_OUT" "$LANG_CPP_OUT" "$LANG_C_OUT" "$LANG_WASM_OUT" \ + "$LANG_TOY_OUT" "$DRV_OUT" "$LOG" CFREE_FLAGS="--support-dir $ROOT -Iinclude -Isrc" -LANG_C_FLAGS="--support-dir $ROOT -Iinclude -Ilang/c" +# The lexer/preprocessor substrate (lang/cpp) is part of libcfree. +LANG_CPP_FLAGS="--support-dir $ROOT -Iinclude -Ilang/cpp" +# lang/c sources reach the lexer/preprocessor and cpp_support.h substrate under +# lang/cpp, mirroring the Makefile's `-Ilang/cpp -Ilang/c`. +LANG_C_FLAGS="--support-dir $ROOT -Iinclude -Ilang/cpp -Ilang/c" LANG_WASM_FLAGS="--support-dir $ROOT -Iinclude -Ilang/wasm" LANG_TOY_FLAGS="--support-dir $ROOT -Iinclude" DRIVER_FLAGS="--support-dir $ROOT -Iinclude -I." @@ -52,7 +58,14 @@ echo "=== compiling src/ with cfree ===" while IFS= read -r src; do rel="${src#src/}" obj="$LIB_OUT/${rel%.*}.o" - if compile_with_cfree "$src" "$obj" "$CFREE_FLAGS"; then + src_flags="$CFREE_FLAGS" + # lang_registry.c is the one libcfree source that reaches into lang/* and + # includes frontend headers as "c/c.h" — matches the Makefile's -Ilang. + if [ "$src" = "src/api/lang_registry.c" ]; then src_flags="$src_flags -Ilang"; fi + # emu/cpu.c pulls in the system <math.h> for sqrt; cfree's bundled freestanding + # header set has no math.h, so reach for the SDK's like the host build does. + if [ "$src" = "src/emu/cpu.c" ]; then src_flags="$src_flags -isystem $SDK/usr/include"; fi + if compile_with_cfree "$src" "$obj" "$src_flags"; then cfree_objs+=("$obj") printf ' ok %s\n' "$src" else @@ -62,6 +75,20 @@ while IFS= read -r src; do done < <(find src \( -name '*.c' -o -name '*.S' \) | sort) echo +echo "=== compiling lang/cpp with cfree ===" +while IFS= read -r src; do + rel="${src#lang/cpp/}" + obj="$LANG_CPP_OUT/${rel%.c}.o" + if compile_with_cfree "$src" "$obj" "$LANG_CPP_FLAGS"; then + cfree_objs+=("$obj") + printf ' ok %s\n' "$src" + else + fail_lang+=("$src") + head -1 "$LOG/$(basename "$obj").log" | sed "s|^| FAIL $src: |" + fi +done < <(find lang/cpp -name '*.c' | sort) + +echo echo "=== compiling lang/c with cfree ===" while IFS= read -r src; do rel="${src#lang/c/}" @@ -143,9 +170,25 @@ if [ ! -f "$LIBSYS_DIR/libSystem.B.tbd" ] && \ exit 2 fi +# cfree lowers some operations (e.g. 128-bit multiply -> __multi3) to +# compiler-rt-style builtins that libSystem does not provide. The host build +# resolves these from clang's compiler-rt; here we link cfree's own runtime for +# the host architecture (built by `make rt`). +case "$(uname -m)" in + arm64|aarch64) RT_TRIPLE=aarch64-apple-darwin ;; + x86_64) RT_TRIPLE=x86_64-apple-darwin ;; + *) echo "unknown host arch $(uname -m) for runtime selection" >&2; exit 2 ;; +esac +RT_LIB="$ROOT/build/rt/$RT_TRIPLE/libcfree_rt.a" +if [ ! -f "$RT_LIB" ]; then + echo "missing $RT_LIB — run \`make rt\` first" >&2 + exit 2 +fi + set -x "$BIN" ld -o "$BIN_OUT" -pie \ "${cfree_objs[@]}" \ + "$RT_LIB" \ -L "$LIBSYS_DIR" -lSystem status=$? set +x