kit

kit
git clone https://git.ryansepassi.com/git/kit.git
Log | Files | Refs | README

commit c9156e6d621e9cd054a69a81f36cfa93015afd81
parent 9efd799a8b6b7d1a9ab4a2cae8a6abc84e1e3019
Author: Ryan Sepassi <rsepassi@gmail.com>
Date:   Wed, 20 May 2026 05:18:11 -0700

Fix stage2 aggregate and aa64 bootstrap bugs

Diffstat:
Mdriver/hosted.c | 6++----
Mlang/c/parse/cg_adapter.c | 2++
Mlang/c/parse/parse_expr.c | 5++++-
Mlang/c/parse/parse_init.c | 97+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Mscripts/stage2_link.sh | 69++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++---------
Msrc/arch/aa64/ops.c | 14+++++++-------
Atest/parse/cases/6_5_15_05_cond_const_aggregate_compound.c | 11+++++++++++
Atest/parse/cases/6_5_15_05_cond_const_aggregate_compound.expected | 1+
Atest/parse/cases/6_5_2_2_09_large_frame_struct_byval.c | 19+++++++++++++++++++
Atest/parse/cases/6_5_2_2_09_large_frame_struct_byval.expected | 1+
Atest/parse/cases/6_5_4_01_int_to_ptr_deref.c | 6++++++
Atest/parse/cases/6_5_4_01_int_to_ptr_deref.expected | 1+
Atest/parse/cases/6_7_9_36_designated_compound_literal_subobject.c | 19+++++++++++++++++++
Atest/parse/cases/6_7_9_36_designated_compound_literal_subobject.expected | 1+
14 files changed, 231 insertions(+), 21 deletions(-)

diff --git a/driver/hosted.c b/driver/hosted.c @@ -2,6 +2,7 @@ #include <stddef.h> #include <stdint.h> +#include <string.h> static char* hosted_join2(DriverEnv* env, const char* a, const char* b, size_t* out_size) { @@ -614,8 +615,5 @@ void driver_hosted_plan_fini(DriverEnv* env, DriverHostedPlan* plan) { driver_free(env, plan->owned_system_includes[i], plan->owned_system_include_sizes[i]); } - { - DriverHostedPlan zero = {0}; - *plan = zero; - } + memset(plan, 0, sizeof(*plan)); } diff --git a/lang/c/parse/cg_adapter.c b/lang/c/parse/cg_adapter.c @@ -479,6 +479,8 @@ void pcg_convert(Parser* p, const Type* dst) { if (emit) cfree_cg_sext(p->cg, id); } else if (ds > ss) { if (emit) cfree_cg_zext(p->cg, id); + } else if (type_is_ptr(src) != type_is_ptr(dst)) { + if (emit) cfree_cg_bitcast(p->cg, id); } } else if (type_is_int(src) && df) { if (pcg_type_is_signed(src)) { diff --git a/lang/c/parse/parse_expr.c b/lang/c/parse/parse_expr.c @@ -1050,7 +1050,10 @@ void to_rvalue(Parser* p) { cg_addr(p->cg); return; } - if (t->kind == TY_STRUCT || t->kind == TY_UNION) return; + if (t->kind == TY_STRUCT || t->kind == TY_UNION) { + p->cg_type_stack[p->cg_type_sp - 1u] = type_unqual(p->pool, t); + return; + } } cg_load(p->cg); } diff --git a/lang/c/parse/parse_init.c b/lang/c/parse/parse_init.c @@ -63,6 +63,98 @@ static void init_aggregate_remainder(Parser* p, FrameSlot slot, const Type* arr_ty, u32 offset, const Type* ty, u32 start_index); +static void replay_recorded_initializer_expr(Parser* p) { + if (p->replay_len == 0) + perr(p, "internal: empty initializer expression replay"); + p->cur = p->replay[0]; + p->replay_pos = 1; + p->replay_active = 1; + p->has_next = 0; +} + +static void record_initializer_expr_for_replay(Parser* p) { + u32 len = 0; + u32 cap = 0; + Tok* buf = NULL; + int paren_depth = 0; + int brack_depth = 0; + int brace_depth = 0; + + for (;;) { + if (len == cap) { + u32 new_cap = cap ? cap * 2u : 16u; + Tok* nb = arena_array(p->pool->arena, Tok, new_cap); + if (!nb) perr(p, "out of memory recording initializer expression"); + if (buf && len) memcpy(nb, buf, len * sizeof(*buf)); + buf = nb; + cap = new_cap; + } + buf[len++] = p->cur; + + if (p->cur.kind == TOK_EOF) break; + if (paren_depth == 0 && brack_depth == 0 && brace_depth == 0 && + (is_punct(&p->cur, ',') || is_punct(&p->cur, '}'))) { + break; + } + + if (is_punct(&p->cur, '(')) + ++paren_depth; + else if (is_punct(&p->cur, ')')) + --paren_depth; + else if (is_punct(&p->cur, '[')) + ++brack_depth; + else if (is_punct(&p->cur, ']')) + --brack_depth; + else if (is_punct(&p->cur, '{')) + ++brace_depth; + else if (is_punct(&p->cur, '}')) + --brace_depth; + + advance(p); + } + + p->replay = buf; + p->replay_cap = cap; + p->replay_len = len; + replay_recorded_initializer_expr(p); +} + +static int try_init_aggregate_from_expr(Parser* p, FrameSlot slot, + const Type* arr_ty, u32 offset, + const Type* ty) { + const Type* expr_ty; + u32 save_sp; + int compatible; + + if (!ty || (ty->kind != TY_STRUCT && ty->kind != TY_UNION)) return 0; + if (is_punct(&p->cur, '{') || is_punct(&p->cur, '.') || + is_punct(&p->cur, '[')) { + return 0; + } + + record_initializer_expr_for_replay(p); + + save_sp = p->cg_type_sp; + pcg_codegen_suppress_push(p); + parse_assign_expr(p); + expr_ty = cg_top_type(p->cg); + compatible = + type_compatible(type_unqual(p->pool, ty), type_unqual(p->pool, expr_ty)); + p->cg_type_sp = save_sp; + pcg_codegen_suppress_pop(p); + + replay_recorded_initializer_expr(p); + if (!compatible) return 0; + + parse_assign_expr(p); + if (!type_compatible(type_unqual(p->pool, ty), + type_unqual(p->pool, cg_top_type(p->cg)))) { + perr(p, "incompatible aggregate initializer"); + } + emit_struct_copy_into_slot(p, slot, arr_ty, offset, ty); + return 1; +} + /* Push the lvalue of a sub-object at byte offset `offset` within the array * local `slot` (whose type is `arr_ty`), with element type `elem_ty`. */ void push_subobject_lv(Parser* p, FrameSlot slot, const Type* arr_ty, @@ -582,6 +674,7 @@ void init_at(Parser* p, FrameSlot slot, const Type* arr_ty, u32 offset, } if (ty->kind == TY_STRUCT) { if (!is_punct(&p->cur, '{')) { + if (try_init_aggregate_from_expr(p, slot, arr_ty, offset, ty)) return; init_aggregate_remainder(p, slot, arr_ty, offset, ty, 0); return; } @@ -592,6 +685,10 @@ void init_at(Parser* p, FrameSlot slot, const Type* arr_ty, u32 offset, } if (ty->kind == TY_UNION) { int had_brace = accept_punct(p, '{'); + if (!had_brace && + try_init_aggregate_from_expr(p, slot, arr_ty, offset, ty)) { + return; + } if (ty->rec.nfields == 0) { if (had_brace) expect_punct(p, '}', "'}'"); return; diff --git a/scripts/stage2_link.sh b/scripts/stage2_link.sh @@ -13,30 +13,37 @@ set -u ROOT="$(cd "$(dirname "$0")/.." && pwd)" cd "$ROOT" -BIN="$ROOT/build/cfree" +BIN="${CFREE_STAGE_BIN:-$ROOT/build/cfree}" if [ ! -x "$BIN" ]; then echo "missing $BIN — run \`make\` first" >&2 exit 2 fi SDK="$(xcrun --show-sdk-path)" -OUT="$ROOT/build/stage2-probe" +OUT="${CFREE_STAGE_OUT:-$ROOT/build/stage2-probe}" LIB_OUT="$OUT/lib" +LANG_C_OUT="$OUT/lang/c" +LANG_WASM_OUT="$OUT/lang/wasm" +LANG_TOY_OUT="$OUT/lang/toy" DRV_OUT="$OUT/driver" LOG="$OUT/log" -mkdir -p "$LIB_OUT" "$DRV_OUT" "$LOG" +mkdir -p "$LIB_OUT" "$LANG_C_OUT" "$LANG_WASM_OUT" "$LANG_TOY_OUT" "$DRV_OUT" "$LOG" CFREE_FLAGS="-isystem $ROOT/rt/include -isystem $ROOT/rt/include/libc -Iinclude -Isrc" -DRIVER_FLAGS="-isystem $ROOT/rt/include -isystem $ROOT/rt/include/libc -Iinclude" +LANG_C_FLAGS="-isystem $ROOT/rt/include -isystem $ROOT/rt/include/libc -Iinclude -Ilang/c" +LANG_WASM_FLAGS="-isystem $ROOT/rt/include -isystem $ROOT/rt/include/libc -Iinclude -Ilang/wasm" +LANG_TOY_FLAGS="-isystem $ROOT/rt/include -isystem $ROOT/rt/include/libc -Iinclude" +DRIVER_FLAGS="-isystem $ROOT/rt/include -isystem $ROOT/rt/include/libc -Iinclude -I." # Driver files cfree still cannot parse (doc/STAGE2.md A2). Compile with # clang in stage-2 flag style so the resulting objects are still arm64 # Mach-O at a matching SDK level. -CLANG_FALLBACK=("env.c" "ld.c") +CLANG_FALLBACK=("env.c") cfree_objs=() clang_objs=() fail_src=() +fail_lang=() fail_driver=() compile_with_cfree() { @@ -62,6 +69,48 @@ while IFS= read -r src; do done < <(find src -name '*.c' | sort) echo +echo "=== compiling lang/c with cfree ===" +while IFS= read -r src; do + rel="${src#lang/c/}" + obj="$LANG_C_OUT/${rel%.c}.o" + if compile_with_cfree "$src" "$obj" "$LANG_C_FLAGS"; then + cfree_objs+=("$obj") + printf ' ok %s\n' "$src" + else + fail_lang+=("$src") + head -1 "$LOG/$(basename "$obj").log" | sed "s|^| FAIL $src: |" + fi +done < <(find lang/c -name '*.c' | sort) + +echo +echo "=== compiling lang/wasm with cfree ===" +while IFS= read -r src; do + rel="${src#lang/wasm/}" + obj="$LANG_WASM_OUT/${rel%.c}.o" + if compile_with_cfree "$src" "$obj" "$LANG_WASM_FLAGS"; then + cfree_objs+=("$obj") + printf ' ok %s\n' "$src" + else + fail_lang+=("$src") + head -1 "$LOG/$(basename "$obj").log" | sed "s|^| FAIL $src: |" + fi +done < <(find lang/wasm -name '*.c' 2>/dev/null | sort) + +echo +echo "=== compiling lang/toy with cfree ===" +while IFS= read -r src; do + rel="${src#lang/toy/}" + obj="$LANG_TOY_OUT/${rel%.c}.o" + if compile_with_cfree "$src" "$obj" "$LANG_TOY_FLAGS"; then + cfree_objs+=("$obj") + printf ' ok %s\n' "$src" + else + fail_lang+=("$src") + head -1 "$LOG/$(basename "$obj").log" | sed "s|^| FAIL $src: |" + fi +done < <(find lang/toy -name '*.c' | sort) + +echo echo "=== compiling driver/ ===" for src in $(ls driver/*.c | sort); do base="$(basename "$src")" @@ -74,7 +123,7 @@ for src in $(ls driver/*.c | sort); do # -fno-{,asynchronous-}unwind-tables: cfree's macho_read doesn't yet # ingest the section-relative UNSIGNED relocs that clang emits in # __LD,__compact_unwind. Suppress the section entirely. - if clang -arch arm64 -isysroot "$SDK" -Iinclude \ + if clang -arch arm64 -isysroot "$SDK" -Iinclude -I. \ -fno-unwind-tables -fno-asynchronous-unwind-tables \ -c "$src" -o "$obj" >"$LOG/$base.log" 2>&1; then clang_objs+=("$obj") @@ -90,7 +139,7 @@ for src in $(ls driver/*.c | sort); do printf ' ok %s\n' "$src" else fail_driver+=("$src (cfree)") - head -1 "$LOG/$base.log" | sed "s|^| FAIL $src: |" + head -1 "$LOG/$(basename "$obj").log" | sed "s|^| FAIL $src: |" fi done @@ -99,9 +148,11 @@ echo "=== compile summary ===" echo " cfree objects: ${#cfree_objs[@]}" echo " clang objects: ${#clang_objs[@]}" echo " src failures: ${#fail_src[@]}" +echo " lang failures: ${#fail_lang[@]}" echo " driver failures: ${#fail_driver[@]}" -if [ "${#fail_src[@]}" -gt 0 ] || [ "${#fail_driver[@]}" -gt 0 ]; then +if [ "${#fail_src[@]}" -gt 0 ] || [ "${#fail_lang[@]}" -gt 0 ] || \ + [ "${#fail_driver[@]}" -gt 0 ]; then echo echo "compile failures present; skipping link" >&2 exit 1 @@ -109,7 +160,7 @@ fi echo echo "=== linking with cfree ld ===" -BIN_OUT="$OUT/cfree-stage2" +BIN_OUT="${CFREE_STAGE_OUTPUT:-$OUT/cfree-stage2}" LIBSYS_DIR="$SDK/usr/lib" if [ ! -f "$LIBSYS_DIR/libSystem.B.tbd" ] && \ [ ! -f "$LIBSYS_DIR/libSystem.tbd" ]; then diff --git a/src/arch/aa64/ops.c b/src/arch/aa64/ops.c @@ -379,7 +379,7 @@ static u32 agg_addr_reg(CGTarget* t, Operand op, u32 scratch) { AAImpl* a = impl_of(t); AASlot* s = aa64_slot_get(a, op.v.frame_slot); if (!s) compiler_panic(t->c, a->loc, "aarch64 agg: bad slot"); - aa64_emit32(t->mc, aa64_sub_imm(1, scratch, 29, s->off, 0)); + aa64_emit_addr_adjust(t->mc, scratch, 29, -(i32)s->off); return scratch; } compiler_panic(t->c, impl_of(t)->loc, @@ -912,7 +912,7 @@ static void emit_arg_value(CGTarget* t, const ABIFuncInfo* fi, if (av->storage.kind == OPK_LOCAL) { AASlot* s = aa64_slot_get(a, av->storage.v.frame_slot); if (!s) compiler_panic(t->c, a->loc, "aarch64 call: bad byval slot"); - aa64_emit32(t->mc, aa64_sub_imm(1, dst_reg, 29, s->off, 0)); + aa64_emit_addr_adjust(t->mc, dst_reg, 29, -(i32)s->off); } else if (av->storage.kind == OPK_INDIRECT) { aa64_emit_addr_adjust(t->mc, dst_reg, av->storage.v.ind.base & 0x1f, av->storage.v.ind.ofs); @@ -1196,7 +1196,7 @@ static void aa_call(CGTarget* t, const CGCallDesc* d) { } AASlot* s = aa64_slot_get(a, d->ret.storage.v.frame_slot); if (!s) compiler_panic(t->c, a->loc, "aarch64 call: bad sret slot"); - aa64_emit32(mc, aa64_sub_imm(1, 8, 29, s->off, 0)); + aa64_emit_addr_adjust(mc, 8, 29, -(i32)s->off); } for (u32 i = 0; i < d->nargs; ++i) { @@ -1778,7 +1778,7 @@ static void aa_atomic_load(CGTarget* t, Operand dst, Operand addr, MemAccess ma, AASlot* s = aa64_slot_get(a, addr.v.frame_slot); if (!s) compiler_panic(t->c, a->loc, "aarch64 atomic_load: bad slot"); base = AA_TMP0; - aa64_emit32(mc, aa64_sub_imm(1, base, 29, s->off, 0)); + aa64_emit_addr_adjust(mc, base, 29, -(i32)s->off); } else { compiler_panic(t->c, a->loc, "aarch64 atomic_load: addr kind %d unsupported", @@ -1816,7 +1816,7 @@ static void aa_atomic_store(CGTarget* t, Operand addr, Operand src, AASlot* s = aa64_slot_get(a, addr.v.frame_slot); if (!s) compiler_panic(t->c, a->loc, "aarch64 atomic_store: bad slot"); base = AA_TMP0; - aa64_emit32(mc, aa64_sub_imm(1, base, 29, s->off, 0)); + aa64_emit_addr_adjust(mc, base, 29, -(i32)s->off); } else { compiler_panic(t->c, a->loc, "aarch64 atomic_store: addr kind %d unsupported", @@ -1861,7 +1861,7 @@ static void aa_atomic_rmw(CGTarget* t, AtomicOp op, Operand dst, Operand addr, } else if (addr.kind == OPK_LOCAL) { AASlot* s = aa64_slot_get(a, addr.v.frame_slot); if (!s) compiler_panic(t->c, a->loc, "aarch64 atomic_rmw: bad slot"); - aa64_emit32(mc, aa64_sub_imm(1, AA_TMP0, 29, s->off, 0)); + aa64_emit_addr_adjust(mc, AA_TMP0, 29, -(i32)s->off); } else { compiler_panic(t->c, a->loc, "aarch64 atomic_rmw: addr kind %d unsupported", (int)addr.kind); @@ -1914,7 +1914,7 @@ static void aa_atomic_cas(CGTarget* t, Operand prior, Operand ok, Operand addr, else if (addr.kind == OPK_LOCAL) { AASlot* s = aa64_slot_get(a, addr.v.frame_slot); if (!s) compiler_panic(t->c, a->loc, "aarch64 atomic_cas: bad slot"); - aa64_emit32(mc, aa64_sub_imm(1, AA_TMP0, 29, s->off, 0)); + aa64_emit_addr_adjust(mc, AA_TMP0, 29, -(i32)s->off); } else { compiler_panic(t->c, a->loc, "aarch64 atomic_cas: addr kind %d unsupported", (int)addr.kind); diff --git a/test/parse/cases/6_5_15_05_cond_const_aggregate_compound.c b/test/parse/cases/6_5_15_05_cond_const_aggregate_compound.c @@ -0,0 +1,11 @@ +typedef struct Pair { + int a; + int b; +} Pair; + +int test_main(void) { + const Pair p = {7, 11}; + const Pair* pp = &p; + Pair r = pp ? *pp : (Pair){1, 2}; + return r.a + r.b; +} diff --git a/test/parse/cases/6_5_15_05_cond_const_aggregate_compound.expected b/test/parse/cases/6_5_15_05_cond_const_aggregate_compound.expected @@ -0,0 +1 @@ +18 diff --git a/test/parse/cases/6_5_2_2_09_large_frame_struct_byval.c b/test/parse/cases/6_5_2_2_09_large_frame_struct_byval.c @@ -0,0 +1,19 @@ +typedef struct Big { + int a; + int b; + int c; + int d; + int e; +} Big; + +int take_big(Big b) { + return b.a + b.e; +} + +int test_main(void) { + char pad[8192]; + Big b = {3, 4, 5, 6, 7}; + pad[0] = 1; + pad[8191] = 2; + return take_big(b) + pad[0] + pad[8191]; +} diff --git a/test/parse/cases/6_5_2_2_09_large_frame_struct_byval.expected b/test/parse/cases/6_5_2_2_09_large_frame_struct_byval.expected @@ -0,0 +1 @@ +13 diff --git a/test/parse/cases/6_5_4_01_int_to_ptr_deref.c b/test/parse/cases/6_5_4_01_int_to_ptr_deref.c @@ -0,0 +1,6 @@ +int test_main(void) { + int x = 3; + unsigned long raw = (unsigned long)&x; + *(int*)raw = 9; + return x; +} diff --git a/test/parse/cases/6_5_4_01_int_to_ptr_deref.expected b/test/parse/cases/6_5_4_01_int_to_ptr_deref.expected @@ -0,0 +1 @@ +9 diff --git a/test/parse/cases/6_7_9_36_designated_compound_literal_subobject.c b/test/parse/cases/6_7_9_36_designated_compound_literal_subobject.c @@ -0,0 +1,19 @@ +typedef struct Inner { + int a; + int b; +} Inner; + +typedef struct Outer { + int x; + Inner inner; + int y; +} Outer; + +int test_main(void) { + Outer o = { + .x = 3, + .inner = (Inner){4, 5}, + .y = 6, + }; + return o.x + o.inner.a * 10 + o.inner.b * 20 + o.y; +} diff --git a/test/parse/cases/6_7_9_36_designated_compound_literal_subobject.expected b/test/parse/cases/6_7_9_36_designated_compound_literal_subobject.expected @@ -0,0 +1 @@ +149