kit

kit
git clone https://git.ryansepassi.com/git/kit.git
Log | Files | Refs | README

commit 77167e7c944869160b845e1a94f3a81a9105f435
parent 251ee264b52abe8f81f692c419a25716672bf9a2
Author: Ryan Sepassi <rsepassi@gmail.com>
Date:   Fri, 22 May 2026 11:50:04 -0700

metrics speedup, defer logging

Diffstat:
Mdriver/inputs.c | 22+++++++++++++++++++---
Mdriver/run.c | 122++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++---------
Msrc/link/link_layout.c | 20++++++++++++++++++++
Msrc/opt/opt.c | 22++++++++++++++++++----
4 files changed, 165 insertions(+), 21 deletions(-)

diff --git a/driver/inputs.c b/driver/inputs.c @@ -4,6 +4,8 @@ #include <stdint.h> #include <string.h> +#include <cfree/frontend.h> + int driver_inputs_init(DriverInputs* in, DriverEnv* env, const char* tool, int argc) { size_t bound = (size_t)argc; @@ -153,11 +155,15 @@ int driver_inputs_compile_and_jit(DriverInputs* in, CfreeCompiler* compiler, } /* Load source files into CfreeBytes and compile them into object builders. */ + cfree_frontend_metrics_scope_begin(compiler, "driver.load_sources"); for (i = 0; i < in->nsources; ++i) { if (driver_load_bytes(io, tool, in->sources[i], &src_lf[i], - &src_bytes[i]) != 0) + &src_bytes[i]) != 0) { + cfree_frontend_metrics_scope_end(compiler, "driver.load_sources"); goto out; + } } + cfree_frontend_metrics_scope_end(compiler, "driver.load_sources"); for (i = 0; i < in->nsources; ++i) { CfreeLanguage lang = cfree_language_for_path(in->sources[i]); @@ -203,19 +209,27 @@ int driver_inputs_compile_and_jit(DriverInputs* in, CfreeCompiler* compiler, if (st != CFREE_OK) goto out; } + cfree_frontend_metrics_scope_begin(compiler, "driver.load_objects"); for (i = 0; i < in->nobject_files; ++i) { if (driver_load_bytes(io, tool, in->object_files[i], &obj_lf[i], - &obj_in[i]) != 0) + &obj_in[i]) != 0) { + cfree_frontend_metrics_scope_end(compiler, "driver.load_objects"); goto out; + } } + cfree_frontend_metrics_scope_end(compiler, "driver.load_objects"); + cfree_frontend_metrics_scope_begin(compiler, "driver.load_archives"); for (i = 0; i < in->narchives; ++i) { if (driver_load_bytes(io, tool, in->archives[i], &arch_lf[i], - &arch_in[i].bytes) != 0) + &arch_in[i].bytes) != 0) { + cfree_frontend_metrics_scope_end(compiler, "driver.load_archives"); goto out; + } arch_in[i].link_mode = CFREE_LM_DEFAULT; arch_in[i].whole_archive = 0; arch_in[i].group_id = 0; } + cfree_frontend_metrics_scope_end(compiler, "driver.load_archives"); { CfreeLinkSessionOptions lopts; @@ -226,6 +240,7 @@ int driver_inputs_compile_and_jit(DriverInputs* in, CfreeCompiler* compiler, lopts.jit_host = host; lopts.extern_resolver = extern_resolver; lopts.extern_resolver_user = extern_resolver_user; + cfree_frontend_metrics_scope_begin(compiler, "driver.link_setup"); st = cfree_link_session_new(compiler, &lopts, &link); for (i = 0; st == CFREE_OK && i < nsrc; ++i) st = cfree_link_session_add_obj(link, objs[i]); @@ -233,6 +248,7 @@ int driver_inputs_compile_and_jit(DriverInputs* in, CfreeCompiler* compiler, st = cfree_link_session_add_obj_bytes(link, &obj_in[i]); for (i = 0; st == CFREE_OK && i < in->narchives; ++i) st = cfree_link_session_add_archive_bytes(link, &arch_in[i]); + cfree_frontend_metrics_scope_end(compiler, "driver.link_setup"); if (st == CFREE_OK) st = cfree_link_session_jit(link, out_jit); rc = st == CFREE_OK ? 0 : 1; } diff --git a/driver/run.c b/driver/run.c @@ -46,7 +46,38 @@ typedef struct RunOptions { uint32_t prog_argc; } RunOptions; +/* Buffered metrics sink. The hot path (scope_begin/end/count called from + * libcfree during compile/link/JIT) does no I/O: each event is appended to + * a segmented event log, and scopes maintain only a small live stack to + * carry the start_ns across to scope_end. All formatting and stderr writes + * happen once in run_metrics_finish at the very end of the run. + * + * Storage is a singly-linked list of fixed-size segments. Each segment is + * allocated once and never moves — geometric vector growth would amortize + * to O(log N) copies, but with a chunk list we get zero copies and the + * hot path is just an index increment + 24-byte store. We only ever drain + * the log sequentially, so a segment table is unnecessary. */ + #define RUN_METRIC_MAX_DEPTH 64u +#define RUN_METRIC_SEG_EVENTS 256u + +typedef enum RunMetricEventKind { + RUN_METRIC_EVENT_SCOPE = 0, + RUN_METRIC_EVENT_COUNT = 1, +} RunMetricEventKind; + +typedef struct RunMetricEvent { + const char* name; /* aliased; callers pass string literals */ + uint64_t value; /* SCOPE: elapsed_ns; COUNT: count value */ + uint16_t depth; /* nesting depth at emit time (0-based) */ + uint16_t kind; +} RunMetricEvent; + +typedef struct RunMetricSeg { + struct RunMetricSeg* next; + uint32_t nused; + RunMetricEvent events[RUN_METRIC_SEG_EVENTS]; +} RunMetricSeg; typedef struct RunMetricFrame { const char* name; @@ -58,8 +89,40 @@ typedef struct RunMetrics { RunMetricFrame stack[RUN_METRIC_MAX_DEPTH]; uint32_t depth; int bench_time; + + DriverEnv* env; + RunMetricSeg* head; + RunMetricSeg* tail; + int oom; /* sticky: drop further events after an allocation failure */ } RunMetrics; +static void run_metrics_push_event(RunMetrics* m, RunMetricEventKind kind, + const char* name, uint64_t value, + uint32_t depth) { + RunMetricSeg* seg; + RunMetricEvent* ev; + if (m->oom) return; + seg = m->tail; + if (!seg || seg->nused == RUN_METRIC_SEG_EVENTS) { + RunMetricSeg* ns = driver_alloc_zeroed(m->env, sizeof(RunMetricSeg)); + if (!ns) { + m->oom = 1; + return; + } + if (m->tail) + m->tail->next = ns; + else + m->head = ns; + m->tail = ns; + seg = ns; + } + ev = &seg->events[seg->nused++]; + ev->name = name; + ev->value = value; + ev->depth = (uint16_t)depth; + ev->kind = (uint16_t)kind; +} + static void run_metrics_scope_begin(void* user, const char* name) { RunMetrics* m = (RunMetrics*)user; if (!m || m->depth >= RUN_METRIC_MAX_DEPTH) return; @@ -82,29 +145,26 @@ static void run_metrics_scope_end(void* user, const char* name) { end_ns = driver_now_ns(); elapsed = (end_ns >= f.start_ns) ? (end_ns - f.start_ns) : 0; scope_name = f.name ? f.name : name; - if (m->bench_time) { - driver_logf("cfree-run %s -- %.3f msec", scope_name, - (double)elapsed / 1000000.0); - } else { - driver_logf("%*s%s %.3f ms", (int)(depth * 2u), "", scope_name, - (double)elapsed / 1000000.0); - } + run_metrics_push_event(m, RUN_METRIC_EVENT_SCOPE, scope_name, elapsed, depth); } static void run_metrics_count(void* user, const char* name, uint64_t value) { RunMetrics* m = (RunMetrics*)user; - uint32_t depth = m ? m->depth : 0; - driver_logf("%*s%s=%llu", (int)(depth * 2u), "", name, - (unsigned long long)value); + if (!m) return; + run_metrics_push_event(m, RUN_METRIC_EVENT_COUNT, name, value, m->depth); } -static void run_metrics_init(RunMetrics* m, int bench_time) { +static void run_metrics_init(RunMetrics* m, DriverEnv* env, int bench_time) { m->iface.scope_begin = run_metrics_scope_begin; m->iface.scope_end = run_metrics_scope_end; m->iface.count = run_metrics_count; m->iface.user = m; m->depth = 0; m->bench_time = bench_time; + m->env = env; + m->head = NULL; + m->tail = NULL; + m->oom = 0; } static void run_metrics_begin(RunMetrics* m, const char* name) { @@ -115,11 +175,45 @@ static void run_metrics_end(RunMetrics* m, const char* name) { if (m) m->iface.scope_end(m->iface.user, name); } +/* Close any still-open scopes (early-return paths emit synthesized ends), + * then walk the segmented event log in push order and format each event + * to stderr. Free segments after draining. Called exactly once per run. */ static void run_metrics_finish(RunMetrics* m) { - while (m && m->depth) { + RunMetricSeg* seg; + if (!m) return; + while (m->depth) { const char* name = m->stack[m->depth - 1u].name; - run_metrics_end(m, name); + run_metrics_scope_end(m, name); + } + for (seg = m->head; seg; seg = seg->next) { + uint32_t i; + for (i = 0; i < seg->nused; ++i) { + const RunMetricEvent* e = &seg->events[i]; + if (e->kind == RUN_METRIC_EVENT_SCOPE) { + if (m->bench_time) { + driver_logf("cfree-run %s -- %.3f msec", e->name, + (double)e->value / 1000000.0); + } else { + driver_logf("%*s%s %.3f ms", (int)((unsigned)e->depth * 2u), "", + e->name, (double)e->value / 1000000.0); + } + } else { + driver_logf("%*s%s=%llu", (int)((unsigned)e->depth * 2u), "", e->name, + (unsigned long long)e->value); + } + } + } + if (m->oom) { + driver_logf("(metrics: event buffer allocation failed; some events lost)"); + } + seg = m->head; + while (seg) { + RunMetricSeg* next = seg->next; + driver_free(m->env, seg, sizeof(RunMetricSeg)); + seg = next; } + m->head = NULL; + m->tail = NULL; } static void run_bench_time(const char* name, uint64_t ns) { @@ -579,7 +673,7 @@ int driver_run(int argc, char** argv) { } if (ro.metrics || ro.bench_time) { - run_metrics_init(&metrics_storage, ro.bench_time); + run_metrics_init(&metrics_storage, &env, ro.bench_time); metrics = &metrics_storage; env.metrics = &metrics->iface; if (ro.metrics && !ro.bench_time) driver_logf("cfree metrics:"); diff --git a/src/link/link_layout.c b/src/link/link_layout.c @@ -987,7 +987,10 @@ LinkImage* link_resolve(Linker* l) { metrics_scope_begin(l->c, "link.emit_segment_bytes"); if (!l->jit_mode) link_emit_segment_bytes(l, img); metrics_scope_end(l->c, "link.emit_segment_bytes"); + metrics_scope_begin(l->c, "link.assign_vaddrs"); link_assign_symbol_vaddrs(l, img); + metrics_scope_end(l->c, "link.assign_vaddrs"); + metrics_scope_begin(l->c, "link.emit_boundaries"); link_emit_array_boundaries(l, img); link_emit_tls_boundaries(l, img); link_emit_encoding_section_boundaries(l, img); @@ -1008,17 +1011,28 @@ LinkImage* link_resolve(Linker* l) { link_emit_boundary_sym(l, img, arch->global_pointer_symbol, gp_vaddr); } } + metrics_scope_end(l->c, "link.emit_boundaries"); + metrics_scope_begin(l->c, "link.resolve_undefs"); link_resolve_undefs(l, img); + metrics_scope_end(l->c, "link.resolve_undefs"); + metrics_scope_begin(l->c, "link.gc_drop_dead"); link_gc_drop_dead_globals(l, img, &g); + metrics_scope_end(l->c, "link.gc_drop_dead"); + metrics_scope_begin(l->c, "link.layout_iplt"); link_layout_iplt(l, img); if (img->niplt) link_emit_array_boundaries(l, img); + metrics_scope_end(l->c, "link.layout_iplt"); { LinkSymId* got_map = NULL; LinkSymId* stub_map = NULL; u32 map_size = LinkSyms_count(&img->syms) + 1u; + metrics_scope_begin(l->c, "link.layout_jit_stubs"); link_layout_jit_stubs(l, img, map_size, &stub_map); + metrics_scope_end(l->c, "link.layout_jit_stubs"); + metrics_scope_begin(l->c, "link.layout_got"); if (l->c->target.obj != CFREE_OBJ_MACHO || !l->emit_static_exe) link_layout_got(l, img, map_size, &got_map); + metrics_scope_end(l->c, "link.layout_got"); metrics_scope_begin(l->c, "link.emit_relocations"); link_emit_relocations(l, img, got_map, stub_map); metrics_count(l->c, "link.syms", LinkSyms_count(&img->syms)); @@ -1027,12 +1041,18 @@ LinkImage* link_resolve(Linker* l) { if (got_map) h->free(h, got_map, sizeof(*got_map) * map_size); if (stub_map) h->free(h, stub_map, sizeof(*stub_map) * map_size); } + metrics_scope_begin(l->c, "link.layout_dyn"); layout_dyn(l, img); + metrics_scope_end(l->c, "link.layout_dyn"); + metrics_scope_begin(l->c, "link.resolve_entry"); link_resolve_entry(l, img); + metrics_scope_end(l->c, "link.resolve_entry"); link_gc_live_free(&g, h); } + metrics_scope_begin(l->c, "link.capture_debug"); link_capture_debug_inputs(l, img); + metrics_scope_end(l->c, "link.capture_debug"); metrics_scope_end(l->c, "link.resolve.total"); return img; diff --git a/src/opt/opt.c b/src/opt/opt.c @@ -1326,13 +1326,21 @@ static void opt_run_lowering_pipeline(OptImpl* o, const char* total_scope, metrics_count(o->c, "opt.blocks", o->f->nblocks); metrics_count(o->c, "opt.insts", func_inst_count(o->f)); metrics_count(o->c, "opt.pregs", o->f->npregs); - metrics_scope_begin(o->c, "opt.build_cfg"); + metrics_scope_begin(o->c, "opt.cfg.build_1"); opt_build_cfg(o->f); + metrics_scope_end(o->c, "opt.cfg.build_1"); + metrics_scope_begin(o->c, "opt.cfg.jump_cleanup_cfg"); opt_jump_cleanup(o->f, OPT_JUMP_CLEANUP_CFG); + metrics_scope_end(o->c, "opt.cfg.jump_cleanup_cfg"); + metrics_scope_begin(o->c, "opt.cfg.build_2"); opt_build_cfg(o->f); + metrics_scope_end(o->c, "opt.cfg.build_2"); + metrics_scope_begin(o->c, "opt.cfg.simplify_local"); opt_simplify_local(o->f); + metrics_scope_end(o->c, "opt.cfg.simplify_local"); + metrics_scope_begin(o->c, "opt.cfg.verify"); opt_verify(o->f, "lowering-cfg"); - metrics_scope_end(o->c, "opt.build_cfg"); + metrics_scope_end(o->c, "opt.cfg.verify"); metrics_scope_begin(o->c, "opt.machinize"); opt_machinize(o->f, o->target); metrics_scope_end(o->c, "opt.machinize"); @@ -1398,12 +1406,18 @@ static void opt_run_lowering_pipeline(OptImpl* o, const char* total_scope, metrics_scope_begin(o->c, "opt.dce"); opt_dce(o->f); metrics_scope_end(o->c, "opt.dce"); - metrics_scope_begin(o->c, "opt.jump_cleanup"); + metrics_scope_begin(o->c, "opt.post_ra.jump_cleanup_cfg"); opt_jump_cleanup(o->f, OPT_JUMP_CLEANUP_CFG); + metrics_scope_end(o->c, "opt.post_ra.jump_cleanup_cfg"); + metrics_scope_begin(o->c, "opt.post_ra.build_cfg"); opt_build_cfg(o->f); + metrics_scope_end(o->c, "opt.post_ra.build_cfg"); + metrics_scope_begin(o->c, "opt.post_ra.verify"); opt_verify(o->f, "post-ra-jump-cfg"); + metrics_scope_end(o->c, "opt.post_ra.verify"); + metrics_scope_begin(o->c, "opt.post_ra.jump_cleanup_layout"); opt_jump_cleanup(o->f, OPT_JUMP_CLEANUP_LAYOUT); - metrics_scope_end(o->c, "opt.jump_cleanup"); + metrics_scope_end(o->c, "opt.post_ra.jump_cleanup_layout"); metrics_scope_begin(o->c, "opt.emit"); opt_emit(o->c, o->f, o->target); metrics_scope_end(o->c, "opt.emit");