commit 77167e7c944869160b845e1a94f3a81a9105f435
parent 251ee264b52abe8f81f692c419a25716672bf9a2
Author: Ryan Sepassi <rsepassi@gmail.com>
Date: Fri, 22 May 2026 11:50:04 -0700
metrics speedup, defer logging
Diffstat:
4 files changed, 165 insertions(+), 21 deletions(-)
diff --git a/driver/inputs.c b/driver/inputs.c
@@ -4,6 +4,8 @@
#include <stdint.h>
#include <string.h>
+#include <cfree/frontend.h>
+
int driver_inputs_init(DriverInputs* in, DriverEnv* env, const char* tool,
int argc) {
size_t bound = (size_t)argc;
@@ -153,11 +155,15 @@ int driver_inputs_compile_and_jit(DriverInputs* in, CfreeCompiler* compiler,
}
/* Load source files into CfreeBytes and compile them into object builders. */
+ cfree_frontend_metrics_scope_begin(compiler, "driver.load_sources");
for (i = 0; i < in->nsources; ++i) {
if (driver_load_bytes(io, tool, in->sources[i], &src_lf[i],
- &src_bytes[i]) != 0)
+ &src_bytes[i]) != 0) {
+ cfree_frontend_metrics_scope_end(compiler, "driver.load_sources");
goto out;
+ }
}
+ cfree_frontend_metrics_scope_end(compiler, "driver.load_sources");
for (i = 0; i < in->nsources; ++i) {
CfreeLanguage lang = cfree_language_for_path(in->sources[i]);
@@ -203,19 +209,27 @@ int driver_inputs_compile_and_jit(DriverInputs* in, CfreeCompiler* compiler,
if (st != CFREE_OK) goto out;
}
+ cfree_frontend_metrics_scope_begin(compiler, "driver.load_objects");
for (i = 0; i < in->nobject_files; ++i) {
if (driver_load_bytes(io, tool, in->object_files[i], &obj_lf[i],
- &obj_in[i]) != 0)
+ &obj_in[i]) != 0) {
+ cfree_frontend_metrics_scope_end(compiler, "driver.load_objects");
goto out;
+ }
}
+ cfree_frontend_metrics_scope_end(compiler, "driver.load_objects");
+ cfree_frontend_metrics_scope_begin(compiler, "driver.load_archives");
for (i = 0; i < in->narchives; ++i) {
if (driver_load_bytes(io, tool, in->archives[i], &arch_lf[i],
- &arch_in[i].bytes) != 0)
+ &arch_in[i].bytes) != 0) {
+ cfree_frontend_metrics_scope_end(compiler, "driver.load_archives");
goto out;
+ }
arch_in[i].link_mode = CFREE_LM_DEFAULT;
arch_in[i].whole_archive = 0;
arch_in[i].group_id = 0;
}
+ cfree_frontend_metrics_scope_end(compiler, "driver.load_archives");
{
CfreeLinkSessionOptions lopts;
@@ -226,6 +240,7 @@ int driver_inputs_compile_and_jit(DriverInputs* in, CfreeCompiler* compiler,
lopts.jit_host = host;
lopts.extern_resolver = extern_resolver;
lopts.extern_resolver_user = extern_resolver_user;
+ cfree_frontend_metrics_scope_begin(compiler, "driver.link_setup");
st = cfree_link_session_new(compiler, &lopts, &link);
for (i = 0; st == CFREE_OK && i < nsrc; ++i)
st = cfree_link_session_add_obj(link, objs[i]);
@@ -233,6 +248,7 @@ int driver_inputs_compile_and_jit(DriverInputs* in, CfreeCompiler* compiler,
st = cfree_link_session_add_obj_bytes(link, &obj_in[i]);
for (i = 0; st == CFREE_OK && i < in->narchives; ++i)
st = cfree_link_session_add_archive_bytes(link, &arch_in[i]);
+ cfree_frontend_metrics_scope_end(compiler, "driver.link_setup");
if (st == CFREE_OK) st = cfree_link_session_jit(link, out_jit);
rc = st == CFREE_OK ? 0 : 1;
}
diff --git a/driver/run.c b/driver/run.c
@@ -46,7 +46,38 @@ typedef struct RunOptions {
uint32_t prog_argc;
} RunOptions;
+/* Buffered metrics sink. The hot path (scope_begin/end/count called from
+ * libcfree during compile/link/JIT) does no I/O: each event is appended to
+ * a segmented event log, and scopes maintain only a small live stack to
+ * carry the start_ns across to scope_end. All formatting and stderr writes
+ * happen once in run_metrics_finish at the very end of the run.
+ *
+ * Storage is a singly-linked list of fixed-size segments. Each segment is
+ * allocated once and never moves — geometric vector growth would amortize
+ * to O(log N) copies, but with a chunk list we get zero copies and the
+ * hot path is just an index increment + 24-byte store. We only ever drain
+ * the log sequentially, so a segment table is unnecessary. */
+
#define RUN_METRIC_MAX_DEPTH 64u
+#define RUN_METRIC_SEG_EVENTS 256u
+
+typedef enum RunMetricEventKind {
+ RUN_METRIC_EVENT_SCOPE = 0,
+ RUN_METRIC_EVENT_COUNT = 1,
+} RunMetricEventKind;
+
+typedef struct RunMetricEvent {
+ const char* name; /* aliased; callers pass string literals */
+ uint64_t value; /* SCOPE: elapsed_ns; COUNT: count value */
+ uint16_t depth; /* nesting depth at emit time (0-based) */
+ uint16_t kind;
+} RunMetricEvent;
+
+typedef struct RunMetricSeg {
+ struct RunMetricSeg* next;
+ uint32_t nused;
+ RunMetricEvent events[RUN_METRIC_SEG_EVENTS];
+} RunMetricSeg;
typedef struct RunMetricFrame {
const char* name;
@@ -58,8 +89,40 @@ typedef struct RunMetrics {
RunMetricFrame stack[RUN_METRIC_MAX_DEPTH];
uint32_t depth;
int bench_time;
+
+ DriverEnv* env;
+ RunMetricSeg* head;
+ RunMetricSeg* tail;
+ int oom; /* sticky: drop further events after an allocation failure */
} RunMetrics;
+static void run_metrics_push_event(RunMetrics* m, RunMetricEventKind kind,
+ const char* name, uint64_t value,
+ uint32_t depth) {
+ RunMetricSeg* seg;
+ RunMetricEvent* ev;
+ if (m->oom) return;
+ seg = m->tail;
+ if (!seg || seg->nused == RUN_METRIC_SEG_EVENTS) {
+ RunMetricSeg* ns = driver_alloc_zeroed(m->env, sizeof(RunMetricSeg));
+ if (!ns) {
+ m->oom = 1;
+ return;
+ }
+ if (m->tail)
+ m->tail->next = ns;
+ else
+ m->head = ns;
+ m->tail = ns;
+ seg = ns;
+ }
+ ev = &seg->events[seg->nused++];
+ ev->name = name;
+ ev->value = value;
+ ev->depth = (uint16_t)depth;
+ ev->kind = (uint16_t)kind;
+}
+
static void run_metrics_scope_begin(void* user, const char* name) {
RunMetrics* m = (RunMetrics*)user;
if (!m || m->depth >= RUN_METRIC_MAX_DEPTH) return;
@@ -82,29 +145,26 @@ static void run_metrics_scope_end(void* user, const char* name) {
end_ns = driver_now_ns();
elapsed = (end_ns >= f.start_ns) ? (end_ns - f.start_ns) : 0;
scope_name = f.name ? f.name : name;
- if (m->bench_time) {
- driver_logf("cfree-run %s -- %.3f msec", scope_name,
- (double)elapsed / 1000000.0);
- } else {
- driver_logf("%*s%s %.3f ms", (int)(depth * 2u), "", scope_name,
- (double)elapsed / 1000000.0);
- }
+ run_metrics_push_event(m, RUN_METRIC_EVENT_SCOPE, scope_name, elapsed, depth);
}
static void run_metrics_count(void* user, const char* name, uint64_t value) {
RunMetrics* m = (RunMetrics*)user;
- uint32_t depth = m ? m->depth : 0;
- driver_logf("%*s%s=%llu", (int)(depth * 2u), "", name,
- (unsigned long long)value);
+ if (!m) return;
+ run_metrics_push_event(m, RUN_METRIC_EVENT_COUNT, name, value, m->depth);
}
-static void run_metrics_init(RunMetrics* m, int bench_time) {
+static void run_metrics_init(RunMetrics* m, DriverEnv* env, int bench_time) {
m->iface.scope_begin = run_metrics_scope_begin;
m->iface.scope_end = run_metrics_scope_end;
m->iface.count = run_metrics_count;
m->iface.user = m;
m->depth = 0;
m->bench_time = bench_time;
+ m->env = env;
+ m->head = NULL;
+ m->tail = NULL;
+ m->oom = 0;
}
static void run_metrics_begin(RunMetrics* m, const char* name) {
@@ -115,11 +175,45 @@ static void run_metrics_end(RunMetrics* m, const char* name) {
if (m) m->iface.scope_end(m->iface.user, name);
}
+/* Close any still-open scopes (early-return paths emit synthesized ends),
+ * then walk the segmented event log in push order and format each event
+ * to stderr. Free segments after draining. Called exactly once per run. */
static void run_metrics_finish(RunMetrics* m) {
- while (m && m->depth) {
+ RunMetricSeg* seg;
+ if (!m) return;
+ while (m->depth) {
const char* name = m->stack[m->depth - 1u].name;
- run_metrics_end(m, name);
+ run_metrics_scope_end(m, name);
+ }
+ for (seg = m->head; seg; seg = seg->next) {
+ uint32_t i;
+ for (i = 0; i < seg->nused; ++i) {
+ const RunMetricEvent* e = &seg->events[i];
+ if (e->kind == RUN_METRIC_EVENT_SCOPE) {
+ if (m->bench_time) {
+ driver_logf("cfree-run %s -- %.3f msec", e->name,
+ (double)e->value / 1000000.0);
+ } else {
+ driver_logf("%*s%s %.3f ms", (int)((unsigned)e->depth * 2u), "",
+ e->name, (double)e->value / 1000000.0);
+ }
+ } else {
+ driver_logf("%*s%s=%llu", (int)((unsigned)e->depth * 2u), "", e->name,
+ (unsigned long long)e->value);
+ }
+ }
+ }
+ if (m->oom) {
+ driver_logf("(metrics: event buffer allocation failed; some events lost)");
+ }
+ seg = m->head;
+ while (seg) {
+ RunMetricSeg* next = seg->next;
+ driver_free(m->env, seg, sizeof(RunMetricSeg));
+ seg = next;
}
+ m->head = NULL;
+ m->tail = NULL;
}
static void run_bench_time(const char* name, uint64_t ns) {
@@ -579,7 +673,7 @@ int driver_run(int argc, char** argv) {
}
if (ro.metrics || ro.bench_time) {
- run_metrics_init(&metrics_storage, ro.bench_time);
+ run_metrics_init(&metrics_storage, &env, ro.bench_time);
metrics = &metrics_storage;
env.metrics = &metrics->iface;
if (ro.metrics && !ro.bench_time) driver_logf("cfree metrics:");
diff --git a/src/link/link_layout.c b/src/link/link_layout.c
@@ -987,7 +987,10 @@ LinkImage* link_resolve(Linker* l) {
metrics_scope_begin(l->c, "link.emit_segment_bytes");
if (!l->jit_mode) link_emit_segment_bytes(l, img);
metrics_scope_end(l->c, "link.emit_segment_bytes");
+ metrics_scope_begin(l->c, "link.assign_vaddrs");
link_assign_symbol_vaddrs(l, img);
+ metrics_scope_end(l->c, "link.assign_vaddrs");
+ metrics_scope_begin(l->c, "link.emit_boundaries");
link_emit_array_boundaries(l, img);
link_emit_tls_boundaries(l, img);
link_emit_encoding_section_boundaries(l, img);
@@ -1008,17 +1011,28 @@ LinkImage* link_resolve(Linker* l) {
link_emit_boundary_sym(l, img, arch->global_pointer_symbol, gp_vaddr);
}
}
+ metrics_scope_end(l->c, "link.emit_boundaries");
+ metrics_scope_begin(l->c, "link.resolve_undefs");
link_resolve_undefs(l, img);
+ metrics_scope_end(l->c, "link.resolve_undefs");
+ metrics_scope_begin(l->c, "link.gc_drop_dead");
link_gc_drop_dead_globals(l, img, &g);
+ metrics_scope_end(l->c, "link.gc_drop_dead");
+ metrics_scope_begin(l->c, "link.layout_iplt");
link_layout_iplt(l, img);
if (img->niplt) link_emit_array_boundaries(l, img);
+ metrics_scope_end(l->c, "link.layout_iplt");
{
LinkSymId* got_map = NULL;
LinkSymId* stub_map = NULL;
u32 map_size = LinkSyms_count(&img->syms) + 1u;
+ metrics_scope_begin(l->c, "link.layout_jit_stubs");
link_layout_jit_stubs(l, img, map_size, &stub_map);
+ metrics_scope_end(l->c, "link.layout_jit_stubs");
+ metrics_scope_begin(l->c, "link.layout_got");
if (l->c->target.obj != CFREE_OBJ_MACHO || !l->emit_static_exe)
link_layout_got(l, img, map_size, &got_map);
+ metrics_scope_end(l->c, "link.layout_got");
metrics_scope_begin(l->c, "link.emit_relocations");
link_emit_relocations(l, img, got_map, stub_map);
metrics_count(l->c, "link.syms", LinkSyms_count(&img->syms));
@@ -1027,12 +1041,18 @@ LinkImage* link_resolve(Linker* l) {
if (got_map) h->free(h, got_map, sizeof(*got_map) * map_size);
if (stub_map) h->free(h, stub_map, sizeof(*stub_map) * map_size);
}
+ metrics_scope_begin(l->c, "link.layout_dyn");
layout_dyn(l, img);
+ metrics_scope_end(l->c, "link.layout_dyn");
+ metrics_scope_begin(l->c, "link.resolve_entry");
link_resolve_entry(l, img);
+ metrics_scope_end(l->c, "link.resolve_entry");
link_gc_live_free(&g, h);
}
+ metrics_scope_begin(l->c, "link.capture_debug");
link_capture_debug_inputs(l, img);
+ metrics_scope_end(l->c, "link.capture_debug");
metrics_scope_end(l->c, "link.resolve.total");
return img;
diff --git a/src/opt/opt.c b/src/opt/opt.c
@@ -1326,13 +1326,21 @@ static void opt_run_lowering_pipeline(OptImpl* o, const char* total_scope,
metrics_count(o->c, "opt.blocks", o->f->nblocks);
metrics_count(o->c, "opt.insts", func_inst_count(o->f));
metrics_count(o->c, "opt.pregs", o->f->npregs);
- metrics_scope_begin(o->c, "opt.build_cfg");
+ metrics_scope_begin(o->c, "opt.cfg.build_1");
opt_build_cfg(o->f);
+ metrics_scope_end(o->c, "opt.cfg.build_1");
+ metrics_scope_begin(o->c, "opt.cfg.jump_cleanup_cfg");
opt_jump_cleanup(o->f, OPT_JUMP_CLEANUP_CFG);
+ metrics_scope_end(o->c, "opt.cfg.jump_cleanup_cfg");
+ metrics_scope_begin(o->c, "opt.cfg.build_2");
opt_build_cfg(o->f);
+ metrics_scope_end(o->c, "opt.cfg.build_2");
+ metrics_scope_begin(o->c, "opt.cfg.simplify_local");
opt_simplify_local(o->f);
+ metrics_scope_end(o->c, "opt.cfg.simplify_local");
+ metrics_scope_begin(o->c, "opt.cfg.verify");
opt_verify(o->f, "lowering-cfg");
- metrics_scope_end(o->c, "opt.build_cfg");
+ metrics_scope_end(o->c, "opt.cfg.verify");
metrics_scope_begin(o->c, "opt.machinize");
opt_machinize(o->f, o->target);
metrics_scope_end(o->c, "opt.machinize");
@@ -1398,12 +1406,18 @@ static void opt_run_lowering_pipeline(OptImpl* o, const char* total_scope,
metrics_scope_begin(o->c, "opt.dce");
opt_dce(o->f);
metrics_scope_end(o->c, "opt.dce");
- metrics_scope_begin(o->c, "opt.jump_cleanup");
+ metrics_scope_begin(o->c, "opt.post_ra.jump_cleanup_cfg");
opt_jump_cleanup(o->f, OPT_JUMP_CLEANUP_CFG);
+ metrics_scope_end(o->c, "opt.post_ra.jump_cleanup_cfg");
+ metrics_scope_begin(o->c, "opt.post_ra.build_cfg");
opt_build_cfg(o->f);
+ metrics_scope_end(o->c, "opt.post_ra.build_cfg");
+ metrics_scope_begin(o->c, "opt.post_ra.verify");
opt_verify(o->f, "post-ra-jump-cfg");
+ metrics_scope_end(o->c, "opt.post_ra.verify");
+ metrics_scope_begin(o->c, "opt.post_ra.jump_cleanup_layout");
opt_jump_cleanup(o->f, OPT_JUMP_CLEANUP_LAYOUT);
- metrics_scope_end(o->c, "opt.jump_cleanup");
+ metrics_scope_end(o->c, "opt.post_ra.jump_cleanup_layout");
metrics_scope_begin(o->c, "opt.emit");
opt_emit(o->c, o->f, o->target);
metrics_scope_end(o->c, "opt.emit");