kit

kit
git clone https://git.ryansepassi.com/git/kit.git
Log | Files | Refs | README

commit 012b76ad1bca94a710e3848c71d7f61c471c1f8f
parent 3081a4983d8aabaad0177a593a25c0491c1bd714
Author: Ryan Sepassi <rsepassi@gmail.com>
Date:   Wed, 27 May 2026 13:34:14 -0700

cg: dump semantic CG IR via cc --emit=ir

Add a textual dumper for the recorded CG-API tape (CgIrFunc) and expose
it through the driver and public CodeOptions.

- src/cg/ir_dump.c: cg_ir_func_dump() renders the semantic IR as stable,
  line-oriented text (op-name tables for CgIrOp/BinOp/UnOp/CmpOp/ConvKind).
- opt: complete the stubbed opt_set_dump_writer; opt_on_func renders the
  CG tape before lowering. Fold the CFREE_DUMPCG debug path onto the same
  renderer so there is one CG-IR dumper.
- core.h: add emit_ir + ir_dump_writer to CfreeCodeOptions.
- session: wire the writer and require opt_level >= 1 (the tape only
  exists when the optimizer runs).
- cc: --emit=ir flag (no-link, -O1+ required), .ir output extension.
- tests: ir_recorder unit coverage + driver --emit=ir behavior checks.

Diffstat:
Mdriver/cc.c | 37++++++++++++++++++++++++++++++++++++-
Minclude/cfree/core.h | 8++++++++
Msrc/cg/ir.h | 4++++
Asrc/cg/ir_dump.c | 366+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Msrc/cg/ir_recorder.c | 2++
Msrc/cg/ir_recorder.h | 1+
Msrc/cg/session.c | 7+++++++
Msrc/opt/opt.c | 54+++++++++++++++---------------------------------------
Mtest/cg/ir_recorder_test.c | 47+++++++++++++++++++++++++++++++++++++++++++++++
Mtest/driver/run.sh | 36++++++++++++++++++++++++++++++++++++
10 files changed, 522 insertions(+), 40 deletions(-)

diff --git a/driver/cc.c b/driver/cc.c @@ -116,6 +116,7 @@ typedef struct CcOptions { int preprocess_only; /* -E */ int syntax_only; /* -fsyntax-only / check */ int emit_c_source; /* --emit=c */ + int emit_ir; /* --emit=ir */ int emit_asm_source; /* -S */ int opt_level; /* -O0/-O1/-O2 */ int debug_info; /* -g */ @@ -250,6 +251,12 @@ void driver_help_cc(void) { "SDK/sysroot\n" " --support-dir DIR cfree support " "root\n" + " -S [options] input.c emit assembly " + "(.s)\n" + " --emit=c [options] input.c emit portable C " + "source\n" + " --emit=ir -O1 [options] input.c emit semantic IR " + "dump\n" "\n" "(see source for the full GCC-subset flag reference)\n"))); } @@ -1122,6 +1129,14 @@ static int cc_parse(int argc, char** argv, CcOptions* o) { o->compile_only = 1; continue; } + if (driver_streq(a, "--emit=ir")) { + /* Textual semantic-IR dump instead of object bytes. The IR tape is only + * recorded when the optimizer runs, so this requires -O1+ (validated + * after argument parsing). Forces a single-input, no-link compile. */ + o->emit_ir = 1; + o->compile_only = 1; + continue; + } if (driver_streq(a, "-g")) { o->debug_info = 1; continue; @@ -1643,6 +1658,12 @@ static int cc_parse(int argc, char** argv, CcOptions* o) { "-shared is incompatible with -c/-S/-E/-fsyntax-only"); return 1; } + if (o->emit_ir && o->opt_level < 1) { + driver_errf(CC_TOOL, + "--emit=ir requires -O1 or higher " + "(the IR tape is only recorded when the optimizer runs)"); + return 1; + } if (o->syntax_only) { if (total_sources == 0 || total_link != 0) { driver_errf(CC_TOOL, @@ -1954,6 +1975,9 @@ static char* cc_default_obj_path_for_name(DriverEnv* env, const CcOptions* o, if (o && o->emit_asm_source) { ext = ".s"; ext_len = 2u; + } else if (o && o->emit_ir) { + ext = ".ir"; + ext_len = 3u; } else if (win) { ext = ".obj"; ext_len = 4u; @@ -2154,6 +2178,7 @@ static void cc_fill_c_opts(const CcOptions* o, const CfreePreprocessOptions* pp, copts->code.check_only = o->syntax_only ? true : false; copts->code.default_visibility = o->default_visibility; copts->code.emit_c_source = o->emit_c_source ? true : false; + copts->code.emit_ir = o->emit_ir ? true : false; copts->code.emit_asm_source = o->emit_asm_source ? true : false; copts->code.function_sections = o->function_sections ? true : false; copts->code.data_sections = o->data_sections ? true : false; @@ -2248,6 +2273,8 @@ static CfreeStatus cc_compile_source_emit(CfreeCompiler* compiler, if (st == CFREE_OK) { if (copts->code.emit_c_source) { /* c_source_writer is wired during CG; nothing to do here. */ + } else if (copts->code.emit_ir) { + /* ir_dump_writer is wired during CG; nothing to do here. */ } else if (copts->code.emit_asm_source) { st = cfree_obj_builder_emit_asm(ob, out); } else { @@ -2303,6 +2330,12 @@ static int cc_run_compile_one(DriverEnv* env, const CcOptions* o, * skip the object-serialize step when this is set. */ copts.code.c_source_writer = obj_w; } + if (copts.code.emit_ir) { + /* --emit=ir routes the output writer to the semantic-IR dumper in the opt + * recorder instead of the object emitter. The downstream emit path skips + * the object-serialize step when this is set. */ + copts.code.ir_dump_writer = obj_w; + } { CfreeLanguage lang = is_memory ? o->source_memory[index].lang @@ -2350,7 +2383,9 @@ static int cc_run_compile_objs(DriverEnv* env, const CcOptions* o, if (rc != 0) return rc; } for (i = 0; i < o->nsource_memory; ++i) { - const char* out = o->emit_asm_source ? "<stdin>.s" : "<stdin>.o"; + const char* out = o->emit_asm_source ? "<stdin>.s" + : o->emit_ir ? "<stdin>.ir" + : "<stdin>.o"; if (cc_run_compile_one(env, o, pp, 1, i, out) != 0) return 1; } return 0; diff --git a/include/cfree/core.h b/include/cfree/core.h @@ -226,6 +226,14 @@ typedef struct CfreeCodeOptions { uint32_t npath_map; /* Destination for emit_c_source mode. Ignored when emit_c_source is 0. */ struct CfreeWriter* c_source_writer; + /* When set, CG writes a textual dump of the recorded semantic IR tape to + * ir_dump_writer instead of producing usable object output. The IR tape only + * exists when the optimizer is engaged, so this requires opt_level >= 1. + * The format is line-oriented and stable for golden diffs but otherwise + * unspecified. */ + bool emit_ir; + /* Destination for emit_ir mode. Ignored when emit_ir is 0. */ + struct CfreeWriter* ir_dump_writer; } CfreeCodeOptions; typedef struct CfreeHeap CfreeHeap; diff --git a/src/cg/ir.h b/src/cg/ir.h @@ -254,6 +254,10 @@ CGScope cg_ir_func_add_scope(CgIrFunc*, const CGScopeDesc*); CgIrInst* cg_ir_emit(CgIrFunc*, CgIrOp, SrcLoc); +/* Render the semantic CG IR tape to `w` as line-oriented text. Stable enough + * for golden-file diffs; symbols appear by numeric id (sym#N). */ +void cg_ir_func_dump(const CgIrFunc*, Writer*); + Operand* cg_ir_dup_operands(Arena*, const Operand*, u32 n); CGLocal* cg_ir_dup_locals(Arena*, const CGLocal*, u32 n); Label* cg_ir_dup_labels(Arena*, const Label*, u32 n); diff --git a/src/cg/ir_dump.c b/src/cg/ir_dump.c @@ -0,0 +1,366 @@ +/* Textual dump of the semantic CG IR (the recorded CG-API tape). + * + * This renders CgIrFunc — the IR as recorded from the CG API, before any + * lowering to the optimizer's CFG form. It is the only dumper for the Cg* IR; + * the optimizer's Func IR is dumped separately by opt_ir_dump (opt/ir_print.c). + * + * The format is line-oriented and stable enough for golden-file diffs but + * otherwise unspecified. Symbols are rendered by numeric id (sym#N) since the + * dumper has no object-builder handle, matching opt_ir_dump's convention. */ + +#include "cg/ir.h" +#include "core/core.h" +#include "core/pool.h" +#include "core/slice.h" +#include "core/strbuf.h" + +static void dump_write(Writer* w, const StrBuf* sb) { + cfree_writer_write(w, strbuf_cstr(sb), strbuf_len(sb)); +} + +static const char* cg_ir_op_name(CgIrOp op) { + switch (op) { + case CG_IR_NOP: return "nop"; + case CG_IR_LABEL: return "label"; + case CG_IR_LOAD_IMM: return "load_imm"; + case CG_IR_LOAD_CONST: return "load_const"; + case CG_IR_COPY: return "copy"; + case CG_IR_LOAD: return "load"; + case CG_IR_STORE: return "store"; + case CG_IR_ADDR_OF: return "addr_of"; + case CG_IR_TLS_ADDR_OF: return "tls_addr_of"; + case CG_IR_AGG_COPY: return "agg_copy"; + case CG_IR_AGG_SET: return "agg_set"; + case CG_IR_BITFIELD_LOAD: return "bitfield_load"; + case CG_IR_BITFIELD_STORE: return "bitfield_store"; + case CG_IR_BINOP: return "binop"; + case CG_IR_UNOP: return "unop"; + case CG_IR_CMP: return "cmp"; + case CG_IR_CONVERT: return "convert"; + case CG_IR_CALL: return "call"; + case CG_IR_RET: return "ret"; + case CG_IR_BR: return "br"; + case CG_IR_CMP_BRANCH: return "cmp_branch"; + case CG_IR_SWITCH: return "switch"; + case CG_IR_INDIRECT_BRANCH: return "indirect_branch"; + case CG_IR_LOAD_LABEL_ADDR: return "load_label_addr"; + case CG_IR_LOCAL_STATIC_DATA_BEGIN: return "local_static_data_begin"; + case CG_IR_LOCAL_STATIC_DATA_WRITE: return "local_static_data_write"; + case CG_IR_LOCAL_STATIC_DATA_LABEL_ADDR: return "local_static_data_label_addr"; + case CG_IR_LOCAL_STATIC_DATA_END: return "local_static_data_end"; + case CG_IR_SCOPE_BEGIN: return "scope_begin"; + case CG_IR_SCOPE_ELSE: return "scope_else"; + case CG_IR_SCOPE_END: return "scope_end"; + case CG_IR_BREAK_TO: return "break_to"; + case CG_IR_CONTINUE_TO: return "continue_to"; + case CG_IR_ALLOCA: return "alloca"; + case CG_IR_VA_START: return "va_start"; + case CG_IR_VA_ARG: return "va_arg"; + case CG_IR_VA_END: return "va_end"; + case CG_IR_VA_COPY: return "va_copy"; + case CG_IR_ATOMIC_LOAD: return "atomic_load"; + case CG_IR_ATOMIC_STORE: return "atomic_store"; + case CG_IR_ATOMIC_RMW: return "atomic_rmw"; + case CG_IR_ATOMIC_CAS: return "atomic_cas"; + case CG_IR_FENCE: return "fence"; + case CG_IR_INTRINSIC: return "intrinsic"; + case CG_IR_ASM_BLOCK: return "asm_block"; + } + return "??"; +} + +static const char* cg_ir_binop_name(BinOp op) { + switch (op) { + case BO_IADD: return "iadd"; + case BO_ISUB: return "isub"; + case BO_IMUL: return "imul"; + case BO_SDIV: return "sdiv"; + case BO_UDIV: return "udiv"; + case BO_SREM: return "srem"; + case BO_UREM: return "urem"; + case BO_FADD: return "fadd"; + case BO_FSUB: return "fsub"; + case BO_FMUL: return "fmul"; + case BO_FDIV: return "fdiv"; + case BO_AND: return "and"; + case BO_OR: return "or"; + case BO_XOR: return "xor"; + case BO_SHL: return "shl"; + case BO_SHR_S: return "shr_s"; + case BO_SHR_U: return "shr_u"; + } + return "??"; +} + +static const char* cg_ir_unop_name(UnOp op) { + switch (op) { + case UO_NEG: return "neg"; + case UO_FNEG: return "fneg"; + case UO_NOT: return "not"; + case UO_BNOT: return "bnot"; + } + return "??"; +} + +static const char* cg_ir_cmp_name(CmpOp op) { + switch (op) { + case CMP_EQ: return "eq"; + case CMP_NE: return "ne"; + case CMP_LT_S: return "lt_s"; + case CMP_LE_S: return "le_s"; + case CMP_GT_S: return "gt_s"; + case CMP_GE_S: return "ge_s"; + case CMP_LT_U: return "lt_u"; + case CMP_LE_U: return "le_u"; + case CMP_GT_U: return "gt_u"; + case CMP_GE_U: return "ge_u"; + case CMP_LT_F: return "lt_f"; + case CMP_LE_F: return "le_f"; + case CMP_GT_F: return "gt_f"; + case CMP_GE_F: return "ge_f"; + } + return "??"; +} + +static const char* cg_ir_conv_name(ConvKind op) { + switch (op) { + case CV_SEXT: return "sext"; + case CV_ZEXT: return "zext"; + case CV_TRUNC: return "trunc"; + case CV_ITOF_S: return "itof_s"; + case CV_ITOF_U: return "itof_u"; + case CV_FTOI_S: return "ftoi_s"; + case CV_FTOI_U: return "ftoi_u"; + case CV_FEXT: return "fext"; + case CV_FTRUNC: return "ftrunc"; + case CV_BITCAST: return "bitcast"; + } + return "??"; +} + +static void put_type(StrBuf* sb, CfreeCgTypeId type) { + strbuf_putc(sb, 't'); + strbuf_put_u64(sb, (u64)type); +} + +static void put_operand(StrBuf* sb, const Operand* op) { + switch (op->kind) { + case OPK_IMM: + strbuf_put_slice(sb, SLICE_LIT("imm:")); + strbuf_put_i64(sb, op->v.imm); + break; + case OPK_LOCAL: + strbuf_putc(sb, 'L'); + strbuf_put_u64(sb, (u64)op->v.local); + break; + case OPK_GLOBAL: + strbuf_put_slice(sb, SLICE_LIT("sym#")); + strbuf_put_u64(sb, (u64)op->v.global.sym); + if (op->v.global.addend) { + strbuf_putc(sb, op->v.global.addend < 0 ? '-' : '+'); + strbuf_put_u64(sb, op->v.global.addend < 0 + ? (u64)(-(op->v.global.addend + 1)) + 1u + : (u64)op->v.global.addend); + } + break; + case OPK_INDIRECT: + strbuf_putc(sb, '['); + strbuf_putc(sb, 'L'); + strbuf_put_u64(sb, (u64)op->v.ind.base); + if (op->v.ind.index != CG_LOCAL_NONE) { + strbuf_put_slice(sb, SLICE_LIT("+L")); + strbuf_put_u64(sb, (u64)op->v.ind.index); + strbuf_putc(sb, '*'); + strbuf_put_u64(sb, (u64)(1u << op->v.ind.log2_scale)); + } + if (op->v.ind.ofs) { + strbuf_putc(sb, op->v.ind.ofs < 0 ? '-' : '+'); + strbuf_put_u64(sb, op->v.ind.ofs < 0 + ? (u64)(-((i64)op->v.ind.ofs + 1)) + 1u + : (u64)op->v.ind.ofs); + } + strbuf_putc(sb, ']'); + break; + default: + strbuf_put_slice(sb, SLICE_LIT("?op")); + break; + } + strbuf_putc(sb, ':'); + put_type(sb, op->type); +} + +static void put_mem(StrBuf* sb, const MemAccess* m) { + strbuf_put_slice(sb, SLICE_LIT(" mem{sz=")); + strbuf_put_u64(sb, (u64)m->size); + strbuf_put_slice(sb, SLICE_LIT(" al=")); + strbuf_put_u64(sb, (u64)m->align); + if (m->flags) { + strbuf_put_slice(sb, SLICE_LIT(" fl=")); + strbuf_put_hex_u64(sb, (u64)m->flags); + } + strbuf_putc(sb, '}'); +} + +static void put_inst_extra(StrBuf* sb, const CgIrFunc* f, const CgIrInst* in) { + switch (in->op) { + case CG_IR_LOAD_IMM: + strbuf_put_slice(sb, SLICE_LIT(" = ")); + strbuf_put_i64(sb, in->extra.imm); + break; + case CG_IR_BINOP: + strbuf_putc(sb, ' '); + strbuf_puts(sb, cg_ir_binop_name((BinOp)in->extra.imm)); + break; + case CG_IR_UNOP: + strbuf_putc(sb, ' '); + strbuf_puts(sb, cg_ir_unop_name((UnOp)in->extra.imm)); + break; + case CG_IR_CMP: + strbuf_putc(sb, ' '); + strbuf_puts(sb, cg_ir_cmp_name((CmpOp)in->extra.imm)); + break; + case CG_IR_CONVERT: + strbuf_putc(sb, ' '); + strbuf_puts(sb, cg_ir_conv_name((ConvKind)in->extra.imm)); + break; + case CG_IR_BR: + case CG_IR_LABEL: + strbuf_put_slice(sb, SLICE_LIT(" Lbl")); + strbuf_put_u64(sb, (u64)in->extra.imm); + break; + case CG_IR_SCOPE_ELSE: + case CG_IR_SCOPE_END: + case CG_IR_BREAK_TO: + case CG_IR_CONTINUE_TO: + strbuf_put_slice(sb, SLICE_LIT(" scope")); + strbuf_put_u64(sb, (u64)in->extra.imm); + break; + case CG_IR_SCOPE_BEGIN: { + const CgIrScopeAux* aux = (const CgIrScopeAux*)in->extra.aux; + if (aux) { + strbuf_put_slice(sb, SLICE_LIT(" scope")); + strbuf_put_u64(sb, (u64)aux->scope); + strbuf_put_slice(sb, SLICE_LIT(" kind=")); + strbuf_put_u64(sb, (u64)aux->desc.kind); + } + break; + } + case CG_IR_CMP_BRANCH: { + const CgIrCmpBranchAux* aux = (const CgIrCmpBranchAux*)in->extra.aux; + if (aux) { + strbuf_putc(sb, ' '); + strbuf_puts(sb, cg_ir_cmp_name(aux->op)); + strbuf_put_slice(sb, SLICE_LIT(" -> Lbl")); + strbuf_put_u64(sb, (u64)aux->target); + } + break; + } + case CG_IR_CALL: { + const CgIrCallAux* aux = (const CgIrCallAux*)in->extra.aux; + if (aux) { + strbuf_put_slice(sb, SLICE_LIT(" callee=")); + put_operand(sb, &aux->desc.callee); + strbuf_put_slice(sb, SLICE_LIT(" args=[")); + for (u32 i = 0; i < aux->desc.nargs; ++i) { + if (i) strbuf_putc(sb, ' '); + strbuf_putc(sb, 'L'); + strbuf_put_u64(sb, (u64)aux->desc.args[i]); + } + strbuf_put_slice(sb, SLICE_LIT("] results=[")); + for (u32 i = 0; i < aux->desc.nresults; ++i) { + if (i) strbuf_putc(sb, ' '); + strbuf_putc(sb, 'L'); + strbuf_put_u64(sb, (u64)aux->desc.results[i]); + } + strbuf_putc(sb, ']'); + if (aux->desc.flags & CG_CALL_TAIL) + strbuf_put_slice(sb, SLICE_LIT(" tail")); + } + break; + } + case CG_IR_RET: { + const CgIrRetAux* aux = (const CgIrRetAux*)in->extra.aux; + if (aux) { + strbuf_put_slice(sb, SLICE_LIT(" values=[")); + for (u32 i = 0; i < aux->nvalues; ++i) { + if (i) strbuf_putc(sb, ' '); + strbuf_putc(sb, 'L'); + strbuf_put_u64(sb, (u64)aux->values[i]); + } + strbuf_putc(sb, ']'); + } + break; + } + case CG_IR_LOAD: + case CG_IR_STORE: + put_mem(sb, &in->extra.mem); + break; + default: + break; + } + (void)f; +} + +void cg_ir_func_dump(const CgIrFunc* f, Writer* w) { + char buf[1024]; + StrBuf sb; + if (!f || !w) return; + strbuf_init(&sb, buf, sizeof buf); + + strbuf_put_slice(&sb, SLICE_LIT("func sym#")); + strbuf_put_u64(&sb, (u64)f->desc.sym); + strbuf_put_slice(&sb, SLICE_LIT(" params=")); + strbuf_put_u64(&sb, (u64)f->nparams); + strbuf_put_slice(&sb, SLICE_LIT(" locals=")); + strbuf_put_u64(&sb, (u64)f->nlocals); + strbuf_put_slice(&sb, SLICE_LIT(" labels=")); + strbuf_put_u64(&sb, (u64)f->nlabels); + strbuf_put_slice(&sb, SLICE_LIT(" scopes=")); + strbuf_put_u64(&sb, (u64)f->nscopes); + strbuf_put_slice(&sb, SLICE_LIT(" insts=")); + strbuf_put_u64(&sb, (u64)f->ninsts); + strbuf_putc(&sb, '\n'); + dump_write(w, &sb); + + for (u32 i = 0; i < f->nlocals; ++i) { + const CgIrLocal* l = &f->locals[i]; + strbuf_reset(&sb); + strbuf_put_slice(&sb, SLICE_LIT(" local L")); + strbuf_put_u64(&sb, (u64)l->id); + strbuf_putc(&sb, ' '); + put_type(&sb, l->desc.type); + strbuf_put_slice(&sb, SLICE_LIT(" sz=")); + strbuf_put_u64(&sb, (u64)l->desc.size); + strbuf_put_slice(&sb, SLICE_LIT(" al=")); + strbuf_put_u64(&sb, (u64)l->desc.align); + if (l->desc.name) { + Slice nm = pool_slice(f->c->global, l->desc.name); + strbuf_put_slice(&sb, SLICE_LIT(" \"")); + strbuf_put_slice(&sb, nm); + strbuf_putc(&sb, '"'); + } + if (l->is_param) { + strbuf_put_slice(&sb, SLICE_LIT(" param#")); + strbuf_put_u64(&sb, (u64)l->param_index); + } + if (l->address_taken) strbuf_put_slice(&sb, SLICE_LIT(" addr_taken")); + strbuf_putc(&sb, '\n'); + dump_write(w, &sb); + } + + for (u32 i = 0; i < f->ninsts; ++i) { + const CgIrInst* in = &f->insts[i]; + strbuf_reset(&sb); + strbuf_put_slice(&sb, SLICE_LIT(" ")); + strbuf_put_u64(&sb, (u64)i); + strbuf_put_slice(&sb, SLICE_LIT(": ")); + strbuf_puts(&sb, cg_ir_op_name((CgIrOp)in->op)); + for (u32 j = 0; j < in->nopnds; ++j) { + strbuf_putc(&sb, ' '); + put_operand(&sb, &in->opnds[j]); + } + put_inst_extra(&sb, f, in); + strbuf_putc(&sb, '\n'); + dump_write(w, &sb); + } +} diff --git a/src/cg/ir_recorder.c b/src/cg/ir_recorder.c @@ -645,6 +645,8 @@ CgIrRecorder* cg_ir_recorder_from_target(CgTarget* t) { return r && r->magic == CG_IR_RECORDER_MAGIC ? r : NULL; } +void* cg_ir_recorder_user(const CgIrRecorder* r) { return r ? r->user : NULL; } + const CgIrModule* cg_ir_recorder_module(const CgTarget* t) { const CgIrRecorder* r = (const CgIrRecorder*)t; return r && r->magic == CG_IR_RECORDER_MAGIC ? r->module : NULL; diff --git a/src/cg/ir_recorder.h b/src/cg/ir_recorder.h @@ -19,6 +19,7 @@ typedef struct CgIrRecorderConfig { CgTarget* cg_ir_recorder_new(Compiler*, ObjBuilder*, const CgIrRecorderConfig*); CgIrRecorder* cg_ir_recorder_from_target(CgTarget*); +void* cg_ir_recorder_user(const CgIrRecorder*); const CgIrModule* cg_ir_recorder_module(const CgTarget*); CgIrFunc* cg_ir_recorder_current_func(const CgTarget*); diff --git a/src/cg/session.c b/src/cg/session.c @@ -95,6 +95,11 @@ CfreeStatus cfree_cg_begin_obj(CfreeCg* g, CfreeObjBuilder* out, compiler_panic((Compiler*)c, api_no_loc(), "CfreeCg: unsupported opt_level %d", opt_level); } + if (opts && opts->emit_ir && opt_level < 1) { + compiler_panic((Compiler*)c, api_no_loc(), + "CfreeCg: emit_ir requires opt_level >= 1 " + "(the IR tape is only recorded when the optimizer runs)"); + } #if !CFREE_OPT_ENABLED if (opt_level > 0) { compiler_panic((Compiler*)c, api_no_loc(), @@ -128,6 +133,8 @@ CfreeStatus cfree_cg_begin_obj(CfreeCg* g, CfreeObjBuilder* out, if (opt_level > 0) { target = opt_cgtarget_new((Compiler*)c, target, opt_level); if (!target) return CFREE_UNSUPPORTED; + if (opts && opts->emit_ir) + opt_set_dump_writer(target, (Writer*)opts->ir_dump_writer); } #endif g->obj = (ObjBuilder*)out; diff --git a/src/opt/opt.c b/src/opt/opt.c @@ -32,6 +32,7 @@ static void opt_dbg_dump(OptImpl* o, Func* f, const char* tag) { size_t len = 0; const uint8_t* bytes; if (!s) return; + if (strcmp(s, "1") != 0 && strcmp(s, tag) != 0) return; cfree_writer_mem(o->c->ctx->heap, &w); opt_ir_dump(f, w); bytes = cfree_writer_mem_bytes(w, &len); @@ -147,6 +148,7 @@ static void opt_run_o1_native(OptImpl* o, Func* f) { opt_mir_jump_cleanup(f, OPT_JUMP_CLEANUP_LAYOUT); metrics_scope_end(o->c, "opt.post_ra.jump_cleanup_layout"); + opt_dbg_dump(o, f, "pre-emit"); metrics_scope_begin(o->c, "opt.emit"); opt_emit_native(o->c, f, o->native); metrics_scope_end(o->c, "opt.emit"); @@ -155,53 +157,27 @@ static void opt_run_o1_native(OptImpl* o, Func* f) { static void opt_dbg_dump_cg(OptImpl* o, const CgIrFunc* f) { extern char* getenv(const char*); - StrBuf sb; - char buf[8192]; + CfreeWriter* w = NULL; + size_t len = 0; + const uint8_t* bytes; if (!getenv("CFREE_DUMPCG")) return; - strbuf_init(&sb, buf, sizeof buf); - for (u32 i = 0; i < f->ninsts; ++i) { - const CgIrInst* in = &f->insts[i]; - strbuf_put_u64(&sb, in->op); - if (in->op == CG_IR_LOAD_IMM) { - strbuf_put_slice(&sb, SLICE_LIT("(imm=")); - strbuf_put_u64(&sb, (u64)in->extra.imm); - strbuf_put_slice(&sb, SLICE_LIT(")")); - } - strbuf_put_slice(&sb, SLICE_LIT(" [")); - for (u32 j = 0; j < in->nopnds; ++j) { - const Operand* op = &in->opnds[j]; - strbuf_put_slice(&sb, SLICE_LIT(" k")); - strbuf_put_u64(&sb, op->kind); - if (op->kind == OPK_LOCAL) { - strbuf_put_slice(&sb, SLICE_LIT(":L")); - strbuf_put_u64(&sb, op->v.local); - strbuf_put_slice(&sb, cg_type_is_ptr(o->c, op->type) - ? SLICE_LIT("(ptr)") - : SLICE_LIT("(val)")); - } else if (op->kind == OPK_INDIRECT) { - strbuf_put_slice(&sb, SLICE_LIT(":ind(b")); - strbuf_put_u64(&sb, op->v.ind.base); - strbuf_put_slice(&sb, SLICE_LIT(",i")); - strbuf_put_u64(&sb, op->v.ind.index); - strbuf_put_slice(&sb, SLICE_LIT(")")); - } else if (op->kind == OPK_IMM) { - strbuf_put_slice(&sb, SLICE_LIT(":i")); - strbuf_put_u64(&sb, (u64)op->v.imm); - } - } - strbuf_put_slice(&sb, SLICE_LIT(" ]\n")); - } - compiler_panic(o->c, f->desc.loc, "CGIR:\n%s", strbuf_cstr(&sb)); + cfree_writer_mem(o->c->ctx->heap, &w); + cg_ir_func_dump(f, w); + bytes = cfree_writer_mem_bytes(w, &len); + compiler_panic(o->c, f->desc.loc, "CGIR:\n%.*s", (int)len, + (const char*)bytes); } static void opt_on_func(void* user, CgIrFunc* cg_func) { OptImpl* o = (OptImpl*)user; Func* f; opt_dbg_dump_cg(o, cg_func); + /* The dump writer renders the semantic CG IR tape — the IR as recorded, + * before lowering to the optimizer's CFG form. */ + if (o->dump_writer) cg_ir_func_dump(cg_func, o->dump_writer); metrics_scope_begin(o->c, "opt.o1.cg_ir_lower"); f = opt_func_from_cg_ir(o->c, cg_func); metrics_scope_end(o->c, "opt.o1.cg_ir_lower"); - if (o->dump_writer && f) opt_ir_dump(f, o->dump_writer); opt_run_o1_native(o, f); } @@ -260,6 +236,6 @@ CgTarget* opt_cgtarget_new(Compiler* c, CgTarget* target, int level) { void opt_set_dump_writer(CgTarget* t, Writer* w) { CgIrRecorder* rec = cg_ir_recorder_from_target(t); - (void)rec; - (void)w; + OptImpl* o = rec ? (OptImpl*)cg_ir_recorder_user(rec) : NULL; + if (o) o->dump_writer = w; } diff --git a/test/cg/ir_recorder_test.c b/test/cg/ir_recorder_test.c @@ -361,11 +361,58 @@ static void test_aliases_and_data_label_diagnostic_hook(void) { tc_fini(&tc); } +static void test_func_dump_renders_text(void) { + TestCtx tc; + CallbackState cb; + CgTarget* t; + CGFuncDesc fd; + CGLocal a, b, dst; + CgIrFunc* f; + CfreeWriter* w = NULL; + const uint8_t* bytes; + size_t len = 0; + char s[4096]; + memset(&cb, 0, sizeof cb); + tc_init(&tc); + t = make_recorder(&tc, &cb); + fd = fn_desc(&tc); + t->func_begin(t, &fd); + a = local_new(t, tc.i32, "a"); + b = local_new(t, tc.i32, "b"); + dst = local_new(t, tc.i32, "dst"); + t->load_imm(t, op_local(a, tc.i32), 40); + t->load_imm(t, op_local(b, tc.i32), 2); + t->binop(t, BO_IADD, op_local(dst, tc.i32), op_local(a, tc.i32), + op_local(b, tc.i32)); + t->ret(t, &dst, 1); + t->func_end(t); + + f = cg_ir_recorder_module(t)->funcs[0]; + cfree_writer_mem(&g_heap, &w); + cg_ir_func_dump(f, w); + bytes = cfree_writer_mem_bytes(w, &len); + EXPECT(len > 0 && bytes && len < sizeof s, "dump should produce output"); + /* NUL-terminate for strstr by copying into a sized buffer. */ + memcpy(s, bytes, len < sizeof s ? len : sizeof s - 1u); + s[len < sizeof s ? len : sizeof s - 1u] = '\0'; + EXPECT(strstr(s, "func sym#") != NULL, "should print func header"); + EXPECT(strstr(s, "local L1 ") != NULL, "should list locals"); + EXPECT(strstr(s, "\"a\"") != NULL, "should print local names"); + EXPECT(strstr(s, "load_imm") != NULL, "should print load_imm op"); + EXPECT(strstr(s, "= 40") != NULL, "should print immediate value"); + EXPECT(strstr(s, "binop") != NULL, "should print binop op"); + EXPECT(strstr(s, "iadd") != NULL, "should name the binop kind"); + EXPECT(strstr(s, "ret values=[") != NULL, "should print ret values"); + cfree_writer_close(w); + tc_fini(&tc); +} + int main(void) { test_records_basic_function_shape(); test_deep_copies_call_switch_and_const_payloads(); test_labels_scopes_and_address_taken_locals(); test_aliases_and_data_label_diagnostic_hook(); + test_func_dump_renders_text(); fprintf(stderr, "ir-recorder: %d checks, %d failures\n", g_checks, g_fails); return g_fails ? 1 : 0; } diff --git a/test/driver/run.sh b/test/driver/run.sh @@ -837,6 +837,42 @@ else fail=$((fail + 1)) fi +# --emit=ir: dump the semantic CG IR tape (requires -O1+). +cat > "$work/ir.c" <<'SRC' +int add(int a, int b) { + int c = a + b; + return c * 2; +} +SRC + +if "$CFREE" cc -O1 --emit=ir -c "$work/ir.c" -o "$work/ir.out" \ + > "$work/ir-emit.out" 2> "$work/ir-emit.err" && + grep -q "^func sym#" "$work/ir.out" && + grep -q "binop" "$work/ir.out" && + grep -q "iadd" "$work/ir.out" && + grep -q "ret values=\[" "$work/ir.out"; then + printf 'PASS %s\n' "cc-emit-ir" + pass=$((pass + 1)) +else + printf 'FAIL %s\n' "cc-emit-ir" + sed 's/^/ | /' "$work/ir-emit.err" + fail=$((fail + 1)) +fi + +# --emit=ir without -O1 must be rejected (no IR tape is recorded at -O0). +if "$CFREE" cc --emit=ir -c "$work/ir.c" -o "$work/ir-o0.out" \ + > "$work/ir-o0.out.log" 2> "$work/ir-o0.err"; then + printf 'FAIL %s (expected failure at -O0)\n' "cc-emit-ir-requires-opt" + fail=$((fail + 1)) +elif grep -q "requires -O1" "$work/ir-o0.err"; then + printf 'PASS %s\n' "cc-emit-ir-requires-opt" + pass=$((pass + 1)) +else + printf 'FAIL %s (wrong diagnostic)\n' "cc-emit-ir-requires-opt" + sed 's/^/ | /' "$work/ir-o0.err" + fail=$((fail + 1)) +fi + total=$((pass + fail)) if [ "$fail" -gt 0 ]; then printf '\ndriver: %d/%d passed\n' "$pass" "$total"