kit

kit
git clone https://git.ryansepassi.com/git/kit.git
Log | Files | Refs | README

commit b212b95462ccf02a5f87c6ee4ee1bd24a8e6cb6e
parent d44c3a68cca85703d06d240afa54156da80e96c4
Author: Ryan Sepassi <rsepassi@gmail.com>
Date:   Fri,  8 May 2026 06:03:16 -0700

cfree_obj_open/query, cfree_ar_iter, cfree_detect_fmt; driver objdump + ar

Public API additions (include/cfree.h):
- cfree_detect_fmt: centralized magic-byte detector for AR, ELF, WASM,
  Mach-O, PE (MZ), and COFF (by machine-type field); returns CfreeBinFmt
- cfree_obj_open/close + query API: CfreeObjFile, CfreeObjSecInfo,
  CfreeObjSymInfo, CfreeObjSymIter; CfreeSecKind/Flag, CfreeSymBind/Kind
- cfree_ar_iter_init/next: stack-allocated archive cursor, zero-copy
  member data pointers; cfree_ar_list refactored onto it
- cfree_ar_write/cfree_ar_list: pure-I/O archive creation and listing

driver/ completions:
- objdump: auto-detects AR vs object via cfree_detect_fmt, iterates
  archive members with cfree_ar_iter, dumps sections + symbol table
- ar: rc/r/c (write) and t (list) modes via cfree_ar_write/cfree_ar_list
- env.c: driver_stdout_writer (fd=1, no-close), driver_printf

Diffstat:
MREADME.md | 1+
Mdriver/ar.c | 136++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++---
Mdriver/as.c | 1-
Mdriver/cc.c | 85++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-----
Ddriver/cpp.c | 10----------
Mdriver/dbg.c | 1-
Mdriver/driver.h | 15++++++++++++---
Mdriver/env.c | 31+++++++++++++++++++++++++++++++
Mdriver/main.c | 8++++----
Mdriver/objdump.c | 204+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++--
Adriver/run.c | 260+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Minclude/cfree.h | 156++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++---
Msrc/api/pipeline.c | 394+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Msrc/obj/obj.h | 8++++----
Msrc/pp/pp.h | 5+++++
15 files changed, 1272 insertions(+), 43 deletions(-)

diff --git a/README.md b/README.md @@ -23,6 +23,7 @@ It features: - Reproducible builds - A build and packaging system - Bootstrap from hex0-seed +- A C library providing access to the above cfree also provides these headers beyond the freestanding set: - stdatomic.h diff --git a/driver/ar.c b/driver/ar.c @@ -1,10 +1,136 @@ #include "driver.h" -#include <cfree.h> + +/* `cfree ar` — create/list POSIX ar archives. + * + * Supported operations: + * cfree ar {r|c|rc|cr} out.a in.o... create / replace archive + * cfree ar t in.a list member names */ + +#define AR_TOOL "ar" + +static void ar_usage(void) +{ + driver_errf(AR_TOOL, "%s", + "usage: cfree ar {rc|r|c|t} archive.a [file.o...]"); +} int driver_ar(int argc, char** argv) { - (void)argc; - (void)argv; - /* TODO: archiver. */ - return 1; + DriverEnv env; + const char* mode; + const char* archive_path; + int do_write = 0; + int do_list = 0; + int i; + int rc = 0; + + if (argc < 3) { + ar_usage(); + return 2; + } + + mode = argv[1]; + archive_path = argv[2]; + + for (i = 0; mode[i]; ++i) { + if (mode[i] == 'r' || mode[i] == 'c') do_write = 1; + else if (mode[i] == 't') do_list = 1; + } + + if (!do_write && !do_list) { + driver_errf(AR_TOOL, "unrecognized operation: %s", mode); + return 2; + } + if (do_write && do_list) { + driver_errf(AR_TOOL, "conflicting operations: %s", mode); + return 2; + } + + driver_env_init(&env); + + if (do_list) { + CfreeEnv cenv = driver_env_to_cfree(&env); + CfreeFileData fd = {0}; + CfreeBytesInput input; + CfreeWriter* out; + + if (!cenv.file_io->read_all(cenv.file_io->user, archive_path, &fd)) { + driver_errf(AR_TOOL, "failed to read: %s", archive_path); + driver_env_fini(&env); + return 1; + } + + input.name = archive_path; + input.data = fd.data; + input.len = fd.size; + + out = driver_stdout_writer(&env); + if (!out) { + driver_errf(AR_TOOL, "out of memory"); + cenv.file_io->release(cenv.file_io->user, &fd); + driver_env_fini(&env); + return 1; + } + + rc = cfree_ar_list(&input, out); + if (rc) driver_errf(AR_TOOL, "failed to read archive: %s", archive_path); + cfree_writer_close(out); + cenv.file_io->release(cenv.file_io->user, &fd); + } else { + /* Write archive. */ + int nmembers = argc - 3; + uint32_t nm = nmembers > 0 ? (uint32_t)nmembers : 0u; + CfreeBytesInput* members = NULL; + CfreeFileData* fds = NULL; + CfreeEnv cenv = driver_env_to_cfree(&env); + CfreeWriter* out = NULL; + + if (nm > 0) { + members = (CfreeBytesInput*)driver_alloc_zeroed( + &env, (size_t)nm * sizeof(*members)); + fds = (CfreeFileData*)driver_alloc_zeroed( + &env, (size_t)nm * sizeof(*fds)); + if (!members || !fds) { + driver_errf(AR_TOOL, "out of memory"); + rc = 1; + goto ar_done; + } + for (i = 0; i < (int)nm && rc == 0; ++i) { + const char* path = argv[3 + i]; + if (!cenv.file_io->read_all(cenv.file_io->user, path, &fds[i])) { + driver_errf(AR_TOOL, "failed to read: %s", path); + rc = 1; + break; + } + members[i].name = driver_basename(path); + members[i].data = fds[i].data; + members[i].len = fds[i].size; + } + } + + if (rc == 0) { + out = cenv.file_io->open_writer(cenv.file_io->user, archive_path); + if (!out) { + driver_errf(AR_TOOL, "failed to open: %s", archive_path); + rc = 1; + } else { + rc = cfree_ar_write(out, members, nm); + if (rc == 0 && cfree_writer_error(out)) rc = 1; + cfree_writer_close(out); + } + } + + ar_done: + if (fds) { + for (i = 0; i < (int)nm; ++i) { + if (fds[i].data) + cenv.file_io->release(cenv.file_io->user, &fds[i]); + } + driver_free(&env, fds, (size_t)nm * sizeof(*fds)); + } + if (members) driver_free(&env, members, (size_t)nm * sizeof(*members)); + } + + driver_env_fini(&env); + return rc; } diff --git a/driver/as.c b/driver/as.c @@ -1,5 +1,4 @@ #include "driver.h" -#include <cfree.h> int driver_as(int argc, char** argv) { diff --git a/driver/cc.c b/driver/cc.c @@ -13,6 +13,7 @@ typedef struct CcOptions { size_t argv_bound; /* upper bound on list size */ int compile_only; /* -c */ + int preprocess_only; /* -E */ int opt_level; /* -O0/-O1/-O2 (default 0) */ int debug_info; /* -g */ const char* output_path; /* -o */ @@ -37,7 +38,7 @@ typedef struct CcOptions { static void cc_usage(void) { driver_errf(CC_TOOL, "%s", - "usage: cfree cc [-c] [-o out] [-O0|-O1|-O2] [-g]\n" + "usage: cfree cc [-c|-E] [-o out] [-O0|-O1|-O2] [-g]\n" " [-I dir]... [-isystem dir]...\n" " [-D name[=body]]... [-U name]...\n" " input.c..."); @@ -94,8 +95,9 @@ static int cc_parse(int argc, char** argv, CcOptions* o) for (i = 1; i < argc; ++i) { const char* a = argv[i]; - if (driver_streq(a, "-c")) { o->compile_only = 1; continue; } - if (driver_streq(a, "-g")) { o->debug_info = 1; continue; } + if (driver_streq(a, "-c")) { o->compile_only = 1; continue; } + if (driver_streq(a, "-E")) { o->preprocess_only = 1; continue; } + if (driver_streq(a, "-g")) { o->debug_info = 1; continue; } if (driver_streq(a, "-O0")) { o->opt_level = 0; continue; } if (driver_streq(a, "-O1")) { o->opt_level = 1; continue; } if (driver_streq(a, "-O2")) { o->opt_level = 2; continue; } @@ -146,10 +148,18 @@ static int cc_parse(int argc, char** argv, CcOptions* o) cc_usage(); return 1; } + if (o->compile_only && o->preprocess_only) { + driver_errf(CC_TOOL, "-c and -E are mutually exclusive"); + return 1; + } if (o->compile_only && o->nsources != 1) { driver_errf(CC_TOOL, "-c requires exactly one input"); return 1; } + if (o->preprocess_only && o->nsources != 1) { + driver_errf(CC_TOOL, "-E requires exactly one input"); + return 1; + } if (!o->output_path) { driver_errf(CC_TOOL, "-o is required"); return 1; @@ -201,6 +211,65 @@ static void cc_to_cfree(const CcOptions* o, CfreeOptions* out) out->pp.nundefines = o->nundefines; } +/* Preprocessor-only path for `cc -E`. Loads the single source via + * env.file_io, opens the output via env.file_io, and runs cfree_preprocess. + * Returns 0 on success, nonzero on any I/O or compile failure. */ +static int cc_preprocess(DriverEnv* env, const CfreePpOptions* pp_opts, + const char* source, const char* output_path) +{ + CfreeEnv cenv = driver_env_to_cfree(env); + CfreeCompiler* compiler = NULL; + CfreeWriter* writer = NULL; + CfreeFileData src = {0}; + CfreeBytesInput input; + int rc = 1; + int loaded = 0; + + if (!cenv.file_io->read_all(cenv.file_io->user, source, &src)) { + driver_errf(CC_TOOL, "failed to read: %s", source); + goto out; + } + loaded = 1; + + writer = cenv.file_io->open_writer(cenv.file_io->user, output_path); + if (!writer) { + driver_errf(CC_TOOL, "failed to open output: %s", output_path); + goto out; + } + + compiler = cfree_compiler_new(driver_host_target(), &cenv); + if (!compiler) { + driver_errf(CC_TOOL, "failed to initialize compiler"); + goto out; + } + + input.name = source; + input.data = src.data; + input.len = src.size; + + rc = cfree_preprocess(compiler, pp_opts, &input, writer); + +out: + if (compiler) cfree_compiler_free(compiler); + if (writer) cfree_writer_close(writer); + if (loaded) cenv.file_io->release(cenv.file_io->user, &src); + return rc; +} + +static void cc_fill_pp(const CcOptions* o, CfreePpOptions* pp) +{ + CfreePpOptions z = {0}; + *pp = z; + pp->include_dirs = o->include_dirs; + pp->ninclude_dirs = o->ninclude_dirs; + pp->system_include_dirs = o->system_include_dirs; + pp->nsystem_include_dirs = o->nsystem_include_dirs; + pp->defines = o->defines; + pp->ndefines = o->ndefines; + pp->undefines = o->undefines; + pp->nundefines = o->nundefines; +} + int driver_cc(int argc, char** argv) { DriverEnv env; @@ -217,8 +286,14 @@ int driver_cc(int argc, char** argv) return 2; } - cc_to_cfree(&co, &copts); - rc = cfree_run(&copts); + if (co.preprocess_only) { + CfreePpOptions pp_opts; + cc_fill_pp(&co, &pp_opts); + rc = cc_preprocess(&env, &pp_opts, co.sources[0], co.output_path); + } else { + cc_to_cfree(&co, &copts); + rc = cfree_run(&copts); + } cc_options_release(&co); driver_env_fini(&env); diff --git a/driver/cpp.c b/driver/cpp.c @@ -1,10 +0,0 @@ -#include "driver.h" -#include <cfree.h> - -int driver_cpp(int argc, char** argv) -{ - (void)argc; - (void)argv; - /* TODO: preprocessor-only mode. */ - return 1; -} diff --git a/driver/dbg.c b/driver/dbg.c @@ -1,5 +1,4 @@ #include "driver.h" -#include <cfree.h> int driver_dbg(int argc, char** argv) { diff --git a/driver/driver.h b/driver/driver.h @@ -6,16 +6,18 @@ /* The cfree CLI driver. Multi-call binary: dispatches to one of seven tool * front-ends by argv[0]'s basename, falling back to argv[1] (e.g. * `cfree cc ...`). The driver only depends on libcfree's public API - * (<cfree.h>); it has no access to libcfree's internal headers. */ + * (<cfree.h>); it has no access to libcfree's internal headers. + * + * Preprocessor-only mode is `cc -E` — there is no separate `cpp` tool. */ typedef enum DriverTool { DRIVER_TOOL_CC, - DRIVER_TOOL_CPP, DRIVER_TOOL_AS, DRIVER_TOOL_LD, DRIVER_TOOL_AR, DRIVER_TOOL_OBJDUMP, DRIVER_TOOL_DBG, + DRIVER_TOOL_RUN, } DriverTool; /* Multi-call entry: dispatches by argv[0] basename (or argv[1] fallback). */ @@ -23,12 +25,12 @@ int driver_main(int argc, char** argv); /* Direct entry per tool. Each lives in driver/<tool>.c. */ int driver_cc (int argc, char** argv); -int driver_cpp (int argc, char** argv); int driver_as (int argc, char** argv); int driver_ld (int argc, char** argv); int driver_ar (int argc, char** argv); int driver_objdump(int argc, char** argv); int driver_dbg (int argc, char** argv); +int driver_run (int argc, char** argv); /* Shared host environment used by every tool that calls into libcfree. * driver_env_init wires up the libc-backed heap, the stderr diag sink, and @@ -73,7 +75,14 @@ void* driver_alloc_zeroed(DriverEnv*, size_t); void driver_free (DriverEnv*, void* p, size_t); void driver_memcpy (void* dst, const void* src, size_t n); +/* Opens a Writer that writes to stdout (fd 1). close frees the struct but + * does not close the fd. */ +CfreeWriter* driver_stdout_writer(DriverEnv*); + /* Diagnostic printing to host stderr. Format is `"<tool>: <fmt>\n"`. */ void driver_errf(const char* tool, const char* fmt, ...); +/* Formatted output to stdout. */ +void driver_printf(const char* fmt, ...); + #endif diff --git a/driver/env.c b/driver/env.c @@ -117,6 +117,12 @@ static void fdw_close(CfreeWriter* w) fw->heap->free(fw->heap, fw, sizeof(*fw)); } +static void fdw_noclose(CfreeWriter* w) +{ + DriverFdWriter* fw = (DriverFdWriter*)w; + fw->heap->free(fw->heap, fw, sizeof(*fw)); +} + static CfreeWriter* driver_writer_fd(CfreeHeap* h, int fd) { DriverFdWriter* fw = (DriverFdWriter*)h->alloc(h, sizeof(*fw), _Alignof(DriverFdWriter)); @@ -133,6 +139,23 @@ static CfreeWriter* driver_writer_fd(CfreeHeap* h, int fd) return &fw->base; } +CfreeWriter* driver_stdout_writer(DriverEnv* e) +{ + DriverFdWriter* fw = (DriverFdWriter*)e->heap->alloc( + e->heap, sizeof(*fw), _Alignof(DriverFdWriter)); + if (!fw) return NULL; + fw->base.write = fdw_write; + fw->base.seek = fdw_seek; + fw->base.tell = fdw_tell; + fw->base.error = fdw_error; + fw->base.close = fdw_noclose; /* do not close fd 1 */ + fw->heap = e->heap; + fw->fd = STDOUT_FILENO; + fw->err = 0; + fw->pos = 0; + return &fw->base; +} + /* ---------------- file_io (POSIX) ---------------- */ static int posix_read_all(void* user, const char* path, CfreeFileData* out) @@ -278,6 +301,14 @@ void driver_errf(const char* tool, const char* fmt, ...) fputc('\n', stderr); } +void driver_printf(const char* fmt, ...) +{ + va_list ap; + va_start(ap, fmt); + vprintf(fmt, ap); + va_end(ap); +} + CfreeTarget driver_host_target(void) { CfreeTarget t; diff --git a/driver/main.c b/driver/main.c @@ -1,18 +1,18 @@ #include "driver.h" -/* Multi-call dispatch. Looks at argv[0]'s basename for "cc", "cpp", "as", - * "ld", "ar", "objdump", or "dbg"; if argv[0] is the bare "cfree" binary, - * falls back to argv[1]. */ +/* Multi-call dispatch. Looks at argv[0]'s basename for "cc", "as", "ld", + * "ar", "objdump", "dbg", or "run"; if argv[0] is the bare "cfree" + * binary, falls back to argv[1]. Preprocessor-only mode is `cc -E`. */ static int dispatch(const char* name, int argc, char** argv) { if (driver_streq(name, "cc")) return driver_cc (argc, argv); - if (driver_streq(name, "cpp")) return driver_cpp (argc, argv); if (driver_streq(name, "as")) return driver_as (argc, argv); if (driver_streq(name, "ld")) return driver_ld (argc, argv); if (driver_streq(name, "ar")) return driver_ar (argc, argv); if (driver_streq(name, "objdump")) return driver_objdump(argc, argv); if (driver_streq(name, "dbg")) return driver_dbg (argc, argv); + if (driver_streq(name, "run")) return driver_run (argc, argv); return -1; } diff --git a/driver/objdump.c b/driver/objdump.c @@ -1,10 +1,204 @@ #include "driver.h" -#include <cfree.h> + +/* `cfree objdump` — print section/symbol info for object files and archives. + * Archives are auto-detected by magic; each member is dumped in turn. + * All display logic lives here; the library supplies data-access APIs. */ + +#define OBJDUMP_TOOL "objdump" + +static void objdump_usage(void) +{ + driver_errf(OBJDUMP_TOOL, "%s", "usage: cfree objdump input..."); +} + +static const char* fmt_str(CfreeObjFmt fmt, uint8_t ptr_size) +{ + switch (fmt) { + case CFREE_OBJ_ELF: return ptr_size == 8 ? "elf64" : "elf32"; + case CFREE_OBJ_COFF: return ptr_size == 8 ? "coff64" : "coff32"; + case CFREE_OBJ_MACHO: return ptr_size == 8 ? "macho64" : "macho32"; + case CFREE_OBJ_WASM: return "wasm"; + } + return "unknown"; +} + +static const char* arch_str(CfreeArchKind arch) +{ + switch (arch) { + case CFREE_ARCH_X86_64: return "x86_64"; + case CFREE_ARCH_X86_32: return "i386"; + case CFREE_ARCH_ARM_64: return "arm64"; + case CFREE_ARCH_ARM_32: return "arm"; + case CFREE_ARCH_RV64: return "riscv64"; + case CFREE_ARCH_RV32: return "riscv32"; + case CFREE_ARCH_WASM: return "wasm32"; + } + return "unknown"; +} + +static char sym_bind_char(CfreeSymBind b) +{ + switch (b) { + case CFREE_SB_LOCAL: return 'l'; + case CFREE_SB_GLOBAL: return 'g'; + case CFREE_SB_WEAK: return 'w'; + } + return ' '; +} + +static char sym_kind_char(CfreeSymKind k) +{ + switch (k) { + case CFREE_SK_FUNC: return 'F'; + case CFREE_SK_OBJ: return 'O'; + case CFREE_SK_SECTION: return 'S'; + case CFREE_SK_FILE: return 'f'; + case CFREE_SK_TLS: return 'T'; + case CFREE_SK_ABS: return 'A'; + case CFREE_SK_COMMON: return 'C'; + case CFREE_SK_UNDEF: return 'U'; + } + return ' '; +} + +static void dump_obj(const char* label, CfreeObjFile* f) +{ + CfreeTarget target = cfree_obj_target(f); + CfreeObjFmt fmt = cfree_obj_fmt(f); + uint32_t nsec = cfree_obj_nsections(f); + uint32_t i; + CfreeObjSymIter* it; + CfreeObjSymInfo sym; + + driver_printf("%s:\tfile format %s-%s\n\n", + label, fmt_str(fmt, target.ptr_size), arch_str(target.arch)); + + driver_printf("Sections:\n"); + driver_printf("Idx %-20s Size Align\n", "Name"); + for (i = 0; i < nsec; ++i) { + CfreeObjSecInfo sec = cfree_obj_section(f, i); + driver_printf("%03x %-20s %08x %04x\n", + i, sec.name, sec.size, sec.align); + } + driver_printf("\n"); + + driver_printf("SYMBOL TABLE:\n"); + it = cfree_obj_symiter_new(f); + while (cfree_obj_symiter_next(it, &sym)) { + const char* secname; + if (sym.section == CFREE_SECTION_NONE) { + secname = "*UND*"; + } else { + CfreeObjSecInfo sec = cfree_obj_section(f, sym.section); + secname = sec.name[0] ? sec.name : "(none)"; + } + driver_printf("%016llx %c %c %-18s %016llx %s\n", + (unsigned long long)sym.value, + sym_bind_char(sym.bind), + sym_kind_char(sym.kind), + secname, + (unsigned long long)sym.size, + sym.name[0] ? sym.name : "(none)"); + } + cfree_obj_symiter_free(it); +} + +static int dump_archive(const char* path, const CfreeBytesInput* input, + const CfreeEnv* cenv, CfreeTarget target) +{ + CfreeArIter it; + CfreeArMember member; + char label[256]; + int j; + + driver_printf("In archive %s:\n\n", path); + + cfree_ar_iter_init(&it, input); + while (cfree_ar_iter_next(&it, &member)) { + CfreeBytesInput min; + CfreeObjFile* f; + + /* Build "archive.a(member.o)" label. */ + j = 0; + { const char* p = path; while (*p && j < 230) label[j++] = *p++; } + label[j++] = '('; + { const char* p = member.name; while (*p && j < 252) label[j++] = *p++; } + label[j++] = ')'; + label[j] = '\0'; + + min.name = member.name; + min.data = member.data; + min.len = member.size; + + f = cfree_obj_open(cenv, target, &min); + if (!f) { + driver_errf(OBJDUMP_TOOL, "failed to parse member: %s", label); + continue; + } + dump_obj(label, f); + cfree_obj_close(f); + } + + return 0; +} int driver_objdump(int argc, char** argv) { - (void)argc; - (void)argv; - /* TODO: object inspection / disassembler. */ - return 1; + DriverEnv env; + int i; + int rc = 0; + + if (argc < 2) { + objdump_usage(); + return 2; + } + + driver_env_init(&env); + + for (i = 1; i < argc && rc == 0; ++i) { + CfreeEnv cenv = driver_env_to_cfree(&env); + CfreeFileData fd = {0}; + CfreeBytesInput input; + CfreeBinFmt bin; + + if (!cenv.file_io->read_all(cenv.file_io->user, argv[i], &fd)) { + driver_errf(OBJDUMP_TOOL, "failed to read: %s", argv[i]); + rc = 1; + break; + } + + input.name = argv[i]; + input.data = fd.data; + input.len = fd.size; + + bin = cfree_detect_fmt(input.data, input.len); + switch (bin) { + case CFREE_BIN_AR: + rc = dump_archive(argv[i], &input, &cenv, driver_host_target()); + break; + case CFREE_BIN_ELF: + case CFREE_BIN_COFF: + case CFREE_BIN_MACHO: + case CFREE_BIN_WASM: { + CfreeObjFile* f = cfree_obj_open(&cenv, driver_host_target(), &input); + if (!f) { + driver_errf(OBJDUMP_TOOL, "failed to parse: %s", argv[i]); + rc = 1; + } else { + dump_obj(argv[i], f); + cfree_obj_close(f); + } + break; + } + default: + driver_errf(OBJDUMP_TOOL, "unsupported file format: %s", argv[i]); + rc = 1; + break; + } + + cenv.file_io->release(cenv.file_io->user, &fd); + } + + driver_env_fini(&env); + return rc; } diff --git a/driver/run.c b/driver/run.c @@ -0,0 +1,260 @@ +#include "driver.h" + +#include <stdint.h> + +/* `cfree run` — JIT-compile one or more C sources and invoke the entry + * symbol (default `main`) in-process. Any args after `--` are passed to + * the JITed program as argv. The driver returns whatever the entry + * returns, or 1 on a compile/link/lookup error. */ + +#define RUN_TOOL "run" + +typedef struct RunOptions { + DriverEnv* env; + size_t argv_bound; + + int opt_level; + int debug_info; + const char* entry; /* -e, default "main" */ + const char** include_dirs; /* -I */ + uint32_t ninclude_dirs; + const char** system_include_dirs; /* -isystem */ + uint32_t nsystem_include_dirs; + CfreeDefine* defines; /* -D */ + uint32_t ndefines; + const char** undefines; /* -U */ + uint32_t nundefines; + const char** sources; /* positional .c files */ + uint32_t nsources; + + char** prog_argv; /* args after `--` */ + uint32_t prog_argc; + + char** owned_define_names; + size_t* owned_define_name_sizes; +} RunOptions; + +static void run_usage(void) +{ + driver_errf(RUN_TOOL, "%s", + "usage: cfree run [-O0|-O1|-O2] [-g] [-e entry]\n" + " [-I dir]... [-isystem dir]...\n" + " [-D name[=body]]... [-U name]...\n" + " input.c... [-- arg...]"); +} + +static int run_alloc_arrays(RunOptions* o, int argc) +{ + size_t bound = (size_t)argc; + o->argv_bound = bound; + o->include_dirs = driver_alloc_zeroed(o->env, bound * sizeof(*o->include_dirs)); + o->system_include_dirs = driver_alloc_zeroed(o->env, bound * sizeof(*o->system_include_dirs)); + o->defines = driver_alloc_zeroed(o->env, bound * sizeof(*o->defines)); + o->owned_define_names = driver_alloc_zeroed(o->env, bound * sizeof(*o->owned_define_names)); + o->owned_define_name_sizes = driver_alloc_zeroed(o->env, bound * sizeof(*o->owned_define_name_sizes)); + o->undefines = driver_alloc_zeroed(o->env, bound * sizeof(*o->undefines)); + o->sources = driver_alloc_zeroed(o->env, bound * sizeof(*o->sources)); + o->prog_argv = driver_alloc_zeroed(o->env, bound * sizeof(*o->prog_argv)); + if (!o->include_dirs || !o->system_include_dirs || !o->defines || + !o->owned_define_names || !o->owned_define_name_sizes || + !o->undefines || !o->sources || !o->prog_argv) { + driver_errf(RUN_TOOL, "out of memory"); + return 1; + } + return 0; +} + +static int run_record_define(RunOptions* o, const char* arg) +{ + const char* eq = driver_strchr(arg, '='); + CfreeDefine* d = &o->defines[o->ndefines]; + if (eq) { + size_t n = (size_t)(eq - arg); + size_t bytes = n + 1; + char* name = driver_alloc(o->env, bytes); + if (!name) { driver_errf(RUN_TOOL, "out of memory"); return 1; } + driver_memcpy(name, arg, n); + name[n] = '\0'; + o->owned_define_names[o->ndefines] = name; + o->owned_define_name_sizes[o->ndefines] = bytes; + d->name = name; + d->body = eq + 1; + } else { + d->name = arg; + d->body = NULL; + } + o->ndefines++; + return 0; +} + +static int run_parse(int argc, char** argv, RunOptions* o) +{ + int i; + int after_dash_dash = 0; + if (run_alloc_arrays(o, argc) != 0) return 1; + + for (i = 1; i < argc; ++i) { + const char* a = argv[i]; + + if (after_dash_dash) { + o->prog_argv[o->prog_argc++] = argv[i]; + continue; + } + if (driver_streq(a, "--")) { after_dash_dash = 1; continue; } + + if (driver_streq(a, "-g")) { o->debug_info = 1; continue; } + if (driver_streq(a, "-O0")) { o->opt_level = 0; continue; } + if (driver_streq(a, "-O1")) { o->opt_level = 1; continue; } + if (driver_streq(a, "-O2")) { o->opt_level = 2; continue; } + + if (driver_streq(a, "-e")) { + if (++i >= argc) { driver_errf(RUN_TOOL, "-e requires an argument"); return 1; } + o->entry = argv[i]; + continue; + } + + if (driver_strneq(a, "-I", 2)) { + const char* dir = a[2] ? a + 2 : (++i < argc ? argv[i] : NULL); + if (!dir) { driver_errf(RUN_TOOL, "-I requires an argument"); return 1; } + o->include_dirs[o->ninclude_dirs++] = dir; + continue; + } + + if (driver_streq(a, "-isystem")) { + if (++i >= argc) { driver_errf(RUN_TOOL, "-isystem requires an argument"); return 1; } + o->system_include_dirs[o->nsystem_include_dirs++] = argv[i]; + continue; + } + + if (driver_strneq(a, "-D", 2)) { + const char* arg = a[2] ? a + 2 : (++i < argc ? argv[i] : NULL); + if (!arg) { driver_errf(RUN_TOOL, "-D requires an argument"); return 1; } + if (run_record_define(o, arg) != 0) return 1; + continue; + } + + if (driver_strneq(a, "-U", 2)) { + const char* arg = a[2] ? a + 2 : (++i < argc ? argv[i] : NULL); + if (!arg) { driver_errf(RUN_TOOL, "-U requires an argument"); return 1; } + o->undefines[o->nundefines++] = arg; + continue; + } + + if (a[0] == '-' && a[1] != '\0') { + driver_errf(RUN_TOOL, "unknown flag: %s", a); + return 1; + } + + o->sources[o->nsources++] = a; + } + + if (o->nsources == 0) { + driver_errf(RUN_TOOL, "no input files"); + run_usage(); + return 1; + } + if (!o->entry) o->entry = "main"; + return 0; +} + +static void run_options_release(RunOptions* o) +{ + uint32_t i; + size_t bound = o->argv_bound; + for (i = 0; i < o->ndefines; ++i) { + if (o->owned_define_names[i]) { + driver_free(o->env, o->owned_define_names[i], + o->owned_define_name_sizes[i]); + } + } + driver_free(o->env, o->include_dirs, bound * sizeof(*o->include_dirs)); + driver_free(o->env, o->system_include_dirs, bound * sizeof(*o->system_include_dirs)); + driver_free(o->env, o->defines, bound * sizeof(*o->defines)); + driver_free(o->env, o->owned_define_names, bound * sizeof(*o->owned_define_names)); + driver_free(o->env, o->owned_define_name_sizes, bound * sizeof(*o->owned_define_name_sizes)); + driver_free(o->env, o->undefines, bound * sizeof(*o->undefines)); + driver_free(o->env, o->sources, bound * sizeof(*o->sources)); + driver_free(o->env, o->prog_argv, bound * sizeof(*o->prog_argv)); +} + +static void run_to_cfree(const RunOptions* o, CfreeOptions* out, CfreeJit** out_jit) +{ + CfreeOptions z = {0}; + *out = z; + out->target = driver_host_target(); + out->env = driver_env_to_cfree(o->env); + out->output_kind = CFREE_OUTPUT_JIT; + out->opt_level = o->opt_level; + out->debug_info = o->debug_info; + + out->source_files = o->sources; + out->nsource_files = o->nsources; + + out->pp.include_dirs = o->include_dirs; + out->pp.ninclude_dirs = o->ninclude_dirs; + out->pp.system_include_dirs = o->system_include_dirs; + out->pp.nsystem_include_dirs = o->nsystem_include_dirs; + out->pp.defines = o->defines; + out->pp.ndefines = o->ndefines; + out->pp.undefines = o->undefines; + out->pp.nundefines = o->nundefines; + + out->entry = o->entry; + out->out_jit = out_jit; +} + +typedef int (*MainFn)(int, char**); + +int driver_run(int argc, char** argv) +{ + DriverEnv env; + RunOptions ro = {0}; + CfreeOptions copts; + CfreeJit* jit = NULL; + void* sym; + MainFn entry_fn; + int rc; + + driver_env_init(&env); + ro.env = &env; + + if (run_parse(argc, argv, &ro) != 0) { + run_options_release(&ro); + driver_env_fini(&env); + return 2; + } + + run_to_cfree(&ro, &copts, &jit); + rc = cfree_run(&copts); + if (rc != 0) { + run_options_release(&ro); + driver_env_fini(&env); + return rc; + } + + sym = cfree_jit_lookup(jit, ro.entry); + if (!sym) { + driver_errf(RUN_TOOL, "entry symbol not found: %s", ro.entry); + cfree_jit_free(jit); + run_options_release(&ro); + driver_env_fini(&env); + return 1; + } + + /* Object-pointer to function-pointer cast is implementation-defined in + * standard C, but every host where a JIT runs treats them as + * interchangeable. Going through a union avoids the -Wpedantic warning + * a direct cast triggers under -Wpedantic. */ + { + union { void* p; MainFn fn; } u; + u.p = sym; + entry_fn = u.fn; + } + + rc = entry_fn((int)ro.prog_argc, ro.prog_argv); + + cfree_jit_free(jit); + run_options_release(&ro); + driver_env_fini(&env); + return rc; +} diff --git a/include/cfree.h b/include/cfree.h @@ -163,11 +163,11 @@ const uint8_t* cfree_writer_mem_bytes(CfreeWriter*, size_t* len_out); * ============================================================ * cfree_link_jit produces a CfreeJit owning its mapped pages and resolved * image. Symbol lookup is by name (object-local handles never escape - * libcfree). */ -typedef void (*CfreeJitFn)(void); - -void cfree_jit_free (CfreeJit*); -CfreeJitFn cfree_jit_lookup(CfreeJit*, const char* name); + * libcfree); dlsym-shaped — the caller casts to whatever function + * signature the JITed symbol actually has (e.g. int(*)(int, char**) for + * `main`). Returns NULL on miss. */ +void cfree_jit_free (CfreeJit*); +void* cfree_jit_lookup(CfreeJit*, const char* name); /* Resolver invoked when the linker encounters an undefined symbol. Returning * NULL is an error. */ @@ -229,6 +229,15 @@ typedef struct CfreeCompileOptions { CfreePpOptions pp; } CfreeCompileOptions; +/* Preprocess one C input. + * + * Reads `input` through the preprocessor configured by `pp` and writes + * preprocessed text to `out`. The Writer is not closed. On nonzero return + * the Writer may contain partial output and should not be consumed. The + * input bytes must outlive this call. */ +int cfree_preprocess (CfreeCompiler*, const CfreePpOptions* pp, + const CfreeBytesInput* input, CfreeWriter* out); + /* Compile one C input (memory bytes). * * cfree_compile_obj returns a CfreeObjBuilder owned by the CfreeCompiler. The @@ -320,4 +329,141 @@ typedef struct CfreeOptions { int cfree_run(const CfreeOptions*); +/* ============================================================ + * Binary format detection + * ============================================================ + * Sniff the format of a binary blob from its magic bytes. + * COFF is detected by common machine-type values (x86, x86_64, + * ARM, ARM64, RISC-V). Returns CFREE_BIN_UNKNOWN if no magic matches. */ + +typedef enum CfreeBinFmt { + CFREE_BIN_UNKNOWN = 0, + CFREE_BIN_AR, + CFREE_BIN_ELF, + CFREE_BIN_COFF, /* relocatable COFF object; first 2 bytes are machine type */ + CFREE_BIN_PE, /* PE executable/DLL; starts with MZ header */ + CFREE_BIN_MACHO, + CFREE_BIN_WASM, +} CfreeBinFmt; + +CfreeBinFmt cfree_detect_fmt(const uint8_t* data, size_t len); + +/* ============================================================ + * Object inspection + * ============================================================ + * Open a relocatable object for inspection. Format is auto-detected from + * magic bytes. Returns NULL on failure. The input bytes must remain alive + * until cfree_obj_close. + * + * After a successful open, query functions provide read-only access to + * sections and symbols. Strings returned by query functions are interned + * and valid until cfree_obj_close. */ + +typedef struct CfreeObjFile CfreeObjFile; +typedef struct CfreeObjSymIter CfreeObjSymIter; + +typedef enum CfreeSecKind { + CFREE_SEC_TEXT, + CFREE_SEC_RODATA, + CFREE_SEC_DATA, + CFREE_SEC_BSS, + CFREE_SEC_DEBUG, + CFREE_SEC_OTHER, +} CfreeSecKind; + +typedef enum CfreeSecFlag { + CFREE_SF_EXEC = 1u << 0, + CFREE_SF_WRITE = 1u << 1, + CFREE_SF_ALLOC = 1u << 2, + CFREE_SF_TLS = 1u << 3, + CFREE_SF_MERGE = 1u << 4, + CFREE_SF_STRINGS = 1u << 5, +} CfreeSecFlag; + +typedef enum CfreeSymBind { + CFREE_SB_LOCAL, + CFREE_SB_GLOBAL, + CFREE_SB_WEAK, +} CfreeSymBind; + +typedef enum CfreeSymKind { + CFREE_SK_UNDEF, + CFREE_SK_FUNC, + CFREE_SK_OBJ, + CFREE_SK_SECTION, + CFREE_SK_FILE, + CFREE_SK_COMMON, + CFREE_SK_TLS, + CFREE_SK_ABS, +} CfreeSymKind; + +#define CFREE_SECTION_NONE UINT32_MAX + +typedef struct CfreeObjSecInfo { + const char* name; /* interned; valid until cfree_obj_close */ + CfreeSecKind kind; + uint32_t flags; /* bitmask of CfreeSecFlag */ + uint32_t size; /* bytes; BSS uses virtual size */ + uint32_t align; +} CfreeObjSecInfo; + +typedef struct CfreeObjSymInfo { + const char* name; /* interned; valid until cfree_obj_close */ + CfreeSymBind bind; + CfreeSymKind kind; + uint32_t section; /* 0-based index, or CFREE_SECTION_NONE */ + uint64_t value; + uint64_t size; +} CfreeObjSymInfo; + +CfreeObjFile* cfree_obj_open (const CfreeEnv*, CfreeTarget, + const CfreeBytesInput*); +void cfree_obj_close (CfreeObjFile*); +CfreeObjFmt cfree_obj_fmt (const CfreeObjFile*); +CfreeTarget cfree_obj_target (const CfreeObjFile*); +uint32_t cfree_obj_nsections (const CfreeObjFile*); +CfreeObjSecInfo cfree_obj_section (const CfreeObjFile*, uint32_t idx); + +CfreeObjSymIter* cfree_obj_symiter_new (CfreeObjFile*); +int cfree_obj_symiter_next(CfreeObjSymIter*, CfreeObjSymInfo* out); +void cfree_obj_symiter_free(CfreeObjSymIter*); + +/* ============================================================ + * Archive (ar) file + * ============================================================ + * Pure format I/O — no compilation context required. + * + * cfree_ar_write packs member byte payloads into a POSIX ar archive written + * to `out`. The Writer is not closed; I/O errors are detectable via + * out->error(). Returns 0 on success, 1 on bad arguments. + * + * cfree_ar_list writes one member name per line to `out` for each non-special + * member in the archive. Returns 0 on success, 1 on bad arguments or + * malformed archive. + * + * CfreeArIter is a stack-allocated cursor for iterating archive members. + * cfree_ar_iter_init validates the archive magic and positions the cursor + * at the first member; returns 1 on success, 0 on bad magic or NULL input. + * cfree_ar_iter_next advances to the next non-special member and fills *out; + * returns 1 if a member was returned, 0 at end or on malformed data. + * Member data pointers alias the original archive bytes and are valid as + * long as the archive bytes remain alive. */ +int cfree_ar_write(CfreeWriter* out, + const CfreeBytesInput* members, uint32_t nmembers); +int cfree_ar_list (const CfreeBytesInput* archive, CfreeWriter* out); + +typedef struct CfreeArIter { + const uint8_t* _p; + const uint8_t* _end; +} CfreeArIter; + +typedef struct CfreeArMember { + char name[17]; /* null-terminated, max 16 chars */ + const uint8_t* data; /* points into archive bytes */ + size_t size; +} CfreeArMember; + +int cfree_ar_iter_init(CfreeArIter*, const CfreeBytesInput* archive); +int cfree_ar_iter_next(CfreeArIter*, CfreeArMember* out); + #endif diff --git a/src/api/pipeline.c b/src/api/pipeline.c @@ -10,6 +10,7 @@ #include "cg/cg.h" #include "core/arena.h" #include "core/heap.h" +#include "core/pool.h" #include "debug/debug.h" #include "decl/decl.h" #include "lex/lex.h" @@ -91,6 +92,42 @@ static void apply_pp_options(Pp* pp, const CfreePpOptions* opts) } /* ============================================================ + * Preprocess one TU + * ============================================================ */ + +int cfree_preprocess(CfreeCompiler* c, const CfreePpOptions* pp_opts, + const CfreeBytesInput* input, CfreeWriter* out) +{ + PanicSave saved; + Pp* pp; + Lexer* lex; + + compiler_panic_save(c, &saved); + if (setjmp(c->panic)) { + compiler_run_cleanups(c); + compiler_panic_restore(c, &saved); + return 1; + } + if (!pp_opts || !input || !out) { + panic_bad_options(c, "preprocess args missing"); + } + if (!input->name) panic_bad_options(c, "input name is NULL"); + if (!input->data && input->len != 0) { + panic_bad_options(c, "input data is NULL but len > 0"); + } + + pp = pp_new(c); + lex = lex_open_mem(c, input->name, (const char*)input->data, input->len); + apply_pp_options(pp, pp_opts); + pp_push_input(pp, lex); /* PP owns the lexer from here on */ + pp_emit_text(pp, out); + pp_free(pp); + + compiler_panic_restore(c, &saved); + return 0; +} + +/* ============================================================ * Compile one TU * ============================================================ */ @@ -622,3 +659,360 @@ int cfree_run(const CfreeOptions* opts) compiler_fini(c); return 0; } + +/* ============================================================ + * Write helpers used by cfree_ar_write and cfree_ar_list. + * All file-static; no libc I/O. + * ============================================================ */ + +static void wh_bytes(Writer* w, const void* p, size_t n) { w->write(w, p, n); } +static void wh_char (Writer* w, char c) { w->write(w, &c, 1); } +static void wh_nl (Writer* w) { wh_char(w, '\n'); } + +/* Format v as decimal into dst[width], left-justified, space-padded right. */ +static void wh_ar_num(char* dst, int width, u64 v) +{ + char tmp[20]; + int len = 0, i; + if (v == 0) { tmp[len++] = '0'; } + else { u64 t = v; while (t) { tmp[len++] = '0' + (int)(t % 10); t /= 10; } } + for (i = 0; i < len / 2; ++i) { + char x = tmp[i]; tmp[i] = tmp[len - 1 - i]; tmp[len - 1 - i] = x; + } + for (i = 0; i < len && i < width; ++i) dst[i] = tmp[i]; + for (; i < width; ++i) dst[i] = ' '; +} + +/* ============================================================ + * Binary format detection + * ============================================================ */ + +CfreeBinFmt cfree_detect_fmt(const uint8_t* data, size_t len) +{ + u32 m; + u16 coff_machine; + + if (len >= 8 && + data[0] == '!' && data[1] == '<' && data[2] == 'a' && data[3] == 'r' && + data[4] == 'c' && data[5] == 'h' && data[6] == '>' && data[7] == '\n') { + return CFREE_BIN_AR; + } + if (len >= 4 && + data[0] == 0x7f && data[1] == 'E' && data[2] == 'L' && data[3] == 'F') { + return CFREE_BIN_ELF; + } + if (len >= 4 && + data[0] == 0x00 && data[1] == 'a' && data[2] == 's' && data[3] == 'm') { + return CFREE_BIN_WASM; + } + if (len >= 4) { + m = (u32)data[0] | ((u32)data[1] << 8) | + ((u32)data[2] << 16) | ((u32)data[3] << 24); + if (m == 0xFEEDFACEu || m == 0xFEEDFACFu || + m == 0xCEFAEDFEu || m == 0xCFFAEDFEu || + m == 0xCAFEBABEu) { + return CFREE_BIN_MACHO; + } + } + /* PE: MZ header — must come before COFF machine-type check since 0x5A4D + * ('MZ') does not collide with known COFF machine types, but checking + * explicitly avoids any future ambiguity. */ + if (len >= 2 && data[0] == 'M' && data[1] == 'Z') { + return CFREE_BIN_PE; + } + /* COFF relocatable object: first 2 bytes are the machine type. */ + if (len >= 2) { + coff_machine = (u16)data[0] | ((u16)data[1] << 8); + switch (coff_machine) { + case 0x8664: /* AMD64 */ + case 0x014C: /* I386 */ + case 0xAA64: /* ARM64 */ + case 0x01C4: /* ARMNT */ + case 0x5032: /* RISCV32 */ + case 0x5064: /* RISCV64 */ + return CFREE_BIN_COFF; + } + } + return CFREE_BIN_UNKNOWN; +} + +static ObjBuilder* obj_read_bytes(Compiler* c, const char* name, + const u8* data, size_t len, ObjFmt fmt) +{ + switch (fmt) { + case CFREE_OBJ_ELF: return read_elf (c, name, data, len); + case CFREE_OBJ_COFF: return read_coff (c, name, data, len); + case CFREE_OBJ_MACHO: return read_macho(c, name, data, len); + case CFREE_OBJ_WASM: return read_wasm (c, name, data, len); + } + compiler_panic(c, no_loc(), "unknown object format: %s", name); +} + +/* ============================================================ + * Object inspection API (cfree_obj_open / query / close) + * ============================================================ */ + +struct CfreeObjFile { + Compiler compiler; + ObjBuilder* ob; + ObjFmt fmt; +}; + +struct CfreeObjSymIter { + CfreeObjFile* file; + ObjSymIter* inner; +}; + +CfreeObjFile* cfree_obj_open(const CfreeEnv* env, CfreeTarget target, + const CfreeBytesInput* input) +{ + Heap* h; + CfreeObjFile* f; + CfreeBinFmt bin; + ObjFmt ofmt; + + if (!env || !env->heap || !input) return NULL; + if (!input->data && input->len > 0) return NULL; + + bin = cfree_detect_fmt(input->data, input->len); + switch (bin) { + case CFREE_BIN_ELF: ofmt = CFREE_OBJ_ELF; break; + case CFREE_BIN_COFF: ofmt = CFREE_OBJ_COFF; break; + case CFREE_BIN_MACHO: ofmt = CFREE_OBJ_MACHO; break; + case CFREE_BIN_WASM: ofmt = CFREE_OBJ_WASM; break; + default: return NULL; + } + + h = (Heap*)env->heap; + f = (CfreeObjFile*)h->alloc(h, sizeof(*f), _Alignof(CfreeObjFile)); + if (!f) return NULL; + + compiler_init(&f->compiler, target, env); + if (setjmp(f->compiler.panic)) { + compiler_run_cleanups(&f->compiler); + compiler_fini(&f->compiler); + h->free(h, f, sizeof(*f)); + return NULL; + } + f->fmt = ofmt; + f->ob = obj_read_bytes(&f->compiler, input->name, input->data, input->len, ofmt); + return f; +} + +void cfree_obj_close(CfreeObjFile* f) +{ + Heap* h; + if (!f) return; + h = (Heap*)f->compiler.env->heap; + obj_free(f->ob); + compiler_fini(&f->compiler); + h->free(h, f, sizeof(*f)); +} + +CfreeObjFmt cfree_obj_fmt(const CfreeObjFile* f) +{ + return f->fmt; +} + +CfreeTarget cfree_obj_target(const CfreeObjFile* f) +{ + return f->compiler.target; +} + +uint32_t cfree_obj_nsections(const CfreeObjFile* f) +{ + return obj_section_count(f->ob); +} + +CfreeObjSecInfo cfree_obj_section(const CfreeObjFile* f, uint32_t idx) +{ + const Section* sec = obj_section_get(f->ob, (ObjSecId)(idx + 1)); + CfreeObjSecInfo out; + out.name = (sec && sec->name) ? pool_str(f->compiler.global, sec->name, NULL) : ""; + out.kind = sec ? (CfreeSecKind)sec->kind : CFREE_SEC_OTHER; + out.flags = sec ? (uint32_t)sec->flags : 0u; + out.size = sec ? (sec->bss_size ? sec->bss_size : sec->bytes.total) : 0u; + out.align = sec ? sec->align : 0u; + return out; +} + +CfreeObjSymIter* cfree_obj_symiter_new(CfreeObjFile* f) +{ + Heap* h = (Heap*)f->compiler.env->heap; + CfreeObjSymIter* it = (CfreeObjSymIter*)h->alloc( + h, sizeof(*it), _Alignof(CfreeObjSymIter)); + if (!it) return NULL; + it->file = f; + it->inner = obj_symiter_new(f->ob); + return it; +} + +int cfree_obj_symiter_next(CfreeObjSymIter* it, CfreeObjSymInfo* out) +{ + ObjSymEntry entry; + const ObjSym* sym; + if (!obj_symiter_next(it->inner, &entry)) return 0; + sym = entry.sym; + out->name = sym->name ? pool_str(it->file->compiler.global, sym->name, NULL) : ""; + out->bind = (CfreeSymBind)sym->bind; + out->kind = (CfreeSymKind)sym->kind; + out->section = sym->section_id != OBJ_SEC_NONE + ? (uint32_t)(sym->section_id - 1) + : CFREE_SECTION_NONE; + out->value = sym->value; + out->size = sym->size; + return 1; +} + +void cfree_obj_symiter_free(CfreeObjSymIter* it) +{ + Heap* h; + if (!it) return; + obj_symiter_free(it->inner); + h = (Heap*)it->file->compiler.env->heap; + h->free(h, it, sizeof(*it)); +} + +/* ============================================================ + * POSIX ar archive: write and list + * ============================================================ + * Archive format: 8-byte magic "!<arch>\n", then zero or more members. + * Each member has a 60-byte fixed-width ASCII header followed by data + * bytes (plus one '\n' pad byte when data length is odd). */ + +int cfree_ar_write(CfreeWriter* out, + const CfreeBytesInput* members, uint32_t nmembers) +{ + static const char magic[] = "!<arch>\n"; + uint32_t i; + + if (!out) return 1; + if (!members && nmembers) return 1; + + wh_bytes(out, magic, 8); + + for (i = 0; i < nmembers; ++i) { + const CfreeBytesInput* m = &members[i]; + const char* name; + const char* p; + size_t namelen; + char hdr[60]; + char pad = '\n'; + size_t j; + + if (!m->name) return 1; + + /* Use basename of the member path. */ + name = m->name; + for (p = m->name; *p; ++p) { + if (*p == '/') name = p + 1; + } + namelen = 0; + for (p = name; *p; ++p) ++namelen; + if (namelen > 15) namelen = 15; /* truncate; long-name table not yet supported */ + + /* ar_name[16]: name + '/' + spaces */ + for (j = 0; j < 16; ++j) hdr[j] = ' '; + for (j = 0; j < namelen; ++j) hdr[j] = name[j]; + hdr[namelen] = '/'; + + /* ar_date[12]: 0 */ + for (j = 16; j < 28; ++j) hdr[j] = ' '; + hdr[16] = '0'; + + /* ar_uid[6]: 0 */ + for (j = 28; j < 34; ++j) hdr[j] = ' '; + hdr[28] = '0'; + + /* ar_gid[6]: 0 */ + for (j = 34; j < 40; ++j) hdr[j] = ' '; + hdr[34] = '0'; + + /* ar_mode[8]: 644 */ + for (j = 40; j < 48; ++j) hdr[j] = ' '; + hdr[40] = '6'; hdr[41] = '4'; hdr[42] = '4'; + + /* ar_size[10]: member data size, decimal */ + wh_ar_num(hdr + 48, 10, (uint64_t)m->len); + + /* ar_fmag[2]: "`\n" */ + hdr[58] = '`'; + hdr[59] = '\n'; + + wh_bytes(out, hdr, 60); + if (m->data && m->len) wh_bytes(out, m->data, m->len); + if (m->len & 1) wh_bytes(out, &pad, 1); + } + + return 0; +} + +int cfree_ar_iter_init(CfreeArIter* it, const CfreeBytesInput* archive) +{ + if (!it || !archive) return 0; + if (!archive->data && archive->len) return 0; + if (cfree_detect_fmt(archive->data, archive->len) != CFREE_BIN_AR) return 0; + it->_p = archive->data + 8; + it->_end = archive->data + archive->len; + return 1; +} + +int cfree_ar_iter_next(CfreeArIter* it, CfreeArMember* out) +{ + for (;;) { + int namelen; + uint64_t size; + size_t avail; + int j; + + if (it->_p + 60 > it->_end) return 0; + + namelen = 0; + for (j = 0; j < 16; ++j) { + char ch = (char)it->_p[j]; + if (ch == '/' || ch == ' ' || ch == '\0') break; + out->name[namelen++] = ch; + } + out->name[namelen] = '\0'; + + size = 0; + for (j = 48; j < 58; ++j) { + char ch = (char)it->_p[j]; + if (ch < '0' || ch > '9') break; + size = size * 10 + (uint64_t)(unsigned char)(ch - '0'); + } + + it->_p += 60; + avail = (size_t)(it->_end - it->_p); + if ((uint64_t)avail < size) return 0; /* truncated */ + + out->data = it->_p; + out->size = (size_t)size; + + it->_p += (size_t)size; + if ((size & 1) && it->_p < it->_end) it->_p++; + + if (namelen > 0 && out->name[0] != '/') return 1; + /* Skip special members (symbol table '/', extended name table '//') */ + } +} + +int cfree_ar_list(const CfreeBytesInput* archive, CfreeWriter* out) +{ + CfreeArIter it; + CfreeArMember m; + size_t namelen; + const char* p; + + if (!out) return 1; + if (!cfree_ar_iter_init(&it, archive)) return 1; + + while (cfree_ar_iter_next(&it, &m)) { + namelen = 0; + for (p = m.name; *p; ++p) ++namelen; + wh_bytes(out, m.name, namelen); + wh_nl(out); + } + + return 0; +} diff --git a/src/obj/obj.h b/src/obj/obj.h @@ -227,9 +227,9 @@ void emit_macho(Compiler*, ObjBuilder*, Writer*); void emit_wasm (Compiler*, ObjBuilder*, Writer*); /* ---- file format readers (for ld and objdump) ---- */ -ObjBuilder* read_elf (Compiler*, const char* path); -ObjBuilder* read_coff (Compiler*, const char* path); -ObjBuilder* read_macho(Compiler*, const char* path); -ObjBuilder* read_wasm (Compiler*, const char* path); +ObjBuilder* read_elf (Compiler*, const char* name, const u8* data, size_t len); +ObjBuilder* read_coff (Compiler*, const char* name, const u8* data, size_t len); +ObjBuilder* read_macho(Compiler*, const char* name, const u8* data, size_t len); +ObjBuilder* read_wasm (Compiler*, const char* name, const u8* data, size_t len); #endif diff --git a/src/pp/pp.h b/src/pp/pp.h @@ -27,4 +27,9 @@ void pp_add_include_edge(Pp*, u32 includer_file_id, u32 included_file_id, Tok pp_next(Pp*); const LitInfo* pp_lit(const Pp*, LitId); +/* Drains pp_next into `out` as preprocessed C source text: token spellings + * separated by single spaces where TF_HAS_SPACE is set, with newlines for + * TF_AT_BOL transitions. Stops on TOK_EOF. Used by cfree_preprocess. */ +void pp_emit_text(Pp*, Writer* out); + #endif