kit

kit
git clone https://git.ryansepassi.com/git/kit.git
Log | Files | Refs | README

commit 0035bf0c8efe3a8b38fb19e4ee9118b750c29ed5
parent 66b1fcd57ed61a89e6c5745445b42c767da7b4b7
Author: Ryan Sepassi <rsepassi@gmail.com>
Date:   Sat,  9 May 2026 04:15:56 -0700

api: add ar, detect, lifecycle, stubs, writer_mem; update pipeline

Diffstat:
Asrc/api/ar.c | 431+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Asrc/api/detect.c | 192+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Asrc/api/lifecycle.c | 30++++++++++++++++++++++++++++++
Msrc/api/pipeline.c | 431++++++++++++++++---------------------------------------------------------------
Asrc/api/stubs.c | 237+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Asrc/api/writer_mem.c | 105+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Atest/ar_test.c | 860+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
7 files changed, 1941 insertions(+), 345 deletions(-)

diff --git a/src/api/ar.c b/src/api/ar.c @@ -0,0 +1,431 @@ +/* POSIX ar archive reader/writer (cfree_ar_write / cfree_ar_iter / + * cfree_ar_list). Pure format I/O over CfreeWriter and a const byte + * range — no pp/lex/parse/cg/obj dependencies. Kept in its own TU so + * consumers that only need the ar surface (e.g. test/ar_test, the + * driver's `ar` and `ld` paths once split out) don't drag in the full + * compile/link pipeline through the linker. + * + * Archive format: 8-byte magic "!<arch>\n", then zero or more members. + * Each member has a 60-byte fixed-width ASCII header followed by data + * bytes (plus one '\n' pad byte when data length is odd). */ + +#include <cfree.h> + +#include "core/core.h" + +/* ============================================================ + * Write helpers (file-static; libc-free). + * ============================================================ */ + +static void wh_bytes(Writer* w, const void* p, size_t n) { w->write(w, p, n); } +static void wh_char (Writer* w, char c) { w->write(w, &c, 1); } +static void wh_nl (Writer* w) { wh_char(w, '\n'); } + +/* Format v as decimal into dst[width], left-justified, space-padded right. */ +static void wh_ar_num(char* dst, int width, u64 v) +{ + char tmp[20]; + int len = 0, i; + if (v == 0) { tmp[len++] = '0'; } + else { u64 t = v; while (t) { tmp[len++] = '0' + (int)(t % 10); t /= 10; } } + for (i = 0; i < len / 2; ++i) { + char x = tmp[i]; tmp[i] = tmp[len - 1 - i]; tmp[len - 1 - i] = x; + } + for (i = 0; i < len && i < width; ++i) dst[i] = tmp[i]; + for (; i < width; ++i) dst[i] = ' '; +} + +/* Emit a 32-bit unsigned integer in big-endian byte order. */ +static void wh_be32(Writer* w, u32 v) +{ + u8 b[4]; + b[0] = (u8)((v >> 24) & 0xff); + b[1] = (u8)((v >> 16) & 0xff); + b[2] = (u8)((v >> 8) & 0xff); + b[3] = (u8)( v & 0xff); + wh_bytes(w, b, 4); +} + +/* ============================================================ + * Member-name helpers. + * ============================================================ */ + +/* Compute the basename and length of a member path for ar_name encoding. */ +static void ar_name_basename(const char* in, const char** name_out, size_t* len_out) +{ + const char* name = in; + const char* p; + size_t namelen = 0; + for (p = in; *p; ++p) { + if (*p == '/') name = p + 1; + } + for (p = name; *p; ++p) ++namelen; + *name_out = name; + *len_out = namelen; +} + +/* Determine whether a member name needs the '//' long-name table. + * GNU ar uses the table when the basename exceeds 15 chars or contains + * '/' (since '/' is the in-header terminator). */ +static int ar_name_needs_longtable(const char* name, size_t len) +{ + size_t i; + if (len > 15) return 1; + for (i = 0; i < len; ++i) if (name[i] == '/') return 1; + return 0; +} + +/* libc-free strlen for caller-provided NUL-terminated strings. */ +static size_t ar_strlen(const char* s) +{ + size_t n = 0; + while (s[n]) ++n; + return n; +} + +/* Padded on-archive size of a member: 60-byte header + payload + parity pad. */ +static size_t ar_member_padded_size(size_t len) +{ + return 60 + len + (len & 1); +} + +/* Fill a 60-byte member header. `name_field` is the 16-byte ar_name encoding + * to write (already terminated with '/' and space-padded). */ +static void ar_fill_header(char hdr[60], const char name_field[16], + uint64_t epoch, uint64_t size) +{ + size_t j; + for (j = 0; j < 16; ++j) hdr[j] = name_field[j]; + /* ar_date[12] */ + for (j = 16; j < 28; ++j) hdr[j] = ' '; + if (epoch) wh_ar_num(hdr + 16, 12, epoch); + else hdr[16] = '0'; + /* ar_uid[6]: 0 */ + for (j = 28; j < 34; ++j) hdr[j] = ' '; + hdr[28] = '0'; + /* ar_gid[6]: 0 */ + for (j = 34; j < 40; ++j) hdr[j] = ' '; + hdr[34] = '0'; + /* ar_mode[8]: 644 */ + for (j = 40; j < 48; ++j) hdr[j] = ' '; + hdr[40] = '6'; hdr[41] = '4'; hdr[42] = '4'; + /* ar_size[10] */ + wh_ar_num(hdr + 48, 10, size); + /* ar_fmag[2] */ + hdr[58] = '`'; hdr[59] = '\n'; +} + +/* ============================================================ + * Public API: write + * ============================================================ */ + +int cfree_ar_write(CfreeWriter* out, + const CfreeBytesInput* members, uint32_t nmembers, + const CfreeArWriteOptions* opts) +{ + static const char magic[] = "!<arch>\n"; + static const CfreeArWriteOptions default_opts = {0, 0, 0, NULL}; + uint32_t i; + uint64_t epoch; + int long_names; + int symbol_index; + const CfreeArMemberSymbols* msyms; + uint64_t longtab_size = 0; + uint32_t nsyms = 0; /* total symbols across all members */ + uint64_t names_size = 0; /* sum of strlen+1 for each symbol */ + uint64_t index_payload = 0; /* bytes of the / member's data */ + uint64_t index_total = 0; /* 60 + index_payload + parity pad */ + uint64_t longtab_total = 0; /* 60 + longtab_size + parity pad */ + char pad = '\n'; + + if (!out) return 1; + if (!members && nmembers) return 1; + + if (!opts) opts = &default_opts; + epoch = opts->epoch; + long_names = opts->long_names; + symbol_index = opts->symbol_index; + msyms = opts->member_symbols; + + /* Sizing pass: '//' long-name table size and '/' symbol-index payload + * size. Both are needed before emitting magic so the index can encode + * absolute member-header offsets. The emit phase walks members again + * and recomputes per-member layout — heap-free by design. */ + if (long_names) { + for (i = 0; i < nmembers; ++i) { + const char* name; + size_t namelen; + if (!members[i].name) return 1; + ar_name_basename(members[i].name, &name, &namelen); + if (ar_name_needs_longtable(name, namelen)) { + longtab_size += (uint64_t)namelen + 2; /* name + "/\n" */ + } + } + } else { + /* Validate names. */ + for (i = 0; i < nmembers; ++i) { + if (!members[i].name) return 1; + } + } + + if (symbol_index) { + if (msyms) { + for (i = 0; i < nmembers; ++i) { + u32 k; + if (msyms[i].count && !msyms[i].names) return 1; + for (k = 0; k < msyms[i].count; ++k) { + const char* nm = msyms[i].names[k]; + if (!nm) return 1; + nsyms += 1; + names_size += (uint64_t)ar_strlen(nm) + 1; + } + } + } + index_payload = (uint64_t)4 + (uint64_t)4 * (uint64_t)nsyms + names_size; + index_total = 60 + index_payload + (index_payload & 1); + } + if (longtab_size) { + longtab_total = 60 + longtab_size + (longtab_size & 1); + } + + wh_bytes(out, magic, 8); + + /* Emit '/' symbol-index member, if requested. The 16-byte ar_name field + * is `/` followed by 15 spaces (no terminating `/`); cfree_ar_iter_next + * skips members matching that exact prefix. */ + if (symbol_index) { + char hdr[60]; + char name_field[16]; + size_t j; + uint64_t cur_offset; + + for (j = 0; j < 16; ++j) name_field[j] = ' '; + name_field[0] = '/'; + ar_fill_header(hdr, name_field, epoch, index_payload); + wh_bytes(out, hdr, 60); + + wh_be32(out, nsyms); + + /* Offsets: each of member i's symbols emits the same offset, equal + * to the start of member i's 60-byte header. */ + cur_offset = (uint64_t)8 + index_total + longtab_total; + if (msyms) { + for (i = 0; i < nmembers; ++i) { + u32 k; + for (k = 0; k < msyms[i].count; ++k) { + wh_be32(out, (u32)cur_offset); + } + cur_offset += ar_member_padded_size(members[i].len); + } + } + + /* Names: same walk, NUL-terminated, in member then declaration order. */ + if (msyms) { + for (i = 0; i < nmembers; ++i) { + u32 k; + for (k = 0; k < msyms[i].count; ++k) { + const char* nm = msyms[i].names[k]; + size_t nmlen = ar_strlen(nm); + wh_bytes(out, nm, nmlen + 1); + } + } + } + + if (index_payload & 1) wh_bytes(out, &pad, 1); + } + + /* Emit '//' long-name table member, if any. */ + if (longtab_size) { + char hdr[60]; + char name_field[16]; + size_t j; + for (j = 0; j < 16; ++j) name_field[j] = ' '; + name_field[0] = '/'; + name_field[1] = '/'; + ar_fill_header(hdr, name_field, 0, longtab_size); + wh_bytes(out, hdr, 60); + for (i = 0; i < nmembers; ++i) { + const char* name; + size_t namelen; + ar_name_basename(members[i].name, &name, &namelen); + if (ar_name_needs_longtable(name, namelen)) { + wh_bytes(out, name, namelen); + wh_bytes(out, "/\n", 2); + } + } + if (longtab_size & 1) wh_bytes(out, &pad, 1); + } + + /* Re-walk members; emit headers + payloads. Track the running offset + * within the // table so each long-name member's name field encodes + * `/<offset>`. */ + { + uint64_t longtab_off = 0; + for (i = 0; i < nmembers; ++i) { + const CfreeBytesInput* m = &members[i]; + const char* name; + size_t namelen; + char hdr[60]; + char name_field[16]; + size_t j; + + ar_name_basename(m->name, &name, &namelen); + + for (j = 0; j < 16; ++j) name_field[j] = ' '; + if (long_names && ar_name_needs_longtable(name, namelen)) { + /* Encode `/<decimal-offset>` in the 16-byte name field. */ + name_field[0] = '/'; + wh_ar_num(name_field + 1, 15, longtab_off); + longtab_off += (uint64_t)namelen + 2; + } else { + size_t emit = namelen > 15 ? 15 : namelen; + for (j = 0; j < emit; ++j) name_field[j] = name[j]; + name_field[emit] = '/'; + } + + ar_fill_header(hdr, name_field, epoch, (uint64_t)m->len); + wh_bytes(out, hdr, 60); + if (m->data && m->len) wh_bytes(out, m->data, m->len); + if (m->len & 1) wh_bytes(out, &pad, 1); + } + } + + return 0; +} + +/* ============================================================ + * Public API: read (iterator) and list + * ============================================================ */ + +int cfree_ar_iter_init(CfreeArIter* it, const CfreeBytesInput* archive) +{ + if (!it || !archive) return 0; + if (!archive->data && archive->len) return 0; + if (cfree_detect_fmt(archive->data, archive->len) != CFREE_BIN_AR) return 0; + it->_p = archive->data + 8; + it->_end = archive->data + archive->len; + it->_longnames = NULL; + it->_longnames_len = 0; + it->_namebuf[0] = '\0'; + return 1; +} + +/* Resolve a `/<decimal-offset>` reference into the iterator's `//` table. + * Names in the table are terminated by '/' or '\n'. Returns the resolved + * name length, or 0 on failure. Writes the name into it->_namebuf. */ +static size_t ar_resolve_longname(CfreeArIter* it, uint64_t off) +{ + size_t i; + if (!it->_longnames) return 0; + if (off >= it->_longnames_len) return 0; + for (i = 0; i + 1 < sizeof(it->_namebuf); ++i) { + size_t k = (size_t)off + i; + char ch; + if (k >= it->_longnames_len) break; + ch = (char)it->_longnames[k]; + if (ch == '/' || ch == '\n') break; + it->_namebuf[i] = ch; + } + it->_namebuf[i] = '\0'; + return i; +} + +int cfree_ar_iter_next(CfreeArIter* it, CfreeArMember* out) +{ + for (;;) { + uint64_t size; + size_t avail; + int j; + int namelen; + char name_field[16]; + + if (it->_p + 60 > it->_end) return 0; + + for (j = 0; j < 16; ++j) name_field[j] = (char)it->_p[j]; + + size = 0; + for (j = 48; j < 58; ++j) { + char ch = (char)it->_p[j]; + if (ch < '0' || ch > '9') break; + size = size * 10 + (uint64_t)(unsigned char)(ch - '0'); + } + + it->_p += 60; + avail = (size_t)(it->_end - it->_p); + if ((uint64_t)avail < size) return 0; /* truncated */ + + /* Special members (handled before user-visible naming): + * "//" extended-name (long-name) table + * "/" alone System V symbol index + * "__.SYMDEF" BSD symbol index */ + if (name_field[0] == '/' && name_field[1] == '/') { + it->_longnames = it->_p; + it->_longnames_len = (size_t)size; + goto advance; + } + if (name_field[0] == '/' && name_field[1] == ' ') { + /* System V symbol index "/ ". */ + goto advance; + } + + /* Decode name. */ + if (name_field[0] == '/' && + name_field[1] >= '0' && name_field[1] <= '9') { + /* `/<offset>` long-name reference. */ + uint64_t off = 0; + for (j = 1; j < 16; ++j) { + char ch = name_field[j]; + if (ch < '0' || ch > '9') break; + off = off * 10 + (uint64_t)(unsigned char)(ch - '0'); + } + namelen = (int)ar_resolve_longname(it, off); + } else { + namelen = 0; + for (j = 0; j < 16; ++j) { + char ch = name_field[j]; + if (ch == '/' || ch == ' ' || ch == '\0') break; + it->_namebuf[namelen++] = ch; + } + it->_namebuf[namelen] = '\0'; + } + + out->name = it->_namebuf; + out->data = it->_p; + out->size = (size_t)size; + + it->_p += (size_t)size; + if ((size & 1) && it->_p < it->_end) it->_p++; + + /* Skip special-but-named members (BSD symbol index). */ + if (it->_namebuf[0] == '_' && + it->_namebuf[1] == '_' && it->_namebuf[2] == '.') { + continue; + } + if (namelen > 0) return 1; + continue; + + advance: + it->_p += (size_t)size; + if ((size & 1) && it->_p < it->_end) it->_p++; + } +} + +int cfree_ar_list(const CfreeBytesInput* archive, CfreeWriter* out) +{ + CfreeArIter it; + CfreeArMember m; + size_t namelen; + const char* p; + + if (!out) return 1; + if (!cfree_ar_iter_init(&it, archive)) return 1; + + while (cfree_ar_iter_next(&it, &m)) { + namelen = 0; + for (p = m.name; *p; ++p) ++namelen; + wh_bytes(out, m.name, namelen); + wh_nl(out); + } + + return 0; +} diff --git a/src/api/detect.c b/src/api/detect.c @@ -0,0 +1,192 @@ +/* Binary format and target detection from object header bytes. Pure + * byte parsing; no libcfree dependencies, kept in its own translation + * unit so consumers that only need detection (e.g. cfree-roundtrip + * tests) don't drag the full pipeline in through the linker. */ + +#include <cfree.h> + +#include "core/core.h" + +CfreeBinFmt cfree_detect_fmt(const uint8_t* data, size_t len) +{ + u32 m; + u16 coff_machine; + + if (!data) return CFREE_BIN_UNKNOWN; + if (len >= 8 && + data[0] == '!' && data[1] == '<' && data[2] == 'a' && data[3] == 'r' && + data[4] == 'c' && data[5] == 'h' && data[6] == '>' && data[7] == '\n') { + return CFREE_BIN_AR; + } + if (len >= 4 && + data[0] == 0x7f && data[1] == 'E' && data[2] == 'L' && data[3] == 'F') { + return CFREE_BIN_ELF; + } + if (len >= 4 && + data[0] == 0x00 && data[1] == 'a' && data[2] == 's' && data[3] == 'm') { + return CFREE_BIN_WASM; + } + if (len >= 4) { + m = (u32)data[0] | ((u32)data[1] << 8) | + ((u32)data[2] << 16) | ((u32)data[3] << 24); + if (m == 0xFEEDFACEu || m == 0xFEEDFACFu || + m == 0xCEFAEDFEu || m == 0xCFFAEDFEu || + m == 0xCAFEBABEu) { + return CFREE_BIN_MACHO; + } + } + if (len >= 2 && data[0] == 'M' && data[1] == 'Z') { + return CFREE_BIN_PE; + } + if (len >= 2) { + coff_machine = (u16)data[0] | ((u16)data[1] << 8); + switch (coff_machine) { + case 0x8664: + case 0x014C: + case 0xAA64: + case 0x01C4: + case 0x5032: + case 0x5064: + return CFREE_BIN_COFF; + } + } + return CFREE_BIN_UNKNOWN; +} + +static void detect_target_defaults(CfreeTarget* t) +{ + t->big_endian = 0; + t->pic = CFREE_PIC_NONE; + t->code_model = CFREE_CM_DEFAULT; +} + +static void detect_set_ptr(CfreeTarget* t, CfreeArchKind arch) +{ + t->arch = arch; + switch (arch) { + case CFREE_ARCH_X86_64: + case CFREE_ARCH_ARM_64: + case CFREE_ARCH_RV64: + t->ptr_size = 8; t->ptr_align = 8; break; + case CFREE_ARCH_X86_32: + case CFREE_ARCH_ARM_32: + case CFREE_ARCH_RV32: + case CFREE_ARCH_WASM: + t->ptr_size = 4; t->ptr_align = 4; break; + } +} + +static int detect_elf(const u8* d, size_t len, CfreeTarget* out) +{ + u8 ei_class, ei_data, ei_osabi; + u16 e_machine; + if (len < 20) return 1; + ei_class = d[4]; + ei_data = d[5]; + ei_osabi = d[7]; + if (ei_data == 1) { + e_machine = (u16)d[18] | ((u16)d[19] << 8); + } else if (ei_data == 2) { + e_machine = (u16)d[19] | ((u16)d[18] << 8); + } else { + return 1; + } + + detect_target_defaults(out); + out->big_endian = (ei_data == 2); + out->obj = CFREE_OBJ_ELF; + + switch (e_machine) { + case 0x03: detect_set_ptr(out, CFREE_ARCH_X86_32); break; + case 0x3E: detect_set_ptr(out, CFREE_ARCH_X86_64); break; + case 0x28: detect_set_ptr(out, CFREE_ARCH_ARM_32); break; + case 0xB7: detect_set_ptr(out, CFREE_ARCH_ARM_64); break; + case 0xF3: + if (ei_class == 1) detect_set_ptr(out, CFREE_ARCH_RV32); + else if (ei_class == 2) detect_set_ptr(out, CFREE_ARCH_RV64); + else return 1; + break; + default: return 1; + } + if (ei_osabi == 0 || ei_osabi == 3) out->os = CFREE_OS_LINUX; + else out->os = CFREE_OS_FREESTANDING; + return 0; +} + +static int detect_coff(const u8* d, size_t len, CfreeTarget* out) +{ + u16 machine; + if (len < 2) return 1; + machine = (u16)d[0] | ((u16)d[1] << 8); + detect_target_defaults(out); + out->obj = CFREE_OBJ_COFF; + out->os = CFREE_OS_WINDOWS; + switch (machine) { + case 0x8664: detect_set_ptr(out, CFREE_ARCH_X86_64); break; + case 0x014C: detect_set_ptr(out, CFREE_ARCH_X86_32); break; + case 0xAA64: detect_set_ptr(out, CFREE_ARCH_ARM_64); break; + case 0x01C4: detect_set_ptr(out, CFREE_ARCH_ARM_32); break; + case 0x5032: detect_set_ptr(out, CFREE_ARCH_RV32); break; + case 0x5064: detect_set_ptr(out, CFREE_ARCH_RV64); break; + default: return 1; + } + return 0; +} + +static int detect_macho(const u8* d, size_t len, CfreeTarget* out) +{ + u32 magic, cputype; + int swap, is64; + if (len < 8) return 1; + magic = (u32)d[0] | ((u32)d[1] << 8) | ((u32)d[2] << 16) | ((u32)d[3] << 24); + switch (magic) { + case 0xFEEDFACEu: swap = 0; is64 = 0; break; + case 0xFEEDFACFu: swap = 0; is64 = 1; break; + case 0xCEFAEDFEu: swap = 1; is64 = 0; break; + case 0xCFFAEDFEu: swap = 1; is64 = 1; break; + default: return 1; + } + if (!swap) { + cputype = (u32)d[4] | ((u32)d[5] << 8) | ((u32)d[6] << 16) | ((u32)d[7] << 24); + } else { + cputype = (u32)d[7] | ((u32)d[6] << 8) | ((u32)d[5] << 16) | ((u32)d[4] << 24); + } + detect_target_defaults(out); + out->obj = CFREE_OBJ_MACHO; + out->os = CFREE_OS_MACOS; + switch (cputype) { + case 0x00000007u: detect_set_ptr(out, CFREE_ARCH_X86_32); break; + case 0x01000007u: detect_set_ptr(out, CFREE_ARCH_X86_64); break; + case 0x0000000Cu: detect_set_ptr(out, CFREE_ARCH_ARM_32); break; + case 0x0100000Cu: detect_set_ptr(out, CFREE_ARCH_ARM_64); break; + default: return 1; + } + (void)is64; + return 0; +} + +int cfree_detect_target(const uint8_t* data, size_t len, CfreeTarget* out) +{ + CfreeBinFmt bin; + if (!data || !out) return 1; + bin = cfree_detect_fmt(data, len); + switch (bin) { + case CFREE_BIN_ELF: return detect_elf (data, len, out); + case CFREE_BIN_COFF: return detect_coff (data, len, out); + case CFREE_BIN_MACHO: return detect_macho(data, len, out); + case CFREE_BIN_WASM: { + CfreeTarget t; + t.big_endian = 0; + t.pic = CFREE_PIC_NONE; + t.code_model = CFREE_CM_DEFAULT; + t.arch = CFREE_ARCH_WASM; + t.ptr_size = 4; + t.ptr_align = 4; + t.obj = CFREE_OBJ_WASM; + t.os = CFREE_OS_WASI; + *out = t; + return 0; + } + default: return 1; + } +} diff --git a/src/api/lifecycle.c b/src/api/lifecycle.c @@ -0,0 +1,30 @@ +/* CfreeCompiler lifecycle. Kept separate from pipeline.c so consumers + * that only need the lifecycle (e.g. obj-inspection or roundtrip tests) + * don't drag in the full compile/link pipeline through the linker. */ + +#include <cfree.h> + +#include "core/core.h" +#include "core/heap.h" + +CfreeCompiler* cfree_compiler_new(CfreeTarget target, const CfreeEnv* env) +{ + Heap* h; + Compiler* c; + + if (!env || !env->heap) return NULL; + h = env->heap; + c = h->alloc(h, sizeof(*c), _Alignof(Compiler)); + if (!c) return NULL; + compiler_init(c, target, env); + return c; +} + +void cfree_compiler_free(CfreeCompiler* c) +{ + Heap* h; + if (!c) return; + h = (Heap*)c->env->heap; + compiler_fini(c); + h->free(h, c, sizeof(*c)); +} diff --git a/src/api/pipeline.c b/src/api/pipeline.c @@ -20,31 +20,15 @@ #include "parse/parse.h" #include "pp/pp.h" -/* ============================================================ - * CfreeCompiler lifecycle (public) - * ============================================================ */ - -CfreeCompiler* cfree_compiler_new(CfreeTarget target, const CfreeEnv* env) -{ - Heap* h; - Compiler* c; - - if (!env || !env->heap) return NULL; - h = env->heap; - c = h->alloc(h, sizeof(*c), _Alignof(Compiler)); - if (!c) return NULL; - compiler_init(c, target, env); - return c; -} - -void cfree_compiler_free(CfreeCompiler* c) -{ - Heap* h; - if (!c) return; - h = (Heap*)c->env->heap; - compiler_fini(c); - h->free(h, c, sizeof(*c)); -} +/* CfreeCompiler lifecycle (cfree_compiler_new / cfree_compiler_free) + * lives in src/api/lifecycle.c so consumers that only need lifecycle + * + read paths don't drag the full compile/link pipeline through ld. + * + * Binary/target detection (cfree_detect_fmt / cfree_detect_target) lives + * in src/api/detect.c for the same reason. + * + * In-memory writer (cfree_writer_mem / cfree_writer_mem_bytes) lives in + * src/api/writer_mem.c. */ /* ============================================================ * Helpers @@ -120,6 +104,76 @@ int cfree_preprocess(CfreeCompiler* c, const CfreePpOptions* pp_opts, } /* ============================================================ + * Dump tokens (lex-only) + * ============================================================ */ + +static void dt_write_str(Writer* w, const char* s) +{ + size_t n = 0; + while (s[n]) ++n; + w->write(w, s, n); +} + +static void dt_write_sym(Writer* w, Pool* p, Sym sym) +{ + size_t len = 0; + const char* s = sym ? pool_str(p, sym, &len) : NULL; + if (s && len) w->write(w, s, len); +} + +static void dt_emit(Writer* w, Pool* p, const Tok* t) +{ + switch (t->kind) { + case TOK_EOF: dt_write_str(w, "(eof)\n"); return; + case TOK_NEWLINE: dt_write_str(w, "(newline)\n"); return; + case TOK_PP_HASH: dt_write_str(w, "(pp-hash)\n"); return; + case TOK_PP_PASTE: dt_write_str(w, "(pp-paste)\n"); return; + case TOK_IDENT: dt_write_str(w, "(ident "); break; + case TOK_NUM: dt_write_str(w, "(num "); break; + case TOK_FLT: dt_write_str(w, "(flt "); break; + case TOK_STR: dt_write_str(w, "(str "); break; + case TOK_CHR: dt_write_str(w, "(chr "); break; + case TOK_PUNCT: dt_write_str(w, "(punct "); break; + default: dt_write_str(w, "(unknown "); break; + } + dt_write_sym(w, p, t->spelling); + dt_write_str(w, ")\n"); +} + +int cfree_dump_tokens(CfreeCompiler* c, const CfreeBytesInput* input, + CfreeWriter* out) +{ + PanicSave saved; + Lexer* lex; + Tok t; + + compiler_panic_save(c, &saved); + if (setjmp(c->panic)) { + compiler_run_cleanups(c); + compiler_panic_restore(c, &saved); + return 1; + } + if (!input || !out) { + panic_bad_options(c, "dump_tokens args missing"); + } + if (!input->name) panic_bad_options(c, "input name is NULL"); + if (!input->data && input->len != 0) { + panic_bad_options(c, "input data is NULL but len > 0"); + } + + lex = lex_open_mem(c, input->name, (const char*)input->data, input->len); + for (;;) { + t = lex_next(lex); + dt_emit(out, c->global, &t); + if (t.kind == TOK_EOF) break; + } + lex_close(lex); + + compiler_panic_restore(c, &saved); + return 0; +} + +/* ============================================================ * Compile one TU * ============================================================ */ @@ -470,33 +524,13 @@ int cfree_pipeline_link_jit(CfreePipeline* p, const CfreeLinkOptions* opts, return cfree_link_jit(p->compiler, opts, out_jit); } -/* ============================================================ - * Write helpers used by cfree_ar_write and cfree_ar_list. - * All file-static; no libc I/O. - * ============================================================ */ - -static void wh_bytes(Writer* w, const void* p, size_t n) { w->write(w, p, n); } -static void wh_char (Writer* w, char c) { w->write(w, &c, 1); } -static void wh_nl (Writer* w) { wh_char(w, '\n'); } - -/* Format v as decimal into dst[width], left-justified, space-padded right. */ -static void wh_ar_num(char* dst, int width, u64 v) -{ - char tmp[20]; - int len = 0, i; - if (v == 0) { tmp[len++] = '0'; } - else { u64 t = v; while (t) { tmp[len++] = '0' + (int)(t % 10); t /= 10; } } - for (i = 0; i < len / 2; ++i) { - char x = tmp[i]; tmp[i] = tmp[len - 1 - i]; tmp[len - 1 - i] = x; - } - for (i = 0; i < len && i < width; ++i) dst[i] = tmp[i]; - for (; i < width; ++i) dst[i] = ' '; -} - -/* ============================================================ - * Binary format detection - * ============================================================ */ +/* Binary format / target detection (cfree_detect_fmt and cfree_detect_target, + * plus their detect_* helpers) live in src/api/detect.c — pure byte parsing, + * no internal-libcfree dependencies, kept separate so consumers that only + * detect inputs (e.g. cfree-roundtrip tests) don't drag this TU's + * lex/pp/parse/cg/etc. dependencies in through the linker. */ +#if 0 /* moved to src/api/detect.c */ CfreeBinFmt cfree_detect_fmt(const uint8_t* data, size_t len) { u32 m; @@ -684,6 +718,7 @@ int cfree_detect_target(const uint8_t* data, size_t len, CfreeTarget* out) default: return 1; } } +#endif /* moved to src/api/detect.c */ static ObjBuilder* obj_read_bytes(Compiler* c, const char* name, const u8* data, size_t len, ObjFmt fmt) @@ -1009,297 +1044,3 @@ void cfree_obj_reliter_free(CfreeObjRelocIter* it) h->free(h, it, sizeof(*it)); } -/* ============================================================ - * POSIX ar archive: write and list - * ============================================================ - * Archive format: 8-byte magic "!<arch>\n", then zero or more members. - * Each member has a 60-byte fixed-width ASCII header followed by data - * bytes (plus one '\n' pad byte when data length is odd). */ - -/* Compute the basename and length of a member path for ar_name encoding. */ -static void ar_name_basename(const char* in, const char** name_out, size_t* len_out) -{ - const char* name = in; - const char* p; - size_t namelen = 0; - for (p = in; *p; ++p) { - if (*p == '/') name = p + 1; - } - for (p = name; *p; ++p) ++namelen; - *name_out = name; - *len_out = namelen; -} - -/* Determine whether a member name needs the '//' long-name table. - * GNU ar uses the table when the basename exceeds 15 chars or contains - * '/' (since '/' is the in-header terminator). */ -static int ar_name_needs_longtable(const char* name, size_t len) -{ - size_t i; - if (len > 15) return 1; - for (i = 0; i < len; ++i) if (name[i] == '/') return 1; - return 0; -} - -/* Fill a 60-byte member header. `name_field` is the 16-byte ar_name encoding - * to write (already terminated with '/' and space-padded). */ -static void ar_fill_header(char hdr[60], const char name_field[16], - uint64_t epoch, uint64_t size) -{ - size_t j; - for (j = 0; j < 16; ++j) hdr[j] = name_field[j]; - /* ar_date[12] */ - for (j = 16; j < 28; ++j) hdr[j] = ' '; - if (epoch) wh_ar_num(hdr + 16, 12, epoch); - else hdr[16] = '0'; - /* ar_uid[6]: 0 */ - for (j = 28; j < 34; ++j) hdr[j] = ' '; - hdr[28] = '0'; - /* ar_gid[6]: 0 */ - for (j = 34; j < 40; ++j) hdr[j] = ' '; - hdr[34] = '0'; - /* ar_mode[8]: 644 */ - for (j = 40; j < 48; ++j) hdr[j] = ' '; - hdr[40] = '6'; hdr[41] = '4'; hdr[42] = '4'; - /* ar_size[10] */ - wh_ar_num(hdr + 48, 10, size); - /* ar_fmag[2] */ - hdr[58] = '`'; hdr[59] = '\n'; -} - -int cfree_ar_write(CfreeWriter* out, - const CfreeBytesInput* members, uint32_t nmembers, - const CfreeArWriteOptions* opts) -{ - static const char magic[] = "!<arch>\n"; - static const CfreeArWriteOptions default_opts = {0, 0, 0}; - uint32_t i; - uint64_t epoch; - int long_names; - uint64_t longtab_size = 0; - char pad = '\n'; - - if (!out) return 1; - if (!members && nmembers) return 1; - - if (!opts) opts = &default_opts; - epoch = opts->epoch; - long_names = opts->long_names; - - /* Two passes (heap-free, recomputing names): pass 1 sizes the '//' - * long-name table; pass 2 emits magic, '//' member if any, and each - * member with its 16-byte ar_name field. The long-name running offset - * is tracked inline during the second pass. */ - if (long_names) { - for (i = 0; i < nmembers; ++i) { - const char* name; - size_t namelen; - if (!members[i].name) return 1; - ar_name_basename(members[i].name, &name, &namelen); - if (ar_name_needs_longtable(name, namelen)) { - longtab_size += (uint64_t)namelen + 2; /* name + "/\n" */ - } - } - } else { - /* Validate names. */ - for (i = 0; i < nmembers; ++i) { - if (!members[i].name) return 1; - } - } - - wh_bytes(out, magic, 8); - - /* Emit '//' long-name table member, if any. */ - if (longtab_size) { - char hdr[60]; - char name_field[16]; - size_t j; - for (j = 0; j < 16; ++j) name_field[j] = ' '; - name_field[0] = '/'; - name_field[1] = '/'; - ar_fill_header(hdr, name_field, 0, longtab_size); - wh_bytes(out, hdr, 60); - for (i = 0; i < nmembers; ++i) { - const char* name; - size_t namelen; - ar_name_basename(members[i].name, &name, &namelen); - if (ar_name_needs_longtable(name, namelen)) { - wh_bytes(out, name, namelen); - wh_bytes(out, "/\n", 2); - } - } - if (longtab_size & 1) wh_bytes(out, &pad, 1); - } - - /* Re-walk members; emit headers + payloads. Track the running offset - * within the // table so each long-name member's name field encodes - * `/<offset>`. */ - { - uint64_t longtab_off = 0; - for (i = 0; i < nmembers; ++i) { - const CfreeBytesInput* m = &members[i]; - const char* name; - size_t namelen; - char hdr[60]; - char name_field[16]; - size_t j; - - ar_name_basename(m->name, &name, &namelen); - - for (j = 0; j < 16; ++j) name_field[j] = ' '; - if (long_names && ar_name_needs_longtable(name, namelen)) { - /* Encode `/<decimal-offset>` in the 16-byte name field. */ - name_field[0] = '/'; - wh_ar_num(name_field + 1, 15, longtab_off); - longtab_off += (uint64_t)namelen + 2; - } else { - size_t emit = namelen > 15 ? 15 : namelen; - for (j = 0; j < emit; ++j) name_field[j] = name[j]; - name_field[emit] = '/'; - } - - ar_fill_header(hdr, name_field, epoch, (uint64_t)m->len); - wh_bytes(out, hdr, 60); - if (m->data && m->len) wh_bytes(out, m->data, m->len); - if (m->len & 1) wh_bytes(out, &pad, 1); - } - } - - /* TODO(symbol_index): emit System V `/` symbol-index entry when - * opts->symbol_index is nonzero. Currently silently ignored. */ - - return 0; -} - -int cfree_ar_iter_init(CfreeArIter* it, const CfreeBytesInput* archive) -{ - if (!it || !archive) return 0; - if (!archive->data && archive->len) return 0; - if (cfree_detect_fmt(archive->data, archive->len) != CFREE_BIN_AR) return 0; - it->_p = archive->data + 8; - it->_end = archive->data + archive->len; - it->_longnames = NULL; - it->_longnames_len = 0; - it->_namebuf[0] = '\0'; - return 1; -} - -/* Resolve a `/<decimal-offset>` reference into the iterator's `//` table. - * Names in the table are terminated by '/' or '\n'. Returns the resolved - * name length, or 0 on failure. Writes the name into it->_namebuf. */ -static size_t ar_resolve_longname(CfreeArIter* it, uint64_t off) -{ - size_t i; - if (!it->_longnames) return 0; - if (off >= it->_longnames_len) return 0; - for (i = 0; i + 1 < sizeof(it->_namebuf); ++i) { - size_t k = (size_t)off + i; - char ch; - if (k >= it->_longnames_len) break; - ch = (char)it->_longnames[k]; - if (ch == '/' || ch == '\n') break; - it->_namebuf[i] = ch; - } - it->_namebuf[i] = '\0'; - return i; -} - -int cfree_ar_iter_next(CfreeArIter* it, CfreeArMember* out) -{ - for (;;) { - uint64_t size; - size_t avail; - int j; - int namelen; - char name_field[16]; - - if (it->_p + 60 > it->_end) return 0; - - for (j = 0; j < 16; ++j) name_field[j] = (char)it->_p[j]; - - size = 0; - for (j = 48; j < 58; ++j) { - char ch = (char)it->_p[j]; - if (ch < '0' || ch > '9') break; - size = size * 10 + (uint64_t)(unsigned char)(ch - '0'); - } - - it->_p += 60; - avail = (size_t)(it->_end - it->_p); - if ((uint64_t)avail < size) return 0; /* truncated */ - - /* Special members (handled before user-visible naming): - * "//" extended-name (long-name) table - * "/" alone System V symbol index - * "__.SYMDEF" BSD symbol index */ - if (name_field[0] == '/' && name_field[1] == '/') { - it->_longnames = it->_p; - it->_longnames_len = (size_t)size; - goto advance; - } - if (name_field[0] == '/' && name_field[1] == ' ') { - /* System V symbol index "/ ". */ - goto advance; - } - - /* Decode name. */ - if (name_field[0] == '/' && - name_field[1] >= '0' && name_field[1] <= '9') { - /* `/<offset>` long-name reference. */ - uint64_t off = 0; - for (j = 1; j < 16; ++j) { - char ch = name_field[j]; - if (ch < '0' || ch > '9') break; - off = off * 10 + (uint64_t)(unsigned char)(ch - '0'); - } - namelen = (int)ar_resolve_longname(it, off); - } else { - namelen = 0; - for (j = 0; j < 16; ++j) { - char ch = name_field[j]; - if (ch == '/' || ch == ' ' || ch == '\0') break; - it->_namebuf[namelen++] = ch; - } - it->_namebuf[namelen] = '\0'; - } - - out->name = it->_namebuf; - out->data = it->_p; - out->size = (size_t)size; - - it->_p += (size_t)size; - if ((size & 1) && it->_p < it->_end) it->_p++; - - /* Skip special-but-named members (BSD symbol index). */ - if (it->_namebuf[0] == '_' && - it->_namebuf[1] == '_' && it->_namebuf[2] == '.') { - continue; - } - if (namelen > 0) return 1; - continue; - - advance: - it->_p += (size_t)size; - if ((size & 1) && it->_p < it->_end) it->_p++; - } -} - -int cfree_ar_list(const CfreeBytesInput* archive, CfreeWriter* out) -{ - CfreeArIter it; - CfreeArMember m; - size_t namelen; - const char* p; - - if (!out) return 1; - if (!cfree_ar_iter_init(&it, archive)) return 1; - - while (cfree_ar_iter_next(&it, &m)) { - namelen = 0; - for (p = m.name; *p; ++p) ++namelen; - wh_bytes(out, m.name, namelen); - wh_nl(out); - } - - return 0; -} diff --git a/src/api/stubs.c b/src/api/stubs.c @@ -0,0 +1,237 @@ +/* Stub implementations for libcfree subsystems that are not yet wired up. + * + * libcfree's public surface (cfree.h) and internal pipeline (api/pipeline.c) + * reference the full compile/link/JIT/DWARF stack. The current build only + * implements the foundation (core, obj/elf, ar, lex). Everything else lives + * here as a panic stub: the symbol resolves at link time but calling it + * raises a clean diagnostic instead of jumping into an uninitialized + * subsystem. + * + * As real implementations land, the matching stub block is deleted from this + * file. The lex test path (cfree cc --dump-tokens) does not exercise any + * stubbed entry, so the binary links and the test runs without touching + * unimplemented code. */ + +#include "lex/lex.h" +#include "pp/pp.h" +#include "parse/parse.h" +#include "decl/decl.h" +#include "cg/cg.h" +#include "debug/debug.h" +#include "opt/opt.h" +#include "link/link.h" +#include "arch/arch.h" +#include "obj/obj.h" + +#include <cfree.h> + +/* Internal panic stub used when a not-yet-implemented subsystem is invoked + * with a Compiler in hand. Public-API stubs that don't have a Compiler + * pointer return safe defaults instead. */ +static _Noreturn void unimplemented(Compiler* c, const char* what) +{ + SrcLoc loc = {0, 0, 0}; + compiler_panic(c, loc, "subsystem not implemented: %s", what); +} + +/* ============================================================ + * Preprocessor + * ============================================================ */ + +Pp* pp_new(Compiler* c) { unimplemented(c, "pp"); } +void pp_free(Pp* p) { (void)p; } +void pp_add_include_dir(Pp* p, const char* d, int sys) { (void)p; (void)d; (void)sys; } +void pp_define(Pp* p, const char* n, const char* b) { (void)p; (void)n; (void)b; } +void pp_undef(Pp* p, const char* n) { (void)p; (void)n; } +void pp_push_input(Pp* p, Lexer* l) { (void)p; (void)l; } +void pp_add_include_edge(Pp* p, u32 a, u32 b, SrcLoc l, int s) + { (void)p; (void)a; (void)b; (void)l; (void)s; } +Tok pp_next(Pp* p) { Tok t; (void)p; t.kind = TOK_EOF; t.flags = 0; + t.loc.file_id = 0; t.loc.line = 0; t.loc.col = 0; + t.spelling = 0; t.lit = LIT_NONE; t.v.ident = 0; return t; } +const LitInfo* pp_lit(const Pp* p, LitId id) { (void)p; (void)id; return 0; } +void pp_emit_text(Pp* p, Writer* w) { (void)p; (void)w; } + +/* ============================================================ + * Parser + * ============================================================ */ + +void parse_c (Compiler* c, Pp* p, DeclTable* d, CG* g) { (void)p; (void)d; (void)g; unimplemented(c, "parse_c"); } +void parse_asm(Compiler* c, Lexer* l, MCEmitter* m) { (void)l; (void)m; unimplemented(c, "parse_asm"); } + +/* ============================================================ + * Declarations + * ============================================================ */ + +DeclTable* decl_new(Compiler* c, ObjBuilder* o) { (void)o; unimplemented(c, "decl"); } +void decl_free(DeclTable* d) { (void)d; } + +/* ============================================================ + * Codegen + arch target + * ============================================================ */ + +CG* cg_new(Compiler* c, CGTarget* t, Debug* d) { (void)t; (void)d; unimplemented(c, "cg"); } +void cg_free(CG* g) { (void)g; } + +MCEmitter* mc_new(Compiler* c, ObjBuilder* o) { (void)o; unimplemented(c, "mc"); } +void mc_free(MCEmitter* m) { (void)m; } + +CGTarget* cgtarget_new(Compiler* c, ObjBuilder* o, MCEmitter* m) + { (void)o; (void)m; unimplemented(c, "cgtarget"); } +void cgtarget_finalize(CGTarget* t) { (void)t; } +void cgtarget_free(CGTarget* t) { (void)t; } + +/* ============================================================ + * Optimizer + * ============================================================ */ + +CGTarget* opt_cgtarget_new(Compiler* c, CGTarget* t, int level) + { (void)t; (void)level; unimplemented(c, "opt"); } + +/* ============================================================ + * Debug info + * ============================================================ */ + +Debug* debug_new(Compiler* c, ObjBuilder* o) { (void)o; unimplemented(c, "debug"); } +void debug_emit(Debug* d) { (void)d; } +void debug_free(Debug* d) { (void)d; } + +/* ============================================================ + * Object emit/read for non-ELF formats + * ============================================================ + * ELF emit/read are real (src/obj/elf_emit.c, elf_read.c). The other + * formats are stubs; callers receive a diagnostic if they ask for a + * COFF/Mach-O/WASM target. */ + +void emit_coff (Compiler* c, ObjBuilder* o, Writer* w) { (void)o; (void)w; unimplemented(c, "emit_coff"); } +void emit_macho(Compiler* c, ObjBuilder* o, Writer* w) { (void)o; (void)w; unimplemented(c, "emit_macho"); } +void emit_wasm (Compiler* c, ObjBuilder* o, Writer* w) { (void)o; (void)w; unimplemented(c, "emit_wasm"); } + +ObjBuilder* read_coff (Compiler* c, const char* n, const u8* d, size_t l) + { (void)n; (void)d; (void)l; unimplemented(c, "read_coff"); } +ObjBuilder* read_macho(Compiler* c, const char* n, const u8* d, size_t l) + { (void)n; (void)d; (void)l; unimplemented(c, "read_macho"); } +ObjBuilder* read_wasm (Compiler* c, const char* n, const u8* d, size_t l) + { (void)n; (void)d; (void)l; unimplemented(c, "read_wasm"); } + +/* Linker, JIT mapper, and JIT lookup live in src/link/. */ + +/* ============================================================ + * Public API stubs + * ============================================================ + * Public entries that don't have a Compiler in hand return safe + * defaults and document the missing capability through the return value + * (NULL handle / nonzero error). */ + +/* Header-dep iterator — drained empty until PP records edges. */ +struct CfreeDepIter { int _; }; +CfreeDepIter* cfree_dep_iter_new (CfreeCompiler* c) { (void)c; return 0; } +int cfree_dep_iter_next(CfreeDepIter* it, CfreeDepEdge* o) { (void)it; (void)o; return 0; } +void cfree_dep_iter_free(CfreeDepIter* it) { (void)it; } + +/* Disassembler. */ +struct CfreeDisasmIter { int _; }; +int cfree_obj_disasm(CfreeCompiler* c, const CfreeBytesInput* in, CfreeWriter* o) + { (void)c; (void)in; (void)o; return 1; } +CfreeDisasmIter* cfree_disasm_iter_new (CfreeCompiler* c, const uint8_t* b, size_t l, uint64_t v, CfreeObjBuilder* o) + { (void)c; (void)b; (void)l; (void)v; (void)o; return 0; } +int cfree_disasm_iter_next(CfreeDisasmIter* it, CfreeInsn* o) { (void)it; (void)o; return 0; } +void cfree_disasm_iter_free(CfreeDisasmIter* it) { (void)it; } + +/* Architecture register name iterator. */ +struct CfreeArchRegIter { int _; }; +const char* cfree_arch_register_name (CfreeArchKind a, uint32_t i) { (void)a; (void)i; return 0; } +int cfree_arch_register_index(CfreeArchKind a, const char* n, uint32_t* o) + { (void)a; (void)n; (void)o; return 1; } +CfreeArchRegIter* cfree_arch_reg_iter_new (CfreeArchKind a) { (void)a; return 0; } +int cfree_arch_reg_iter_next(CfreeArchRegIter* it, CfreeArchReg* o) + { (void)it; (void)o; return 0; } +void cfree_arch_reg_iter_free(CfreeArchRegIter* it) { (void)it; } + +/* Linker script parsing. */ +int cfree_link_script_parse(CfreeCompiler* c, const char* t, size_t l, const CfreeLinkScript** o) + { (void)c; (void)t; (void)l; if (o) *o = 0; return 1; } +void cfree_link_script_free (CfreeCompiler* c, const CfreeLinkScript* s) + { (void)c; (void)s; } + +/* JIT lookup, view, addr_to_sym, and the symbol iterator live in + * src/link/link_jit.c. */ + +CfreeJitSession* cfree_jit_session_new (CfreeJit* j) { (void)j; return 0; } +void cfree_jit_session_free(CfreeJitSession* s) { (void)s; } +int cfree_jit_session_call (CfreeJitSession* s, void* e, CfreeEntryKind k, int ac, char** av, CfreeStopInfo* o) + { (void)s; (void)e; (void)k; (void)ac; (void)av; (void)o; return 1; } +int cfree_jit_session_resume(CfreeJitSession* s, CfreeResumeMode m, CfreeStopInfo* o) + { (void)s; (void)m; (void)o; return 1; } +int cfree_jit_session_interrupt(CfreeJitSession* s) { (void)s; return 1; } +int cfree_jit_session_read_mem (CfreeJitSession* s, uint64_t a, void* d, size_t n) + { (void)s; (void)a; (void)d; (void)n; return 1; } +int cfree_jit_session_write_mem(CfreeJitSession* s, uint64_t a, const void* d, size_t n) + { (void)s; (void)a; (void)d; (void)n; return 1; } +int cfree_jit_session_set_regs(CfreeJitSession* s, const CfreeUnwindFrame* f) + { (void)s; (void)f; return 1; } +int cfree_jit_session_breakpoint_set (CfreeJitSession* s, uint64_t a, uint32_t* o) + { (void)s; (void)a; (void)o; return 1; } +int cfree_jit_session_breakpoint_clear (CfreeJitSession* s, uint32_t id) + { (void)s; (void)id; return 1; } +int cfree_jit_session_breakpoint_set_spec (CfreeJitSession* s, const CfreeBreakpointSpec* sp, uint32_t* o) + { (void)s; (void)sp; (void)o; return 1; } + +/* DWARF. */ +struct CfreeDwarfFieldIter { int _; }; +struct CfreeDwarfEnumIter { int _; }; +struct CfreeDwarfVarIter { int _; }; +struct CfreeDwarfParamIter { int _; }; + +CfreeDebugInfo* cfree_dwarf_open (CfreeCompiler* c, const CfreeObjFile* f) + { (void)c; (void)f; return 0; } +void cfree_dwarf_close(CfreeDebugInfo* d) { (void)d; } +int cfree_dwarf_addr_to_line(CfreeDebugInfo* d, uint64_t pc, const char** f, uint32_t* l, uint32_t* co) + { (void)d; (void)pc; (void)f; (void)l; (void)co; return 1; } +int cfree_dwarf_line_to_addr(CfreeDebugInfo* d, const char* f, uint32_t l, uint64_t* o) + { (void)d; (void)f; (void)l; (void)o; return 1; } +int cfree_dwarf_subprogram_at(CfreeDebugInfo* d, uint64_t pc, CfreeDwarfSubprogram* o) + { (void)d; (void)pc; (void)o; return 1; } +int cfree_dwarf_unwind_step(CfreeDebugInfo* d, CfreeUnwindFrame* f) { (void)d; (void)f; return 1; } + +CfreeDwarfTypeInfo cfree_dwarf_type_info(const CfreeDwarfType* t) +{ + CfreeDwarfTypeInfo info; + (void)t; + info.kind = CFREE_DT_VOID; + info.byte_size = 0; + info.name = ""; + info.element_count = 0; + info.inner = 0; + return info; +} + +CfreeDwarfFieldIter* cfree_dwarf_field_iter_new (CfreeDebugInfo* d, const CfreeDwarfType* t) + { (void)d; (void)t; return 0; } +int cfree_dwarf_field_iter_next(CfreeDwarfFieldIter* it, CfreeDwarfField* o) + { (void)it; (void)o; return 0; } +void cfree_dwarf_field_iter_free(CfreeDwarfFieldIter* it) { (void)it; } + +CfreeDwarfEnumIter* cfree_dwarf_enum_iter_new (CfreeDebugInfo* d, const CfreeDwarfType* t) + { (void)d; (void)t; return 0; } +int cfree_dwarf_enum_iter_next(CfreeDwarfEnumIter* it, CfreeDwarfEnumVal* o) + { (void)it; (void)o; return 0; } +void cfree_dwarf_enum_iter_free(CfreeDwarfEnumIter* it) { (void)it; } + +int cfree_dwarf_var_at (CfreeDebugInfo* d, uint64_t pc, const char* n, CfreeDwarfVarLoc* o) + { (void)d; (void)pc; (void)n; (void)o; return 1; } +int cfree_dwarf_loc_read(CfreeDebugInfo* d, const CfreeDwarfVarLoc* l, const CfreeUnwindFrame* f, + CfreeJitSession* s, void* dst, size_t cap, size_t* ro) + { (void)d; (void)l; (void)f; (void)s; (void)dst; (void)cap; (void)ro; return 1; } + +CfreeDwarfVarIter* cfree_dwarf_vars_at_new (CfreeDebugInfo* d, uint64_t pc, uint32_t mask) + { (void)d; (void)pc; (void)mask; return 0; } +int cfree_dwarf_vars_at_next(CfreeDwarfVarIter* it, CfreeDwarfVar* o) + { (void)it; (void)o; return 0; } +void cfree_dwarf_vars_at_free(CfreeDwarfVarIter* it) { (void)it; } + +CfreeDwarfParamIter* cfree_dwarf_param_iter_new (CfreeDebugInfo* d, uint64_t pc) + { (void)d; (void)pc; return 0; } +int cfree_dwarf_param_iter_next(CfreeDwarfParamIter* it, CfreeDwarfVar* o) + { (void)it; (void)o; return 0; } +void cfree_dwarf_param_iter_free(CfreeDwarfParamIter* it) { (void)it; } diff --git a/src/api/writer_mem.c b/src/api/writer_mem.c @@ -0,0 +1,105 @@ +/* In-memory CfreeWriter. Backing is a single grow-as-you-write byte + * buffer allocated through the host CfreeHeap. seek/tell let formats + * that need to back-patch (ELF/COFF/Mach-O) place an Ehdr/Shdr pass + * after computing later offsets. */ + +#include <cfree.h> + +#include "core/core.h" +#include "core/heap.h" + +#include <string.h> + +typedef struct MemWriter { + CfreeWriter base; + Heap* heap; + u8* data; /* heap-allocated buffer; NULL means cap == 0 */ + size_t cap; + size_t len; /* high-water mark of valid bytes */ + size_t pos; /* current write/seek position; may equal or exceed len momentarily */ + int err; +} MemWriter; + +static int mw_grow(MemWriter* mw, size_t needed) +{ + size_t new_cap; + u8* p; + + if (needed <= mw->cap) return 0; + new_cap = mw->cap ? mw->cap : 64; + while (new_cap < needed) { + size_t doubled = new_cap * 2; + if (doubled <= new_cap) { mw->err = 1; return 1; } + new_cap = doubled; + } + + p = (u8*)mw->heap->realloc(mw->heap, mw->data, mw->cap, new_cap, 1); + if (!p) { mw->err = 1; return 1; } + /* Zero-fill any newly-extended range so a subsequent seek-past-len + * read sees deterministic zero bytes. */ + if (new_cap > mw->cap) memset(p + mw->cap, 0, new_cap - mw->cap); + mw->data = p; + mw->cap = new_cap; + return 0; +} + +static void mw_write(CfreeWriter* w, const void* data, size_t n) +{ + MemWriter* mw = (MemWriter*)w; + size_t end; + + if (mw->err || n == 0) return; + end = mw->pos + n; + if (end < mw->pos) { mw->err = 1; return; } /* overflow */ + if (mw_grow(mw, end)) return; + memcpy(mw->data + mw->pos, data, n); + mw->pos = end; + if (mw->pos > mw->len) mw->len = mw->pos; +} + +static void mw_seek(CfreeWriter* w, uint64_t off) +{ + MemWriter* mw = (MemWriter*)w; + if (mw->err) return; + /* Allow seeking past `len`; subsequent writes extend through the + * zero-filled tail. The cap grows on first such write. */ + mw->pos = (size_t)off; +} + +static uint64_t mw_tell (CfreeWriter* w) { return ((MemWriter*)w)->pos; } +static int mw_error(CfreeWriter* w) { return ((MemWriter*)w)->err; } + +static void mw_close(CfreeWriter* w) +{ + MemWriter* mw = (MemWriter*)w; + Heap* h = mw->heap; + if (mw->data) h->free(h, mw->data, mw->cap); + h->free(h, mw, sizeof(*mw)); +} + +CfreeWriter* cfree_writer_mem(CfreeHeap* heap) +{ + MemWriter* mw; + if (!heap) return NULL; + mw = (MemWriter*)heap->alloc(heap, sizeof(*mw), _Alignof(MemWriter)); + if (!mw) return NULL; + mw->base.write = mw_write; + mw->base.seek = mw_seek; + mw->base.tell = mw_tell; + mw->base.error = mw_error; + mw->base.close = mw_close; + mw->heap = heap; + mw->data = NULL; + mw->cap = 0; + mw->len = 0; + mw->pos = 0; + mw->err = 0; + return &mw->base; +} + +const uint8_t* cfree_writer_mem_bytes(CfreeWriter* w, size_t* len_out) +{ + MemWriter* mw = (MemWriter*)w; + if (len_out) *len_out = mw ? mw->len : 0; + return mw ? mw->data : NULL; +} diff --git a/test/ar_test.c b/test/ar_test.c @@ -0,0 +1,860 @@ +/* Round-trip tests for the cfree ar reader/writer. + * + * Builds against just include/cfree.h + libcfree.a and a few libc calls + * (malloc/realloc/free, printf for diagnostics). cfree_ar_write itself + * makes no heap allocations, so the test does not need a CfreeHeap. + * + * Set CFREE_AR_TEST_HOST=1 to also dump the produced symbol-index + * archive to /tmp/cfree_ar_test.a and run the host's `ar t` and + * `nm --print-armap` on it as a cross-check. */ +#include <cfree.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +/* ===== minimal CfreeWriter over a growing buffer ===== */ + +typedef struct BufW { + CfreeWriter base; + uint8_t* data; + size_t len; + size_t cap; + int err; +} BufW; + +static void bufw_write(CfreeWriter* w, const void* data, size_t n) +{ + BufW* b = (BufW*)w; + if (b->err) return; + if (b->len + n > b->cap) { + size_t nc = b->cap ? b->cap * 2 : 256; + while (nc < b->len + n) nc *= 2; + uint8_t* p = (uint8_t*)realloc(b->data, nc); + if (!p) { b->err = 1; return; } + b->data = p; + b->cap = nc; + } + memcpy(b->data + b->len, data, n); + b->len += n; +} + +static void bufw_seek (CfreeWriter* w, uint64_t off) { (void)w; (void)off; } +static uint64_t bufw_tell (CfreeWriter* w) { return ((BufW*)w)->len; } +static int bufw_error(CfreeWriter* w) { return ((BufW*)w)->err; } +static void bufw_close(CfreeWriter* w) { (void)w; } + +static void bufw_init(BufW* b) +{ + b->base.write = bufw_write; + b->base.seek = bufw_seek; + b->base.tell = bufw_tell; + b->base.error = bufw_error; + b->base.close = bufw_close; + b->data = NULL; + b->len = 0; + b->cap = 0; + b->err = 0; +} + +static void bufw_fini(BufW* b) { free(b->data); } + +/* ===== assertion helpers ===== */ + +static int g_fail; +#define EXPECT(cond, ...) do { \ + if (!(cond)) { \ + g_fail++; \ + fprintf(stderr, "FAIL %s:%d: ", __FILE__, __LINE__); \ + fprintf(stderr, __VA_ARGS__); \ + fprintf(stderr, "\n"); \ + return 0; \ + } \ +} while (0) + +static uint32_t be32(const uint8_t* p) +{ + return ((uint32_t)p[0] << 24) | ((uint32_t)p[1] << 16) | + ((uint32_t)p[2] << 8) | (uint32_t)p[3]; +} + +/* Decode the ar_size field (10-byte ASCII decimal, space-padded). */ +static uint64_t ar_size_field(const uint8_t* hdr) +{ + uint64_t v = 0; int j; + for (j = 48; j < 58; ++j) { + char c = (char)hdr[j]; + if (c < '0' || c > '9') break; + v = v * 10 + (uint64_t)(c - '0'); + } + return v; +} + +/* Decode the ar_date field (12-byte ASCII decimal, space-padded). */ +static uint64_t ar_date_field(const uint8_t* hdr) +{ + uint64_t v = 0; int j; + for (j = 16; j < 28; ++j) { + char c = (char)hdr[j]; + if (c < '0' || c > '9') break; + v = v * 10 + (uint64_t)(c - '0'); + } + return v; +} + +/* ===== tests ===== */ + +static int test_basic_roundtrip(void) +{ + CfreeBytesInput ms[2]; + BufW bw; + CfreeBytesInput in; + CfreeArIter it; + CfreeArMember m; + int rc; + + ms[0].name = "a.o"; ms[0].data = (const uint8_t*)"AAAA"; ms[0].len = 4; ms[0].lang = 0; + ms[1].name = "b.o"; ms[1].data = (const uint8_t*)"BBBBB"; ms[1].len = 5; ms[1].lang = 0; + + bufw_init(&bw); + rc = cfree_ar_write(&bw.base, ms, 2, NULL); + EXPECT(rc == 0, "cfree_ar_write returned %d", rc); + EXPECT(!bw.err, "writer error"); + EXPECT(bw.len >= 8, "archive too short"); + EXPECT(memcmp(bw.data, "!<arch>\n", 8) == 0, "magic"); + + in.name = "test"; in.data = bw.data; in.len = bw.len; in.lang = 0; + EXPECT(cfree_ar_iter_init(&it, &in), "iter_init"); + + EXPECT(cfree_ar_iter_next(&it, &m), "first member"); + EXPECT(strcmp(m.name, "a.o") == 0, "name 0 = %s", m.name); + EXPECT(m.size == 4 && memcmp(m.data, "AAAA", 4) == 0, "data 0"); + + EXPECT(cfree_ar_iter_next(&it, &m), "second member"); + EXPECT(strcmp(m.name, "b.o") == 0, "name 1 = %s", m.name); + EXPECT(m.size == 5 && memcmp(m.data, "BBBBB", 5) == 0, "data 1"); + + EXPECT(!cfree_ar_iter_next(&it, &m), "iter end"); + + bufw_fini(&bw); + return 1; +} + +static int test_long_name_table(void) +{ + /* >15 chars triggers the // long-name table. */ + CfreeBytesInput ms[2]; + BufW bw; + CfreeBytesInput in; + CfreeArIter it; + CfreeArMember m; + CfreeArWriteOptions opts = {0}; + int rc; + + opts.long_names = 1; + ms[0].name = "short.o"; ms[0].data = (const uint8_t*)"x"; ms[0].len = 1; ms[0].lang = 0; + ms[1].name = "this_name_is_long_enough.o"; ms[1].data = (const uint8_t*)"yy"; ms[1].len = 2; ms[1].lang = 0; + + bufw_init(&bw); + rc = cfree_ar_write(&bw.base, ms, 2, &opts); + EXPECT(rc == 0, "write rc=%d", rc); + + in.name = "test"; in.data = bw.data; in.len = bw.len; in.lang = 0; + EXPECT(cfree_ar_iter_init(&it, &in), "iter_init"); + EXPECT(cfree_ar_iter_next(&it, &m), "first member"); + EXPECT(strcmp(m.name, "short.o") == 0, "name 0 = %s", m.name); + EXPECT(cfree_ar_iter_next(&it, &m), "second member"); + EXPECT(strcmp(m.name, "this_name_is_long_enough.o") == 0, + "long name = %s", m.name); + EXPECT(!cfree_ar_iter_next(&it, &m), "iter end"); + + bufw_fini(&bw); + return 1; +} + +static int test_symbol_index_empty(void) +{ + CfreeBytesInput ms[1]; + BufW bw; + CfreeBytesInput in; + CfreeArIter it; + CfreeArMember m; + CfreeArWriteOptions opts = {0}; + int rc; + uint32_t nsyms; + + opts.symbol_index = 1; + ms[0].name = "lonely.o"; ms[0].data = (const uint8_t*)"P"; ms[0].len = 1; ms[0].lang = 0; + + bufw_init(&bw); + rc = cfree_ar_write(&bw.base, ms, 1, &opts); + EXPECT(rc == 0, "write rc=%d", rc); + + /* First member after magic must be `/` index with count=0. */ + EXPECT(bw.len >= 8 + 60 + 4, "archive too short"); + EXPECT(bw.data[8] == '/' && bw.data[9] == ' ', "index name field"); + EXPECT(ar_size_field(bw.data + 8) == 4, "index payload size = 4"); + nsyms = be32(bw.data + 8 + 60); + EXPECT(nsyms == 0, "nsyms = %u", nsyms); + + /* Iterator should skip the `/` and yield only the user member. */ + in.name = "test"; in.data = bw.data; in.len = bw.len; in.lang = 0; + EXPECT(cfree_ar_iter_init(&it, &in), "iter_init"); + EXPECT(cfree_ar_iter_next(&it, &m), "first user member"); + EXPECT(strcmp(m.name, "lonely.o") == 0, "name = %s", m.name); + EXPECT(!cfree_ar_iter_next(&it, &m), "iter end"); + + bufw_fini(&bw); + return 1; +} + +static int test_symbol_index_basic(void) +{ + /* 2 members with symbol lists; verify count, offsets, and names. */ + const char* a_syms[] = { "foo", "bar" }; + const char* b_syms[] = { "baz" }; + CfreeArMemberSymbols msyms[2]; + CfreeBytesInput ms[2]; + BufW bw; + CfreeArWriteOptions opts = {0}; + int rc; + uint32_t nsyms; + const uint8_t* p; + uint64_t index_payload; + uint32_t off0, off1, off2; + uint64_t a_hdr_off, b_hdr_off; + const char* name; + + opts.symbol_index = 1; + opts.long_names = 1; + + msyms[0].names = (const char* const*)a_syms; msyms[0].count = 2; + msyms[1].names = (const char* const*)b_syms; msyms[1].count = 1; + opts.member_symbols = msyms; + + ms[0].name = "a.o"; ms[0].data = (const uint8_t*)"AAAA"; ms[0].len = 4; ms[0].lang = 0; + ms[1].name = "b.o"; ms[1].data = (const uint8_t*)"BBBBB"; ms[1].len = 5; ms[1].lang = 0; + + bufw_init(&bw); + rc = cfree_ar_write(&bw.base, ms, 2, &opts); + EXPECT(rc == 0, "write rc=%d", rc); + + EXPECT(memcmp(bw.data, "!<arch>\n", 8) == 0, "magic"); + EXPECT(bw.data[8] == '/' && bw.data[9] == ' ', "index ar_name field"); + + index_payload = ar_size_field(bw.data + 8); + /* 4 (count) + 4*3 (offsets) + 3+1 + 3+1 + 3+1 (names "foo\0bar\0baz\0") = 28 */ + EXPECT(index_payload == 28, "index payload = %llu", (unsigned long long)index_payload); + + p = bw.data + 8 + 60; + nsyms = be32(p); p += 4; + EXPECT(nsyms == 3, "nsyms = %u", nsyms); + + off0 = be32(p); /* foo → a.o */ + off1 = be32(p + 4); /* bar → a.o */ + off2 = be32(p + 8); /* baz → b.o */ + p += 12; + + /* Compute expected header offsets: index_total = 60+28 (no pad, even). + * No long-name table is emitted (basenames ≤ 15 chars). So: + * a.o header at offset 8 + 88 = 96 + * b.o header at offset 96 + 60 + 4 (+0 pad) = 160 */ + a_hdr_off = 8 + 60 + 28; + b_hdr_off = a_hdr_off + 60 + 4; + EXPECT(off0 == a_hdr_off, "off0 = %u, expected %llu", off0, (unsigned long long)a_hdr_off); + EXPECT(off1 == a_hdr_off, "off1 = %u, expected %llu", off1, (unsigned long long)a_hdr_off); + EXPECT(off2 == b_hdr_off, "off2 = %u, expected %llu", off2, (unsigned long long)b_hdr_off); + + /* Sanity: the member headers must actually live at those offsets. */ + EXPECT(memcmp(bw.data + a_hdr_off, "a.o/", 4) == 0, "a.o at offset"); + EXPECT(memcmp(bw.data + b_hdr_off, "b.o/", 4) == 0, "b.o at offset"); + + /* Names: "foo\0bar\0baz\0" */ + name = (const char*)p; + EXPECT(strcmp(name, "foo") == 0, "name 0 = %s", name); + name += strlen(name) + 1; + EXPECT(strcmp(name, "bar") == 0, "name 1 = %s", name); + name += strlen(name) + 1; + EXPECT(strcmp(name, "baz") == 0, "name 2 = %s", name); + + bufw_fini(&bw); + return 1; +} + +static int test_symbol_index_with_long_names(void) +{ + /* `/` member must come BEFORE `//` long-name table, and offsets must + * still point at correct member-header positions. */ + const char* syms0[] = { "alpha" }; + const char* syms1[] = { "beta" }; + CfreeArMemberSymbols msyms[2]; + CfreeBytesInput ms[2]; + BufW bw; + CfreeBytesInput in; + CfreeArIter it; + CfreeArMember m; + CfreeArWriteOptions opts = {0}; + int rc; + uint64_t index_payload, longtab_payload; + uint64_t index_total, longtab_total; + uint64_t m0_hdr, m1_hdr; + uint32_t off0, off1; + const uint8_t* p; + + opts.symbol_index = 1; + opts.long_names = 1; + msyms[0].names = (const char* const*)syms0; msyms[0].count = 1; + msyms[1].names = (const char* const*)syms1; msyms[1].count = 1; + opts.member_symbols = msyms; + + ms[0].name = "this_name_is_long_enough.o"; /* 26 chars → // */ + ms[0].data = (const uint8_t*)"X"; ms[0].len = 1; ms[0].lang = 0; + ms[1].name = "short.o"; + ms[1].data = (const uint8_t*)"YY"; ms[1].len = 2; ms[1].lang = 0; + + bufw_init(&bw); + rc = cfree_ar_write(&bw.base, ms, 2, &opts); + EXPECT(rc == 0, "write rc=%d", rc); + + /* Layout: magic(8) | / index member | // long-name member | members. */ + EXPECT(bw.data[8] == '/' && bw.data[9] == ' ', "/ first"); + index_payload = ar_size_field(bw.data + 8); + /* 4 (count) + 4*2 (offsets) + 6 ("alpha\0") + 5 ("beta\0") = 23 → odd → pad 1 */ + EXPECT(index_payload == 23, "index_payload = %llu", (unsigned long long)index_payload); + index_total = 60 + index_payload + (index_payload & 1); + + /* Verify // header sits at 8 + index_total. */ + EXPECT(bw.data[8 + index_total] == '/', "// pos byte 0"); + EXPECT(bw.data[8 + index_total + 1] == '/', "// pos byte 1"); + longtab_payload = ar_size_field(bw.data + 8 + index_total); + /* "this_name_is_long_enough.o/\n" = 28 bytes → even → no pad */ + EXPECT(longtab_payload == 28, "longtab payload = %llu", + (unsigned long long)longtab_payload); + longtab_total = 60 + longtab_payload + (longtab_payload & 1); + + m0_hdr = 8 + index_total + longtab_total; + m1_hdr = m0_hdr + 60 + 1 + 1 /* parity pad for odd len 1 */; + + p = bw.data + 8 + 60; + EXPECT(be32(p) == 2, "nsyms = %u", be32(p)); + p += 4; + off0 = be32(p); + off1 = be32(p + 4); + EXPECT(off0 == m0_hdr, "off0 = %u, expected %llu", off0, (unsigned long long)m0_hdr); + EXPECT(off1 == m1_hdr, "off1 = %u, expected %llu", off1, (unsigned long long)m1_hdr); + /* Spot-check m1 starts with "short.o/" for sanity. */ + EXPECT(memcmp(bw.data + m1_hdr, "short.o/", 8) == 0, "m1 hdr"); + + /* Iterator should walk past both /, // and yield 2 members. */ + in.name = "test"; in.data = bw.data; in.len = bw.len; in.lang = 0; + EXPECT(cfree_ar_iter_init(&it, &in), "iter_init"); + EXPECT(cfree_ar_iter_next(&it, &m), "m0"); + EXPECT(strcmp(m.name, "this_name_is_long_enough.o") == 0, "m0 name = %s", m.name); + EXPECT(cfree_ar_iter_next(&it, &m), "m1"); + EXPECT(strcmp(m.name, "short.o") == 0, "m1 name = %s", m.name); + EXPECT(!cfree_ar_iter_next(&it, &m), "iter end"); + + /* Optional host cross-check. */ + if (getenv("CFREE_AR_TEST_HOST")) { + FILE* f = fopen("/tmp/cfree_ar_test.a", "wb"); + if (f) { + fwrite(bw.data, 1, bw.len, f); + fclose(f); + fprintf(stderr, "host cross-check: ar t /tmp/cfree_ar_test.a\n"); + (void)!system("ar t /tmp/cfree_ar_test.a"); + fprintf(stderr, "host cross-check: nm --print-armap /tmp/cfree_ar_test.a\n"); + (void)!system("nm --print-armap /tmp/cfree_ar_test.a 2>&1 || true"); + } + } + + bufw_fini(&bw); + return 1; +} + +static int test_iter_skips_index(void) +{ + /* Make sure the iterator never surfaces the `/` member as a user member. */ + const char* s[] = { "only_sym" }; + CfreeArMemberSymbols msyms[1]; + CfreeBytesInput ms[1]; + CfreeBytesInput in; + CfreeArIter it; + CfreeArMember m; + BufW bw; + CfreeArWriteOptions opts = {0}; + int seen = 0; + + opts.symbol_index = 1; + msyms[0].names = (const char* const*)s; msyms[0].count = 1; + opts.member_symbols = msyms; + ms[0].name = "only.o"; ms[0].data = (const uint8_t*)"Z"; ms[0].len = 1; ms[0].lang = 0; + + bufw_init(&bw); + EXPECT(cfree_ar_write(&bw.base, ms, 1, &opts) == 0, "write"); + + in.name = "test"; in.data = bw.data; in.len = bw.len; in.lang = 0; + EXPECT(cfree_ar_iter_init(&it, &in), "iter_init"); + while (cfree_ar_iter_next(&it, &m)) { + EXPECT(m.name[0] != '/' || m.name[1] != '\0', + "iter surfaced raw `/` member"); + seen++; + } + EXPECT(seen == 1, "saw %d members", seen); + + bufw_fini(&bw); + return 1; +} + +static int test_empty_archive(void) +{ + /* nmembers == 0 with NULL members should produce a magic-only archive. */ + BufW bw; + CfreeBytesInput in; + CfreeArIter it; + CfreeArMember m; + int rc; + + bufw_init(&bw); + rc = cfree_ar_write(&bw.base, NULL, 0, NULL); + EXPECT(rc == 0, "write rc=%d", rc); + EXPECT(bw.len == 8, "size = %zu", bw.len); + EXPECT(memcmp(bw.data, "!<arch>\n", 8) == 0, "magic only"); + + in.name = "test"; in.data = bw.data; in.len = bw.len; in.lang = 0; + EXPECT(cfree_ar_iter_init(&it, &in), "iter_init"); + EXPECT(!cfree_ar_iter_next(&it, &m), "no members"); + + bufw_fini(&bw); + return 1; +} + +static int test_epoch_field(void) +{ + /* opts.epoch is written into ar_date for every member. */ + CfreeBytesInput ms[1]; + BufW bw; + CfreeArWriteOptions opts = {0}; + int rc; + + ms[0].name = "x.o"; ms[0].data = (const uint8_t*)"q"; ms[0].len = 1; ms[0].lang = 0; + + opts.epoch = 1234567890u; + bufw_init(&bw); + rc = cfree_ar_write(&bw.base, ms, 1, &opts); + EXPECT(rc == 0, "write rc=%d", rc); + EXPECT(ar_date_field(bw.data + 8) == 1234567890u, + "ar_date = %llu", + (unsigned long long)ar_date_field(bw.data + 8)); + bufw_fini(&bw); + + /* Default (epoch=0): single '0' followed by spaces. */ + opts.epoch = 0; + bufw_init(&bw); + rc = cfree_ar_write(&bw.base, ms, 1, &opts); + EXPECT(rc == 0, "write rc=%d", rc); + EXPECT(bw.data[8 + 16] == '0', "epoch default first byte"); + EXPECT(bw.data[8 + 17] == ' ', "epoch default second byte = 0x%02x", + bw.data[8 + 17]); + bufw_fini(&bw); + return 1; +} + +static int test_path_basename(void) +{ + /* Member name with path components is stored as basename only. */ + CfreeBytesInput ms[1]; + BufW bw; + CfreeBytesInput in; + CfreeArIter it; + CfreeArMember m; + + ms[0].name = "src/sub/foo.o"; + ms[0].data = (const uint8_t*)"D"; ms[0].len = 1; ms[0].lang = 0; + + bufw_init(&bw); + EXPECT(cfree_ar_write(&bw.base, ms, 1, NULL) == 0, "write"); + + in.name = "test"; in.data = bw.data; in.len = bw.len; in.lang = 0; + EXPECT(cfree_ar_iter_init(&it, &in), "iter_init"); + EXPECT(cfree_ar_iter_next(&it, &m), "first"); + EXPECT(strcmp(m.name, "foo.o") == 0, "basename = %s", m.name); + + bufw_fini(&bw); + return 1; +} + +static int test_truncate_when_long_names_off(void) +{ + /* >15 chars without long_names: name is truncated to 15. */ + CfreeBytesInput ms[1]; + BufW bw; + CfreeBytesInput in; + CfreeArIter it; + CfreeArMember m; + + ms[0].name = "abcdefghijklmnopqrst.o"; /* 22 chars */ + ms[0].data = (const uint8_t*)"D"; ms[0].len = 1; ms[0].lang = 0; + + bufw_init(&bw); + EXPECT(cfree_ar_write(&bw.base, ms, 1, NULL) == 0, "write"); + + in.name = "test"; in.data = bw.data; in.len = bw.len; in.lang = 0; + EXPECT(cfree_ar_iter_init(&it, &in), "iter_init"); + EXPECT(cfree_ar_iter_next(&it, &m), "first"); + EXPECT(strcmp(m.name, "abcdefghijklmno") == 0, "truncated = %s", m.name); + + bufw_fini(&bw); + return 1; +} + +static int test_name_15_char_boundary(void) +{ + /* Exactly 15 chars: fits in-header even with long_names enabled. */ + CfreeBytesInput ms[1]; + BufW bw; + CfreeBytesInput in; + CfreeArIter it; + CfreeArMember m; + CfreeArWriteOptions opts = {0}; + + opts.long_names = 1; + ms[0].name = "abcdefghijklmno"; /* 15 chars */ + ms[0].data = (const uint8_t*)"X"; ms[0].len = 1; ms[0].lang = 0; + + bufw_init(&bw); + EXPECT(cfree_ar_write(&bw.base, ms, 1, &opts) == 0, "write"); + + /* No `//` long-name table: first member sits right after magic. */ + EXPECT(memcmp(bw.data + 8, "abcdefghijklmno/", 16) == 0, + "name field = %.16s", (const char*)(bw.data + 8)); + + in.name = "test"; in.data = bw.data; in.len = bw.len; in.lang = 0; + EXPECT(cfree_ar_iter_init(&it, &in), "iter_init"); + EXPECT(cfree_ar_iter_next(&it, &m), "first"); + EXPECT(strcmp(m.name, "abcdefghijklmno") == 0, "name = %s", m.name); + + bufw_fini(&bw); + return 1; +} + +static int test_name_16_char_boundary(void) +{ + /* Exactly 16 chars: triggers // long-name table. */ + CfreeBytesInput ms[1]; + BufW bw; + CfreeBytesInput in; + CfreeArIter it; + CfreeArMember m; + CfreeArWriteOptions opts = {0}; + + opts.long_names = 1; + ms[0].name = "abcdefghijklmnop"; /* 16 chars */ + ms[0].data = (const uint8_t*)"Y"; ms[0].len = 1; ms[0].lang = 0; + + bufw_init(&bw); + EXPECT(cfree_ar_write(&bw.base, ms, 1, &opts) == 0, "write"); + EXPECT(bw.data[8] == '/' && bw.data[9] == '/', "// header"); + + in.name = "test"; in.data = bw.data; in.len = bw.len; in.lang = 0; + EXPECT(cfree_ar_iter_init(&it, &in), "iter_init"); + EXPECT(cfree_ar_iter_next(&it, &m), "first"); + EXPECT(strcmp(m.name, "abcdefghijklmnop") == 0, "name = %s", m.name); + + bufw_fini(&bw); + return 1; +} + +static int test_empty_member_payload(void) +{ + /* len=0 (data=NULL): header only, no pad; followed by the next member. */ + CfreeBytesInput ms[2]; + BufW bw; + CfreeBytesInput in; + CfreeArIter it; + CfreeArMember m; + + ms[0].name = "empty.o"; ms[0].data = NULL; ms[0].len = 0; ms[0].lang = 0; + ms[1].name = "next.o"; ms[1].data = (const uint8_t*)"N"; ms[1].len = 1; ms[1].lang = 0; + + bufw_init(&bw); + EXPECT(cfree_ar_write(&bw.base, ms, 2, NULL) == 0, "write"); + /* magic(8) + hdr(60) + 0 + hdr(60) + 1 + pad(1) = 130 */ + EXPECT(bw.len == 130, "size = %zu", bw.len); + + in.name = "test"; in.data = bw.data; in.len = bw.len; in.lang = 0; + EXPECT(cfree_ar_iter_init(&it, &in), "iter_init"); + EXPECT(cfree_ar_iter_next(&it, &m), "first"); + EXPECT(strcmp(m.name, "empty.o") == 0 && m.size == 0, "empty.o size=%zu", + m.size); + EXPECT(cfree_ar_iter_next(&it, &m), "second"); + EXPECT(strcmp(m.name, "next.o") == 0 && m.size == 1 && m.data[0] == 'N', + "next.o"); + EXPECT(!cfree_ar_iter_next(&it, &m), "iter end"); + + bufw_fini(&bw); + return 1; +} + +static int test_odd_payload_pad(void) +{ + /* Odd-length payloads add a '\n' parity pad; even lengths do not. */ + CfreeBytesInput ms[3]; + BufW bw; + + ms[0].name = "a.o"; ms[0].data = (const uint8_t*)"x"; ms[0].len = 1; ms[0].lang = 0; + ms[1].name = "b.o"; ms[1].data = (const uint8_t*)"yy"; ms[1].len = 2; ms[1].lang = 0; + ms[2].name = "c.o"; ms[2].data = (const uint8_t*)"z"; ms[2].len = 1; ms[2].lang = 0; + + bufw_init(&bw); + EXPECT(cfree_ar_write(&bw.base, ms, 3, NULL) == 0, "write"); + /* 8 + (60+1+1) + (60+2) + (60+1+1) = 194 */ + EXPECT(bw.len == 194, "size = %zu", bw.len); + EXPECT(bw.data[8 + 60 + 1] == '\n', "pad after a.o = 0x%02x", + bw.data[8 + 60 + 1]); + /* No pad after b.o (even): next header begins immediately. */ + EXPECT(bw.data[8 + 62 + 60 + 2] == 'c', "c.o name follows b.o without pad"); + + bufw_fini(&bw); + return 1; +} + +static int test_ar_list_output(void) +{ + /* cfree_ar_list emits one user member per line, skipping / and //. */ + CfreeBytesInput ms[3]; + BufW bw, lw; + CfreeBytesInput in; + CfreeArWriteOptions opts = {0}; + const char* expected = "a.o\nlong_name_member.o\nb.o\n"; + + opts.symbol_index = 1; + opts.long_names = 1; + ms[0].name = "a.o"; ms[0].data = (const uint8_t*)"A"; ms[0].len = 1; ms[0].lang = 0; + ms[1].name = "long_name_member.o"; ms[1].data = (const uint8_t*)"B"; ms[1].len = 1; ms[1].lang = 0; + ms[2].name = "b.o"; ms[2].data = (const uint8_t*)"C"; ms[2].len = 1; ms[2].lang = 0; + + bufw_init(&bw); + EXPECT(cfree_ar_write(&bw.base, ms, 3, &opts) == 0, "write"); + + in.name = "test"; in.data = bw.data; in.len = bw.len; in.lang = 0; + bufw_init(&lw); + EXPECT(cfree_ar_list(&in, &lw.base) == 0, "list"); + EXPECT(lw.len == strlen(expected), + "list len = %zu, want %zu", lw.len, strlen(expected)); + EXPECT(memcmp(lw.data, expected, lw.len) == 0, + "list = %.*s", (int)lw.len, (const char*)lw.data); + + bufw_fini(&lw); + bufw_fini(&bw); + return 1; +} + +static int test_iter_bad_magic(void) +{ + /* iter_init must reject non-ar inputs. */ + static const uint8_t bad[] = "NOT-AN-AR"; + CfreeBytesInput in; + CfreeArIter it; + + in.name = "bad"; in.data = bad; in.len = sizeof(bad) - 1; in.lang = 0; + EXPECT(!cfree_ar_iter_init(&it, &in), "rejects bad magic"); + + in.data = NULL; in.len = 0; + EXPECT(!cfree_ar_iter_init(&it, &in), "rejects empty"); + + in.data = bad; in.len = 4; /* too short */ + EXPECT(!cfree_ar_iter_init(&it, &in), "rejects short"); + + return 1; +} + +static int test_write_invalid_args(void) +{ + /* Bad argument combinations must return 1 from cfree_ar_write. */ + CfreeBytesInput ms[1]; + BufW bw; + CfreeArWriteOptions opts = {0}; + const char* bad_syms[1]; + CfreeArMemberSymbols msyms[1]; + + bufw_init(&bw); + + EXPECT(cfree_ar_write(NULL, NULL, 0, NULL) == 1, "NULL writer rejected"); + EXPECT(cfree_ar_write(&bw.base, NULL, 1, NULL) == 1, + "NULL members with nmembers>0 rejected"); + + ms[0].name = NULL; + ms[0].data = (const uint8_t*)"X"; ms[0].len = 1; ms[0].lang = 0; + EXPECT(cfree_ar_write(&bw.base, ms, 1, NULL) == 1, "NULL name rejected"); + + /* NULL symbol-name with nonzero count. */ + bad_syms[0] = NULL; + msyms[0].names = (const char* const*)bad_syms; + msyms[0].count = 1; + opts.symbol_index = 1; + opts.member_symbols = msyms; + ms[0].name = "ok.o"; + EXPECT(cfree_ar_write(&bw.base, ms, 1, &opts) == 1, + "NULL symbol name rejected"); + + bufw_fini(&bw); + return 1; +} + +static int test_iter_skips_bsd_symdef(void) +{ + /* Hand-craft an archive containing a BSD __.SYMDEF SORTED member; the + * iterator must not surface it. (cfree_ar_write never emits BSD indexes, + * but the iterator is documented to handle them on read.) */ + BufW bw; + CfreeBytesInput in; + CfreeArIter it; + CfreeArMember m; + char hdr[60]; + size_t j; + int seen = 0; + + bufw_init(&bw); + bw.base.write(&bw.base, "!<arch>\n", 8); + + /* __.SYMDEF SORTED with a 4-byte payload. */ + for (j = 0; j < 60; ++j) hdr[j] = ' '; + { + const char* nm = "__.SYMDEF SORTED"; + for (j = 0; j < 16 && nm[j]; ++j) hdr[j] = nm[j]; + } + hdr[16] = '0'; + hdr[28] = '0'; hdr[34] = '0'; + hdr[40] = '6'; hdr[41] = '4'; hdr[42] = '4'; + hdr[48] = '4'; /* size = 4 */ + hdr[58] = '`'; hdr[59] = '\n'; + bw.base.write(&bw.base, hdr, 60); + bw.base.write(&bw.base, "ZZZZ", 4); + + /* User member u.o size=1 + parity pad. */ + for (j = 0; j < 60; ++j) hdr[j] = ' '; + hdr[0] = 'u'; hdr[1] = '.'; hdr[2] = 'o'; hdr[3] = '/'; + hdr[16] = '0'; + hdr[28] = '0'; hdr[34] = '0'; + hdr[40] = '6'; hdr[41] = '4'; hdr[42] = '4'; + hdr[48] = '1'; + hdr[58] = '`'; hdr[59] = '\n'; + bw.base.write(&bw.base, hdr, 60); + bw.base.write(&bw.base, "U\n", 2); + + in.name = "bsd"; in.data = bw.data; in.len = bw.len; in.lang = 0; + EXPECT(cfree_ar_iter_init(&it, &in), "iter_init"); + while (cfree_ar_iter_next(&it, &m)) { + EXPECT(strcmp(m.name, "u.o") == 0, "unexpected name = %s", m.name); + seen++; + } + EXPECT(seen == 1, "saw %d members", seen); + + bufw_fini(&bw); + return 1; +} + +static int test_iter_data_aliases_archive(void) +{ + /* CfreeArMember.data must point into the archive's own bytes. */ + CfreeBytesInput ms[1]; + BufW bw; + CfreeBytesInput in; + CfreeArIter it; + CfreeArMember m; + + ms[0].name = "a.o"; ms[0].data = (const uint8_t*)"PAYLOAD"; + ms[0].len = 7; ms[0].lang = 0; + + bufw_init(&bw); + EXPECT(cfree_ar_write(&bw.base, ms, 1, NULL) == 0, "write"); + + in.name = "test"; in.data = bw.data; in.len = bw.len; in.lang = 0; + EXPECT(cfree_ar_iter_init(&it, &in), "iter_init"); + EXPECT(cfree_ar_iter_next(&it, &m), "first"); + EXPECT(m.data >= bw.data && m.data + m.size <= bw.data + bw.len, + "data aliases archive bytes"); + EXPECT(memcmp(m.data, "PAYLOAD", 7) == 0, "payload"); + + bufw_fini(&bw); + return 1; +} + +static int test_symbol_index_partial_members(void) +{ + /* Members with 0 symbols mid-list: cur_offset must still advance for + * every member so later offsets land on the right header. */ + const char* mid_syms[] = { "midsym" }; + CfreeArMemberSymbols msyms[3]; + CfreeBytesInput ms[3]; + BufW bw; + CfreeArWriteOptions opts = {0}; + uint32_t nsyms, off; + uint64_t expected_b_hdr; + + opts.symbol_index = 1; + msyms[0].names = NULL; msyms[0].count = 0; + msyms[1].names = (const char* const*)mid_syms; msyms[1].count = 1; + msyms[2].names = NULL; msyms[2].count = 0; + opts.member_symbols = msyms; + + ms[0].name = "a.o"; ms[0].data = (const uint8_t*)"AA"; ms[0].len = 2; ms[0].lang = 0; + ms[1].name = "b.o"; ms[1].data = (const uint8_t*)"BB"; ms[1].len = 2; ms[1].lang = 0; + ms[2].name = "c.o"; ms[2].data = (const uint8_t*)"CC"; ms[2].len = 2; ms[2].lang = 0; + + bufw_init(&bw); + EXPECT(cfree_ar_write(&bw.base, ms, 3, &opts) == 0, "write"); + + /* Index payload: 4(count) + 4(offset) + 7("midsym\0") = 15 → odd → +1 pad. */ + EXPECT(ar_size_field(bw.data + 8) == 15, + "index payload = %llu", + (unsigned long long)ar_size_field(bw.data + 8)); + + nsyms = be32(bw.data + 8 + 60); + EXPECT(nsyms == 1, "nsyms = %u", nsyms); + + off = be32(bw.data + 8 + 60 + 4); + /* magic(8) + index(60+15+1) + a.o(60+2) = 146 */ + expected_b_hdr = 8 + 60 + 15 + 1 + 60 + 2; + EXPECT(off == expected_b_hdr, + "off = %u, expected %llu", off, (unsigned long long)expected_b_hdr); + EXPECT(memcmp(bw.data + off, "b.o/", 4) == 0, "b.o at offset"); + + bufw_fini(&bw); + return 1; +} + +int main(void) +{ + int passes = 0; + int total = 0; + #define RUN(t) do { total++; passes += (t)(); } while (0) + + RUN(test_basic_roundtrip); + RUN(test_long_name_table); + RUN(test_symbol_index_empty); + RUN(test_symbol_index_basic); + RUN(test_symbol_index_with_long_names); + RUN(test_iter_skips_index); + RUN(test_empty_archive); + RUN(test_epoch_field); + RUN(test_path_basename); + RUN(test_truncate_when_long_names_off); + RUN(test_name_15_char_boundary); + RUN(test_name_16_char_boundary); + RUN(test_empty_member_payload); + RUN(test_odd_payload_pad); + RUN(test_ar_list_output); + RUN(test_iter_bad_magic); + RUN(test_write_invalid_args); + RUN(test_iter_skips_bsd_symdef); + RUN(test_iter_data_aliases_archive); + RUN(test_symbol_index_partial_members); + + if (g_fail) { + fprintf(stderr, "ar_test: %d failure(s) (%d/%d passed)\n", + g_fail, passes, total); + return 1; + } + printf("ar_test: OK (%d/%d)\n", passes, total); + return 0; +}