kit

kit
git clone https://git.ryansepassi.com/git/kit.git
Log | Files | Refs | README

commit 55e6cae81c8e601c13504393d234c59e43135ffa
parent ae690b5af361d1c325939f360dcd054623b6fb83
Author: Ryan Sepassi <rsepassi@gmail.com>
Date:   Tue,  2 Jun 2026 11:05:22 -0700

driver: add cmp tool (byte-by-byte file comparison)

GNU/BSD cmp-compatible: default reports the first differing byte
("FILE1 FILE2 differ: char N, line M"), -s is silent, -l lists every
differing byte in octal, -b renders the bytes cat -v style, -n caps the
compared length, and SKIP1/SKIP2 skip leading bytes. Exit codes 0/1/2
(identical/differ/trouble) match the convention. FILE2 defaults to stdin.

Lets the toolchain verify its own outputs (e.g. bootstrap reproducibility)
without shelling out to the host cmp.

Diffstat:
MMakefile | 3+++
Adriver/cmd/cmp.c | 277+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Mdriver/driver.h | 2++
Mdriver/main.c | 4++++
Minclude/cfree/config.h | 1+
5 files changed, 287 insertions(+), 0 deletions(-)

diff --git a/Makefile b/Makefile @@ -394,6 +394,9 @@ endif ifeq ($(CFREE_TOOL_XXD_ENABLED),1) DRIVER_TOOL_SRCS += driver/cmd/xxd.c endif +ifeq ($(CFREE_TOOL_CMP_ENABLED),1) +DRIVER_TOOL_SRCS += driver/cmd/cmp.c +endif DRIVER_SRCS += $(sort $(DRIVER_TOOL_SRCS)) ifneq ($(filter 1,$(CFREE_TOOL_CC_ENABLED) $(CFREE_TOOL_CHECK_ENABLED) $(CFREE_TOOL_CPP_ENABLED) $(CFREE_TOOL_AS_ENABLED) $(CFREE_TOOL_DBG_ENABLED) $(CFREE_TOOL_RUN_ENABLED)),) DRIVER_SRCS += driver/lib/cflags.c diff --git a/driver/cmd/cmp.c b/driver/cmd/cmp.c @@ -0,0 +1,277 @@ +#include <cfree/core.h> +#include <stddef.h> +#include <stdint.h> +#include <string.h> + +#include "driver.h" +#include "env.h" + +/* `cfree cmp` — compare two files byte by byte, GNU cmp style. Default prints + * the first differing byte (1-based offset + line); -l lists every difference; + * -s is silent. Exit codes follow GNU cmp and cfree's convention exactly: + * 0 identical, 1 differ, 2 trouble/usage. With FILE2 omitted or `-`, the second + * operand is stdin. Optional SKIP1/SKIP2 skip leading bytes of each input. */ + +#define CMP_TOOL "cmp" + +typedef struct CmpOpts { + int silent; /* -s / --quiet / --silent */ + int list; /* -l : list all differing bytes */ + int show_bytes; /* -b : show the differing byte values */ + uint64_t max; /* -n N : compare at most N bytes (0 = unlimited) */ + int have_max; /* whether -n was given */ +} CmpOpts; + +void driver_help_cmp(void) { + driver_printf( + "%.*s", + CFREE_SLICE_ARG(CFREE_SLICE_LIT( + "cfree cmp — compare two files byte by byte\n" + "\n" + "USAGE\n" + " cfree cmp [OPTIONS] FILE1 [FILE2 [SKIP1 [SKIP2]]]\n" + "\n" + "DESCRIPTION\n" + " Compares FILE1 and FILE2. With FILE2 omitted or `-`, reads stdin.\n" + " SKIP1/SKIP2 skip that many leading bytes of each file before\n" + " comparing (decimal, or 0x-prefixed hex).\n" + "\n" + "OPTIONS\n" + " -s, --quiet, --silent print nothing; status only\n" + " -l, --verbose list each differing byte (octal)\n" + " -b, --print-bytes show the differing byte values\n" + " -n N compare at most N bytes\n" + " -h, --help show this help\n" + "\n" + "EXIT CODES\n" + " 0 identical 1 differ 2 trouble/usage\n"))); +} + +/* Parse a decimal or 0x-hex non-negative integer. Returns 0 on success. */ +static int cmp_parse_u64(const char* s, uint64_t* out) { + uint64_t v = 0; + int base = 10; + if (!s || !*s) return 1; + if (s[0] == '0' && (s[1] == 'x' || s[1] == 'X')) { + base = 16; + s += 2; + if (!*s) return 1; + } + for (; *s; ++s) { + unsigned d; + char c = *s; + if (c >= '0' && c <= '9') + d = (unsigned)(c - '0'); + else if (base == 16 && c >= 'a' && c <= 'f') + d = (unsigned)(c - 'a' + 10); + else if (base == 16 && c >= 'A' && c <= 'F') + d = (unsigned)(c - 'A' + 10); + else + return 1; + v = v * (uint64_t)base + d; + } + *out = v; + return 0; +} + +/* Load a named operand, or stdin when the name is "-". Returns 0 on success + * with data and len set. A stdin read fills stdin_buf/stdin_len (freed with + * driver_free); a file read fills ld (freed with driver_release_bytes). */ +static int cmp_load(DriverEnv* env, const char* name, const uint8_t** data, + size_t* len, DriverLoad* ld, uint8_t** stdin_buf, + size_t* stdin_len) { + if (driver_streq(name, "-")) { + if (!driver_read_stdin(env, stdin_buf, stdin_len)) { + driver_errf(CMP_TOOL, "failed to read stdin"); + return 1; + } + *data = *stdin_buf; + *len = *stdin_len; + return 0; + } + { + CfreeSlice in; + if (driver_load_bytes(&env->file_io, CMP_TOOL, name, ld, &in) != 0) + return 1; + *data = in.data; + *len = in.len; + return 0; + } +} + +/* Render a byte the way `cat -v` / cmp -b does: M- for the high bit, ^X for + * control codes, ^? for DEL, otherwise the literal character. */ +static void cmp_print_catv(uint8_t c) { + if (c >= 128) { + driver_printf("M-"); + c = (uint8_t)(c - 128); + } + if (c < 32) + driver_printf("^%c", (char)(c + 64)); + else if (c == 127) + driver_printf("^?"); + else + driver_printf("%c", (char)c); +} + +int driver_cmp(int argc, char** argv) { + DriverEnv env; + CmpOpts opts; + const char* names[2] = {NULL, NULL}; + uint64_t skip[2] = {0, 0}; + int npos = 0; /* count of positional operands seen */ + int i, rc = 2; + + const uint8_t* d1 = NULL; + const uint8_t* d2 = NULL; + size_t l1 = 0, l2 = 0; + DriverLoad ld1 = {0}, ld2 = {0}; + uint8_t* sb1 = NULL; + uint8_t* sb2 = NULL; + size_t sl1 = 0, sl2 = 0; + int loaded1 = 0, loaded2 = 0; + + if (driver_argv_wants_help(argc, argv, 1)) { + driver_help_cmp(); + return 0; + } + + memset(&opts, 0, sizeof opts); + driver_env_init(&env); + + for (i = 1; i < argc; ++i) { + const char* a = argv[i]; + if (driver_streq(a, "-s") || driver_streq(a, "--quiet") || + driver_streq(a, "--silent")) { + opts.silent = 1; + continue; + } + if (driver_streq(a, "-l") || driver_streq(a, "--verbose")) { + opts.list = 1; + continue; + } + if (driver_streq(a, "-b") || driver_streq(a, "--print-bytes")) { + opts.show_bytes = 1; + continue; + } + if (driver_streq(a, "-n")) { + if (i + 1 >= argc || cmp_parse_u64(argv[++i], &opts.max) != 0) { + driver_errf(CMP_TOOL, "-n requires a non-negative count"); + goto done; + } + opts.have_max = 1; + continue; + } + if (driver_streq(a, "-")) { + /* stdin operand */ + } else if (a[0] == '-' && a[1] != '\0') { + driver_errf(CMP_TOOL, "unknown option: %s", a); + goto done; + } + /* positional: FILE1, FILE2, SKIP1, SKIP2 */ + if (npos < 2) { + names[npos] = a; + } else if (npos < 4) { + if (cmp_parse_u64(a, &skip[npos - 2]) != 0) { + driver_errf(CMP_TOOL, "invalid skip value: %s", a); + goto done; + } + } else { + driver_errf(CMP_TOOL, "too many operands: %s", a); + goto done; + } + ++npos; + } + + if (npos < 1) { + driver_errf(CMP_TOOL, "missing operand (need FILE1)"); + goto done; + } + if (!names[1]) names[1] = "-"; /* FILE2 defaults to stdin */ + if (driver_streq(names[0], "-") && driver_streq(names[1], "-")) { + driver_errf(CMP_TOOL, "only one operand may be stdin (`-`)"); + goto done; + } + + if (cmp_load(&env, names[0], &d1, &l1, &ld1, &sb1, &sl1) != 0) { + rc = 2; + goto done; + } + loaded1 = 1; + if (cmp_load(&env, names[1], &d2, &l2, &ld2, &sb2, &sl2) != 0) { + rc = 2; + goto done; + } + loaded2 = 1; + + /* Apply leading-byte skips. A skip past EOF yields an empty view. */ + d1 = d1 + (skip[0] < l1 ? skip[0] : l1); + l1 = (skip[0] < l1) ? (l1 - skip[0]) : 0; + d2 = d2 + (skip[1] < l2 ? skip[1] : l2); + l2 = (skip[1] < l2) ? (l2 - skip[1]) : 0; + + { + size_t cmp_len = l1 < l2 ? l1 : l2; + size_t k; + uint64_t line = 1; /* 1-based line of the current position */ + int differ = 0; + + if (opts.have_max && opts.max < (uint64_t)cmp_len) + cmp_len = (size_t)opts.max; + + for (k = 0; k < cmp_len; ++k) { + if (d1[k] != d2[k]) { + differ = 1; + if (opts.silent) break; + if (opts.list) { + /* -l: "<byte> <oct1> <oct2>", 1-based, octal byte values. */ + driver_printf("%6llu %3llo %3llo\n", (unsigned long long)(k + 1), + (unsigned long long)d1[k], (unsigned long long)d2[k]); + continue; + } + /* Default: report the first difference and stop. POSIX/GNU/BSD all + * phrase this as "char N" (the 1-based byte offset). */ + driver_printf("%s %s differ: char %llu, line %llu", names[0], names[1], + (unsigned long long)(k + 1), (unsigned long long)line); + if (opts.show_bytes) { + driver_printf(" is %3llo ", (unsigned long long)d1[k]); + cmp_print_catv(d1[k]); + driver_printf(" %3llo ", (unsigned long long)d2[k]); + cmp_print_catv(d2[k]); + } + driver_printf("\n"); + break; + } + if (d1[k] == '\n') ++line; + } + + if (opts.list && differ) { + /* listed all diffs above */ + } + + if (!differ && l1 != l2 && + (!opts.have_max || opts.max > (uint64_t)cmp_len)) { + /* Equal up to the shorter length: EOF on the shorter file. */ + const char* shorter = l1 < l2 ? names[0] : names[1]; + if (!opts.silent) { + driver_errf(CMP_TOOL, "EOF on %s after byte %llu", shorter, + (unsigned long long)cmp_len); + } + differ = 1; + } + + rc = differ ? 1 : 0; + } + +done: + if (loaded1) { + if (sb1) driver_free(&env, sb1, sl1); + driver_release_bytes(&env.file_io, &ld1); + } + if (loaded2) { + if (sb2) driver_free(&env, sb2, sl2); + driver_release_bytes(&env.file_io, &ld2); + } + driver_env_fini(&env); + return rc; +} diff --git a/driver/driver.h b/driver/driver.h @@ -56,6 +56,7 @@ int driver_strings(int argc, char** argv); int driver_cas(int argc, char** argv); int driver_pkg(int argc, char** argv); int driver_xxd(int argc, char** argv); +int driver_cmp(int argc, char** argv); /* Per-tool help printers. Write a multi-section help text to stdout and * return. The tool entry-points call these when invoked with no args, -h, @@ -81,6 +82,7 @@ void driver_help_strings(void); void driver_help_cas(void); void driver_help_pkg(void); void driver_help_xxd(void); +void driver_help_cmp(void); /* Multi-call top-level help (`cfree`, `cfree -h`, `cfree --help`, * `cfree help`). Lists each tool with a one-line summary and explains diff --git a/driver/main.c b/driver/main.c @@ -100,6 +100,10 @@ static const DriverToolDesc driver_tools[] = { {"xxd", driver_xxd, driver_help_xxd, "Hex dump any file (and reverse a dump back to binary)"}, #endif +#if CFREE_TOOL_CMP_ENABLED + {"cmp", driver_cmp, driver_help_cmp, + "Compare two files byte by byte"}, +#endif {NULL, NULL, NULL, NULL}, }; diff --git a/include/cfree/config.h b/include/cfree/config.h @@ -115,5 +115,6 @@ #define CFREE_TOOL_CAS_ENABLED 1 #define CFREE_TOOL_PKG_ENABLED 1 #define CFREE_TOOL_XXD_ENABLED 1 +#define CFREE_TOOL_CMP_ENABLED 1 #endif /* CFREE_CONFIG_H */