commit 55e6cae81c8e601c13504393d234c59e43135ffa
parent ae690b5af361d1c325939f360dcd054623b6fb83
Author: Ryan Sepassi <rsepassi@gmail.com>
Date: Tue, 2 Jun 2026 11:05:22 -0700
driver: add cmp tool (byte-by-byte file comparison)
GNU/BSD cmp-compatible: default reports the first differing byte
("FILE1 FILE2 differ: char N, line M"), -s is silent, -l lists every
differing byte in octal, -b renders the bytes cat -v style, -n caps the
compared length, and SKIP1/SKIP2 skip leading bytes. Exit codes 0/1/2
(identical/differ/trouble) match the convention. FILE2 defaults to stdin.
Lets the toolchain verify its own outputs (e.g. bootstrap reproducibility)
without shelling out to the host cmp.
Diffstat:
5 files changed, 287 insertions(+), 0 deletions(-)
diff --git a/Makefile b/Makefile
@@ -394,6 +394,9 @@ endif
ifeq ($(CFREE_TOOL_XXD_ENABLED),1)
DRIVER_TOOL_SRCS += driver/cmd/xxd.c
endif
+ifeq ($(CFREE_TOOL_CMP_ENABLED),1)
+DRIVER_TOOL_SRCS += driver/cmd/cmp.c
+endif
DRIVER_SRCS += $(sort $(DRIVER_TOOL_SRCS))
ifneq ($(filter 1,$(CFREE_TOOL_CC_ENABLED) $(CFREE_TOOL_CHECK_ENABLED) $(CFREE_TOOL_CPP_ENABLED) $(CFREE_TOOL_AS_ENABLED) $(CFREE_TOOL_DBG_ENABLED) $(CFREE_TOOL_RUN_ENABLED)),)
DRIVER_SRCS += driver/lib/cflags.c
diff --git a/driver/cmd/cmp.c b/driver/cmd/cmp.c
@@ -0,0 +1,277 @@
+#include <cfree/core.h>
+#include <stddef.h>
+#include <stdint.h>
+#include <string.h>
+
+#include "driver.h"
+#include "env.h"
+
+/* `cfree cmp` — compare two files byte by byte, GNU cmp style. Default prints
+ * the first differing byte (1-based offset + line); -l lists every difference;
+ * -s is silent. Exit codes follow GNU cmp and cfree's convention exactly:
+ * 0 identical, 1 differ, 2 trouble/usage. With FILE2 omitted or `-`, the second
+ * operand is stdin. Optional SKIP1/SKIP2 skip leading bytes of each input. */
+
+#define CMP_TOOL "cmp"
+
+typedef struct CmpOpts {
+ int silent; /* -s / --quiet / --silent */
+ int list; /* -l : list all differing bytes */
+ int show_bytes; /* -b : show the differing byte values */
+ uint64_t max; /* -n N : compare at most N bytes (0 = unlimited) */
+ int have_max; /* whether -n was given */
+} CmpOpts;
+
+void driver_help_cmp(void) {
+ driver_printf(
+ "%.*s",
+ CFREE_SLICE_ARG(CFREE_SLICE_LIT(
+ "cfree cmp — compare two files byte by byte\n"
+ "\n"
+ "USAGE\n"
+ " cfree cmp [OPTIONS] FILE1 [FILE2 [SKIP1 [SKIP2]]]\n"
+ "\n"
+ "DESCRIPTION\n"
+ " Compares FILE1 and FILE2. With FILE2 omitted or `-`, reads stdin.\n"
+ " SKIP1/SKIP2 skip that many leading bytes of each file before\n"
+ " comparing (decimal, or 0x-prefixed hex).\n"
+ "\n"
+ "OPTIONS\n"
+ " -s, --quiet, --silent print nothing; status only\n"
+ " -l, --verbose list each differing byte (octal)\n"
+ " -b, --print-bytes show the differing byte values\n"
+ " -n N compare at most N bytes\n"
+ " -h, --help show this help\n"
+ "\n"
+ "EXIT CODES\n"
+ " 0 identical 1 differ 2 trouble/usage\n")));
+}
+
+/* Parse a decimal or 0x-hex non-negative integer. Returns 0 on success. */
+static int cmp_parse_u64(const char* s, uint64_t* out) {
+ uint64_t v = 0;
+ int base = 10;
+ if (!s || !*s) return 1;
+ if (s[0] == '0' && (s[1] == 'x' || s[1] == 'X')) {
+ base = 16;
+ s += 2;
+ if (!*s) return 1;
+ }
+ for (; *s; ++s) {
+ unsigned d;
+ char c = *s;
+ if (c >= '0' && c <= '9')
+ d = (unsigned)(c - '0');
+ else if (base == 16 && c >= 'a' && c <= 'f')
+ d = (unsigned)(c - 'a' + 10);
+ else if (base == 16 && c >= 'A' && c <= 'F')
+ d = (unsigned)(c - 'A' + 10);
+ else
+ return 1;
+ v = v * (uint64_t)base + d;
+ }
+ *out = v;
+ return 0;
+}
+
+/* Load a named operand, or stdin when the name is "-". Returns 0 on success
+ * with data and len set. A stdin read fills stdin_buf/stdin_len (freed with
+ * driver_free); a file read fills ld (freed with driver_release_bytes). */
+static int cmp_load(DriverEnv* env, const char* name, const uint8_t** data,
+ size_t* len, DriverLoad* ld, uint8_t** stdin_buf,
+ size_t* stdin_len) {
+ if (driver_streq(name, "-")) {
+ if (!driver_read_stdin(env, stdin_buf, stdin_len)) {
+ driver_errf(CMP_TOOL, "failed to read stdin");
+ return 1;
+ }
+ *data = *stdin_buf;
+ *len = *stdin_len;
+ return 0;
+ }
+ {
+ CfreeSlice in;
+ if (driver_load_bytes(&env->file_io, CMP_TOOL, name, ld, &in) != 0)
+ return 1;
+ *data = in.data;
+ *len = in.len;
+ return 0;
+ }
+}
+
+/* Render a byte the way `cat -v` / cmp -b does: M- for the high bit, ^X for
+ * control codes, ^? for DEL, otherwise the literal character. */
+static void cmp_print_catv(uint8_t c) {
+ if (c >= 128) {
+ driver_printf("M-");
+ c = (uint8_t)(c - 128);
+ }
+ if (c < 32)
+ driver_printf("^%c", (char)(c + 64));
+ else if (c == 127)
+ driver_printf("^?");
+ else
+ driver_printf("%c", (char)c);
+}
+
+int driver_cmp(int argc, char** argv) {
+ DriverEnv env;
+ CmpOpts opts;
+ const char* names[2] = {NULL, NULL};
+ uint64_t skip[2] = {0, 0};
+ int npos = 0; /* count of positional operands seen */
+ int i, rc = 2;
+
+ const uint8_t* d1 = NULL;
+ const uint8_t* d2 = NULL;
+ size_t l1 = 0, l2 = 0;
+ DriverLoad ld1 = {0}, ld2 = {0};
+ uint8_t* sb1 = NULL;
+ uint8_t* sb2 = NULL;
+ size_t sl1 = 0, sl2 = 0;
+ int loaded1 = 0, loaded2 = 0;
+
+ if (driver_argv_wants_help(argc, argv, 1)) {
+ driver_help_cmp();
+ return 0;
+ }
+
+ memset(&opts, 0, sizeof opts);
+ driver_env_init(&env);
+
+ for (i = 1; i < argc; ++i) {
+ const char* a = argv[i];
+ if (driver_streq(a, "-s") || driver_streq(a, "--quiet") ||
+ driver_streq(a, "--silent")) {
+ opts.silent = 1;
+ continue;
+ }
+ if (driver_streq(a, "-l") || driver_streq(a, "--verbose")) {
+ opts.list = 1;
+ continue;
+ }
+ if (driver_streq(a, "-b") || driver_streq(a, "--print-bytes")) {
+ opts.show_bytes = 1;
+ continue;
+ }
+ if (driver_streq(a, "-n")) {
+ if (i + 1 >= argc || cmp_parse_u64(argv[++i], &opts.max) != 0) {
+ driver_errf(CMP_TOOL, "-n requires a non-negative count");
+ goto done;
+ }
+ opts.have_max = 1;
+ continue;
+ }
+ if (driver_streq(a, "-")) {
+ /* stdin operand */
+ } else if (a[0] == '-' && a[1] != '\0') {
+ driver_errf(CMP_TOOL, "unknown option: %s", a);
+ goto done;
+ }
+ /* positional: FILE1, FILE2, SKIP1, SKIP2 */
+ if (npos < 2) {
+ names[npos] = a;
+ } else if (npos < 4) {
+ if (cmp_parse_u64(a, &skip[npos - 2]) != 0) {
+ driver_errf(CMP_TOOL, "invalid skip value: %s", a);
+ goto done;
+ }
+ } else {
+ driver_errf(CMP_TOOL, "too many operands: %s", a);
+ goto done;
+ }
+ ++npos;
+ }
+
+ if (npos < 1) {
+ driver_errf(CMP_TOOL, "missing operand (need FILE1)");
+ goto done;
+ }
+ if (!names[1]) names[1] = "-"; /* FILE2 defaults to stdin */
+ if (driver_streq(names[0], "-") && driver_streq(names[1], "-")) {
+ driver_errf(CMP_TOOL, "only one operand may be stdin (`-`)");
+ goto done;
+ }
+
+ if (cmp_load(&env, names[0], &d1, &l1, &ld1, &sb1, &sl1) != 0) {
+ rc = 2;
+ goto done;
+ }
+ loaded1 = 1;
+ if (cmp_load(&env, names[1], &d2, &l2, &ld2, &sb2, &sl2) != 0) {
+ rc = 2;
+ goto done;
+ }
+ loaded2 = 1;
+
+ /* Apply leading-byte skips. A skip past EOF yields an empty view. */
+ d1 = d1 + (skip[0] < l1 ? skip[0] : l1);
+ l1 = (skip[0] < l1) ? (l1 - skip[0]) : 0;
+ d2 = d2 + (skip[1] < l2 ? skip[1] : l2);
+ l2 = (skip[1] < l2) ? (l2 - skip[1]) : 0;
+
+ {
+ size_t cmp_len = l1 < l2 ? l1 : l2;
+ size_t k;
+ uint64_t line = 1; /* 1-based line of the current position */
+ int differ = 0;
+
+ if (opts.have_max && opts.max < (uint64_t)cmp_len)
+ cmp_len = (size_t)opts.max;
+
+ for (k = 0; k < cmp_len; ++k) {
+ if (d1[k] != d2[k]) {
+ differ = 1;
+ if (opts.silent) break;
+ if (opts.list) {
+ /* -l: "<byte> <oct1> <oct2>", 1-based, octal byte values. */
+ driver_printf("%6llu %3llo %3llo\n", (unsigned long long)(k + 1),
+ (unsigned long long)d1[k], (unsigned long long)d2[k]);
+ continue;
+ }
+ /* Default: report the first difference and stop. POSIX/GNU/BSD all
+ * phrase this as "char N" (the 1-based byte offset). */
+ driver_printf("%s %s differ: char %llu, line %llu", names[0], names[1],
+ (unsigned long long)(k + 1), (unsigned long long)line);
+ if (opts.show_bytes) {
+ driver_printf(" is %3llo ", (unsigned long long)d1[k]);
+ cmp_print_catv(d1[k]);
+ driver_printf(" %3llo ", (unsigned long long)d2[k]);
+ cmp_print_catv(d2[k]);
+ }
+ driver_printf("\n");
+ break;
+ }
+ if (d1[k] == '\n') ++line;
+ }
+
+ if (opts.list && differ) {
+ /* listed all diffs above */
+ }
+
+ if (!differ && l1 != l2 &&
+ (!opts.have_max || opts.max > (uint64_t)cmp_len)) {
+ /* Equal up to the shorter length: EOF on the shorter file. */
+ const char* shorter = l1 < l2 ? names[0] : names[1];
+ if (!opts.silent) {
+ driver_errf(CMP_TOOL, "EOF on %s after byte %llu", shorter,
+ (unsigned long long)cmp_len);
+ }
+ differ = 1;
+ }
+
+ rc = differ ? 1 : 0;
+ }
+
+done:
+ if (loaded1) {
+ if (sb1) driver_free(&env, sb1, sl1);
+ driver_release_bytes(&env.file_io, &ld1);
+ }
+ if (loaded2) {
+ if (sb2) driver_free(&env, sb2, sl2);
+ driver_release_bytes(&env.file_io, &ld2);
+ }
+ driver_env_fini(&env);
+ return rc;
+}
diff --git a/driver/driver.h b/driver/driver.h
@@ -56,6 +56,7 @@ int driver_strings(int argc, char** argv);
int driver_cas(int argc, char** argv);
int driver_pkg(int argc, char** argv);
int driver_xxd(int argc, char** argv);
+int driver_cmp(int argc, char** argv);
/* Per-tool help printers. Write a multi-section help text to stdout and
* return. The tool entry-points call these when invoked with no args, -h,
@@ -81,6 +82,7 @@ void driver_help_strings(void);
void driver_help_cas(void);
void driver_help_pkg(void);
void driver_help_xxd(void);
+void driver_help_cmp(void);
/* Multi-call top-level help (`cfree`, `cfree -h`, `cfree --help`,
* `cfree help`). Lists each tool with a one-line summary and explains
diff --git a/driver/main.c b/driver/main.c
@@ -100,6 +100,10 @@ static const DriverToolDesc driver_tools[] = {
{"xxd", driver_xxd, driver_help_xxd,
"Hex dump any file (and reverse a dump back to binary)"},
#endif
+#if CFREE_TOOL_CMP_ENABLED
+ {"cmp", driver_cmp, driver_help_cmp,
+ "Compare two files byte by byte"},
+#endif
{NULL, NULL, NULL, NULL},
};
diff --git a/include/cfree/config.h b/include/cfree/config.h
@@ -115,5 +115,6 @@
#define CFREE_TOOL_CAS_ENABLED 1
#define CFREE_TOOL_PKG_ENABLED 1
#define CFREE_TOOL_XXD_ENABLED 1
+#define CFREE_TOOL_CMP_ENABLED 1
#endif /* CFREE_CONFIG_H */