cmp.c (8398B)
1 #include <kit/core.h> 2 #include <stddef.h> 3 #include <stdint.h> 4 #include <string.h> 5 6 #include "driver.h" 7 #include "env.h" 8 9 /* `kit cmp` — compare two files byte by byte, GNU cmp style. Default prints 10 * the first differing byte (1-based offset + line); -l lists every difference; 11 * -s is silent. Exit codes follow GNU cmp and kit's convention exactly: 12 * 0 identical, 1 differ, 2 trouble/usage. With FILE2 omitted or `-`, the second 13 * operand is stdin. Optional SKIP1/SKIP2 skip leading bytes of each input. */ 14 15 #define CMP_TOOL "cmp" 16 17 typedef struct CmpOpts { 18 int silent; /* -s / --quiet / --silent */ 19 int list; /* -l : list all differing bytes */ 20 int show_bytes; /* -b : show the differing byte values */ 21 uint64_t max; /* -n N : compare at most N bytes (0 = unlimited) */ 22 int have_max; /* whether -n was given */ 23 } CmpOpts; 24 25 void driver_help_cmp(void) { 26 driver_printf( 27 "%.*s", 28 KIT_SLICE_ARG(KIT_SLICE_LIT( 29 "kit cmp — compare two files byte by byte\n" 30 "\n" 31 "USAGE\n" 32 " kit cmp [OPTIONS] FILE1 [FILE2 [SKIP1 [SKIP2]]]\n" 33 "\n" 34 "DESCRIPTION\n" 35 " Compares FILE1 and FILE2. With FILE2 omitted or `-`, reads " 36 "stdin.\n" 37 " SKIP1/SKIP2 skip that many leading bytes of each file before\n" 38 " comparing (decimal, or 0x-prefixed hex).\n" 39 "\n" 40 "OPTIONS\n" 41 " -s, --quiet, --silent print nothing; status only\n" 42 " -l, --verbose list each differing byte (octal)\n" 43 " -b, --print-bytes show the differing byte values\n" 44 " -n N compare at most N bytes\n" 45 " -h, --help show this help\n" 46 "\n" 47 "EXIT CODES\n" 48 " 0 identical 1 differ 2 " 49 "trouble/usage\n"))); 50 } 51 52 /* Parse a decimal or 0x-hex non-negative integer. Returns 0 on success. */ 53 static int cmp_parse_u64(const char* s, uint64_t* out) { 54 uint64_t v = 0; 55 int base = 10; 56 if (!s || !*s) return 1; 57 if (s[0] == '0' && (s[1] == 'x' || s[1] == 'X')) { 58 base = 16; 59 s += 2; 60 if (!*s) return 1; 61 } 62 for (; *s; ++s) { 63 unsigned d; 64 char c = *s; 65 if (c >= '0' && c <= '9') 66 d = (unsigned)(c - '0'); 67 else if (base == 16 && c >= 'a' && c <= 'f') 68 d = (unsigned)(c - 'a' + 10); 69 else if (base == 16 && c >= 'A' && c <= 'F') 70 d = (unsigned)(c - 'A' + 10); 71 else 72 return 1; 73 v = v * (uint64_t)base + d; 74 } 75 *out = v; 76 return 0; 77 } 78 79 /* Load a named operand, or stdin when the name is "-". Returns 0 on success 80 * with data and len set. A stdin read fills stdin_buf/stdin_len (freed with 81 * driver_free); a file read fills ld (freed with driver_release_bytes). */ 82 static int cmp_load(DriverEnv* env, const char* name, const uint8_t** data, 83 size_t* len, DriverLoad* ld, uint8_t** stdin_buf, 84 size_t* stdin_len) { 85 if (driver_streq(name, "-")) { 86 if (!driver_read_stdin(env, stdin_buf, stdin_len)) { 87 driver_errf(CMP_TOOL, "failed to read stdin"); 88 return 1; 89 } 90 *data = *stdin_buf; 91 *len = *stdin_len; 92 return 0; 93 } 94 { 95 KitSlice in; 96 if (driver_load_bytes(&env->file_io, CMP_TOOL, name, ld, &in) != 0) 97 return 1; 98 *data = in.data; 99 *len = in.len; 100 return 0; 101 } 102 } 103 104 /* Render a byte the way `cat -v` / cmp -b does: M- for the high bit, ^X for 105 * control codes, ^? for DEL, otherwise the literal character. */ 106 static void cmp_print_catv(uint8_t c) { 107 if (c >= 128) { 108 driver_printf("M-"); 109 c = (uint8_t)(c - 128); 110 } 111 if (c < 32) 112 driver_printf("^%c", (char)(c + 64)); 113 else if (c == 127) 114 driver_printf("^?"); 115 else 116 driver_printf("%c", (char)c); 117 } 118 119 int driver_cmp(int argc, char** argv) { 120 DriverEnv env; 121 CmpOpts opts; 122 const char* names[2] = {NULL, NULL}; 123 uint64_t skip[2] = {0, 0}; 124 int npos = 0; /* count of positional operands seen */ 125 int i, rc = 2; 126 127 const uint8_t* d1 = NULL; 128 const uint8_t* d2 = NULL; 129 size_t l1 = 0, l2 = 0; 130 DriverLoad ld1 = {0}, ld2 = {0}; 131 uint8_t* sb1 = NULL; 132 uint8_t* sb2 = NULL; 133 size_t sl1 = 0, sl2 = 0; 134 int loaded1 = 0, loaded2 = 0; 135 136 if (driver_argv_wants_help(argc, argv, 1)) { 137 driver_help_cmp(); 138 return 0; 139 } 140 141 memset(&opts, 0, sizeof opts); 142 driver_env_init(&env); 143 144 for (i = 1; i < argc; ++i) { 145 const char* a = argv[i]; 146 if (driver_streq(a, "-s") || driver_streq(a, "--quiet") || 147 driver_streq(a, "--silent")) { 148 opts.silent = 1; 149 continue; 150 } 151 if (driver_streq(a, "-l") || driver_streq(a, "--verbose")) { 152 opts.list = 1; 153 continue; 154 } 155 if (driver_streq(a, "-b") || driver_streq(a, "--print-bytes")) { 156 opts.show_bytes = 1; 157 continue; 158 } 159 if (driver_streq(a, "-n")) { 160 if (i + 1 >= argc || cmp_parse_u64(argv[++i], &opts.max) != 0) { 161 driver_errf(CMP_TOOL, "-n requires a non-negative count"); 162 goto done; 163 } 164 opts.have_max = 1; 165 continue; 166 } 167 if (driver_streq(a, "-")) { 168 /* stdin operand */ 169 } else if (a[0] == '-' && a[1] != '\0') { 170 driver_errf(CMP_TOOL, "unknown option: %s", a); 171 goto done; 172 } 173 /* positional: FILE1, FILE2, SKIP1, SKIP2 */ 174 if (npos < 2) { 175 names[npos] = a; 176 } else if (npos < 4) { 177 if (cmp_parse_u64(a, &skip[npos - 2]) != 0) { 178 driver_errf(CMP_TOOL, "invalid skip value: %s", a); 179 goto done; 180 } 181 } else { 182 driver_errf(CMP_TOOL, "too many operands: %s", a); 183 goto done; 184 } 185 ++npos; 186 } 187 188 if (npos < 1) { 189 driver_errf(CMP_TOOL, "missing operand (need FILE1)"); 190 goto done; 191 } 192 if (!names[1]) names[1] = "-"; /* FILE2 defaults to stdin */ 193 if (driver_streq(names[0], "-") && driver_streq(names[1], "-")) { 194 driver_errf(CMP_TOOL, "only one operand may be stdin (`-`)"); 195 goto done; 196 } 197 198 if (cmp_load(&env, names[0], &d1, &l1, &ld1, &sb1, &sl1) != 0) { 199 rc = 2; 200 goto done; 201 } 202 loaded1 = 1; 203 if (cmp_load(&env, names[1], &d2, &l2, &ld2, &sb2, &sl2) != 0) { 204 rc = 2; 205 goto done; 206 } 207 loaded2 = 1; 208 209 /* Apply leading-byte skips. A skip past EOF yields an empty view. */ 210 d1 = d1 + (skip[0] < l1 ? skip[0] : l1); 211 l1 = (skip[0] < l1) ? (l1 - skip[0]) : 0; 212 d2 = d2 + (skip[1] < l2 ? skip[1] : l2); 213 l2 = (skip[1] < l2) ? (l2 - skip[1]) : 0; 214 215 { 216 size_t cmp_len = l1 < l2 ? l1 : l2; 217 size_t k; 218 uint64_t line = 1; /* 1-based line of the current position */ 219 int differ = 0; 220 221 if (opts.have_max && opts.max < (uint64_t)cmp_len) 222 cmp_len = (size_t)opts.max; 223 224 for (k = 0; k < cmp_len; ++k) { 225 if (d1[k] != d2[k]) { 226 differ = 1; 227 if (opts.silent) break; 228 if (opts.list) { 229 /* -l: "<byte> <oct1> <oct2>", 1-based, octal byte values. */ 230 driver_printf("%6llu %3llo %3llo\n", (unsigned long long)(k + 1), 231 (unsigned long long)d1[k], (unsigned long long)d2[k]); 232 continue; 233 } 234 /* Default: report the first difference and stop. POSIX/GNU/BSD all 235 * phrase this as "char N" (the 1-based byte offset). */ 236 driver_printf("%s %s differ: char %llu, line %llu", names[0], names[1], 237 (unsigned long long)(k + 1), (unsigned long long)line); 238 if (opts.show_bytes) { 239 driver_printf(" is %3llo ", (unsigned long long)d1[k]); 240 cmp_print_catv(d1[k]); 241 driver_printf(" %3llo ", (unsigned long long)d2[k]); 242 cmp_print_catv(d2[k]); 243 } 244 driver_printf("\n"); 245 break; 246 } 247 if (d1[k] == '\n') ++line; 248 } 249 250 if (opts.list && differ) { 251 /* listed all diffs above */ 252 } 253 254 if (!differ && l1 != l2 && 255 (!opts.have_max || opts.max > (uint64_t)cmp_len)) { 256 /* Equal up to the shorter length: EOF on the shorter file. */ 257 const char* shorter = l1 < l2 ? names[0] : names[1]; 258 if (!opts.silent) { 259 driver_errf(CMP_TOOL, "EOF on %s after byte %llu", shorter, 260 (unsigned long long)cmp_len); 261 } 262 differ = 1; 263 } 264 265 rc = differ ? 1 : 0; 266 } 267 268 done: 269 if (loaded1) { 270 if (sb1) driver_free(&env, sb1, sl1); 271 driver_release_bytes(&env.file_io, &ld1); 272 } 273 if (loaded2) { 274 if (sb2) driver_free(&env, sb2, sl2); 275 driver_release_bytes(&env.file_io, &ld2); 276 } 277 driver_env_fini(&env); 278 return rc; 279 }