kit

kit
git clone https://git.ryansepassi.com/git/kit.git
Log | Files | Refs | README

cmp.c (8398B)


      1 #include <kit/core.h>
      2 #include <stddef.h>
      3 #include <stdint.h>
      4 #include <string.h>
      5 
      6 #include "driver.h"
      7 #include "env.h"
      8 
      9 /* `kit cmp` — compare two files byte by byte, GNU cmp style. Default prints
     10  * the first differing byte (1-based offset + line); -l lists every difference;
     11  * -s is silent. Exit codes follow GNU cmp and kit's convention exactly:
     12  * 0 identical, 1 differ, 2 trouble/usage. With FILE2 omitted or `-`, the second
     13  * operand is stdin. Optional SKIP1/SKIP2 skip leading bytes of each input. */
     14 
     15 #define CMP_TOOL "cmp"
     16 
     17 typedef struct CmpOpts {
     18   int silent;     /* -s / --quiet / --silent */
     19   int list;       /* -l : list all differing bytes */
     20   int show_bytes; /* -b : show the differing byte values */
     21   uint64_t max;   /* -n N : compare at most N bytes (0 = unlimited) */
     22   int have_max;   /* whether -n was given */
     23 } CmpOpts;
     24 
     25 void driver_help_cmp(void) {
     26   driver_printf(
     27       "%.*s",
     28       KIT_SLICE_ARG(KIT_SLICE_LIT(
     29           "kit cmp — compare two files byte by byte\n"
     30           "\n"
     31           "USAGE\n"
     32           "  kit cmp [OPTIONS] FILE1 [FILE2 [SKIP1 [SKIP2]]]\n"
     33           "\n"
     34           "DESCRIPTION\n"
     35           "  Compares FILE1 and FILE2. With FILE2 omitted or `-`, reads "
     36           "stdin.\n"
     37           "  SKIP1/SKIP2 skip that many leading bytes of each file before\n"
     38           "  comparing (decimal, or 0x-prefixed hex).\n"
     39           "\n"
     40           "OPTIONS\n"
     41           "  -s, --quiet, --silent   print nothing; status only\n"
     42           "  -l, --verbose           list each differing byte (octal)\n"
     43           "  -b, --print-bytes       show the differing byte values\n"
     44           "  -n N                    compare at most N bytes\n"
     45           "  -h, --help              show this help\n"
     46           "\n"
     47           "EXIT CODES\n"
     48           "  0   identical         1   differ              2   "
     49           "trouble/usage\n")));
     50 }
     51 
     52 /* Parse a decimal or 0x-hex non-negative integer. Returns 0 on success. */
     53 static int cmp_parse_u64(const char* s, uint64_t* out) {
     54   uint64_t v = 0;
     55   int base = 10;
     56   if (!s || !*s) return 1;
     57   if (s[0] == '0' && (s[1] == 'x' || s[1] == 'X')) {
     58     base = 16;
     59     s += 2;
     60     if (!*s) return 1;
     61   }
     62   for (; *s; ++s) {
     63     unsigned d;
     64     char c = *s;
     65     if (c >= '0' && c <= '9')
     66       d = (unsigned)(c - '0');
     67     else if (base == 16 && c >= 'a' && c <= 'f')
     68       d = (unsigned)(c - 'a' + 10);
     69     else if (base == 16 && c >= 'A' && c <= 'F')
     70       d = (unsigned)(c - 'A' + 10);
     71     else
     72       return 1;
     73     v = v * (uint64_t)base + d;
     74   }
     75   *out = v;
     76   return 0;
     77 }
     78 
     79 /* Load a named operand, or stdin when the name is "-". Returns 0 on success
     80  * with data and len set. A stdin read fills stdin_buf/stdin_len (freed with
     81  * driver_free); a file read fills ld (freed with driver_release_bytes). */
     82 static int cmp_load(DriverEnv* env, const char* name, const uint8_t** data,
     83                     size_t* len, DriverLoad* ld, uint8_t** stdin_buf,
     84                     size_t* stdin_len) {
     85   if (driver_streq(name, "-")) {
     86     if (!driver_read_stdin(env, stdin_buf, stdin_len)) {
     87       driver_errf(CMP_TOOL, "failed to read stdin");
     88       return 1;
     89     }
     90     *data = *stdin_buf;
     91     *len = *stdin_len;
     92     return 0;
     93   }
     94   {
     95     KitSlice in;
     96     if (driver_load_bytes(&env->file_io, CMP_TOOL, name, ld, &in) != 0)
     97       return 1;
     98     *data = in.data;
     99     *len = in.len;
    100     return 0;
    101   }
    102 }
    103 
    104 /* Render a byte the way `cat -v` / cmp -b does: M- for the high bit, ^X for
    105  * control codes, ^? for DEL, otherwise the literal character. */
    106 static void cmp_print_catv(uint8_t c) {
    107   if (c >= 128) {
    108     driver_printf("M-");
    109     c = (uint8_t)(c - 128);
    110   }
    111   if (c < 32)
    112     driver_printf("^%c", (char)(c + 64));
    113   else if (c == 127)
    114     driver_printf("^?");
    115   else
    116     driver_printf("%c", (char)c);
    117 }
    118 
    119 int driver_cmp(int argc, char** argv) {
    120   DriverEnv env;
    121   CmpOpts opts;
    122   const char* names[2] = {NULL, NULL};
    123   uint64_t skip[2] = {0, 0};
    124   int npos = 0; /* count of positional operands seen */
    125   int i, rc = 2;
    126 
    127   const uint8_t* d1 = NULL;
    128   const uint8_t* d2 = NULL;
    129   size_t l1 = 0, l2 = 0;
    130   DriverLoad ld1 = {0}, ld2 = {0};
    131   uint8_t* sb1 = NULL;
    132   uint8_t* sb2 = NULL;
    133   size_t sl1 = 0, sl2 = 0;
    134   int loaded1 = 0, loaded2 = 0;
    135 
    136   if (driver_argv_wants_help(argc, argv, 1)) {
    137     driver_help_cmp();
    138     return 0;
    139   }
    140 
    141   memset(&opts, 0, sizeof opts);
    142   driver_env_init(&env);
    143 
    144   for (i = 1; i < argc; ++i) {
    145     const char* a = argv[i];
    146     if (driver_streq(a, "-s") || driver_streq(a, "--quiet") ||
    147         driver_streq(a, "--silent")) {
    148       opts.silent = 1;
    149       continue;
    150     }
    151     if (driver_streq(a, "-l") || driver_streq(a, "--verbose")) {
    152       opts.list = 1;
    153       continue;
    154     }
    155     if (driver_streq(a, "-b") || driver_streq(a, "--print-bytes")) {
    156       opts.show_bytes = 1;
    157       continue;
    158     }
    159     if (driver_streq(a, "-n")) {
    160       if (i + 1 >= argc || cmp_parse_u64(argv[++i], &opts.max) != 0) {
    161         driver_errf(CMP_TOOL, "-n requires a non-negative count");
    162         goto done;
    163       }
    164       opts.have_max = 1;
    165       continue;
    166     }
    167     if (driver_streq(a, "-")) {
    168       /* stdin operand */
    169     } else if (a[0] == '-' && a[1] != '\0') {
    170       driver_errf(CMP_TOOL, "unknown option: %s", a);
    171       goto done;
    172     }
    173     /* positional: FILE1, FILE2, SKIP1, SKIP2 */
    174     if (npos < 2) {
    175       names[npos] = a;
    176     } else if (npos < 4) {
    177       if (cmp_parse_u64(a, &skip[npos - 2]) != 0) {
    178         driver_errf(CMP_TOOL, "invalid skip value: %s", a);
    179         goto done;
    180       }
    181     } else {
    182       driver_errf(CMP_TOOL, "too many operands: %s", a);
    183       goto done;
    184     }
    185     ++npos;
    186   }
    187 
    188   if (npos < 1) {
    189     driver_errf(CMP_TOOL, "missing operand (need FILE1)");
    190     goto done;
    191   }
    192   if (!names[1]) names[1] = "-"; /* FILE2 defaults to stdin */
    193   if (driver_streq(names[0], "-") && driver_streq(names[1], "-")) {
    194     driver_errf(CMP_TOOL, "only one operand may be stdin (`-`)");
    195     goto done;
    196   }
    197 
    198   if (cmp_load(&env, names[0], &d1, &l1, &ld1, &sb1, &sl1) != 0) {
    199     rc = 2;
    200     goto done;
    201   }
    202   loaded1 = 1;
    203   if (cmp_load(&env, names[1], &d2, &l2, &ld2, &sb2, &sl2) != 0) {
    204     rc = 2;
    205     goto done;
    206   }
    207   loaded2 = 1;
    208 
    209   /* Apply leading-byte skips. A skip past EOF yields an empty view. */
    210   d1 = d1 + (skip[0] < l1 ? skip[0] : l1);
    211   l1 = (skip[0] < l1) ? (l1 - skip[0]) : 0;
    212   d2 = d2 + (skip[1] < l2 ? skip[1] : l2);
    213   l2 = (skip[1] < l2) ? (l2 - skip[1]) : 0;
    214 
    215   {
    216     size_t cmp_len = l1 < l2 ? l1 : l2;
    217     size_t k;
    218     uint64_t line = 1; /* 1-based line of the current position */
    219     int differ = 0;
    220 
    221     if (opts.have_max && opts.max < (uint64_t)cmp_len)
    222       cmp_len = (size_t)opts.max;
    223 
    224     for (k = 0; k < cmp_len; ++k) {
    225       if (d1[k] != d2[k]) {
    226         differ = 1;
    227         if (opts.silent) break;
    228         if (opts.list) {
    229           /* -l: "<byte> <oct1> <oct2>", 1-based, octal byte values. */
    230           driver_printf("%6llu %3llo %3llo\n", (unsigned long long)(k + 1),
    231                         (unsigned long long)d1[k], (unsigned long long)d2[k]);
    232           continue;
    233         }
    234         /* Default: report the first difference and stop. POSIX/GNU/BSD all
    235          * phrase this as "char N" (the 1-based byte offset). */
    236         driver_printf("%s %s differ: char %llu, line %llu", names[0], names[1],
    237                       (unsigned long long)(k + 1), (unsigned long long)line);
    238         if (opts.show_bytes) {
    239           driver_printf(" is %3llo ", (unsigned long long)d1[k]);
    240           cmp_print_catv(d1[k]);
    241           driver_printf(" %3llo ", (unsigned long long)d2[k]);
    242           cmp_print_catv(d2[k]);
    243         }
    244         driver_printf("\n");
    245         break;
    246       }
    247       if (d1[k] == '\n') ++line;
    248     }
    249 
    250     if (opts.list && differ) {
    251       /* listed all diffs above */
    252     }
    253 
    254     if (!differ && l1 != l2 &&
    255         (!opts.have_max || opts.max > (uint64_t)cmp_len)) {
    256       /* Equal up to the shorter length: EOF on the shorter file. */
    257       const char* shorter = l1 < l2 ? names[0] : names[1];
    258       if (!opts.silent) {
    259         driver_errf(CMP_TOOL, "EOF on %s after byte %llu", shorter,
    260                     (unsigned long long)cmp_len);
    261       }
    262       differ = 1;
    263     }
    264 
    265     rc = differ ? 1 : 0;
    266   }
    267 
    268 done:
    269   if (loaded1) {
    270     if (sb1) driver_free(&env, sb1, sl1);
    271     driver_release_bytes(&env.file_io, &ld1);
    272   }
    273   if (loaded2) {
    274     if (sb2) driver_free(&env, sb2, sl2);
    275     driver_release_bytes(&env.file_io, &ld2);
    276   }
    277   driver_env_fini(&env);
    278   return rc;
    279 }