kit

kit
git clone https://git.ryansepassi.com/git/kit.git
Log | Files | Refs | README

xxd.c (13289B)


      1 #include <kit/core.h>
      2 #include <stddef.h>
      3 #include <stdint.h>
      4 #include <string.h>
      5 
      6 #include "driver.h"
      7 #include "env.h"
      8 
      9 /* `kit xxd` — hex dump any file, and reverse a dump back to binary. A GNU
     10  * xxd subset: default hex+ASCII dump, plus -r (reverse), -p (plain hex),
     11  * -i (C array), -c/-g (columns/grouping), -s/-l (seek/length), -u (uppercase).
     12  * Unlike `objdump -s` it is format-agnostic: it dumps raw bytes of any input.
     13  * With no FILE, or `-`, reads stdin; output goes to stdout or -o FILE. */
     14 
     15 #define XXD_TOOL "xxd"
     16 
     17 #define XXD_COLS_DUMP 16u
     18 #define XXD_COLS_PLAIN 30u
     19 #define XXD_COLS_INCLUDE 12u
     20 
     21 typedef enum XxdMode {
     22   XXD_DUMP = 0, /* default hex+ASCII */
     23   XXD_PLAIN,    /* -p: continuous hex */
     24   XXD_INCLUDE,  /* -i: C array */
     25 } XxdMode;
     26 
     27 typedef struct XxdOpts {
     28   XxdMode mode;
     29   int reverse;     /* -r */
     30   int uppercase;   /* -u */
     31   size_t cols;     /* -c; 0 = mode default */
     32   size_t group;    /* -g; 0 = default (2) */
     33   uint64_t seek;   /* -s */
     34   uint64_t limit;  /* -l; 0 = no limit */
     35   int have_limit;  /* whether -l was given */
     36   const char* in;  /* input path, or NULL = stdin */
     37   const char* out; /* output path, or NULL = stdout */
     38 } XxdOpts;
     39 
     40 /* Streaming output buffer over a KitWriter; flushes when full so cols can be
     41  * arbitrarily large. */
     42 typedef struct Xb {
     43   char buf[8192];
     44   size_t n;
     45   KitWriter* w;
     46   int err;
     47 } Xb;
     48 
     49 static void xb_flush(Xb* b) {
     50   if (b->n) {
     51     if (kit_writer_write(b->w, b->buf, b->n) != KIT_OK) b->err = 1;
     52     b->n = 0;
     53   }
     54 }
     55 static void xb_c(Xb* b, char c) {
     56   if (b->n == sizeof b->buf) xb_flush(b);
     57   b->buf[b->n++] = c;
     58 }
     59 static void xb_s(Xb* b, const char* s) {
     60   while (*s) xb_c(b, *s++);
     61 }
     62 static void xb_hex2(Xb* b, uint8_t v, const char* hx) {
     63   xb_c(b, hx[v >> 4]);
     64   xb_c(b, hx[v & 0x0f]);
     65 }
     66 static void xb_hexnum(Xb* b, uint64_t v, int width, const char* hx) {
     67   char t[16];
     68   int i;
     69   if (width > 16) width = 16;
     70   for (i = width - 1; i >= 0; --i) {
     71     t[i] = hx[v & 0x0f];
     72     v >>= 4;
     73   }
     74   for (i = 0; i < width; ++i) xb_c(b, t[i]);
     75 }
     76 static void xb_dec(Xb* b, uint64_t v) {
     77   char t[24];
     78   int i = 0;
     79   if (v == 0) {
     80     xb_c(b, '0');
     81     return;
     82   }
     83   while (v) {
     84     t[i++] = (char)('0' + (int)(v % 10));
     85     v /= 10;
     86   }
     87   while (i) xb_c(b, t[--i]);
     88 }
     89 
     90 void driver_help_xxd(void) {
     91   driver_printf(
     92       "%.*s",
     93       KIT_SLICE_ARG(KIT_SLICE_LIT(
     94           "kit xxd — hex dump a file (and reverse a dump back to binary)\n"
     95           "\n"
     96           "USAGE\n"
     97           "  kit xxd [OPTIONS] [INFILE [OUTFILE]]\n"
     98           "\n"
     99           "DESCRIPTION\n"
    100           "  Dumps the raw bytes of INFILE (or stdin) as a hex+ASCII table.\n"
    101           "  Works on any file, not just objects. `xxd f | xxd -r` "
    102           "round-trips\n"
    103           "  contiguous data back to the original bytes.\n"
    104           "\n"
    105           "OPTIONS\n"
    106           "  -r            reverse: read a hex dump, write binary\n"
    107           "  -p            plain hex dump (continuous, no offsets/ASCII)\n"
    108           "  -i            output a C `unsigned char[]` array\n"
    109           "  -c N          N bytes per line (default 16; -p 30; -i 12)\n"
    110           "  -g N          group hex into N-byte columns (default 2)\n"
    111           "  -s OFF        start at byte OFF (decimal or 0x-hex)\n"
    112           "  -l LEN        dump at most LEN bytes\n"
    113           "  -u            uppercase hex digits\n"
    114           "  -o FILE       write output to FILE instead of stdout\n"
    115           "  -h, --help    show this help\n"
    116           "\n"
    117           "NOTE\n"
    118           "  -r reconstructs contiguous data; leading offsets are read for\n"
    119           "  context but not used to seek/pad sparse output.\n"
    120           "\n"
    121           "EXIT CODES\n"
    122           "  0   success           1   I/O error           2   bad usage\n")));
    123 }
    124 
    125 static int xxd_is_hex(int c) {
    126   return (c >= '0' && c <= '9') || (c >= 'a' && c <= 'f') ||
    127          (c >= 'A' && c <= 'F');
    128 }
    129 static int xxd_hexval(int c) {
    130   if (c >= '0' && c <= '9') return c - '0';
    131   if (c >= 'a' && c <= 'f') return c - 'a' + 10;
    132   return c - 'A' + 10;
    133 }
    134 
    135 /* Parse a decimal or 0x-hex non-negative integer. Returns 0 on success. */
    136 static int xxd_parse_u64(const char* s, uint64_t* out) {
    137   uint64_t v = 0;
    138   int base = 10;
    139   if (!s || !*s) return 1;
    140   if (s[0] == '0' && (s[1] == 'x' || s[1] == 'X')) {
    141     base = 16;
    142     s += 2;
    143     if (!*s) return 1;
    144   }
    145   for (; *s; ++s) {
    146     unsigned d;
    147     char c = *s;
    148     if (c >= '0' && c <= '9')
    149       d = (unsigned)(c - '0');
    150     else if (base == 16 && (c >= 'a' && c <= 'f'))
    151       d = (unsigned)(c - 'a' + 10);
    152     else if (base == 16 && (c >= 'A' && c <= 'F'))
    153       d = (unsigned)(c - 'A' + 10);
    154     else
    155       return 1;
    156     v = v * (uint64_t)base + d;
    157   }
    158   *out = v;
    159   return 0;
    160 }
    161 
    162 /* Derive a C identifier from a file path: basename, non-alnum -> '_', a leading
    163  * digit gets an '_' prefix. Writes into out (cap bytes). */
    164 static void xxd_ident(const char* path, char* out, size_t cap) {
    165   const char* base = driver_basename(path);
    166   size_t i = 0;
    167   if (cap == 0) return;
    168   if (base[0] >= '0' && base[0] <= '9' && i + 1 < cap) out[i++] = '_';
    169   for (; *base && i + 1 < cap; ++base) {
    170     char c = *base;
    171     int ok = (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') ||
    172              (c >= '0' && c <= '9');
    173     out[i++] = ok ? c : '_';
    174   }
    175   out[i] = '\0';
    176 }
    177 
    178 static void xxd_emit_dump(Xb* b, const uint8_t* data, size_t len, uint64_t base,
    179                           size_t cols, size_t group, const char* hx) {
    180   size_t off;
    181   for (off = 0; off < len; off += cols) {
    182     size_t linelen = (len - off < cols) ? (len - off) : cols;
    183     size_t j;
    184     xb_hexnum(b, base + off, 8, hx);
    185     xb_s(b, ": ");
    186     for (j = 0; j < cols; ++j) {
    187       if (j < linelen)
    188         xb_hex2(b, data[off + j], hx);
    189       else {
    190         xb_c(b, ' ');
    191         xb_c(b, ' ');
    192       }
    193       if ((j + 1) % group == 0) xb_c(b, ' ');
    194     }
    195     if (cols % group != 0) xb_c(b, ' ');
    196     xb_c(b, ' ');
    197     for (j = 0; j < linelen; ++j) {
    198       uint8_t c = data[off + j];
    199       xb_c(b, (c >= 0x20 && c <= 0x7e) ? (char)c : '.');
    200     }
    201     xb_c(b, '\n');
    202   }
    203 }
    204 
    205 static void xxd_emit_plain(Xb* b, const uint8_t* data, size_t len, size_t cols,
    206                            const char* hx) {
    207   size_t i;
    208   for (i = 0; i < len; ++i) {
    209     xb_hex2(b, data[i], hx);
    210     if ((i + 1) % cols == 0) xb_c(b, '\n');
    211   }
    212   if (len == 0 || len % cols != 0) xb_c(b, '\n');
    213 }
    214 
    215 static void xxd_emit_include(Xb* b, const uint8_t* data, size_t len,
    216                              const char* ident, size_t cols, const char* hx) {
    217   size_t i;
    218   xb_s(b, "unsigned char ");
    219   if (ident && ident[0]) xb_s(b, ident);
    220   xb_s(b, "[] = {\n");
    221   for (i = 0; i < len; ++i) {
    222     if (i % cols == 0) xb_s(b, "  ");
    223     xb_s(b, "0x");
    224     xb_hex2(b, data[i], hx);
    225     if (i + 1 < len) xb_c(b, ',');
    226     if ((i + 1) % cols == 0 || i + 1 == len)
    227       xb_c(b, '\n');
    228     else
    229       xb_c(b, ' ');
    230   }
    231   xb_s(b, "};\n");
    232   xb_s(b, "unsigned int ");
    233   if (ident && ident[0]) xb_s(b, ident);
    234   xb_s(b, "_len = ");
    235   xb_dec(b, len);
    236   xb_s(b, ";\n");
    237 }
    238 
    239 /* Reverse a hex dump back to raw bytes. plain=1 treats the whole input as
    240  * continuous hex; otherwise each line is `[offset:] hex...  ascii` and parsing
    241  * stops at the double-space gutter. */
    242 static void xxd_reverse(Xb* b, const uint8_t* data, size_t len, int plain) {
    243   size_t i = 0;
    244   while (i < len) {
    245     size_t eol = i;
    246     size_t s, p;
    247     int hi = -1;
    248     while (eol < len && data[eol] != '\n') ++eol;
    249     s = i;
    250     if (!plain) {
    251       size_t c;
    252       for (c = i; c < eol; ++c) {
    253         if (data[c] == ':') {
    254           s = c + 1;
    255           break;
    256         }
    257       }
    258     }
    259     for (p = s; p < eol; ++p) {
    260       unsigned char ch = data[p];
    261       if (ch == ' ' || ch == '\t') {
    262         if (!plain && p + 1 < eol &&
    263             (data[p + 1] == ' ' || data[p + 1] == '\t'))
    264           break; /* gutter before the ASCII column */
    265         continue;
    266       }
    267       if (xxd_is_hex(ch)) {
    268         if (hi < 0) {
    269           hi = xxd_hexval(ch);
    270         } else {
    271           xb_c(b, (char)((hi << 4) | xxd_hexval(ch)));
    272           hi = -1;
    273         }
    274         continue;
    275       }
    276       break; /* non-hex, non-space: rest of line is ASCII/junk */
    277     }
    278     i = (eol < len) ? eol + 1 : eol;
    279   }
    280 }
    281 
    282 /* Extract the value for a short option that may be attached (-c16) or separate
    283  * (-c 16). On success advances *i past a consumed separate arg and returns the
    284  * value string; returns NULL on a missing argument. */
    285 static const char* xxd_optval(const char* a, int argc, char** argv, int* i) {
    286   if (a[2] != '\0') return a + 2;
    287   if (*i + 1 >= argc) return NULL;
    288   return argv[++(*i)];
    289 }
    290 
    291 int driver_xxd(int argc, char** argv) {
    292   DriverEnv env;
    293   KitContext ctx;
    294   XxdOpts o;
    295   Xb b;
    296   KitWriter* w = NULL;
    297   const uint8_t* data = NULL;
    298   size_t len = 0;
    299   DriverLoad ld = {0};
    300   uint8_t* sbuf = NULL;
    301   size_t sbuf_len = 0;
    302   int loaded = 0, npos = 0, rc = 2, owned_writer = 0;
    303   int i;
    304   size_t cols, group;
    305   const char* hx;
    306   char ident[256];
    307 
    308   if (driver_argv_wants_help(argc, argv, 1)) {
    309     driver_help_xxd();
    310     return 0;
    311   }
    312 
    313   memset(&o, 0, sizeof o);
    314   driver_env_init(&env);
    315   ctx = driver_env_to_context(&env);
    316 
    317   for (i = 1; i < argc; ++i) {
    318     const char* a = argv[i];
    319     if (driver_streq(a, "-r")) {
    320       o.reverse = 1;
    321       continue;
    322     }
    323     if (driver_streq(a, "-p")) {
    324       o.mode = XXD_PLAIN;
    325       continue;
    326     }
    327     if (driver_streq(a, "-i")) {
    328       o.mode = XXD_INCLUDE;
    329       continue;
    330     }
    331     if (driver_streq(a, "-u")) {
    332       o.uppercase = 1;
    333       continue;
    334     }
    335     if (a[0] == '-' && a[1] == 'c') {
    336       const char* v = xxd_optval(a, argc, argv, &i);
    337       uint64_t n;
    338       if (!v || xxd_parse_u64(v, &n) != 0 || n == 0) {
    339         driver_errf(XXD_TOOL, "-c requires a positive integer");
    340         goto done;
    341       }
    342       o.cols = (size_t)n;
    343       continue;
    344     }
    345     if (a[0] == '-' && a[1] == 'g') {
    346       const char* v = xxd_optval(a, argc, argv, &i);
    347       uint64_t n;
    348       if (!v || xxd_parse_u64(v, &n) != 0 || n == 0) {
    349         driver_errf(XXD_TOOL, "-g requires a positive integer");
    350         goto done;
    351       }
    352       o.group = (size_t)n;
    353       continue;
    354     }
    355     if (a[0] == '-' && a[1] == 's') {
    356       const char* v = xxd_optval(a, argc, argv, &i);
    357       if (!v || xxd_parse_u64(v, &o.seek) != 0) {
    358         driver_errf(XXD_TOOL, "-s requires a byte offset");
    359         goto done;
    360       }
    361       continue;
    362     }
    363     if (a[0] == '-' && a[1] == 'l') {
    364       const char* v = xxd_optval(a, argc, argv, &i);
    365       if (!v || xxd_parse_u64(v, &o.limit) != 0) {
    366         driver_errf(XXD_TOOL, "-l requires a length");
    367         goto done;
    368       }
    369       o.have_limit = 1;
    370       continue;
    371     }
    372     if (driver_streq(a, "-o")) {
    373       if (i + 1 >= argc) {
    374         driver_errf(XXD_TOOL, "-o requires a path");
    375         goto done;
    376       }
    377       o.out = argv[++i];
    378       continue;
    379     }
    380     if (driver_streq(a, "-")) {
    381       if (npos == 0) o.in = NULL; /* stdin */
    382       ++npos;
    383       continue;
    384     }
    385     if (a[0] == '-' && a[1] != '\0') {
    386       driver_errf(XXD_TOOL, "unknown option: %s", a);
    387       goto done;
    388     }
    389     if (npos == 0)
    390       o.in = a;
    391     else if (npos == 1 && !o.out)
    392       o.out = a;
    393     else {
    394       driver_errf(XXD_TOOL, "too many operands: %s", a);
    395       goto done;
    396     }
    397     ++npos;
    398   }
    399 
    400   /* Resolve mode-dependent defaults. */
    401   cols = o.cols ? o.cols
    402                 : (o.mode == XXD_PLAIN     ? XXD_COLS_PLAIN
    403                    : o.mode == XXD_INCLUDE ? XXD_COLS_INCLUDE
    404                                            : XXD_COLS_DUMP);
    405   group = o.group ? o.group : 2u;
    406   hx = o.uppercase ? "0123456789ABCDEF" : "0123456789abcdef";
    407 
    408   /* Load input. */
    409   if (o.in) {
    410     KitSlice in;
    411     if (driver_load_bytes(&env.file_io, XXD_TOOL, o.in, &ld, &in) != 0) {
    412       rc = 1;
    413       goto done;
    414     }
    415     loaded = 1;
    416     data = in.data;
    417     len = in.len;
    418   } else {
    419     if (!driver_read_stdin(&env, &sbuf, &sbuf_len)) {
    420       driver_errf(XXD_TOOL, "failed to read stdin");
    421       rc = 1;
    422       goto done;
    423     }
    424     data = sbuf;
    425     len = sbuf_len;
    426   }
    427 
    428   /* Apply seek/length (forward modes only; reverse consumes the whole text). */
    429   if (!o.reverse) {
    430     if (o.seek < len) {
    431       data += o.seek;
    432       len -= (size_t)o.seek;
    433     } else {
    434       data += len;
    435       len = 0;
    436     }
    437     if (o.have_limit && o.limit < (uint64_t)len) len = (size_t)o.limit;
    438   }
    439 
    440   /* Open output. */
    441   if (o.out) {
    442     if (ctx.file_io->open_writer(ctx.file_io->user, o.out, &w) != KIT_OK) {
    443       driver_errf(XXD_TOOL, "failed to open output: %s", o.out);
    444       rc = 1;
    445       goto done;
    446     }
    447     owned_writer = 1;
    448   } else {
    449     w = driver_stdout_writer(&env);
    450     owned_writer = 1;
    451   }
    452 
    453   memset(&b, 0, sizeof b);
    454   b.w = w;
    455 
    456   if (o.reverse) {
    457     xxd_reverse(&b, data, len, o.mode == XXD_PLAIN);
    458   } else if (o.mode == XXD_PLAIN) {
    459     xxd_emit_plain(&b, data, len, cols, hx);
    460   } else if (o.mode == XXD_INCLUDE) {
    461     ident[0] = '\0';
    462     if (o.in) xxd_ident(o.in, ident, sizeof ident);
    463     xxd_emit_include(&b, data, len, ident, cols, hx);
    464   } else {
    465     xxd_emit_dump(&b, data, len, o.seek, cols, group, hx);
    466   }
    467   xb_flush(&b);
    468   rc = b.err ? 1 : 0;
    469 
    470 done:
    471   if (owned_writer && w) kit_writer_close(w);
    472   if (sbuf) driver_free(&env, sbuf, sbuf_len);
    473   if (loaded) driver_release_bytes(&env.file_io, &ld);
    474   driver_env_fini(&env);
    475   return rc;
    476 }