xxd.c (13289B)
1 #include <kit/core.h> 2 #include <stddef.h> 3 #include <stdint.h> 4 #include <string.h> 5 6 #include "driver.h" 7 #include "env.h" 8 9 /* `kit xxd` — hex dump any file, and reverse a dump back to binary. A GNU 10 * xxd subset: default hex+ASCII dump, plus -r (reverse), -p (plain hex), 11 * -i (C array), -c/-g (columns/grouping), -s/-l (seek/length), -u (uppercase). 12 * Unlike `objdump -s` it is format-agnostic: it dumps raw bytes of any input. 13 * With no FILE, or `-`, reads stdin; output goes to stdout or -o FILE. */ 14 15 #define XXD_TOOL "xxd" 16 17 #define XXD_COLS_DUMP 16u 18 #define XXD_COLS_PLAIN 30u 19 #define XXD_COLS_INCLUDE 12u 20 21 typedef enum XxdMode { 22 XXD_DUMP = 0, /* default hex+ASCII */ 23 XXD_PLAIN, /* -p: continuous hex */ 24 XXD_INCLUDE, /* -i: C array */ 25 } XxdMode; 26 27 typedef struct XxdOpts { 28 XxdMode mode; 29 int reverse; /* -r */ 30 int uppercase; /* -u */ 31 size_t cols; /* -c; 0 = mode default */ 32 size_t group; /* -g; 0 = default (2) */ 33 uint64_t seek; /* -s */ 34 uint64_t limit; /* -l; 0 = no limit */ 35 int have_limit; /* whether -l was given */ 36 const char* in; /* input path, or NULL = stdin */ 37 const char* out; /* output path, or NULL = stdout */ 38 } XxdOpts; 39 40 /* Streaming output buffer over a KitWriter; flushes when full so cols can be 41 * arbitrarily large. */ 42 typedef struct Xb { 43 char buf[8192]; 44 size_t n; 45 KitWriter* w; 46 int err; 47 } Xb; 48 49 static void xb_flush(Xb* b) { 50 if (b->n) { 51 if (kit_writer_write(b->w, b->buf, b->n) != KIT_OK) b->err = 1; 52 b->n = 0; 53 } 54 } 55 static void xb_c(Xb* b, char c) { 56 if (b->n == sizeof b->buf) xb_flush(b); 57 b->buf[b->n++] = c; 58 } 59 static void xb_s(Xb* b, const char* s) { 60 while (*s) xb_c(b, *s++); 61 } 62 static void xb_hex2(Xb* b, uint8_t v, const char* hx) { 63 xb_c(b, hx[v >> 4]); 64 xb_c(b, hx[v & 0x0f]); 65 } 66 static void xb_hexnum(Xb* b, uint64_t v, int width, const char* hx) { 67 char t[16]; 68 int i; 69 if (width > 16) width = 16; 70 for (i = width - 1; i >= 0; --i) { 71 t[i] = hx[v & 0x0f]; 72 v >>= 4; 73 } 74 for (i = 0; i < width; ++i) xb_c(b, t[i]); 75 } 76 static void xb_dec(Xb* b, uint64_t v) { 77 char t[24]; 78 int i = 0; 79 if (v == 0) { 80 xb_c(b, '0'); 81 return; 82 } 83 while (v) { 84 t[i++] = (char)('0' + (int)(v % 10)); 85 v /= 10; 86 } 87 while (i) xb_c(b, t[--i]); 88 } 89 90 void driver_help_xxd(void) { 91 driver_printf( 92 "%.*s", 93 KIT_SLICE_ARG(KIT_SLICE_LIT( 94 "kit xxd — hex dump a file (and reverse a dump back to binary)\n" 95 "\n" 96 "USAGE\n" 97 " kit xxd [OPTIONS] [INFILE [OUTFILE]]\n" 98 "\n" 99 "DESCRIPTION\n" 100 " Dumps the raw bytes of INFILE (or stdin) as a hex+ASCII table.\n" 101 " Works on any file, not just objects. `xxd f | xxd -r` " 102 "round-trips\n" 103 " contiguous data back to the original bytes.\n" 104 "\n" 105 "OPTIONS\n" 106 " -r reverse: read a hex dump, write binary\n" 107 " -p plain hex dump (continuous, no offsets/ASCII)\n" 108 " -i output a C `unsigned char[]` array\n" 109 " -c N N bytes per line (default 16; -p 30; -i 12)\n" 110 " -g N group hex into N-byte columns (default 2)\n" 111 " -s OFF start at byte OFF (decimal or 0x-hex)\n" 112 " -l LEN dump at most LEN bytes\n" 113 " -u uppercase hex digits\n" 114 " -o FILE write output to FILE instead of stdout\n" 115 " -h, --help show this help\n" 116 "\n" 117 "NOTE\n" 118 " -r reconstructs contiguous data; leading offsets are read for\n" 119 " context but not used to seek/pad sparse output.\n" 120 "\n" 121 "EXIT CODES\n" 122 " 0 success 1 I/O error 2 bad usage\n"))); 123 } 124 125 static int xxd_is_hex(int c) { 126 return (c >= '0' && c <= '9') || (c >= 'a' && c <= 'f') || 127 (c >= 'A' && c <= 'F'); 128 } 129 static int xxd_hexval(int c) { 130 if (c >= '0' && c <= '9') return c - '0'; 131 if (c >= 'a' && c <= 'f') return c - 'a' + 10; 132 return c - 'A' + 10; 133 } 134 135 /* Parse a decimal or 0x-hex non-negative integer. Returns 0 on success. */ 136 static int xxd_parse_u64(const char* s, uint64_t* out) { 137 uint64_t v = 0; 138 int base = 10; 139 if (!s || !*s) return 1; 140 if (s[0] == '0' && (s[1] == 'x' || s[1] == 'X')) { 141 base = 16; 142 s += 2; 143 if (!*s) return 1; 144 } 145 for (; *s; ++s) { 146 unsigned d; 147 char c = *s; 148 if (c >= '0' && c <= '9') 149 d = (unsigned)(c - '0'); 150 else if (base == 16 && (c >= 'a' && c <= 'f')) 151 d = (unsigned)(c - 'a' + 10); 152 else if (base == 16 && (c >= 'A' && c <= 'F')) 153 d = (unsigned)(c - 'A' + 10); 154 else 155 return 1; 156 v = v * (uint64_t)base + d; 157 } 158 *out = v; 159 return 0; 160 } 161 162 /* Derive a C identifier from a file path: basename, non-alnum -> '_', a leading 163 * digit gets an '_' prefix. Writes into out (cap bytes). */ 164 static void xxd_ident(const char* path, char* out, size_t cap) { 165 const char* base = driver_basename(path); 166 size_t i = 0; 167 if (cap == 0) return; 168 if (base[0] >= '0' && base[0] <= '9' && i + 1 < cap) out[i++] = '_'; 169 for (; *base && i + 1 < cap; ++base) { 170 char c = *base; 171 int ok = (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || 172 (c >= '0' && c <= '9'); 173 out[i++] = ok ? c : '_'; 174 } 175 out[i] = '\0'; 176 } 177 178 static void xxd_emit_dump(Xb* b, const uint8_t* data, size_t len, uint64_t base, 179 size_t cols, size_t group, const char* hx) { 180 size_t off; 181 for (off = 0; off < len; off += cols) { 182 size_t linelen = (len - off < cols) ? (len - off) : cols; 183 size_t j; 184 xb_hexnum(b, base + off, 8, hx); 185 xb_s(b, ": "); 186 for (j = 0; j < cols; ++j) { 187 if (j < linelen) 188 xb_hex2(b, data[off + j], hx); 189 else { 190 xb_c(b, ' '); 191 xb_c(b, ' '); 192 } 193 if ((j + 1) % group == 0) xb_c(b, ' '); 194 } 195 if (cols % group != 0) xb_c(b, ' '); 196 xb_c(b, ' '); 197 for (j = 0; j < linelen; ++j) { 198 uint8_t c = data[off + j]; 199 xb_c(b, (c >= 0x20 && c <= 0x7e) ? (char)c : '.'); 200 } 201 xb_c(b, '\n'); 202 } 203 } 204 205 static void xxd_emit_plain(Xb* b, const uint8_t* data, size_t len, size_t cols, 206 const char* hx) { 207 size_t i; 208 for (i = 0; i < len; ++i) { 209 xb_hex2(b, data[i], hx); 210 if ((i + 1) % cols == 0) xb_c(b, '\n'); 211 } 212 if (len == 0 || len % cols != 0) xb_c(b, '\n'); 213 } 214 215 static void xxd_emit_include(Xb* b, const uint8_t* data, size_t len, 216 const char* ident, size_t cols, const char* hx) { 217 size_t i; 218 xb_s(b, "unsigned char "); 219 if (ident && ident[0]) xb_s(b, ident); 220 xb_s(b, "[] = {\n"); 221 for (i = 0; i < len; ++i) { 222 if (i % cols == 0) xb_s(b, " "); 223 xb_s(b, "0x"); 224 xb_hex2(b, data[i], hx); 225 if (i + 1 < len) xb_c(b, ','); 226 if ((i + 1) % cols == 0 || i + 1 == len) 227 xb_c(b, '\n'); 228 else 229 xb_c(b, ' '); 230 } 231 xb_s(b, "};\n"); 232 xb_s(b, "unsigned int "); 233 if (ident && ident[0]) xb_s(b, ident); 234 xb_s(b, "_len = "); 235 xb_dec(b, len); 236 xb_s(b, ";\n"); 237 } 238 239 /* Reverse a hex dump back to raw bytes. plain=1 treats the whole input as 240 * continuous hex; otherwise each line is `[offset:] hex... ascii` and parsing 241 * stops at the double-space gutter. */ 242 static void xxd_reverse(Xb* b, const uint8_t* data, size_t len, int plain) { 243 size_t i = 0; 244 while (i < len) { 245 size_t eol = i; 246 size_t s, p; 247 int hi = -1; 248 while (eol < len && data[eol] != '\n') ++eol; 249 s = i; 250 if (!plain) { 251 size_t c; 252 for (c = i; c < eol; ++c) { 253 if (data[c] == ':') { 254 s = c + 1; 255 break; 256 } 257 } 258 } 259 for (p = s; p < eol; ++p) { 260 unsigned char ch = data[p]; 261 if (ch == ' ' || ch == '\t') { 262 if (!plain && p + 1 < eol && 263 (data[p + 1] == ' ' || data[p + 1] == '\t')) 264 break; /* gutter before the ASCII column */ 265 continue; 266 } 267 if (xxd_is_hex(ch)) { 268 if (hi < 0) { 269 hi = xxd_hexval(ch); 270 } else { 271 xb_c(b, (char)((hi << 4) | xxd_hexval(ch))); 272 hi = -1; 273 } 274 continue; 275 } 276 break; /* non-hex, non-space: rest of line is ASCII/junk */ 277 } 278 i = (eol < len) ? eol + 1 : eol; 279 } 280 } 281 282 /* Extract the value for a short option that may be attached (-c16) or separate 283 * (-c 16). On success advances *i past a consumed separate arg and returns the 284 * value string; returns NULL on a missing argument. */ 285 static const char* xxd_optval(const char* a, int argc, char** argv, int* i) { 286 if (a[2] != '\0') return a + 2; 287 if (*i + 1 >= argc) return NULL; 288 return argv[++(*i)]; 289 } 290 291 int driver_xxd(int argc, char** argv) { 292 DriverEnv env; 293 KitContext ctx; 294 XxdOpts o; 295 Xb b; 296 KitWriter* w = NULL; 297 const uint8_t* data = NULL; 298 size_t len = 0; 299 DriverLoad ld = {0}; 300 uint8_t* sbuf = NULL; 301 size_t sbuf_len = 0; 302 int loaded = 0, npos = 0, rc = 2, owned_writer = 0; 303 int i; 304 size_t cols, group; 305 const char* hx; 306 char ident[256]; 307 308 if (driver_argv_wants_help(argc, argv, 1)) { 309 driver_help_xxd(); 310 return 0; 311 } 312 313 memset(&o, 0, sizeof o); 314 driver_env_init(&env); 315 ctx = driver_env_to_context(&env); 316 317 for (i = 1; i < argc; ++i) { 318 const char* a = argv[i]; 319 if (driver_streq(a, "-r")) { 320 o.reverse = 1; 321 continue; 322 } 323 if (driver_streq(a, "-p")) { 324 o.mode = XXD_PLAIN; 325 continue; 326 } 327 if (driver_streq(a, "-i")) { 328 o.mode = XXD_INCLUDE; 329 continue; 330 } 331 if (driver_streq(a, "-u")) { 332 o.uppercase = 1; 333 continue; 334 } 335 if (a[0] == '-' && a[1] == 'c') { 336 const char* v = xxd_optval(a, argc, argv, &i); 337 uint64_t n; 338 if (!v || xxd_parse_u64(v, &n) != 0 || n == 0) { 339 driver_errf(XXD_TOOL, "-c requires a positive integer"); 340 goto done; 341 } 342 o.cols = (size_t)n; 343 continue; 344 } 345 if (a[0] == '-' && a[1] == 'g') { 346 const char* v = xxd_optval(a, argc, argv, &i); 347 uint64_t n; 348 if (!v || xxd_parse_u64(v, &n) != 0 || n == 0) { 349 driver_errf(XXD_TOOL, "-g requires a positive integer"); 350 goto done; 351 } 352 o.group = (size_t)n; 353 continue; 354 } 355 if (a[0] == '-' && a[1] == 's') { 356 const char* v = xxd_optval(a, argc, argv, &i); 357 if (!v || xxd_parse_u64(v, &o.seek) != 0) { 358 driver_errf(XXD_TOOL, "-s requires a byte offset"); 359 goto done; 360 } 361 continue; 362 } 363 if (a[0] == '-' && a[1] == 'l') { 364 const char* v = xxd_optval(a, argc, argv, &i); 365 if (!v || xxd_parse_u64(v, &o.limit) != 0) { 366 driver_errf(XXD_TOOL, "-l requires a length"); 367 goto done; 368 } 369 o.have_limit = 1; 370 continue; 371 } 372 if (driver_streq(a, "-o")) { 373 if (i + 1 >= argc) { 374 driver_errf(XXD_TOOL, "-o requires a path"); 375 goto done; 376 } 377 o.out = argv[++i]; 378 continue; 379 } 380 if (driver_streq(a, "-")) { 381 if (npos == 0) o.in = NULL; /* stdin */ 382 ++npos; 383 continue; 384 } 385 if (a[0] == '-' && a[1] != '\0') { 386 driver_errf(XXD_TOOL, "unknown option: %s", a); 387 goto done; 388 } 389 if (npos == 0) 390 o.in = a; 391 else if (npos == 1 && !o.out) 392 o.out = a; 393 else { 394 driver_errf(XXD_TOOL, "too many operands: %s", a); 395 goto done; 396 } 397 ++npos; 398 } 399 400 /* Resolve mode-dependent defaults. */ 401 cols = o.cols ? o.cols 402 : (o.mode == XXD_PLAIN ? XXD_COLS_PLAIN 403 : o.mode == XXD_INCLUDE ? XXD_COLS_INCLUDE 404 : XXD_COLS_DUMP); 405 group = o.group ? o.group : 2u; 406 hx = o.uppercase ? "0123456789ABCDEF" : "0123456789abcdef"; 407 408 /* Load input. */ 409 if (o.in) { 410 KitSlice in; 411 if (driver_load_bytes(&env.file_io, XXD_TOOL, o.in, &ld, &in) != 0) { 412 rc = 1; 413 goto done; 414 } 415 loaded = 1; 416 data = in.data; 417 len = in.len; 418 } else { 419 if (!driver_read_stdin(&env, &sbuf, &sbuf_len)) { 420 driver_errf(XXD_TOOL, "failed to read stdin"); 421 rc = 1; 422 goto done; 423 } 424 data = sbuf; 425 len = sbuf_len; 426 } 427 428 /* Apply seek/length (forward modes only; reverse consumes the whole text). */ 429 if (!o.reverse) { 430 if (o.seek < len) { 431 data += o.seek; 432 len -= (size_t)o.seek; 433 } else { 434 data += len; 435 len = 0; 436 } 437 if (o.have_limit && o.limit < (uint64_t)len) len = (size_t)o.limit; 438 } 439 440 /* Open output. */ 441 if (o.out) { 442 if (ctx.file_io->open_writer(ctx.file_io->user, o.out, &w) != KIT_OK) { 443 driver_errf(XXD_TOOL, "failed to open output: %s", o.out); 444 rc = 1; 445 goto done; 446 } 447 owned_writer = 1; 448 } else { 449 w = driver_stdout_writer(&env); 450 owned_writer = 1; 451 } 452 453 memset(&b, 0, sizeof b); 454 b.w = w; 455 456 if (o.reverse) { 457 xxd_reverse(&b, data, len, o.mode == XXD_PLAIN); 458 } else if (o.mode == XXD_PLAIN) { 459 xxd_emit_plain(&b, data, len, cols, hx); 460 } else if (o.mode == XXD_INCLUDE) { 461 ident[0] = '\0'; 462 if (o.in) xxd_ident(o.in, ident, sizeof ident); 463 xxd_emit_include(&b, data, len, ident, cols, hx); 464 } else { 465 xxd_emit_dump(&b, data, len, o.seek, cols, group, hx); 466 } 467 xb_flush(&b); 468 rc = b.err ? 1 : 0; 469 470 done: 471 if (owned_writer && w) kit_writer_close(w); 472 if (sbuf) driver_free(&env, sbuf, sbuf_len); 473 if (loaded) driver_release_bytes(&env.file_io, &ld); 474 driver_env_fini(&env); 475 return rc; 476 }