kit

kit
git clone https://git.ryansepassi.com/git/kit.git
Log | Files | Refs | README

strings.c (7816B)


      1 #include <kit/core.h>
      2 #include <stddef.h>
      3 #include <stdint.h>
      4 #include <stdio.h>
      5 #include <string.h>
      6 
      7 #include "driver.h"
      8 #include "env.h"
      9 
     10 /* `kit strings` — print sequences of printable characters found in a
     11  * file. Unlike most tools here it is format-agnostic: it scans the raw
     12  * bytes of any input (objects, archives, or arbitrary data), matching
     13  * GNU strings' default whole-file behaviour. All display logic lives
     14  * here; the driver env supplies the path/stdin byte loaders. */
     15 
     16 #define STRINGS_TOOL "strings"
     17 
     18 #define STRINGS_DEFAULT_MIN 4
     19 
     20 typedef enum StringsRadix {
     21   STRINGS_RAD_NONE, /* default: no offset prefix */
     22   STRINGS_RAD_OCT,  /* -t o */
     23   STRINGS_RAD_DEC,  /* -t d */
     24   STRINGS_RAD_HEX,  /* -t x */
     25 } StringsRadix;
     26 
     27 typedef struct StringsOpts {
     28   size_t min_len;     /* -n N / --bytes=N / -N ; default STRINGS_DEFAULT_MIN */
     29   StringsRadix radix; /* -t {o,d,x} */
     30   int print_filename; /* -f / --print-file-name */
     31 } StringsOpts;
     32 
     33 /* A byte is "printable" for the purposes of a run if it is a graphic
     34  * ASCII character (0x20..0x7e) or a horizontal tab, matching GNU
     35  * strings' default (non-locale) classification. */
     36 static int strings_is_printable(uint8_t c) {
     37   return (c >= 0x20 && c <= 0x7e) || c == '\t';
     38 }
     39 
     40 static void strings_emit(const char* path, const StringsOpts* opts,
     41                          size_t offset, const uint8_t* s, size_t len) {
     42   if (opts->print_filename && path) driver_printf("%s: ", path);
     43   switch (opts->radix) {
     44     case STRINGS_RAD_OCT:
     45       driver_printf("%7llo ", (unsigned long long)offset);
     46       break;
     47     case STRINGS_RAD_DEC:
     48       driver_printf("%7llu ", (unsigned long long)offset);
     49       break;
     50     case STRINGS_RAD_HEX:
     51       driver_printf("%7llx ", (unsigned long long)offset);
     52       break;
     53     default:
     54       break;
     55   }
     56   driver_printf("%.*s\n", (int)len, (const char*)s);
     57 }
     58 
     59 /* Scan `data` for printable runs of at least opts->min_len bytes. */
     60 static void strings_scan(const char* path, const StringsOpts* opts,
     61                          const uint8_t* data, size_t size) {
     62   size_t i = 0;
     63   size_t run_start = 0;
     64   size_t run_len = 0;
     65   for (i = 0; i < size; ++i) {
     66     if (strings_is_printable(data[i])) {
     67       if (run_len == 0) run_start = i;
     68       ++run_len;
     69       continue;
     70     }
     71     if (run_len >= opts->min_len) {
     72       strings_emit(path, opts, run_start, data + run_start, run_len);
     73     }
     74     run_len = 0;
     75   }
     76   if (run_len >= opts->min_len) {
     77     strings_emit(path, opts, run_start, data + run_start, run_len);
     78   }
     79 }
     80 
     81 void driver_help_strings(void) {
     82   driver_printf(
     83       "%.*s",
     84       KIT_SLICE_ARG(KIT_SLICE_LIT(
     85           "kit strings — print printable character sequences in a file\n"
     86           "\n"
     87           "USAGE\n"
     88           "  kit strings [OPTIONS] [FILE...]\n"
     89           "\n"
     90           "DESCRIPTION\n"
     91           "  Scans the raw bytes of each input and prints runs of at least\n"
     92           "  N printable characters (default 4). Works on any file, not\n"
     93           "  just object files. With no FILE, or with `-`, reads stdin.\n"
     94           "\n"
     95           "OPTIONS\n"
     96           "  -n N, --bytes=N    minimum run length (default 4)\n"
     97           "  -N                 shorthand for -n N (e.g. -8)\n"
     98           "  -t {o,d,x}         print the byte offset of each string in\n"
     99           "                     octal / decimal / hexadecimal\n"
    100           "  -f, --print-file-name\n"
    101           "                     print the input file name before each string\n"
    102           "  -h, --help         show this help\n"
    103           "\n"
    104           "EXIT CODES\n"
    105           "  0   success           1   I/O error           2   bad usage\n")));
    106 }
    107 
    108 /* Parse the argument of -n / --bytes=. Returns 0 and stores the value on
    109  * success, or non-zero on a malformed/zero value. */
    110 static int strings_parse_min(const char* s, size_t* out) {
    111   size_t v = 0;
    112   if (!s || !*s) return 1;
    113   for (; *s; ++s) {
    114     if (*s < '0' || *s > '9') return 1;
    115     v = v * 10u + (size_t)(*s - '0');
    116   }
    117   if (v == 0) return 1;
    118   *out = v;
    119   return 0;
    120 }
    121 
    122 int driver_strings(int argc, char** argv) {
    123   DriverEnv env;
    124   StringsOpts opts;
    125   int i, rc = 1, any_input = 0;
    126 
    127   if (driver_argv_wants_help(argc, argv, 1)) {
    128     driver_help_strings();
    129     return 0;
    130   }
    131 
    132   memset(&opts, 0, sizeof opts);
    133   opts.min_len = STRINGS_DEFAULT_MIN;
    134   opts.radix = STRINGS_RAD_NONE;
    135   driver_env_init(&env);
    136 
    137   /* First pass: options. */
    138   for (i = 1; i < argc; ++i) {
    139     const char* a = argv[i];
    140     if (driver_streq(a, "-n") || driver_streq(a, "--bytes")) {
    141       if (i + 1 >= argc || strings_parse_min(argv[++i], &opts.min_len) != 0) {
    142         driver_errf(STRINGS_TOOL, "option %s requires a positive integer", a);
    143         rc = 2;
    144         goto done;
    145       }
    146       continue;
    147     }
    148     if (strncmp(a, "--bytes=", 8) == 0) {
    149       if (strings_parse_min(a + 8, &opts.min_len) != 0) {
    150         driver_errf(STRINGS_TOOL, "invalid --bytes value: %s", a + 8);
    151         rc = 2;
    152         goto done;
    153       }
    154       continue;
    155     }
    156     if (driver_streq(a, "-t")) {
    157       const char* v;
    158       if (i + 1 >= argc) {
    159         driver_errf(STRINGS_TOOL, "option -t requires o, d, or x");
    160         rc = 2;
    161         goto done;
    162       }
    163       v = argv[++i];
    164       if (driver_streq(v, "o")) {
    165         opts.radix = STRINGS_RAD_OCT;
    166       } else if (driver_streq(v, "d")) {
    167         opts.radix = STRINGS_RAD_DEC;
    168       } else if (driver_streq(v, "x")) {
    169         opts.radix = STRINGS_RAD_HEX;
    170       } else {
    171         driver_errf(STRINGS_TOOL, "invalid -t radix: %s (want o, d, or x)", v);
    172         rc = 2;
    173         goto done;
    174       }
    175       continue;
    176     }
    177     if (driver_streq(a, "-f") || driver_streq(a, "--print-file-name")) {
    178       opts.print_filename = 1;
    179       continue;
    180     }
    181     /* `-<digits>` is GNU shorthand for `-n <digits>`. */
    182     if (a[0] == '-' && a[1] >= '0' && a[1] <= '9') {
    183       if (strings_parse_min(a + 1, &opts.min_len) != 0) {
    184         driver_errf(STRINGS_TOOL, "invalid length: %s", a);
    185         rc = 2;
    186         goto done;
    187       }
    188       continue;
    189     }
    190     if (driver_streq(a, "-")) {
    191       any_input = 1; /* stdin, handled in second pass */
    192       continue;
    193     }
    194     if (a[0] == '-' && a[1] != '\0') {
    195       driver_errf(STRINGS_TOOL, "unknown option: %s", a);
    196       rc = 2;
    197       goto done;
    198     }
    199     any_input = 1;
    200   }
    201 
    202   /* No file operands: scan stdin. */
    203   if (!any_input) {
    204     uint8_t* buf = NULL;
    205     size_t n = 0;
    206     if (!driver_read_stdin(&env, &buf, &n)) {
    207       driver_errf(STRINGS_TOOL, "failed to read stdin");
    208       rc = 1;
    209       goto done;
    210     }
    211     strings_scan(NULL, &opts, buf, n);
    212     driver_free(&env, buf, n);
    213     rc = 0;
    214     goto done;
    215   }
    216 
    217   /* Second pass: inputs, in argv order. */
    218   for (i = 1; i < argc; ++i) {
    219     const char* a = argv[i];
    220     /* Skip the options consumed above (and their values). */
    221     if (driver_streq(a, "-n") || driver_streq(a, "--bytes") ||
    222         driver_streq(a, "-t")) {
    223       ++i;
    224       continue;
    225     }
    226     if (strncmp(a, "--bytes=", 8) == 0 || driver_streq(a, "-f") ||
    227         driver_streq(a, "--print-file-name") ||
    228         (a[0] == '-' && a[1] >= '0' && a[1] <= '9')) {
    229       continue;
    230     }
    231     if (driver_streq(a, "-")) {
    232       uint8_t* buf = NULL;
    233       size_t n = 0;
    234       if (!driver_read_stdin(&env, &buf, &n)) {
    235         driver_errf(STRINGS_TOOL, "failed to read stdin");
    236         rc = 1;
    237         goto done;
    238       }
    239       strings_scan(opts.print_filename ? "{standard input}" : NULL, &opts, buf,
    240                    n);
    241       driver_free(&env, buf, n);
    242       continue;
    243     }
    244     {
    245       DriverLoad ld = {0};
    246       KitSlice input;
    247       if (driver_load_bytes(&env.file_io, STRINGS_TOOL, a, &ld, &input) != 0) {
    248         rc = 1;
    249         goto done;
    250       }
    251       strings_scan(a, &opts, input.data, input.len);
    252       driver_release_bytes(&env.file_io, &ld);
    253     }
    254   }
    255 
    256   rc = 0;
    257 done:
    258   driver_env_fini(&env);
    259   return rc;
    260 }