strings.c (7816B)
1 #include <kit/core.h> 2 #include <stddef.h> 3 #include <stdint.h> 4 #include <stdio.h> 5 #include <string.h> 6 7 #include "driver.h" 8 #include "env.h" 9 10 /* `kit strings` — print sequences of printable characters found in a 11 * file. Unlike most tools here it is format-agnostic: it scans the raw 12 * bytes of any input (objects, archives, or arbitrary data), matching 13 * GNU strings' default whole-file behaviour. All display logic lives 14 * here; the driver env supplies the path/stdin byte loaders. */ 15 16 #define STRINGS_TOOL "strings" 17 18 #define STRINGS_DEFAULT_MIN 4 19 20 typedef enum StringsRadix { 21 STRINGS_RAD_NONE, /* default: no offset prefix */ 22 STRINGS_RAD_OCT, /* -t o */ 23 STRINGS_RAD_DEC, /* -t d */ 24 STRINGS_RAD_HEX, /* -t x */ 25 } StringsRadix; 26 27 typedef struct StringsOpts { 28 size_t min_len; /* -n N / --bytes=N / -N ; default STRINGS_DEFAULT_MIN */ 29 StringsRadix radix; /* -t {o,d,x} */ 30 int print_filename; /* -f / --print-file-name */ 31 } StringsOpts; 32 33 /* A byte is "printable" for the purposes of a run if it is a graphic 34 * ASCII character (0x20..0x7e) or a horizontal tab, matching GNU 35 * strings' default (non-locale) classification. */ 36 static int strings_is_printable(uint8_t c) { 37 return (c >= 0x20 && c <= 0x7e) || c == '\t'; 38 } 39 40 static void strings_emit(const char* path, const StringsOpts* opts, 41 size_t offset, const uint8_t* s, size_t len) { 42 if (opts->print_filename && path) driver_printf("%s: ", path); 43 switch (opts->radix) { 44 case STRINGS_RAD_OCT: 45 driver_printf("%7llo ", (unsigned long long)offset); 46 break; 47 case STRINGS_RAD_DEC: 48 driver_printf("%7llu ", (unsigned long long)offset); 49 break; 50 case STRINGS_RAD_HEX: 51 driver_printf("%7llx ", (unsigned long long)offset); 52 break; 53 default: 54 break; 55 } 56 driver_printf("%.*s\n", (int)len, (const char*)s); 57 } 58 59 /* Scan `data` for printable runs of at least opts->min_len bytes. */ 60 static void strings_scan(const char* path, const StringsOpts* opts, 61 const uint8_t* data, size_t size) { 62 size_t i = 0; 63 size_t run_start = 0; 64 size_t run_len = 0; 65 for (i = 0; i < size; ++i) { 66 if (strings_is_printable(data[i])) { 67 if (run_len == 0) run_start = i; 68 ++run_len; 69 continue; 70 } 71 if (run_len >= opts->min_len) { 72 strings_emit(path, opts, run_start, data + run_start, run_len); 73 } 74 run_len = 0; 75 } 76 if (run_len >= opts->min_len) { 77 strings_emit(path, opts, run_start, data + run_start, run_len); 78 } 79 } 80 81 void driver_help_strings(void) { 82 driver_printf( 83 "%.*s", 84 KIT_SLICE_ARG(KIT_SLICE_LIT( 85 "kit strings — print printable character sequences in a file\n" 86 "\n" 87 "USAGE\n" 88 " kit strings [OPTIONS] [FILE...]\n" 89 "\n" 90 "DESCRIPTION\n" 91 " Scans the raw bytes of each input and prints runs of at least\n" 92 " N printable characters (default 4). Works on any file, not\n" 93 " just object files. With no FILE, or with `-`, reads stdin.\n" 94 "\n" 95 "OPTIONS\n" 96 " -n N, --bytes=N minimum run length (default 4)\n" 97 " -N shorthand for -n N (e.g. -8)\n" 98 " -t {o,d,x} print the byte offset of each string in\n" 99 " octal / decimal / hexadecimal\n" 100 " -f, --print-file-name\n" 101 " print the input file name before each string\n" 102 " -h, --help show this help\n" 103 "\n" 104 "EXIT CODES\n" 105 " 0 success 1 I/O error 2 bad usage\n"))); 106 } 107 108 /* Parse the argument of -n / --bytes=. Returns 0 and stores the value on 109 * success, or non-zero on a malformed/zero value. */ 110 static int strings_parse_min(const char* s, size_t* out) { 111 size_t v = 0; 112 if (!s || !*s) return 1; 113 for (; *s; ++s) { 114 if (*s < '0' || *s > '9') return 1; 115 v = v * 10u + (size_t)(*s - '0'); 116 } 117 if (v == 0) return 1; 118 *out = v; 119 return 0; 120 } 121 122 int driver_strings(int argc, char** argv) { 123 DriverEnv env; 124 StringsOpts opts; 125 int i, rc = 1, any_input = 0; 126 127 if (driver_argv_wants_help(argc, argv, 1)) { 128 driver_help_strings(); 129 return 0; 130 } 131 132 memset(&opts, 0, sizeof opts); 133 opts.min_len = STRINGS_DEFAULT_MIN; 134 opts.radix = STRINGS_RAD_NONE; 135 driver_env_init(&env); 136 137 /* First pass: options. */ 138 for (i = 1; i < argc; ++i) { 139 const char* a = argv[i]; 140 if (driver_streq(a, "-n") || driver_streq(a, "--bytes")) { 141 if (i + 1 >= argc || strings_parse_min(argv[++i], &opts.min_len) != 0) { 142 driver_errf(STRINGS_TOOL, "option %s requires a positive integer", a); 143 rc = 2; 144 goto done; 145 } 146 continue; 147 } 148 if (strncmp(a, "--bytes=", 8) == 0) { 149 if (strings_parse_min(a + 8, &opts.min_len) != 0) { 150 driver_errf(STRINGS_TOOL, "invalid --bytes value: %s", a + 8); 151 rc = 2; 152 goto done; 153 } 154 continue; 155 } 156 if (driver_streq(a, "-t")) { 157 const char* v; 158 if (i + 1 >= argc) { 159 driver_errf(STRINGS_TOOL, "option -t requires o, d, or x"); 160 rc = 2; 161 goto done; 162 } 163 v = argv[++i]; 164 if (driver_streq(v, "o")) { 165 opts.radix = STRINGS_RAD_OCT; 166 } else if (driver_streq(v, "d")) { 167 opts.radix = STRINGS_RAD_DEC; 168 } else if (driver_streq(v, "x")) { 169 opts.radix = STRINGS_RAD_HEX; 170 } else { 171 driver_errf(STRINGS_TOOL, "invalid -t radix: %s (want o, d, or x)", v); 172 rc = 2; 173 goto done; 174 } 175 continue; 176 } 177 if (driver_streq(a, "-f") || driver_streq(a, "--print-file-name")) { 178 opts.print_filename = 1; 179 continue; 180 } 181 /* `-<digits>` is GNU shorthand for `-n <digits>`. */ 182 if (a[0] == '-' && a[1] >= '0' && a[1] <= '9') { 183 if (strings_parse_min(a + 1, &opts.min_len) != 0) { 184 driver_errf(STRINGS_TOOL, "invalid length: %s", a); 185 rc = 2; 186 goto done; 187 } 188 continue; 189 } 190 if (driver_streq(a, "-")) { 191 any_input = 1; /* stdin, handled in second pass */ 192 continue; 193 } 194 if (a[0] == '-' && a[1] != '\0') { 195 driver_errf(STRINGS_TOOL, "unknown option: %s", a); 196 rc = 2; 197 goto done; 198 } 199 any_input = 1; 200 } 201 202 /* No file operands: scan stdin. */ 203 if (!any_input) { 204 uint8_t* buf = NULL; 205 size_t n = 0; 206 if (!driver_read_stdin(&env, &buf, &n)) { 207 driver_errf(STRINGS_TOOL, "failed to read stdin"); 208 rc = 1; 209 goto done; 210 } 211 strings_scan(NULL, &opts, buf, n); 212 driver_free(&env, buf, n); 213 rc = 0; 214 goto done; 215 } 216 217 /* Second pass: inputs, in argv order. */ 218 for (i = 1; i < argc; ++i) { 219 const char* a = argv[i]; 220 /* Skip the options consumed above (and their values). */ 221 if (driver_streq(a, "-n") || driver_streq(a, "--bytes") || 222 driver_streq(a, "-t")) { 223 ++i; 224 continue; 225 } 226 if (strncmp(a, "--bytes=", 8) == 0 || driver_streq(a, "-f") || 227 driver_streq(a, "--print-file-name") || 228 (a[0] == '-' && a[1] >= '0' && a[1] <= '9')) { 229 continue; 230 } 231 if (driver_streq(a, "-")) { 232 uint8_t* buf = NULL; 233 size_t n = 0; 234 if (!driver_read_stdin(&env, &buf, &n)) { 235 driver_errf(STRINGS_TOOL, "failed to read stdin"); 236 rc = 1; 237 goto done; 238 } 239 strings_scan(opts.print_filename ? "{standard input}" : NULL, &opts, buf, 240 n); 241 driver_free(&env, buf, n); 242 continue; 243 } 244 { 245 DriverLoad ld = {0}; 246 KitSlice input; 247 if (driver_load_bytes(&env.file_io, STRINGS_TOOL, a, &ld, &input) != 0) { 248 rc = 1; 249 goto done; 250 } 251 strings_scan(a, &opts, input.data, input.len); 252 driver_release_bytes(&env.file_io, &ld); 253 } 254 } 255 256 rc = 0; 257 done: 258 driver_env_fini(&env); 259 return rc; 260 }