compress.c (10266B)
1 #include <kit/compress.h> 2 #include <kit/core.h> 3 #include <stddef.h> 4 #include <stdint.h> 5 #include <string.h> 6 7 #include "driver.h" 8 #include "env.h" 9 10 /* `kit compress` — compress or decompress a stream with one of two standard, 11 * interoperable containers: gzip (`.gz`, default) or the LZ4 frame format 12 * (`.lz4`). Reads a FILE operand or stdin, writes to `-o OUT` or stdout. On 13 * decompress the format is auto-detected from the input's magic bytes unless 14 * `-z` forces it. Drives the public kit/compress.h API. 15 * 16 * The same core backs the standard-named aliases, each pinning a container and 17 * a default direction so it drops in for the matching command: 18 * gzip -> compress, gzip gunzip -> decompress, gzip 19 * lz4 -> compress, LZ4 frame lz4c -> compress, LZ4 frame (legacy name) 20 * Under an alias -z is rejected (the container is fixed), the common 21 * gzip/lz4 flags (-c/-k/-f, levels, ...) are accepted, and -d still flips the 22 * direction. kit always streams to stdout/`-o` and never rewrites the input in 23 * place, so the in-place file flags are deliberately not emulated. */ 24 25 #define COMPRESS_TOOL "compress" 26 27 /* Invocation personality. The generic `compress` leaves format and direction 28 * to flags; the aliases pin a container and a starting direction. */ 29 typedef struct CompressPersona { 30 const char* name; /* tool name for diagnostics: compress/gzip/... */ 31 KitCompressFormat format; /* default (and, when locked, the only) container */ 32 int locked_format; /* alias pins the container: -z is rejected, no 33 * decompress auto-detection */ 34 int default_decompress; /* alias starts in decompress mode (gunzip) */ 35 int compat; /* accept gzip/lz4-style compatibility flags */ 36 } CompressPersona; 37 38 static const CompressPersona CZ_GENERIC = {"compress", KIT_COMPRESS_GZIP, 0, 0, 39 0}; 40 static const CompressPersona CZ_GZIP = {"gzip", KIT_COMPRESS_GZIP, 1, 0, 1}; 41 static const CompressPersona CZ_GUNZIP = {"gunzip", KIT_COMPRESS_GZIP, 1, 1, 1}; 42 static const CompressPersona CZ_LZ4 = {"lz4", KIT_COMPRESS_LZ4_FRAME, 1, 0, 1}; 43 static const CompressPersona CZ_LZ4C = {"lz4c", KIT_COMPRESS_LZ4_FRAME, 1, 0, 44 1}; 45 46 typedef struct CompressOpts { 47 int decompress; /* -d */ 48 int have_format; /* whether -z/--format (or a pinned container) was given */ 49 KitCompressFormat format; 50 int seen_input; /* a FILE or `-` operand was seen */ 51 const char* in; /* input path, or NULL = stdin */ 52 const char* out; /* output path, or NULL = stdout */ 53 } CompressOpts; 54 55 static int compress_parse_format(const char* s, KitCompressFormat* out) { 56 if (driver_streq(s, "gzip") || driver_streq(s, "gz")) { 57 *out = KIT_COMPRESS_GZIP; 58 return 0; 59 } 60 if (driver_streq(s, "lz4")) { 61 *out = KIT_COMPRESS_LZ4_FRAME; 62 return 0; 63 } 64 return 1; 65 } 66 67 /* gzip/lz4-style flags accepted (as no-ops) under an alias so the tool works 68 * as a drop-in. kit always streams stdin/-o to stdout and never mutates the 69 * input in place, so -c/--stdout, -k/--keep and -f/--force have nothing to do; 70 * the codecs expose no level, so -1..-9 / --fast[=N] / --best are ignored too. 71 * Direction (-d) and the long decompress spellings are handled by the caller. 72 * Returns 1 if `a` is one of these accepted no-ops. */ 73 static int compress_compat_noop(const char* a) { 74 if (driver_streq(a, "-c") || driver_streq(a, "--stdout") || 75 driver_streq(a, "--to-stdout")) 76 return 1; 77 if (driver_streq(a, "-k") || driver_streq(a, "--keep")) return 1; 78 if (driver_streq(a, "-f") || driver_streq(a, "--force")) return 1; 79 if (driver_streq(a, "-n") || driver_streq(a, "--no-name")) return 1; 80 if (driver_streq(a, "-q") || driver_streq(a, "--quiet")) return 1; 81 if (driver_streq(a, "--best") || driver_streq(a, "--fast")) return 1; 82 if (driver_strneq(a, "--fast=", 7)) return 1; 83 /* -N / -NN compression level (gzip 1-9, lz4 up to 12): a dash then digits. */ 84 if (a[0] == '-' && a[1] >= '1' && a[1] <= '9') { 85 size_t k = 2; 86 while (a[k] >= '0' && a[k] <= '9') ++k; 87 if (a[k] == '\0') return 1; 88 } 89 return 0; 90 } 91 92 void driver_help_compress(void) { 93 driver_printf( 94 "%.*s", 95 KIT_SLICE_ARG(KIT_SLICE_LIT( 96 "kit compress — compress or decompress with gzip or LZ4 frame\n" 97 "\n" 98 "USAGE\n" 99 " kit compress [-z FMT] [-o OUT] [FILE]\n" 100 " kit compress -d [-z FMT] [-o OUT] [FILE]\n" 101 "\n" 102 "DESCRIPTION\n" 103 " Reads FILE (or stdin with no FILE, or `-`) and writes the result\n" 104 " to OUT (or stdout). Containers are standard: a `.gz` any gzip\n" 105 " reads, or a `.lz4` frame the lz4 CLI reads.\n" 106 "\n" 107 "OPTIONS\n" 108 " -d decompress (default: compress)\n" 109 " -z, --format FMT gzip (default) | lz4. When decompressing, the\n" 110 " format is auto-detected from the input's " 111 "magic\n" 112 " bytes unless -z is given.\n" 113 " -o OUT write output to OUT (default: stdout)\n" 114 " -h, --help show this help\n" 115 "\n" 116 "ALIASES\n" 117 " Invoked as gzip / lz4 (or lz4c) compresses to that container; as\n" 118 " gunzip decompresses gzip. -d flips direction; -z is rejected " 119 "(the\n" 120 " container is fixed). Common gzip/lz4 flags (-c/--stdout, -k, -f,\n" 121 " -1..-9, --fast, --best) are accepted; output still goes to " 122 "stdout\n" 123 " or -o, never rewriting the input in place.\n" 124 "\n" 125 "EXIT CODES\n" 126 " 0 success 1 I/O or codec error 2 bad usage\n"))); 127 } 128 129 static int compress_main(int argc, char** argv, 130 const CompressPersona* persona) { 131 DriverEnv env; 132 KitContext ctx; 133 CompressOpts o; 134 KitWriter* w = NULL; 135 const uint8_t* data = NULL; 136 size_t len = 0; 137 DriverLoad ld = {0}; 138 uint8_t* sbuf = NULL; 139 size_t sbuf_len = 0; 140 int loaded = 0, owned_writer = 0, i, rc = 2; 141 KitCompressFormat fmt; 142 143 if (driver_argv_wants_help(argc, argv, 1)) { 144 driver_help_compress(); 145 return 0; 146 } 147 148 memset(&o, 0, sizeof o); 149 o.format = persona->format; 150 /* A pinned container both fixes the format and disables decompress 151 * auto-detection (gunzip only reads gzip, lz4 only reads LZ4). */ 152 o.have_format = persona->locked_format; 153 o.decompress = persona->default_decompress; 154 driver_env_init(&env); 155 ctx = driver_env_to_context(&env); 156 157 for (i = 1; i < argc; ++i) { 158 const char* a = argv[i]; 159 if (driver_streq(a, "-d") || 160 (persona->compat && (driver_streq(a, "--decompress") || 161 driver_streq(a, "--uncompress")))) { 162 o.decompress = 1; 163 continue; 164 } 165 if (driver_streq(a, "-z") || driver_streq(a, "--format")) { 166 if (persona->locked_format) { 167 driver_errf(persona->name, "-z is not accepted; %s always uses %s", 168 persona->name, 169 persona->format == KIT_COMPRESS_GZIP ? "gzip" : "lz4"); 170 goto done; 171 } 172 if (i + 1 >= argc || compress_parse_format(argv[++i], &o.format) != 0) { 173 driver_errf(persona->name, "-z requires gzip or lz4"); 174 goto done; 175 } 176 o.have_format = 1; 177 continue; 178 } 179 if (driver_streq(a, "-o")) { 180 if (i + 1 >= argc) { 181 driver_errf(persona->name, "-o requires a file path"); 182 goto done; 183 } 184 o.out = argv[++i]; 185 continue; 186 } 187 if (driver_streq(a, "-")) { /* explicit stdin */ 188 if (o.seen_input) { 189 driver_errf(persona->name, "only one input may be given"); 190 goto done; 191 } 192 o.seen_input = 1; 193 continue; 194 } 195 if (persona->compat && compress_compat_noop(a)) continue; 196 if (a[0] == '-' && a[1] != '\0') { 197 driver_errf(persona->name, "unknown option: %s", a); 198 goto done; 199 } 200 if (o.seen_input) { 201 driver_errf(persona->name, "only one input may be given"); 202 goto done; 203 } 204 o.seen_input = 1; 205 o.in = a; 206 } 207 208 /* Load the whole input. */ 209 if (o.in) { 210 KitSlice input; 211 if (driver_load_bytes(&env.file_io, persona->name, o.in, &ld, &input) != 212 0) { 213 rc = 1; 214 goto done; 215 } 216 loaded = 1; 217 data = input.data; 218 len = input.len; 219 } else { 220 if (!driver_read_stdin(&env, &sbuf, &sbuf_len)) { 221 driver_errf(persona->name, "failed to read stdin"); 222 rc = 1; 223 goto done; 224 } 225 data = sbuf; 226 len = sbuf_len; 227 } 228 229 /* Resolve the format. Compress uses the requested/default codec; decompress 230 * auto-detects from magic bytes unless -z forced one. */ 231 fmt = o.format; 232 if (o.decompress && !o.have_format) { 233 if (kit_compress_detect(data, len, &fmt) != KIT_OK) { 234 driver_errf(persona->name, "cannot detect input format; use -z gzip|lz4"); 235 rc = 1; 236 goto done; 237 } 238 } 239 240 /* Open the output. */ 241 if (o.out) { 242 if (ctx.file_io->open_writer(ctx.file_io->user, o.out, &w) != KIT_OK) { 243 driver_errf(persona->name, "failed to open output: %s", o.out); 244 rc = 1; 245 goto done; 246 } 247 owned_writer = 1; 248 } else { 249 w = driver_stdout_writer(&env); 250 owned_writer = 1; 251 } 252 253 /* Transform. The public API reports the specific reason through ctx->diag 254 * (the driver's stderr sink), so we only set the exit status here. */ 255 if (o.decompress) { 256 rc = kit_decompress(&ctx, fmt, data, len, w) == KIT_OK ? 0 : 1; 257 } else { 258 rc = kit_compress(&ctx, fmt, data, len, w) == KIT_OK ? 0 : 1; 259 } 260 261 done: 262 if (owned_writer && w) kit_writer_close(w); 263 if (sbuf) driver_free(&env, sbuf, sbuf_len); 264 if (loaded) driver_release_bytes(&env.file_io, &ld); 265 driver_env_fini(&env); 266 return rc; 267 } 268 269 int driver_compress(int argc, char** argv) { 270 return compress_main(argc, argv, &CZ_GENERIC); 271 } 272 273 int driver_gzip(int argc, char** argv) { 274 return compress_main(argc, argv, &CZ_GZIP); 275 } 276 277 int driver_gunzip(int argc, char** argv) { 278 return compress_main(argc, argv, &CZ_GUNZIP); 279 } 280 281 int driver_lz4(int argc, char** argv) { 282 return compress_main(argc, argv, &CZ_LZ4); 283 } 284 285 int driver_lz4c(int argc, char** argv) { 286 return compress_main(argc, argv, &CZ_LZ4C); 287 }