kit

kit
git clone https://git.ryansepassi.com/git/kit.git
Log | Files | Refs | README

compress.c (10266B)


      1 #include <kit/compress.h>
      2 #include <kit/core.h>
      3 #include <stddef.h>
      4 #include <stdint.h>
      5 #include <string.h>
      6 
      7 #include "driver.h"
      8 #include "env.h"
      9 
     10 /* `kit compress` — compress or decompress a stream with one of two standard,
     11  * interoperable containers: gzip (`.gz`, default) or the LZ4 frame format
     12  * (`.lz4`). Reads a FILE operand or stdin, writes to `-o OUT` or stdout. On
     13  * decompress the format is auto-detected from the input's magic bytes unless
     14  * `-z` forces it. Drives the public kit/compress.h API.
     15  *
     16  * The same core backs the standard-named aliases, each pinning a container and
     17  * a default direction so it drops in for the matching command:
     18  *   gzip  -> compress, gzip       gunzip -> decompress, gzip
     19  *   lz4   -> compress, LZ4 frame  lz4c   -> compress, LZ4 frame (legacy name)
     20  * Under an alias -z is rejected (the container is fixed), the common
     21  * gzip/lz4 flags (-c/-k/-f, levels, ...) are accepted, and -d still flips the
     22  * direction. kit always streams to stdout/`-o` and never rewrites the input in
     23  * place, so the in-place file flags are deliberately not emulated. */
     24 
     25 #define COMPRESS_TOOL "compress"
     26 
     27 /* Invocation personality. The generic `compress` leaves format and direction
     28  * to flags; the aliases pin a container and a starting direction. */
     29 typedef struct CompressPersona {
     30   const char* name;         /* tool name for diagnostics: compress/gzip/... */
     31   KitCompressFormat format; /* default (and, when locked, the only) container */
     32   int locked_format;        /* alias pins the container: -z is rejected, no
     33                              * decompress auto-detection */
     34   int default_decompress;   /* alias starts in decompress mode (gunzip) */
     35   int compat;               /* accept gzip/lz4-style compatibility flags */
     36 } CompressPersona;
     37 
     38 static const CompressPersona CZ_GENERIC = {"compress", KIT_COMPRESS_GZIP, 0, 0,
     39                                            0};
     40 static const CompressPersona CZ_GZIP = {"gzip", KIT_COMPRESS_GZIP, 1, 0, 1};
     41 static const CompressPersona CZ_GUNZIP = {"gunzip", KIT_COMPRESS_GZIP, 1, 1, 1};
     42 static const CompressPersona CZ_LZ4 = {"lz4", KIT_COMPRESS_LZ4_FRAME, 1, 0, 1};
     43 static const CompressPersona CZ_LZ4C = {"lz4c", KIT_COMPRESS_LZ4_FRAME, 1, 0,
     44                                         1};
     45 
     46 typedef struct CompressOpts {
     47   int decompress;  /* -d */
     48   int have_format; /* whether -z/--format (or a pinned container) was given */
     49   KitCompressFormat format;
     50   int seen_input;  /* a FILE or `-` operand was seen */
     51   const char* in;  /* input path, or NULL = stdin */
     52   const char* out; /* output path, or NULL = stdout */
     53 } CompressOpts;
     54 
     55 static int compress_parse_format(const char* s, KitCompressFormat* out) {
     56   if (driver_streq(s, "gzip") || driver_streq(s, "gz")) {
     57     *out = KIT_COMPRESS_GZIP;
     58     return 0;
     59   }
     60   if (driver_streq(s, "lz4")) {
     61     *out = KIT_COMPRESS_LZ4_FRAME;
     62     return 0;
     63   }
     64   return 1;
     65 }
     66 
     67 /* gzip/lz4-style flags accepted (as no-ops) under an alias so the tool works
     68  * as a drop-in. kit always streams stdin/-o to stdout and never mutates the
     69  * input in place, so -c/--stdout, -k/--keep and -f/--force have nothing to do;
     70  * the codecs expose no level, so -1..-9 / --fast[=N] / --best are ignored too.
     71  * Direction (-d) and the long decompress spellings are handled by the caller.
     72  * Returns 1 if `a` is one of these accepted no-ops. */
     73 static int compress_compat_noop(const char* a) {
     74   if (driver_streq(a, "-c") || driver_streq(a, "--stdout") ||
     75       driver_streq(a, "--to-stdout"))
     76     return 1;
     77   if (driver_streq(a, "-k") || driver_streq(a, "--keep")) return 1;
     78   if (driver_streq(a, "-f") || driver_streq(a, "--force")) return 1;
     79   if (driver_streq(a, "-n") || driver_streq(a, "--no-name")) return 1;
     80   if (driver_streq(a, "-q") || driver_streq(a, "--quiet")) return 1;
     81   if (driver_streq(a, "--best") || driver_streq(a, "--fast")) return 1;
     82   if (driver_strneq(a, "--fast=", 7)) return 1;
     83   /* -N / -NN compression level (gzip 1-9, lz4 up to 12): a dash then digits. */
     84   if (a[0] == '-' && a[1] >= '1' && a[1] <= '9') {
     85     size_t k = 2;
     86     while (a[k] >= '0' && a[k] <= '9') ++k;
     87     if (a[k] == '\0') return 1;
     88   }
     89   return 0;
     90 }
     91 
     92 void driver_help_compress(void) {
     93   driver_printf(
     94       "%.*s",
     95       KIT_SLICE_ARG(KIT_SLICE_LIT(
     96           "kit compress — compress or decompress with gzip or LZ4 frame\n"
     97           "\n"
     98           "USAGE\n"
     99           "  kit compress    [-z FMT] [-o OUT] [FILE]\n"
    100           "  kit compress -d [-z FMT] [-o OUT] [FILE]\n"
    101           "\n"
    102           "DESCRIPTION\n"
    103           "  Reads FILE (or stdin with no FILE, or `-`) and writes the result\n"
    104           "  to OUT (or stdout). Containers are standard: a `.gz` any gzip\n"
    105           "  reads, or a `.lz4` frame the lz4 CLI reads.\n"
    106           "\n"
    107           "OPTIONS\n"
    108           "  -d                 decompress (default: compress)\n"
    109           "  -z, --format FMT   gzip (default) | lz4. When decompressing, the\n"
    110           "                     format is auto-detected from the input's "
    111           "magic\n"
    112           "                     bytes unless -z is given.\n"
    113           "  -o OUT             write output to OUT (default: stdout)\n"
    114           "  -h, --help         show this help\n"
    115           "\n"
    116           "ALIASES\n"
    117           "  Invoked as gzip / lz4 (or lz4c) compresses to that container; as\n"
    118           "  gunzip decompresses gzip. -d flips direction; -z is rejected "
    119           "(the\n"
    120           "  container is fixed). Common gzip/lz4 flags (-c/--stdout, -k, -f,\n"
    121           "  -1..-9, --fast, --best) are accepted; output still goes to "
    122           "stdout\n"
    123           "  or -o, never rewriting the input in place.\n"
    124           "\n"
    125           "EXIT CODES\n"
    126           "  0   success           1   I/O or codec error   2   bad usage\n")));
    127 }
    128 
    129 static int compress_main(int argc, char** argv,
    130                          const CompressPersona* persona) {
    131   DriverEnv env;
    132   KitContext ctx;
    133   CompressOpts o;
    134   KitWriter* w = NULL;
    135   const uint8_t* data = NULL;
    136   size_t len = 0;
    137   DriverLoad ld = {0};
    138   uint8_t* sbuf = NULL;
    139   size_t sbuf_len = 0;
    140   int loaded = 0, owned_writer = 0, i, rc = 2;
    141   KitCompressFormat fmt;
    142 
    143   if (driver_argv_wants_help(argc, argv, 1)) {
    144     driver_help_compress();
    145     return 0;
    146   }
    147 
    148   memset(&o, 0, sizeof o);
    149   o.format = persona->format;
    150   /* A pinned container both fixes the format and disables decompress
    151    * auto-detection (gunzip only reads gzip, lz4 only reads LZ4). */
    152   o.have_format = persona->locked_format;
    153   o.decompress = persona->default_decompress;
    154   driver_env_init(&env);
    155   ctx = driver_env_to_context(&env);
    156 
    157   for (i = 1; i < argc; ++i) {
    158     const char* a = argv[i];
    159     if (driver_streq(a, "-d") ||
    160         (persona->compat && (driver_streq(a, "--decompress") ||
    161                              driver_streq(a, "--uncompress")))) {
    162       o.decompress = 1;
    163       continue;
    164     }
    165     if (driver_streq(a, "-z") || driver_streq(a, "--format")) {
    166       if (persona->locked_format) {
    167         driver_errf(persona->name, "-z is not accepted; %s always uses %s",
    168                     persona->name,
    169                     persona->format == KIT_COMPRESS_GZIP ? "gzip" : "lz4");
    170         goto done;
    171       }
    172       if (i + 1 >= argc || compress_parse_format(argv[++i], &o.format) != 0) {
    173         driver_errf(persona->name, "-z requires gzip or lz4");
    174         goto done;
    175       }
    176       o.have_format = 1;
    177       continue;
    178     }
    179     if (driver_streq(a, "-o")) {
    180       if (i + 1 >= argc) {
    181         driver_errf(persona->name, "-o requires a file path");
    182         goto done;
    183       }
    184       o.out = argv[++i];
    185       continue;
    186     }
    187     if (driver_streq(a, "-")) { /* explicit stdin */
    188       if (o.seen_input) {
    189         driver_errf(persona->name, "only one input may be given");
    190         goto done;
    191       }
    192       o.seen_input = 1;
    193       continue;
    194     }
    195     if (persona->compat && compress_compat_noop(a)) continue;
    196     if (a[0] == '-' && a[1] != '\0') {
    197       driver_errf(persona->name, "unknown option: %s", a);
    198       goto done;
    199     }
    200     if (o.seen_input) {
    201       driver_errf(persona->name, "only one input may be given");
    202       goto done;
    203     }
    204     o.seen_input = 1;
    205     o.in = a;
    206   }
    207 
    208   /* Load the whole input. */
    209   if (o.in) {
    210     KitSlice input;
    211     if (driver_load_bytes(&env.file_io, persona->name, o.in, &ld, &input) !=
    212         0) {
    213       rc = 1;
    214       goto done;
    215     }
    216     loaded = 1;
    217     data = input.data;
    218     len = input.len;
    219   } else {
    220     if (!driver_read_stdin(&env, &sbuf, &sbuf_len)) {
    221       driver_errf(persona->name, "failed to read stdin");
    222       rc = 1;
    223       goto done;
    224     }
    225     data = sbuf;
    226     len = sbuf_len;
    227   }
    228 
    229   /* Resolve the format. Compress uses the requested/default codec; decompress
    230    * auto-detects from magic bytes unless -z forced one. */
    231   fmt = o.format;
    232   if (o.decompress && !o.have_format) {
    233     if (kit_compress_detect(data, len, &fmt) != KIT_OK) {
    234       driver_errf(persona->name, "cannot detect input format; use -z gzip|lz4");
    235       rc = 1;
    236       goto done;
    237     }
    238   }
    239 
    240   /* Open the output. */
    241   if (o.out) {
    242     if (ctx.file_io->open_writer(ctx.file_io->user, o.out, &w) != KIT_OK) {
    243       driver_errf(persona->name, "failed to open output: %s", o.out);
    244       rc = 1;
    245       goto done;
    246     }
    247     owned_writer = 1;
    248   } else {
    249     w = driver_stdout_writer(&env);
    250     owned_writer = 1;
    251   }
    252 
    253   /* Transform. The public API reports the specific reason through ctx->diag
    254    * (the driver's stderr sink), so we only set the exit status here. */
    255   if (o.decompress) {
    256     rc = kit_decompress(&ctx, fmt, data, len, w) == KIT_OK ? 0 : 1;
    257   } else {
    258     rc = kit_compress(&ctx, fmt, data, len, w) == KIT_OK ? 0 : 1;
    259   }
    260 
    261 done:
    262   if (owned_writer && w) kit_writer_close(w);
    263   if (sbuf) driver_free(&env, sbuf, sbuf_len);
    264   if (loaded) driver_release_bytes(&env.file_io, &ld);
    265   driver_env_fini(&env);
    266   return rc;
    267 }
    268 
    269 int driver_compress(int argc, char** argv) {
    270   return compress_main(argc, argv, &CZ_GENERIC);
    271 }
    272 
    273 int driver_gzip(int argc, char** argv) {
    274   return compress_main(argc, argv, &CZ_GZIP);
    275 }
    276 
    277 int driver_gunzip(int argc, char** argv) {
    278   return compress_main(argc, argv, &CZ_GUNZIP);
    279 }
    280 
    281 int driver_lz4(int argc, char** argv) {
    282   return compress_main(argc, argv, &CZ_LZ4);
    283 }
    284 
    285 int driver_lz4c(int argc, char** argv) {
    286   return compress_main(argc, argv, &CZ_LZ4C);
    287 }