kit

kit
git clone https://git.ryansepassi.com/git/kit.git
Log | Files | Refs | README

deflate.c (82681B)


      1 #include "deflate.h"
      2 
      3 #include <string.h>
      4 
      5 #include "core/crc32.h"
      6 
      7 /*
      8  * Private raw DEFLATE/INFLATE codec.
      9  *
     10  * Extracted from miniz (public domain), via xOS. Provides the core DEFLATE
     11  * algorithm without malloc, file I/O, or ZIP support.
     12  *
     13  * Features:
     14  * - No dynamic allocation (all state in user-provided structures)
     15  * - No file I/O
     16  * - Supports raw deflate and zlib-wrapped streams
     17  *
     18  * Structure sizes:
     19  * - xdeflate_compressor: ~285 KB (or ~140 KB with XDEFLATE_LESS_MEMORY=1)
     20  * - xdeflate_decompressor: ~11 KB
     21  */
     22 
     23 /* ============================================================================
     24  * Configuration
     25  * ============================================================================
     26  */
     27 
     28 /* Set to 1 to reduce compressor memory usage (slightly slower) */
     29 #ifndef XDEFLATE_LESS_MEMORY
     30 #define XDEFLATE_LESS_MEMORY 1
     31 #endif
     32 
     33 /* Use 64-bit bit buffer on 64-bit platforms */
     34 #if defined(__x86_64__) || defined(_M_X64) || defined(__aarch64__) || \
     35     defined(__LP64__)
     36 #define XDEFLATE_HAS_64BIT_REGISTERS 1
     37 #else
     38 #define XDEFLATE_HAS_64BIT_REGISTERS 0
     39 #endif
     40 
     41 /* ============================================================================
     42  * Compression API
     43  * ============================================================================
     44  */
     45 
     46 /* Compression flags (OR together) */
     47 enum {
     48   XDEFLATE_HUFFMAN_ONLY = 0,         /* No LZ77, Huffman only */
     49   XDEFLATE_DEFAULT_MAX_PROBES = 128, /* Default hash probe depth */
     50   XDEFLATE_MAX_PROBES_MASK = 0xFFF,  /* Max probes in low 12 bits */
     51 
     52   XDEFLATE_WRITE_ZLIB_HEADER = 0x01000, /* Output zlib header + adler32 */
     53   XDEFLATE_COMPUTE_ADLER32 = 0x02000, /* Compute adler32 even without header */
     54   XDEFLATE_GREEDY_PARSING = 0x04000,  /* Fast greedy vs lazy parsing */
     55   XDEFLATE_NONDETERMINISTIC =
     56       0x08000,                       /* Faster init, non-deterministic output */
     57   XDEFLATE_RLE_MATCHES = 0x10000,    /* Only distance-1 matches */
     58   XDEFLATE_FILTER_MATCHES = 0x20000, /* Skip matches <= 5 chars */
     59   XDEFLATE_FORCE_STATIC_BLOCKS = 0x40000,
     60   XDEFLATE_FORCE_RAW_BLOCKS = 0x80000
     61 };
     62 
     63 /* Compression status */
     64 typedef enum {
     65   XDEFLATE_STATUS_BAD_PARAM = -2,
     66   XDEFLATE_STATUS_PUT_BUF_FAILED = -1,
     67   XDEFLATE_STATUS_OKAY = 0,
     68   XDEFLATE_STATUS_DONE = 1
     69 } xdeflate_status;
     70 
     71 /* Flush modes */
     72 typedef enum {
     73   XDEFLATE_NO_FLUSH = 0,
     74   XDEFLATE_SYNC_FLUSH = 2,
     75   XDEFLATE_FULL_FLUSH = 3,
     76   XDEFLATE_FINISH = 4
     77 } xdeflate_flush;
     78 
     79 /* Internal constants */
     80 enum {
     81   XDEFLATE_MAX_HUFF_TABLES = 3,
     82   XDEFLATE_MAX_HUFF_SYMBOLS_0 = 288,
     83   XDEFLATE_MAX_HUFF_SYMBOLS_1 = 32,
     84   XDEFLATE_MAX_HUFF_SYMBOLS_2 = 19,
     85   XDEFLATE_MAX_HUFF_SYMBOLS = 288,
     86   XDEFLATE_LZ_DICT_SIZE = 32768,
     87   XDEFLATE_LZ_DICT_SIZE_MASK = XDEFLATE_LZ_DICT_SIZE - 1,
     88   XDEFLATE_MIN_MATCH_LEN = 3,
     89   XDEFLATE_MAX_MATCH_LEN = 258
     90 };
     91 
     92 #if XDEFLATE_LESS_MEMORY
     93 enum {
     94   XDEFLATE_LZ_CODE_BUF_SIZE = 24 * 1024,
     95   XDEFLATE_OUT_BUF_SIZE = (XDEFLATE_LZ_CODE_BUF_SIZE * 13) / 10,
     96   XDEFLATE_LZ_HASH_BITS = 12,
     97   XDEFLATE_LEVEL1_HASH_SIZE_MASK = 4095,
     98   XDEFLATE_LZ_HASH_SHIFT = (XDEFLATE_LZ_HASH_BITS + 2) / 3,
     99   XDEFLATE_LZ_HASH_SIZE = 1 << XDEFLATE_LZ_HASH_BITS
    100 };
    101 #else
    102 enum {
    103   XDEFLATE_LZ_CODE_BUF_SIZE = 64 * 1024,
    104   XDEFLATE_OUT_BUF_SIZE = (XDEFLATE_LZ_CODE_BUF_SIZE * 13) / 10,
    105   XDEFLATE_LZ_HASH_BITS = 15,
    106   XDEFLATE_LEVEL1_HASH_SIZE_MASK = 4095,
    107   XDEFLATE_LZ_HASH_SHIFT = (XDEFLATE_LZ_HASH_BITS + 2) / 3,
    108   XDEFLATE_LZ_HASH_SIZE = 1 << XDEFLATE_LZ_HASH_BITS
    109 };
    110 #endif
    111 
    112 /* Output callback type */
    113 typedef int (*xdeflate_put_buf_func)(const void* buf, int len, void* user);
    114 
    115 /* Compressor state (~285 KB or ~140 KB with XDEFLATE_LESS_MEMORY) */
    116 typedef struct {
    117   xdeflate_put_buf_func m_pPut_buf_func;
    118   void* m_pPut_buf_user;
    119   uint32_t m_flags, m_max_probes[2];
    120   int m_greedy_parsing;
    121   uint32_t m_adler32, m_lookahead_pos, m_lookahead_size, m_dict_size;
    122   uint8_t *m_pLZ_code_buf, *m_pLZ_flags, *m_pOutput_buf, *m_pOutput_buf_end;
    123   uint32_t m_num_flags_left, m_total_lz_bytes, m_lz_code_buf_dict_pos;
    124   uint32_t m_bits_in, m_bit_buffer;
    125   uint32_t m_saved_match_dist, m_saved_match_len, m_saved_lit;
    126   uint32_t m_output_flush_ofs, m_output_flush_remaining;
    127   uint32_t m_finished, m_block_index, m_wants_to_finish;
    128   xdeflate_status m_prev_return_status;
    129   const void* m_pIn_buf;
    130   void* m_pOut_buf;
    131   size_t *m_pIn_buf_size, *m_pOut_buf_size;
    132   xdeflate_flush m_flush;
    133   const uint8_t* m_pSrc;
    134   size_t m_src_buf_left, m_out_buf_ofs;
    135   uint8_t m_dict[XDEFLATE_LZ_DICT_SIZE + XDEFLATE_MAX_MATCH_LEN - 1];
    136   uint16_t m_huff_count[XDEFLATE_MAX_HUFF_TABLES][XDEFLATE_MAX_HUFF_SYMBOLS];
    137   uint16_t m_huff_codes[XDEFLATE_MAX_HUFF_TABLES][XDEFLATE_MAX_HUFF_SYMBOLS];
    138   uint8_t m_huff_code_sizes[XDEFLATE_MAX_HUFF_TABLES]
    139                            [XDEFLATE_MAX_HUFF_SYMBOLS];
    140   uint8_t m_lz_code_buf[XDEFLATE_LZ_CODE_BUF_SIZE];
    141   uint16_t m_next[XDEFLATE_LZ_DICT_SIZE];
    142   uint16_t m_hash[XDEFLATE_LZ_HASH_SIZE];
    143   uint8_t m_output_buf[XDEFLATE_OUT_BUF_SIZE];
    144 } xdeflate_compressor;
    145 
    146 /*
    147  * xdeflate_init - Initialize compressor
    148  *
    149  * @d:             Compressor state (caller-provided)
    150  * @put_buf_func:  Output callback (NULL to use xdeflate_compress with output
    151  * buffer)
    152  * @put_buf_user:  User data for callback
    153  * @flags:         Compression flags (XDEFLATE_* ORed together)
    154  *
    155  * Returns XDEFLATE_STATUS_OKAY on success.
    156  * No corresponding deinit needed - no allocations made.
    157  */
    158 xdeflate_status xdeflate_init(xdeflate_compressor* d,
    159                               xdeflate_put_buf_func put_buf_func,
    160                               void* put_buf_user, int flags);
    161 
    162 /*
    163  * xdeflate_compress - Compress data
    164  *
    165  * @d:            Compressor state
    166  * @in_buf:       Input data
    167  * @in_buf_size:  Input size (updated to bytes consumed)
    168  * @out_buf:      Output buffer
    169  * @out_buf_size: Output capacity (updated to bytes written)
    170  * @flush:        Flush mode
    171  *
    172  * Returns status code.
    173  */
    174 xdeflate_status xdeflate_compress(xdeflate_compressor* d, const void* in_buf,
    175                                   size_t* in_buf_size, void* out_buf,
    176                                   size_t* out_buf_size, xdeflate_flush flush);
    177 
    178 /*
    179  * xdeflate_get_adler32 - Get current adler32 checksum
    180  */
    181 uint32_t xdeflate_get_adler32(xdeflate_compressor* d);
    182 
    183 /*
    184  * xdeflate_create_flags - Create compression flags from zlib-style parameters
    185  *
    186  * @level:       0-10 (0=store, 1=fastest, 10=best)
    187  * @window_bits: -15 (raw deflate) or 15 (zlib header)
    188  * @strategy:    0=default, 1=filtered, 2=huffman, 3=rle, 4=fixed
    189  */
    190 uint32_t xdeflate_create_flags(int level, int window_bits, int strategy);
    191 
    192 /* ============================================================================
    193  * Decompression API
    194  * ============================================================================
    195  */
    196 
    197 /* Decompression flags */
    198 enum {
    199   XINFLATE_FLAG_PARSE_ZLIB_HEADER = 1, /* Input has zlib header */
    200   XINFLATE_FLAG_HAS_MORE_INPUT = 2,    /* More input available */
    201   XINFLATE_FLAG_USING_NON_WRAPPING_OUTPUT_BUF =
    202       4,                            /* Output buffer >= uncompressed size */
    203   XINFLATE_FLAG_COMPUTE_ADLER32 = 8 /* Compute adler32 checksum */
    204 };
    205 
    206 /* Decompression status */
    207 typedef enum {
    208   XINFLATE_STATUS_FAILED_CANNOT_MAKE_PROGRESS = -4,
    209   XINFLATE_STATUS_BAD_PARAM = -3,
    210   XINFLATE_STATUS_ADLER32_MISMATCH = -2,
    211   XINFLATE_STATUS_FAILED = -1,
    212   XINFLATE_STATUS_DONE = 0,
    213   XINFLATE_STATUS_NEEDS_MORE_INPUT = 1,
    214   XINFLATE_STATUS_HAS_MORE_OUTPUT = 2
    215 } xinflate_status;
    216 
    217 /* Internal constants */
    218 enum {
    219   XINFLATE_MAX_HUFF_TABLES = 3,
    220   XINFLATE_MAX_HUFF_SYMBOLS_0 = 288,
    221   XINFLATE_MAX_HUFF_SYMBOLS_1 = 32,
    222   XINFLATE_MAX_HUFF_SYMBOLS_2 = 19,
    223   XINFLATE_FAST_LOOKUP_BITS = 10,
    224   XINFLATE_FAST_LOOKUP_SIZE = 1 << XINFLATE_FAST_LOOKUP_BITS,
    225   XINFLATE_LZ_DICT_SIZE = 32768
    226 };
    227 
    228 #if XDEFLATE_HAS_64BIT_REGISTERS
    229 typedef uint64_t xinflate_bit_buf_t;
    230 #define XINFLATE_BITBUF_SIZE 64
    231 #else
    232 typedef uint32_t xinflate_bit_buf_t;
    233 #define XINFLATE_BITBUF_SIZE 32
    234 #endif
    235 
    236 /* Decompressor state (~11 KB) */
    237 typedef struct {
    238   uint32_t m_state, m_num_bits, m_zhdr0, m_zhdr1;
    239   uint32_t m_z_adler32, m_final, m_type, m_check_adler32;
    240   uint32_t m_dist, m_counter, m_num_extra;
    241   uint32_t m_table_sizes[XINFLATE_MAX_HUFF_TABLES];
    242   xinflate_bit_buf_t m_bit_buf;
    243   size_t m_dist_from_out_buf_start;
    244   int16_t m_look_up[XINFLATE_MAX_HUFF_TABLES][XINFLATE_FAST_LOOKUP_SIZE];
    245   int16_t m_tree_0[XINFLATE_MAX_HUFF_SYMBOLS_0 * 2];
    246   int16_t m_tree_1[XINFLATE_MAX_HUFF_SYMBOLS_1 * 2];
    247   int16_t m_tree_2[XINFLATE_MAX_HUFF_SYMBOLS_2 * 2];
    248   uint8_t m_code_size_0[XINFLATE_MAX_HUFF_SYMBOLS_0];
    249   uint8_t m_code_size_1[XINFLATE_MAX_HUFF_SYMBOLS_1];
    250   uint8_t m_code_size_2[XINFLATE_MAX_HUFF_SYMBOLS_2];
    251   uint8_t m_raw_header[4];
    252   uint8_t m_len_codes[XINFLATE_MAX_HUFF_SYMBOLS_0 +
    253                       XINFLATE_MAX_HUFF_SYMBOLS_1 + 137];
    254 } xinflate_decompressor;
    255 
    256 /* Initialize decompressor */
    257 #define xinflate_init(r) \
    258   do {                   \
    259     (r)->m_state = 0;    \
    260   } while (0)
    261 
    262 /* Get adler32 after decompression */
    263 #define xinflate_get_adler32(r) ((r)->m_check_adler32)
    264 
    265 /*
    266  * xinflate_decompress - Decompress data
    267  *
    268  * @r:             Decompressor state
    269  * @in_buf_next:   Input buffer pointer
    270  * @in_buf_size:   Input size (updated to bytes consumed)
    271  * @out_buf_start: Start of output buffer (for dictionary lookback)
    272  * @out_buf_next:  Current output position
    273  * @out_buf_size:  Output capacity (updated to bytes written)
    274  * @flags:         Decompression flags
    275  *
    276  * Returns status code.
    277  */
    278 xinflate_status xinflate_decompress(xinflate_decompressor* r,
    279                                     const uint8_t* in_buf_next,
    280                                     size_t* in_buf_size, uint8_t* out_buf_start,
    281                                     uint8_t* out_buf_next, size_t* out_buf_size,
    282                                     uint32_t flags);
    283 
    284 /* ============================================================================
    285  * Convenience Functions
    286  * ============================================================================
    287  */
    288 
    289 #define XINFLATE_DECOMPRESS_MEM_TO_MEM_FAILED ((size_t)(-1))
    290 
    291 /*
    292  * xinflate_decompress_mem_to_mem - Decompress memory to memory
    293  *
    294  * @out_buf:      Output buffer
    295  * @out_buf_len:  Output buffer size
    296  * @src_buf:      Compressed input
    297  * @src_buf_len:  Input size
    298  * @flags:        Decompression flags
    299  *
    300  * Returns bytes written, or XINFLATE_DECOMPRESS_MEM_TO_MEM_FAILED on error.
    301  */
    302 size_t xinflate_decompress_mem_to_mem(void* out_buf, size_t out_buf_len,
    303                                       const void* src_buf, size_t src_buf_len,
    304                                       int flags);
    305 
    306 /* ============================================================================
    307  * Internal Macros
    308  * ============================================================================
    309  */
    310 
    311 #define XDEFLATE_MIN(a, b) (((a) < (b)) ? (a) : (b))
    312 #define XDEFLATE_MAX(a, b) (((a) > (b)) ? (a) : (b))
    313 
    314 #define XDEFLATE_CLEAR_ARR(arr) memset(arr, 0, sizeof(arr))
    315 #define XDEFLATE_CLEAR_OBJ(obj) memset(&(obj), 0, sizeof(obj))
    316 
    317 #define XDEFLATE_ASSERT(x) ((void)(x))
    318 
    319 #define XDEFLATE_MACRO_END while (0)
    320 
    321 #if defined(_MSC_VER)
    322 #define XDEFLATE_FORCEINLINE __forceinline
    323 #elif defined(__GNUC__) || defined(__clang__)
    324 #define XDEFLATE_FORCEINLINE static inline __attribute__((always_inline))
    325 #else
    326 #define XDEFLATE_FORCEINLINE static inline
    327 #endif
    328 
    329 /* Detect platform capabilities */
    330 #if defined(_M_IX86) || defined(_M_X64) || defined(__i386__) || \
    331     defined(__x86_64__)
    332 #define XDEFLATE_X86_OR_X64 1
    333 #else
    334 #define XDEFLATE_X86_OR_X64 0
    335 #endif
    336 
    337 #if defined(__BYTE_ORDER__) && defined(__ORDER_LITTLE_ENDIAN__)
    338 #if (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)
    339 #define XDEFLATE_LITTLE_ENDIAN 1
    340 #else
    341 #define XDEFLATE_LITTLE_ENDIAN 0
    342 #endif
    343 #elif XDEFLATE_X86_OR_X64
    344 #define XDEFLATE_LITTLE_ENDIAN 1
    345 #else
    346 #define XDEFLATE_LITTLE_ENDIAN 0
    347 #endif
    348 
    349 /* Allow unaligned loads on x86/x64 */
    350 #if XDEFLATE_X86_OR_X64
    351 #define XDEFLATE_USE_UNALIGNED_LOADS 1
    352 #else
    353 #define XDEFLATE_USE_UNALIGNED_LOADS 0
    354 #endif
    355 
    356 /* Use memcpy for unaligned access (safer, usually optimized away) */
    357 #define XDEFLATE_UNALIGNED_USE_MEMCPY 1
    358 
    359 /* Read/checksum helpers */
    360 #if !XDEFLATE_HAS_64BIT_REGISTERS
    361 static uint16_t xdeflate_read_le16(const uint8_t* p) {
    362   return (uint16_t)(p[0] | ((uint16_t)p[1] << 8));
    363 }
    364 #endif
    365 
    366 static uint32_t xdeflate_read_le32(const uint8_t* p) {
    367   return (uint32_t)p[0] | ((uint32_t)p[1] << 8) | ((uint32_t)p[2] << 16) |
    368          ((uint32_t)p[3] << 24);
    369 }
    370 
    371 static uint32_t xdeflate_adler32_update(uint32_t adler, const uint8_t* data,
    372                                         size_t len) {
    373   uint32_t s1 = adler & 0xffffu;
    374   uint32_t s2 = adler >> 16;
    375   while (len) {
    376     size_t block = len < 5552u ? len : 5552u;
    377     size_t i;
    378     for (i = 0; i < block; ++i) {
    379       s1 += data[i];
    380       s2 += s1;
    381     }
    382     s1 %= 65521u;
    383     s2 %= 65521u;
    384     data += block;
    385     len -= block;
    386   }
    387   return (s2 << 16) | s1;
    388 }
    389 
    390 /* ============================================================================
    391  * Compression Implementation
    392  * ============================================================================
    393  */
    394 
    395 /* Lookup tables for length/distance encoding */
    396 static const uint16_t s_len_sym[256] = {
    397     257, 258, 259, 260, 261, 262, 263, 264, 265, 265, 266, 266, 267, 267, 268,
    398     268, 269, 269, 269, 269, 270, 270, 270, 270, 271, 271, 271, 271, 272, 272,
    399     272, 272, 273, 273, 273, 273, 273, 273, 273, 273, 274, 274, 274, 274, 274,
    400     274, 274, 274, 275, 275, 275, 275, 275, 275, 275, 275, 276, 276, 276, 276,
    401     276, 276, 276, 276, 277, 277, 277, 277, 277, 277, 277, 277, 277, 277, 277,
    402     277, 277, 277, 277, 277, 278, 278, 278, 278, 278, 278, 278, 278, 278, 278,
    403     278, 278, 278, 278, 278, 278, 279, 279, 279, 279, 279, 279, 279, 279, 279,
    404     279, 279, 279, 279, 279, 279, 279, 280, 280, 280, 280, 280, 280, 280, 280,
    405     280, 280, 280, 280, 280, 280, 280, 280, 281, 281, 281, 281, 281, 281, 281,
    406     281, 281, 281, 281, 281, 281, 281, 281, 281, 281, 281, 281, 281, 281, 281,
    407     281, 281, 281, 281, 281, 281, 281, 281, 281, 281, 282, 282, 282, 282, 282,
    408     282, 282, 282, 282, 282, 282, 282, 282, 282, 282, 282, 282, 282, 282, 282,
    409     282, 282, 282, 282, 282, 282, 282, 282, 282, 282, 282, 282, 283, 283, 283,
    410     283, 283, 283, 283, 283, 283, 283, 283, 283, 283, 283, 283, 283, 283, 283,
    411     283, 283, 283, 283, 283, 283, 283, 283, 283, 283, 283, 283, 283, 283, 284,
    412     284, 284, 284, 284, 284, 284, 284, 284, 284, 284, 284, 284, 284, 284, 284,
    413     284, 284, 284, 284, 284, 284, 284, 284, 284, 284, 284, 284, 284, 284, 284,
    414     285};
    415 
    416 static const uint8_t s_len_extra[256] = {
    417     0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2,
    418     2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
    419     3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
    420     4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
    421     4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
    422     4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
    423     5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
    424     5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
    425     5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
    426     5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
    427     5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 0};
    428 
    429 static const uint8_t s_small_dist_sym[512] = {
    430     0,  1,  2,  3,  4,  4,  5,  5,  6,  6,  6,  6,  7,  7,  7,  7,  8,  8,  8,
    431     8,  8,  8,  8,  8,  9,  9,  9,  9,  9,  9,  9,  9,  10, 10, 10, 10, 10, 10,
    432     10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 11, 11, 11, 11, 11, 11, 11, 11, 11,
    433     11, 11, 11, 11, 11, 11, 11, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
    434     12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
    435     12, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
    436     13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 14, 14, 14, 14, 14,
    437     14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
    438     14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
    439     14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
    440     14, 14, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
    441     15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
    442     15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
    443     15, 15, 15, 15, 15, 15, 15, 15, 15, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
    444     16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
    445     16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
    446     16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
    447     16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
    448     16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
    449     16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
    450     16, 16, 16, 16, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17,
    451     17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17,
    452     17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17,
    453     17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17,
    454     17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17,
    455     17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17,
    456     17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17};
    457 
    458 static const uint8_t s_small_dist_extra[512] = {
    459     0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3,
    460     3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
    461     4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
    462     5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
    463     5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
    464     5, 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
    465     6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
    466     6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
    467     6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
    468     6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
    469     6, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
    470     7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
    471     7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
    472     7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
    473     7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
    474     7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
    475     7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
    476     7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
    477     7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
    478     7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
    479     7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7};
    480 
    481 static const uint8_t s_large_dist_sym[128] = {
    482     0,  0,  18, 19, 20, 20, 21, 21, 22, 22, 22, 22, 23, 23, 23, 23, 24, 24, 24,
    483     24, 24, 24, 24, 24, 25, 25, 25, 25, 25, 25, 25, 25, 26, 26, 26, 26, 26, 26,
    484     26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 27, 27, 27, 27, 27, 27, 27, 27, 27,
    485     27, 27, 27, 27, 27, 27, 27, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28,
    486     28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28,
    487     28, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29,
    488     29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29};
    489 
    490 static const uint8_t s_large_dist_extra[128] = {
    491     0,  0,  8,  8,  9,  9,  9,  9,  10, 10, 10, 10, 10, 10, 10, 10, 11, 11, 11,
    492     11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 12, 12, 12, 12, 12, 12,
    493     12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
    494     12, 12, 12, 12, 12, 12, 12, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
    495     13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
    496     13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
    497     13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13};
    498 
    499 static const uint32_t s_bitmasks[17] = {
    500     0x0000, 0x0001, 0x0003, 0x0007, 0x000F, 0x001F, 0x003F, 0x007F, 0x00FF,
    501     0x01FF, 0x03FF, 0x07FF, 0x0FFF, 0x1FFF, 0x3FFF, 0x7FFF, 0xFFFF};
    502 
    503 static const uint32_t s_num_probes[11] = {0,   1,   6,   32,  16,  32,
    504                                           128, 256, 512, 768, 1500};
    505 
    506 /* Symbol frequency for radix sort */
    507 typedef struct {
    508   uint16_t m_key, m_sym_index;
    509 } sym_freq;
    510 
    511 static sym_freq* radix_sort_syms(uint32_t num_syms, sym_freq* pSyms0,
    512                                  sym_freq* pSyms1) {
    513   uint32_t total_passes = 2, pass_shift, pass, i, hist[256 * 2];
    514   sym_freq *pCur_syms = pSyms0, *pNew_syms = pSyms1;
    515   XDEFLATE_CLEAR_ARR(hist);
    516   for (i = 0; i < num_syms; i++) {
    517     uint32_t freq = pSyms0[i].m_key;
    518     hist[freq & 0xFF]++;
    519     hist[256 + ((freq >> 8) & 0xFF)]++;
    520   }
    521   while ((total_passes > 1) && (num_syms == hist[(total_passes - 1) * 256]))
    522     total_passes--;
    523   for (pass_shift = 0, pass = 0; pass < total_passes; pass++, pass_shift += 8) {
    524     const uint32_t* pHist = &hist[pass << 8];
    525     uint32_t offsets[256], cur_ofs = 0;
    526     for (i = 0; i < 256; i++) {
    527       offsets[i] = cur_ofs;
    528       cur_ofs += pHist[i];
    529     }
    530     for (i = 0; i < num_syms; i++)
    531       pNew_syms[offsets[(pCur_syms[i].m_key >> pass_shift) & 0xFF]++] =
    532           pCur_syms[i];
    533     {
    534       sym_freq* t = pCur_syms;
    535       pCur_syms = pNew_syms;
    536       pNew_syms = t;
    537     }
    538   }
    539   return pCur_syms;
    540 }
    541 
    542 static void calculate_minimum_redundancy(sym_freq* A, int n) {
    543   int root, leaf, next, avbl, used, dpth;
    544   if (n == 0)
    545     return;
    546   else if (n == 1) {
    547     A[0].m_key = 1;
    548     return;
    549   }
    550   A[0].m_key += A[1].m_key;
    551   root = 0;
    552   leaf = 2;
    553   for (next = 1; next < n - 1; next++) {
    554     if (leaf >= n || A[root].m_key < A[leaf].m_key) {
    555       A[next].m_key = A[root].m_key;
    556       A[root++].m_key = (uint16_t)next;
    557     } else
    558       A[next].m_key = A[leaf++].m_key;
    559     if (leaf >= n || (root < next && A[root].m_key < A[leaf].m_key)) {
    560       A[next].m_key = (uint16_t)(A[next].m_key + A[root].m_key);
    561       A[root++].m_key = (uint16_t)next;
    562     } else
    563       A[next].m_key = (uint16_t)(A[next].m_key + A[leaf++].m_key);
    564   }
    565   A[n - 2].m_key = 0;
    566   for (next = n - 3; next >= 0; next--)
    567     A[next].m_key = A[A[next].m_key].m_key + 1;
    568   avbl = 1;
    569   used = dpth = 0;
    570   root = n - 2;
    571   next = n - 1;
    572   while (avbl > 0) {
    573     while (root >= 0 && (int)A[root].m_key == dpth) {
    574       used++;
    575       root--;
    576     }
    577     while (avbl > used) {
    578       A[next--].m_key = (uint16_t)(dpth);
    579       avbl--;
    580     }
    581     avbl = 2 * used;
    582     dpth++;
    583     used = 0;
    584   }
    585 }
    586 
    587 #define MAX_SUPPORTED_HUFF_CODESIZE 32
    588 
    589 static void huffman_enforce_max_code_size(int* pNum_codes, int code_list_len,
    590                                           int max_code_size) {
    591   int i;
    592   uint32_t total = 0;
    593   if (code_list_len <= 1) return;
    594   for (i = max_code_size + 1; i <= MAX_SUPPORTED_HUFF_CODESIZE; i++)
    595     pNum_codes[max_code_size] += pNum_codes[i];
    596   for (i = max_code_size; i > 0; i--)
    597     total += (((uint32_t)pNum_codes[i]) << (max_code_size - i));
    598   while (total != (1UL << max_code_size)) {
    599     pNum_codes[max_code_size]--;
    600     for (i = max_code_size - 1; i > 0; i--)
    601       if (pNum_codes[i]) {
    602         pNum_codes[i]--;
    603         pNum_codes[i + 1] += 2;
    604         break;
    605       }
    606     total--;
    607   }
    608 }
    609 
    610 static void optimize_huffman_table(xdeflate_compressor* d, int table_num,
    611                                    int table_len, int code_size_limit,
    612                                    int static_table) {
    613   int i, j, l, num_codes[1 + MAX_SUPPORTED_HUFF_CODESIZE];
    614   uint32_t next_code[MAX_SUPPORTED_HUFF_CODESIZE + 1];
    615   XDEFLATE_CLEAR_ARR(num_codes);
    616   if (static_table) {
    617     for (i = 0; i < table_len; i++)
    618       num_codes[d->m_huff_code_sizes[table_num][i]]++;
    619   } else {
    620     sym_freq syms0[XDEFLATE_MAX_HUFF_SYMBOLS], syms1[XDEFLATE_MAX_HUFF_SYMBOLS],
    621         *pSyms;
    622     int num_used_syms = 0;
    623     const uint16_t* pSym_count = &d->m_huff_count[table_num][0];
    624     for (i = 0; i < table_len; i++)
    625       if (pSym_count[i]) {
    626         syms0[num_used_syms].m_key = (uint16_t)pSym_count[i];
    627         syms0[num_used_syms++].m_sym_index = (uint16_t)i;
    628       }
    629 
    630     pSyms = radix_sort_syms(num_used_syms, syms0, syms1);
    631     calculate_minimum_redundancy(pSyms, num_used_syms);
    632 
    633     for (i = 0; i < num_used_syms; i++) num_codes[pSyms[i].m_key]++;
    634 
    635     huffman_enforce_max_code_size(num_codes, num_used_syms, code_size_limit);
    636 
    637     XDEFLATE_CLEAR_ARR(d->m_huff_code_sizes[table_num]);
    638     XDEFLATE_CLEAR_ARR(d->m_huff_codes[table_num]);
    639     for (i = 1, j = num_used_syms; i <= code_size_limit; i++)
    640       for (l = num_codes[i]; l > 0; l--)
    641         d->m_huff_code_sizes[table_num][pSyms[--j].m_sym_index] = (uint8_t)(i);
    642   }
    643 
    644   next_code[1] = 0;
    645   for (j = 0, i = 2; i <= code_size_limit; i++)
    646     next_code[i] = j = ((j + num_codes[i - 1]) << 1);
    647 
    648   for (i = 0; i < table_len; i++) {
    649     uint32_t rev_code = 0, code, code_size;
    650     if ((code_size = d->m_huff_code_sizes[table_num][i]) == 0) continue;
    651     code = next_code[code_size]++;
    652     for (l = code_size; l > 0; l--, code >>= 1)
    653       rev_code = (rev_code << 1) | (code & 1);
    654     d->m_huff_codes[table_num][i] = (uint16_t)rev_code;
    655   }
    656 }
    657 
    658 #define PUT_BITS(b, l)                                    \
    659   do {                                                    \
    660     uint32_t bits = b;                                    \
    661     uint32_t len = l;                                     \
    662     XDEFLATE_ASSERT(bits <= ((1U << len) - 1U));          \
    663     d->m_bit_buffer |= (bits << d->m_bits_in);            \
    664     d->m_bits_in += len;                                  \
    665     while (d->m_bits_in >= 8) {                           \
    666       if (d->m_pOutput_buf < d->m_pOutput_buf_end)        \
    667         *d->m_pOutput_buf++ = (uint8_t)(d->m_bit_buffer); \
    668       d->m_bit_buffer >>= 8;                              \
    669       d->m_bits_in -= 8;                                  \
    670     }                                                     \
    671   }                                                       \
    672   XDEFLATE_MACRO_END
    673 
    674 #define RLE_PREV_CODE_SIZE()                                                   \
    675   {                                                                            \
    676     if (rle_repeat_count) {                                                    \
    677       if (rle_repeat_count < 3) {                                              \
    678         d->m_huff_count[2][prev_code_size] =                                   \
    679             (uint16_t)(d->m_huff_count[2][prev_code_size] + rle_repeat_count); \
    680         while (rle_repeat_count--)                                             \
    681           packed_code_sizes[num_packed_code_sizes++] = prev_code_size;         \
    682       } else {                                                                 \
    683         d->m_huff_count[2][16] = (uint16_t)(d->m_huff_count[2][16] + 1);       \
    684         packed_code_sizes[num_packed_code_sizes++] = 16;                       \
    685         packed_code_sizes[num_packed_code_sizes++] =                           \
    686             (uint8_t)(rle_repeat_count - 3);                                   \
    687       }                                                                        \
    688       rle_repeat_count = 0;                                                    \
    689     }                                                                          \
    690   }
    691 
    692 #define RLE_ZERO_CODE_SIZE()                                                  \
    693   {                                                                           \
    694     if (rle_z_count) {                                                        \
    695       if (rle_z_count < 3) {                                                  \
    696         d->m_huff_count[2][0] =                                               \
    697             (uint16_t)(d->m_huff_count[2][0] + rle_z_count);                  \
    698         while (rle_z_count--) packed_code_sizes[num_packed_code_sizes++] = 0; \
    699       } else if (rle_z_count <= 10) {                                         \
    700         d->m_huff_count[2][17] = (uint16_t)(d->m_huff_count[2][17] + 1);      \
    701         packed_code_sizes[num_packed_code_sizes++] = 17;                      \
    702         packed_code_sizes[num_packed_code_sizes++] =                          \
    703             (uint8_t)(rle_z_count - 3);                                       \
    704       } else {                                                                \
    705         d->m_huff_count[2][18] = (uint16_t)(d->m_huff_count[2][18] + 1);      \
    706         packed_code_sizes[num_packed_code_sizes++] = 18;                      \
    707         packed_code_sizes[num_packed_code_sizes++] =                          \
    708             (uint8_t)(rle_z_count - 11);                                      \
    709       }                                                                       \
    710       rle_z_count = 0;                                                        \
    711     }                                                                         \
    712   }
    713 
    714 static const uint8_t s_packed_code_size_syms_swizzle[] = {
    715     16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15};
    716 
    717 static void start_dynamic_block(xdeflate_compressor* d) {
    718   int num_lit_codes, num_dist_codes, num_bit_lengths;
    719   uint32_t i, total_code_sizes_to_pack, num_packed_code_sizes, rle_z_count,
    720       rle_repeat_count, packed_code_sizes_index;
    721   uint8_t code_sizes_to_pack[XDEFLATE_MAX_HUFF_SYMBOLS_0 +
    722                              XDEFLATE_MAX_HUFF_SYMBOLS_1];
    723   uint8_t packed_code_sizes[XDEFLATE_MAX_HUFF_SYMBOLS_0 +
    724                             XDEFLATE_MAX_HUFF_SYMBOLS_1];
    725   uint8_t prev_code_size = 0xFF;
    726 
    727   d->m_huff_count[0][256] = 1;
    728 
    729   optimize_huffman_table(d, 0, XDEFLATE_MAX_HUFF_SYMBOLS_0, 15, 0);
    730   optimize_huffman_table(d, 1, XDEFLATE_MAX_HUFF_SYMBOLS_1, 15, 0);
    731 
    732   for (num_lit_codes = 286; num_lit_codes > 257; num_lit_codes--)
    733     if (d->m_huff_code_sizes[0][num_lit_codes - 1]) break;
    734   for (num_dist_codes = 30; num_dist_codes > 1; num_dist_codes--)
    735     if (d->m_huff_code_sizes[1][num_dist_codes - 1]) break;
    736 
    737   memcpy(code_sizes_to_pack, &d->m_huff_code_sizes[0][0], num_lit_codes);
    738   memcpy(code_sizes_to_pack + num_lit_codes, &d->m_huff_code_sizes[1][0],
    739          num_dist_codes);
    740   total_code_sizes_to_pack = num_lit_codes + num_dist_codes;
    741   num_packed_code_sizes = 0;
    742   rle_z_count = 0;
    743   rle_repeat_count = 0;
    744 
    745   memset(&d->m_huff_count[2][0], 0,
    746          sizeof(d->m_huff_count[2][0]) * XDEFLATE_MAX_HUFF_SYMBOLS_2);
    747   for (i = 0; i < total_code_sizes_to_pack; i++) {
    748     uint8_t code_size = code_sizes_to_pack[i];
    749     if (!code_size) {
    750       RLE_PREV_CODE_SIZE();
    751       if (++rle_z_count == 138) {
    752         RLE_ZERO_CODE_SIZE();
    753       }
    754     } else {
    755       RLE_ZERO_CODE_SIZE();
    756       if (code_size != prev_code_size) {
    757         RLE_PREV_CODE_SIZE();
    758         d->m_huff_count[2][code_size] =
    759             (uint16_t)(d->m_huff_count[2][code_size] + 1);
    760         packed_code_sizes[num_packed_code_sizes++] = code_size;
    761       } else if (++rle_repeat_count == 6) {
    762         RLE_PREV_CODE_SIZE();
    763       }
    764     }
    765     prev_code_size = code_size;
    766   }
    767   if (rle_repeat_count) {
    768     RLE_PREV_CODE_SIZE();
    769   } else {
    770     RLE_ZERO_CODE_SIZE();
    771   }
    772 
    773   optimize_huffman_table(d, 2, XDEFLATE_MAX_HUFF_SYMBOLS_2, 7, 0);
    774 
    775   PUT_BITS(2, 2);
    776 
    777   PUT_BITS(num_lit_codes - 257, 5);
    778   PUT_BITS(num_dist_codes - 1, 5);
    779 
    780   for (num_bit_lengths = 18; num_bit_lengths >= 0; num_bit_lengths--)
    781     if (d->m_huff_code_sizes[2]
    782                             [s_packed_code_size_syms_swizzle[num_bit_lengths]])
    783       break;
    784   num_bit_lengths = XDEFLATE_MAX(4, (num_bit_lengths + 1));
    785   PUT_BITS(num_bit_lengths - 4, 4);
    786   for (i = 0; (int)i < num_bit_lengths; i++)
    787     PUT_BITS(d->m_huff_code_sizes[2][s_packed_code_size_syms_swizzle[i]], 3);
    788 
    789   for (packed_code_sizes_index = 0;
    790        packed_code_sizes_index < num_packed_code_sizes;) {
    791     uint32_t code = packed_code_sizes[packed_code_sizes_index++];
    792     XDEFLATE_ASSERT(code < XDEFLATE_MAX_HUFF_SYMBOLS_2);
    793     PUT_BITS(d->m_huff_codes[2][code], d->m_huff_code_sizes[2][code]);
    794     if (code >= 16)
    795       PUT_BITS(packed_code_sizes[packed_code_sizes_index++],
    796                "\02\03\07"[code - 16]);
    797   }
    798 }
    799 
    800 static void start_static_block(xdeflate_compressor* d) {
    801   uint32_t i;
    802   uint8_t* p = &d->m_huff_code_sizes[0][0];
    803 
    804   for (i = 0; i <= 143; ++i) *p++ = 8;
    805   for (; i <= 255; ++i) *p++ = 9;
    806   for (; i <= 279; ++i) *p++ = 7;
    807   for (; i <= 287; ++i) *p++ = 8;
    808 
    809   memset(d->m_huff_code_sizes[1], 5, 32);
    810 
    811   optimize_huffman_table(d, 0, 288, 15, 1);
    812   optimize_huffman_table(d, 1, 32, 15, 1);
    813 
    814   PUT_BITS(1, 2);
    815 }
    816 
    817 static int compress_lz_codes(xdeflate_compressor* d) {
    818   uint32_t flags;
    819   uint8_t* pLZ_codes;
    820 
    821   flags = 1;
    822   for (pLZ_codes = d->m_lz_code_buf; pLZ_codes < d->m_pLZ_code_buf;
    823        flags >>= 1) {
    824     if (flags == 1) flags = *pLZ_codes++ | 0x100;
    825     if (flags & 1) {
    826       uint32_t sym, num_extra_bits;
    827       uint32_t match_len = pLZ_codes[0],
    828                match_dist = (pLZ_codes[1] | (pLZ_codes[2] << 8));
    829       pLZ_codes += 3;
    830 
    831       XDEFLATE_ASSERT(d->m_huff_code_sizes[0][s_len_sym[match_len]]);
    832       PUT_BITS(d->m_huff_codes[0][s_len_sym[match_len]],
    833                d->m_huff_code_sizes[0][s_len_sym[match_len]]);
    834       PUT_BITS(match_len & s_bitmasks[s_len_extra[match_len]],
    835                s_len_extra[match_len]);
    836 
    837       if (match_dist < 512) {
    838         sym = s_small_dist_sym[match_dist];
    839         num_extra_bits = s_small_dist_extra[match_dist];
    840       } else {
    841         sym = s_large_dist_sym[match_dist >> 8];
    842         num_extra_bits = s_large_dist_extra[match_dist >> 8];
    843       }
    844       XDEFLATE_ASSERT(d->m_huff_code_sizes[1][sym]);
    845       PUT_BITS(d->m_huff_codes[1][sym], d->m_huff_code_sizes[1][sym]);
    846       PUT_BITS(match_dist & s_bitmasks[num_extra_bits], num_extra_bits);
    847     } else {
    848       uint32_t lit = *pLZ_codes++;
    849       XDEFLATE_ASSERT(d->m_huff_code_sizes[0][lit]);
    850       PUT_BITS(d->m_huff_codes[0][lit], d->m_huff_code_sizes[0][lit]);
    851     }
    852   }
    853 
    854   PUT_BITS(d->m_huff_codes[0][256], d->m_huff_code_sizes[0][256]);
    855 
    856   return (d->m_pOutput_buf < d->m_pOutput_buf_end);
    857 }
    858 
    859 static int compress_block(xdeflate_compressor* d, int static_block) {
    860   if (static_block)
    861     start_static_block(d);
    862   else
    863     start_dynamic_block(d);
    864   return compress_lz_codes(d);
    865 }
    866 
    867 static int flush_block(xdeflate_compressor* d, int flush) {
    868   uint32_t saved_bit_buf, saved_bits_in;
    869   uint8_t* pSaved_output_buf;
    870   int comp_block_succeeded = 0;
    871   int n, use_raw_block =
    872              ((d->m_flags & XDEFLATE_FORCE_RAW_BLOCKS) != 0) &&
    873              (d->m_lookahead_pos - d->m_lz_code_buf_dict_pos) <= d->m_dict_size;
    874   uint8_t* pOutput_buf_start =
    875       ((d->m_pPut_buf_func == NULL) &&
    876        ((*d->m_pOut_buf_size - d->m_out_buf_ofs) >= XDEFLATE_OUT_BUF_SIZE))
    877           ? ((uint8_t*)d->m_pOut_buf + d->m_out_buf_ofs)
    878           : d->m_output_buf;
    879 
    880   d->m_pOutput_buf = pOutput_buf_start;
    881   d->m_pOutput_buf_end = d->m_pOutput_buf + XDEFLATE_OUT_BUF_SIZE - 16;
    882 
    883   XDEFLATE_ASSERT(!d->m_output_flush_remaining);
    884   d->m_output_flush_ofs = 0;
    885   d->m_output_flush_remaining = 0;
    886 
    887   *d->m_pLZ_flags = (uint8_t)(*d->m_pLZ_flags >> d->m_num_flags_left);
    888   d->m_pLZ_code_buf -= (d->m_num_flags_left == 8);
    889 
    890   if ((d->m_flags & XDEFLATE_WRITE_ZLIB_HEADER) && (!d->m_block_index)) {
    891     const uint8_t cmf = 0x78;
    892     uint8_t flg, flevel = 3;
    893     uint32_t header, i, mz_un = sizeof(s_num_probes) / sizeof(uint32_t);
    894 
    895     for (i = 0; i < mz_un; i++)
    896       if (s_num_probes[i] == (d->m_flags & 0xFFF)) break;
    897 
    898     if (i < 2)
    899       flevel = 0;
    900     else if (i < 6)
    901       flevel = 1;
    902     else if (i == 6)
    903       flevel = 2;
    904 
    905     header = cmf << 8 | (flevel << 6);
    906     header += 31 - (header % 31);
    907     flg = header & 0xFF;
    908 
    909     PUT_BITS(cmf, 8);
    910     PUT_BITS(flg, 8);
    911   }
    912 
    913   PUT_BITS(flush == XDEFLATE_FINISH, 1);
    914 
    915   pSaved_output_buf = d->m_pOutput_buf;
    916   saved_bit_buf = d->m_bit_buffer;
    917   saved_bits_in = d->m_bits_in;
    918 
    919   if (!use_raw_block)
    920     comp_block_succeeded =
    921         compress_block(d, (d->m_flags & XDEFLATE_FORCE_STATIC_BLOCKS) ||
    922                               (d->m_total_lz_bytes < 48));
    923 
    924   if (((use_raw_block) ||
    925        ((d->m_total_lz_bytes) && ((d->m_pOutput_buf - pSaved_output_buf + 1U) >=
    926                                   d->m_total_lz_bytes))) &&
    927       ((d->m_lookahead_pos - d->m_lz_code_buf_dict_pos) <= d->m_dict_size)) {
    928     uint32_t i;
    929     d->m_pOutput_buf = pSaved_output_buf;
    930     d->m_bit_buffer = saved_bit_buf;
    931     d->m_bits_in = saved_bits_in;
    932     PUT_BITS(0, 2);
    933     if (d->m_bits_in) {
    934       PUT_BITS(0, 8 - d->m_bits_in);
    935     }
    936     for (i = 2; i; --i, d->m_total_lz_bytes ^= 0xFFFF) {
    937       PUT_BITS(d->m_total_lz_bytes & 0xFFFF, 16);
    938     }
    939     for (i = 0; i < d->m_total_lz_bytes; ++i) {
    940       PUT_BITS(d->m_dict[(d->m_lz_code_buf_dict_pos + i) &
    941                          XDEFLATE_LZ_DICT_SIZE_MASK],
    942                8);
    943     }
    944   } else if (!comp_block_succeeded) {
    945     d->m_pOutput_buf = pSaved_output_buf;
    946     d->m_bit_buffer = saved_bit_buf;
    947     d->m_bits_in = saved_bits_in;
    948     compress_block(d, 1);
    949   }
    950 
    951   if (flush) {
    952     if (flush == XDEFLATE_FINISH) {
    953       if (d->m_bits_in) {
    954         PUT_BITS(0, 8 - d->m_bits_in);
    955       }
    956       if (d->m_flags & XDEFLATE_WRITE_ZLIB_HEADER) {
    957         uint32_t i, a = d->m_adler32;
    958         for (i = 0; i < 4; i++) {
    959           PUT_BITS((a >> 24) & 0xFF, 8);
    960           a <<= 8;
    961         }
    962       }
    963     } else {
    964       uint32_t i, z = 0;
    965       PUT_BITS(0, 3);
    966       if (d->m_bits_in) {
    967         PUT_BITS(0, 8 - d->m_bits_in);
    968       }
    969       for (i = 2; i; --i, z ^= 0xFFFF) {
    970         PUT_BITS(z & 0xFFFF, 16);
    971       }
    972     }
    973   }
    974 
    975   XDEFLATE_ASSERT(d->m_pOutput_buf < d->m_pOutput_buf_end);
    976 
    977   memset(&d->m_huff_count[0][0], 0,
    978          sizeof(d->m_huff_count[0][0]) * XDEFLATE_MAX_HUFF_SYMBOLS_0);
    979   memset(&d->m_huff_count[1][0], 0,
    980          sizeof(d->m_huff_count[1][0]) * XDEFLATE_MAX_HUFF_SYMBOLS_1);
    981 
    982   d->m_pLZ_code_buf = d->m_lz_code_buf + 1;
    983   d->m_pLZ_flags = d->m_lz_code_buf;
    984   d->m_num_flags_left = 8;
    985   d->m_lz_code_buf_dict_pos += d->m_total_lz_bytes;
    986   d->m_total_lz_bytes = 0;
    987   d->m_block_index++;
    988 
    989   if ((n = (int)(d->m_pOutput_buf - pOutput_buf_start)) != 0) {
    990     if (d->m_pPut_buf_func) {
    991       *d->m_pIn_buf_size = d->m_pSrc - (const uint8_t*)d->m_pIn_buf;
    992       if (!(*d->m_pPut_buf_func)(d->m_output_buf, n, d->m_pPut_buf_user))
    993         return (d->m_prev_return_status = XDEFLATE_STATUS_PUT_BUF_FAILED);
    994     } else if (pOutput_buf_start == d->m_output_buf) {
    995       int bytes_to_copy = (int)XDEFLATE_MIN(
    996           (size_t)n, (size_t)(*d->m_pOut_buf_size - d->m_out_buf_ofs));
    997       memcpy((uint8_t*)d->m_pOut_buf + d->m_out_buf_ofs, d->m_output_buf,
    998              bytes_to_copy);
    999       d->m_out_buf_ofs += bytes_to_copy;
   1000       if ((n -= bytes_to_copy) != 0) {
   1001         d->m_output_flush_ofs = bytes_to_copy;
   1002         d->m_output_flush_remaining = n;
   1003       }
   1004     } else {
   1005       d->m_out_buf_ofs += n;
   1006     }
   1007   }
   1008 
   1009   return d->m_output_flush_remaining;
   1010 }
   1011 
   1012 XDEFLATE_FORCEINLINE void find_match(xdeflate_compressor* d,
   1013                                      uint32_t lookahead_pos, uint32_t max_dist,
   1014                                      uint32_t max_match_len,
   1015                                      uint32_t* pMatch_dist,
   1016                                      uint32_t* pMatch_len) {
   1017   uint32_t dist, pos = lookahead_pos & XDEFLATE_LZ_DICT_SIZE_MASK,
   1018                  match_len = *pMatch_len, probe_pos = pos, next_probe_pos,
   1019                  probe_len;
   1020   uint32_t num_probes_left = d->m_max_probes[match_len >= 32];
   1021   const uint8_t *s = d->m_dict + pos, *p, *q;
   1022   uint8_t c0 = d->m_dict[pos + match_len], c1 = d->m_dict[pos + match_len - 1];
   1023   XDEFLATE_ASSERT(max_match_len <= XDEFLATE_MAX_MATCH_LEN);
   1024   if (max_match_len <= match_len) return;
   1025   for (;;) {
   1026     for (;;) {
   1027       if (--num_probes_left == 0) return;
   1028       next_probe_pos = d->m_next[probe_pos];
   1029       if ((!next_probe_pos) ||
   1030           ((dist = (uint16_t)(lookahead_pos - next_probe_pos)) > max_dist))
   1031         return;
   1032       probe_pos = next_probe_pos & XDEFLATE_LZ_DICT_SIZE_MASK;
   1033       if ((d->m_dict[probe_pos + match_len] == c0) &&
   1034           (d->m_dict[probe_pos + match_len - 1] == c1))
   1035         break;
   1036     }
   1037     if (!dist) break;
   1038     p = s;
   1039     q = d->m_dict + probe_pos;
   1040     for (probe_len = 0; probe_len < max_match_len; probe_len++)
   1041       if (*p++ != *q++) break;
   1042     if (probe_len > match_len) {
   1043       *pMatch_dist = dist;
   1044       if ((*pMatch_len = match_len = probe_len) == max_match_len) return;
   1045       c0 = d->m_dict[pos + match_len];
   1046       c1 = d->m_dict[pos + match_len - 1];
   1047     }
   1048   }
   1049 }
   1050 
   1051 XDEFLATE_FORCEINLINE void record_literal(xdeflate_compressor* d, uint8_t lit) {
   1052   d->m_total_lz_bytes++;
   1053   *d->m_pLZ_code_buf++ = lit;
   1054   *d->m_pLZ_flags = (uint8_t)(*d->m_pLZ_flags >> 1);
   1055   if (--d->m_num_flags_left == 0) {
   1056     d->m_num_flags_left = 8;
   1057     d->m_pLZ_flags = d->m_pLZ_code_buf++;
   1058   }
   1059   d->m_huff_count[0][lit]++;
   1060 }
   1061 
   1062 XDEFLATE_FORCEINLINE void record_match(xdeflate_compressor* d,
   1063                                        uint32_t match_len,
   1064                                        uint32_t match_dist) {
   1065   uint32_t s0, s1;
   1066 
   1067   XDEFLATE_ASSERT((match_len >= XDEFLATE_MIN_MATCH_LEN) && (match_dist >= 1) &&
   1068                   (match_dist <= XDEFLATE_LZ_DICT_SIZE));
   1069 
   1070   d->m_total_lz_bytes += match_len;
   1071 
   1072   d->m_pLZ_code_buf[0] = (uint8_t)(match_len - XDEFLATE_MIN_MATCH_LEN);
   1073 
   1074   match_dist -= 1;
   1075   d->m_pLZ_code_buf[1] = (uint8_t)(match_dist & 0xFF);
   1076   d->m_pLZ_code_buf[2] = (uint8_t)(match_dist >> 8);
   1077   d->m_pLZ_code_buf += 3;
   1078 
   1079   *d->m_pLZ_flags = (uint8_t)((*d->m_pLZ_flags >> 1) | 0x80);
   1080   if (--d->m_num_flags_left == 0) {
   1081     d->m_num_flags_left = 8;
   1082     d->m_pLZ_flags = d->m_pLZ_code_buf++;
   1083   }
   1084 
   1085   s0 = s_small_dist_sym[match_dist & 511];
   1086   s1 = s_large_dist_sym[(match_dist >> 8) & 127];
   1087   d->m_huff_count[1][(match_dist < 512) ? s0 : s1]++;
   1088   d->m_huff_count[0][s_len_sym[match_len - XDEFLATE_MIN_MATCH_LEN]]++;
   1089 }
   1090 
   1091 static int compress_normal(xdeflate_compressor* d) {
   1092   const uint8_t* pSrc = d->m_pSrc;
   1093   size_t src_buf_left = d->m_src_buf_left;
   1094   xdeflate_flush flush = d->m_flush;
   1095 
   1096   while ((src_buf_left) || ((flush) && (d->m_lookahead_size))) {
   1097     uint32_t len_to_move, cur_match_dist, cur_match_len, cur_pos;
   1098     if ((d->m_lookahead_size + d->m_dict_size) >=
   1099         (XDEFLATE_MIN_MATCH_LEN - 1)) {
   1100       uint32_t dst_pos = (d->m_lookahead_pos + d->m_lookahead_size) &
   1101                          XDEFLATE_LZ_DICT_SIZE_MASK;
   1102       uint32_t ins_pos = d->m_lookahead_pos + d->m_lookahead_size - 2;
   1103       uint32_t hash = (d->m_dict[ins_pos & XDEFLATE_LZ_DICT_SIZE_MASK]
   1104                        << XDEFLATE_LZ_HASH_SHIFT) ^
   1105                       d->m_dict[(ins_pos + 1) & XDEFLATE_LZ_DICT_SIZE_MASK];
   1106       uint32_t num_bytes_to_process = (uint32_t)XDEFLATE_MIN(
   1107           src_buf_left, XDEFLATE_MAX_MATCH_LEN - d->m_lookahead_size);
   1108       const uint8_t* pSrc_end = pSrc ? pSrc + num_bytes_to_process : NULL;
   1109       src_buf_left -= num_bytes_to_process;
   1110       d->m_lookahead_size += num_bytes_to_process;
   1111       while (pSrc != pSrc_end) {
   1112         uint8_t c = *pSrc++;
   1113         d->m_dict[dst_pos] = c;
   1114         if (dst_pos < (XDEFLATE_MAX_MATCH_LEN - 1))
   1115           d->m_dict[XDEFLATE_LZ_DICT_SIZE + dst_pos] = c;
   1116         hash = ((hash << XDEFLATE_LZ_HASH_SHIFT) ^ c) &
   1117                (XDEFLATE_LZ_HASH_SIZE - 1);
   1118         d->m_next[ins_pos & XDEFLATE_LZ_DICT_SIZE_MASK] = d->m_hash[hash];
   1119         d->m_hash[hash] = (uint16_t)(ins_pos);
   1120         dst_pos = (dst_pos + 1) & XDEFLATE_LZ_DICT_SIZE_MASK;
   1121         ins_pos++;
   1122       }
   1123     } else {
   1124       while ((src_buf_left) && (d->m_lookahead_size < XDEFLATE_MAX_MATCH_LEN)) {
   1125         uint8_t c = *pSrc++;
   1126         uint32_t dst_pos = (d->m_lookahead_pos + d->m_lookahead_size) &
   1127                            XDEFLATE_LZ_DICT_SIZE_MASK;
   1128         src_buf_left--;
   1129         d->m_dict[dst_pos] = c;
   1130         if (dst_pos < (XDEFLATE_MAX_MATCH_LEN - 1))
   1131           d->m_dict[XDEFLATE_LZ_DICT_SIZE + dst_pos] = c;
   1132         if ((++d->m_lookahead_size + d->m_dict_size) >=
   1133             XDEFLATE_MIN_MATCH_LEN) {
   1134           uint32_t ins_pos = d->m_lookahead_pos + (d->m_lookahead_size - 1) - 2;
   1135           uint32_t hash =
   1136               ((d->m_dict[ins_pos & XDEFLATE_LZ_DICT_SIZE_MASK]
   1137                 << (XDEFLATE_LZ_HASH_SHIFT * 2)) ^
   1138                (d->m_dict[(ins_pos + 1) & XDEFLATE_LZ_DICT_SIZE_MASK]
   1139                 << XDEFLATE_LZ_HASH_SHIFT) ^
   1140                c) &
   1141               (XDEFLATE_LZ_HASH_SIZE - 1);
   1142           d->m_next[ins_pos & XDEFLATE_LZ_DICT_SIZE_MASK] = d->m_hash[hash];
   1143           d->m_hash[hash] = (uint16_t)(ins_pos);
   1144         }
   1145       }
   1146     }
   1147     d->m_dict_size = XDEFLATE_MIN(XDEFLATE_LZ_DICT_SIZE - d->m_lookahead_size,
   1148                                   d->m_dict_size);
   1149     if ((!flush) && (d->m_lookahead_size < XDEFLATE_MAX_MATCH_LEN)) break;
   1150 
   1151     len_to_move = 1;
   1152     cur_match_dist = 0;
   1153     cur_match_len = d->m_saved_match_len ? d->m_saved_match_len
   1154                                          : (XDEFLATE_MIN_MATCH_LEN - 1);
   1155     cur_pos = d->m_lookahead_pos & XDEFLATE_LZ_DICT_SIZE_MASK;
   1156     if (d->m_flags & (XDEFLATE_RLE_MATCHES | XDEFLATE_FORCE_RAW_BLOCKS)) {
   1157       if ((d->m_dict_size) && (!(d->m_flags & XDEFLATE_FORCE_RAW_BLOCKS))) {
   1158         uint8_t c = d->m_dict[(cur_pos - 1) & XDEFLATE_LZ_DICT_SIZE_MASK];
   1159         cur_match_len = 0;
   1160         while (cur_match_len < d->m_lookahead_size) {
   1161           if (d->m_dict[cur_pos + cur_match_len] != c) break;
   1162           cur_match_len++;
   1163         }
   1164         if (cur_match_len < XDEFLATE_MIN_MATCH_LEN)
   1165           cur_match_len = 0;
   1166         else
   1167           cur_match_dist = 1;
   1168       }
   1169     } else {
   1170       find_match(d, d->m_lookahead_pos, d->m_dict_size, d->m_lookahead_size,
   1171                  &cur_match_dist, &cur_match_len);
   1172     }
   1173     if (((cur_match_len == XDEFLATE_MIN_MATCH_LEN) &&
   1174          (cur_match_dist >= 8U * 1024U)) ||
   1175         (cur_pos == cur_match_dist) ||
   1176         ((d->m_flags & XDEFLATE_FILTER_MATCHES) && (cur_match_len <= 5))) {
   1177       cur_match_dist = cur_match_len = 0;
   1178     }
   1179     if (d->m_saved_match_len) {
   1180       if (cur_match_len > d->m_saved_match_len) {
   1181         record_literal(d, (uint8_t)d->m_saved_lit);
   1182         if (cur_match_len >= 128) {
   1183           record_match(d, cur_match_len, cur_match_dist);
   1184           d->m_saved_match_len = 0;
   1185           len_to_move = cur_match_len;
   1186         } else {
   1187           d->m_saved_lit = d->m_dict[cur_pos];
   1188           d->m_saved_match_dist = cur_match_dist;
   1189           d->m_saved_match_len = cur_match_len;
   1190         }
   1191       } else {
   1192         record_match(d, d->m_saved_match_len, d->m_saved_match_dist);
   1193         len_to_move = d->m_saved_match_len - 1;
   1194         d->m_saved_match_len = 0;
   1195       }
   1196     } else if (!cur_match_dist)
   1197       record_literal(d,
   1198                      d->m_dict[XDEFLATE_MIN(cur_pos, sizeof(d->m_dict) - 1)]);
   1199     else if ((d->m_greedy_parsing) || (d->m_flags & XDEFLATE_RLE_MATCHES) ||
   1200              (cur_match_len >= 128)) {
   1201       record_match(d, cur_match_len, cur_match_dist);
   1202       len_to_move = cur_match_len;
   1203     } else {
   1204       d->m_saved_lit = d->m_dict[XDEFLATE_MIN(cur_pos, sizeof(d->m_dict) - 1)];
   1205       d->m_saved_match_dist = cur_match_dist;
   1206       d->m_saved_match_len = cur_match_len;
   1207     }
   1208     d->m_lookahead_pos += len_to_move;
   1209     XDEFLATE_ASSERT(d->m_lookahead_size >= len_to_move);
   1210     d->m_lookahead_size -= len_to_move;
   1211     d->m_dict_size = XDEFLATE_MIN(d->m_dict_size + len_to_move,
   1212                                   (uint32_t)XDEFLATE_LZ_DICT_SIZE);
   1213     if ((d->m_pLZ_code_buf >
   1214          &d->m_lz_code_buf[XDEFLATE_LZ_CODE_BUF_SIZE - 8]) ||
   1215         ((d->m_total_lz_bytes > 31 * 1024) &&
   1216          (((((uint32_t)(d->m_pLZ_code_buf - d->m_lz_code_buf) * 115) >> 7) >=
   1217            d->m_total_lz_bytes) ||
   1218           (d->m_flags & XDEFLATE_FORCE_RAW_BLOCKS)))) {
   1219       int n;
   1220       d->m_pSrc = pSrc;
   1221       d->m_src_buf_left = src_buf_left;
   1222       if ((n = flush_block(d, 0)) != 0) return (n < 0) ? 0 : 1;
   1223     }
   1224   }
   1225 
   1226   d->m_pSrc = pSrc;
   1227   d->m_src_buf_left = src_buf_left;
   1228   return 1;
   1229 }
   1230 
   1231 static xdeflate_status flush_output_buffer(xdeflate_compressor* d) {
   1232   if (d->m_pIn_buf_size) {
   1233     *d->m_pIn_buf_size = d->m_pSrc - (const uint8_t*)d->m_pIn_buf;
   1234   }
   1235 
   1236   if (d->m_pOut_buf_size) {
   1237     size_t n = XDEFLATE_MIN(*d->m_pOut_buf_size - d->m_out_buf_ofs,
   1238                             d->m_output_flush_remaining);
   1239     memcpy((uint8_t*)d->m_pOut_buf + d->m_out_buf_ofs,
   1240            d->m_output_buf + d->m_output_flush_ofs, n);
   1241     d->m_output_flush_ofs += (uint32_t)n;
   1242     d->m_output_flush_remaining -= (uint32_t)n;
   1243     d->m_out_buf_ofs += n;
   1244 
   1245     *d->m_pOut_buf_size = d->m_out_buf_ofs;
   1246   }
   1247 
   1248   return (d->m_finished && !d->m_output_flush_remaining) ? XDEFLATE_STATUS_DONE
   1249                                                          : XDEFLATE_STATUS_OKAY;
   1250 }
   1251 
   1252 /* ============================================================================
   1253  * Public Compression API
   1254  * ============================================================================
   1255  */
   1256 
   1257 xdeflate_status xdeflate_init(xdeflate_compressor* d,
   1258                               xdeflate_put_buf_func put_buf_func,
   1259                               void* put_buf_user, int flags) {
   1260   d->m_pPut_buf_func = put_buf_func;
   1261   d->m_pPut_buf_user = put_buf_user;
   1262   d->m_flags = (uint32_t)(flags);
   1263   d->m_max_probes[0] = 1 + ((flags & 0xFFF) + 2) / 3;
   1264   d->m_greedy_parsing = (flags & XDEFLATE_GREEDY_PARSING) != 0;
   1265   d->m_max_probes[1] = 1 + (((flags & 0xFFF) >> 2) + 2) / 3;
   1266   if (!(flags & XDEFLATE_NONDETERMINISTIC)) XDEFLATE_CLEAR_ARR(d->m_hash);
   1267   d->m_lookahead_pos = d->m_lookahead_size = d->m_dict_size =
   1268       d->m_total_lz_bytes = d->m_lz_code_buf_dict_pos = d->m_bits_in = 0;
   1269   d->m_output_flush_ofs = d->m_output_flush_remaining = d->m_finished =
   1270       d->m_block_index = d->m_bit_buffer = d->m_wants_to_finish = 0;
   1271   d->m_pLZ_code_buf = d->m_lz_code_buf + 1;
   1272   d->m_pLZ_flags = d->m_lz_code_buf;
   1273   *d->m_pLZ_flags = 0;
   1274   d->m_num_flags_left = 8;
   1275   d->m_pOutput_buf = d->m_output_buf;
   1276   d->m_pOutput_buf_end = d->m_output_buf;
   1277   d->m_prev_return_status = XDEFLATE_STATUS_OKAY;
   1278   d->m_saved_match_dist = d->m_saved_match_len = d->m_saved_lit = 0;
   1279   d->m_adler32 = 1;
   1280   d->m_pIn_buf = NULL;
   1281   d->m_pOut_buf = NULL;
   1282   d->m_pIn_buf_size = NULL;
   1283   d->m_pOut_buf_size = NULL;
   1284   d->m_flush = XDEFLATE_NO_FLUSH;
   1285   d->m_pSrc = NULL;
   1286   d->m_src_buf_left = 0;
   1287   d->m_out_buf_ofs = 0;
   1288   if (!(flags & XDEFLATE_NONDETERMINISTIC)) XDEFLATE_CLEAR_ARR(d->m_dict);
   1289   memset(&d->m_huff_count[0][0], 0,
   1290          sizeof(d->m_huff_count[0][0]) * XDEFLATE_MAX_HUFF_SYMBOLS_0);
   1291   memset(&d->m_huff_count[1][0], 0,
   1292          sizeof(d->m_huff_count[1][0]) * XDEFLATE_MAX_HUFF_SYMBOLS_1);
   1293   return XDEFLATE_STATUS_OKAY;
   1294 }
   1295 
   1296 xdeflate_status xdeflate_compress(xdeflate_compressor* d, const void* in_buf,
   1297                                   size_t* in_buf_size, void* out_buf,
   1298                                   size_t* out_buf_size, xdeflate_flush flush) {
   1299   if (!d) {
   1300     if (in_buf_size) *in_buf_size = 0;
   1301     if (out_buf_size) *out_buf_size = 0;
   1302     return XDEFLATE_STATUS_BAD_PARAM;
   1303   }
   1304 
   1305   d->m_pIn_buf = in_buf;
   1306   d->m_pIn_buf_size = in_buf_size;
   1307   d->m_pOut_buf = out_buf;
   1308   d->m_pOut_buf_size = out_buf_size;
   1309   d->m_pSrc = (const uint8_t*)(in_buf);
   1310   d->m_src_buf_left = in_buf_size ? *in_buf_size : 0;
   1311   d->m_out_buf_ofs = 0;
   1312   d->m_flush = flush;
   1313 
   1314   if (((d->m_pPut_buf_func != NULL) ==
   1315        ((out_buf != NULL) || (out_buf_size != NULL))) ||
   1316       (d->m_prev_return_status != XDEFLATE_STATUS_OKAY) ||
   1317       (d->m_wants_to_finish && (flush != XDEFLATE_FINISH)) ||
   1318       (in_buf_size && *in_buf_size && !in_buf) ||
   1319       (out_buf_size && *out_buf_size && !out_buf)) {
   1320     if (in_buf_size) *in_buf_size = 0;
   1321     if (out_buf_size) *out_buf_size = 0;
   1322     return (d->m_prev_return_status = XDEFLATE_STATUS_BAD_PARAM);
   1323   }
   1324   d->m_wants_to_finish |= (flush == XDEFLATE_FINISH);
   1325 
   1326   if ((d->m_output_flush_remaining) || (d->m_finished))
   1327     return (d->m_prev_return_status = flush_output_buffer(d));
   1328 
   1329   if (!compress_normal(d)) return d->m_prev_return_status;
   1330 
   1331   if ((d->m_flags & (XDEFLATE_WRITE_ZLIB_HEADER | XDEFLATE_COMPUTE_ADLER32)) &&
   1332       (in_buf))
   1333     d->m_adler32 =
   1334         xdeflate_adler32_update(d->m_adler32, (const uint8_t*)in_buf,
   1335                                 (size_t)(d->m_pSrc - (const uint8_t*)in_buf));
   1336 
   1337   if ((flush) && (!d->m_lookahead_size) && (!d->m_src_buf_left) &&
   1338       (!d->m_output_flush_remaining)) {
   1339     if (flush_block(d, flush) < 0) return d->m_prev_return_status;
   1340     d->m_finished = (flush == XDEFLATE_FINISH);
   1341     if (flush == XDEFLATE_FULL_FLUSH) {
   1342       XDEFLATE_CLEAR_ARR(d->m_hash);
   1343       XDEFLATE_CLEAR_ARR(d->m_next);
   1344       d->m_dict_size = 0;
   1345     }
   1346   }
   1347 
   1348   return (d->m_prev_return_status = flush_output_buffer(d));
   1349 }
   1350 
   1351 uint32_t xdeflate_get_adler32(xdeflate_compressor* d) { return d->m_adler32; }
   1352 
   1353 uint32_t xdeflate_create_flags(int level, int window_bits, int strategy) {
   1354   uint32_t comp_flags =
   1355       s_num_probes[(level >= 0) ? XDEFLATE_MIN(10, level) : 6] |
   1356       ((level <= 3) ? XDEFLATE_GREEDY_PARSING : 0);
   1357   if (window_bits > 0) comp_flags |= XDEFLATE_WRITE_ZLIB_HEADER;
   1358 
   1359   if (!level)
   1360     comp_flags |= XDEFLATE_FORCE_RAW_BLOCKS;
   1361   else if (strategy == 1) /* filtered */
   1362     comp_flags |= XDEFLATE_FILTER_MATCHES;
   1363   else if (strategy == 2) /* huffman only */
   1364     comp_flags &= ~XDEFLATE_MAX_PROBES_MASK;
   1365   else if (strategy == 4) /* fixed */
   1366     comp_flags |= XDEFLATE_FORCE_STATIC_BLOCKS;
   1367   else if (strategy == 3) /* rle */
   1368     comp_flags |= XDEFLATE_RLE_MATCHES;
   1369 
   1370   return comp_flags;
   1371 }
   1372 
   1373 /* ============================================================================
   1374  * Decompression Implementation
   1375  * ============================================================================
   1376  */
   1377 
   1378 #define XINFLATE_MEMCPY(d, s, l) memcpy(d, s, l)
   1379 #define XINFLATE_MEMSET(p, c, l) memset(p, c, l)
   1380 
   1381 #define XINFLATE_CR_BEGIN \
   1382   switch (r->m_state) {   \
   1383     case 0:
   1384 #define XINFLATE_CR_RETURN(state_index, result) \
   1385   do {                                          \
   1386     status = result;                            \
   1387     r->m_state = state_index;                   \
   1388     goto common_exit;                           \
   1389     case state_index:;                          \
   1390   }                                             \
   1391   XDEFLATE_MACRO_END
   1392 #define XINFLATE_CR_RETURN_FOREVER(state_index, result) \
   1393   do {                                                  \
   1394     for (;;) {                                          \
   1395       XINFLATE_CR_RETURN(state_index, result);          \
   1396     }                                                   \
   1397   }                                                     \
   1398   XDEFLATE_MACRO_END
   1399 #define XINFLATE_CR_FINISH }
   1400 
   1401 #define XINFLATE_GET_BYTE(state_index, c)                                    \
   1402   do {                                                                       \
   1403     while (pIn_buf_cur >= pIn_buf_end) {                                     \
   1404       XINFLATE_CR_RETURN(state_index,                                        \
   1405                          (decomp_flags & XINFLATE_FLAG_HAS_MORE_INPUT)       \
   1406                              ? XINFLATE_STATUS_NEEDS_MORE_INPUT              \
   1407                              : XINFLATE_STATUS_FAILED_CANNOT_MAKE_PROGRESS); \
   1408     }                                                                        \
   1409     (c) = *pIn_buf_cur++;                                                    \
   1410   }                                                                          \
   1411   XDEFLATE_MACRO_END
   1412 
   1413 #define XINFLATE_NEED_BITS(state_index, n)            \
   1414   do {                                                \
   1415     uint32_t c;                                       \
   1416     XINFLATE_GET_BYTE(state_index, c);                \
   1417     bit_buf |= (((xinflate_bit_buf_t)c) << num_bits); \
   1418     num_bits += 8;                                    \
   1419   } while (num_bits < (uint32_t)(n))
   1420 #define XINFLATE_SKIP_BITS(state_index, n) \
   1421   do {                                     \
   1422     if (num_bits < (uint32_t)(n)) {        \
   1423       XINFLATE_NEED_BITS(state_index, n);  \
   1424     }                                      \
   1425     bit_buf >>= (n);                       \
   1426     num_bits -= (n);                       \
   1427   }                                        \
   1428   XDEFLATE_MACRO_END
   1429 #define XINFLATE_GET_BITS(state_index, b, n) \
   1430   do {                                       \
   1431     if (num_bits < (uint32_t)(n)) {          \
   1432       XINFLATE_NEED_BITS(state_index, n);    \
   1433     }                                        \
   1434     (b) = bit_buf & ((1 << (n)) - 1);        \
   1435     bit_buf >>= (n);                         \
   1436     num_bits -= (n);                         \
   1437   }                                          \
   1438   XDEFLATE_MACRO_END
   1439 
   1440 #define XINFLATE_HUFF_BITBUF_FILL(state_index, pLookUp, pTree)   \
   1441   do {                                                           \
   1442     temp = (pLookUp)[bit_buf & (XINFLATE_FAST_LOOKUP_SIZE - 1)]; \
   1443     if (temp >= 0) {                                             \
   1444       code_len = temp >> 9;                                      \
   1445       if ((code_len) && (num_bits >= code_len)) break;           \
   1446     } else if (num_bits > XINFLATE_FAST_LOOKUP_BITS) {           \
   1447       code_len = XINFLATE_FAST_LOOKUP_BITS;                      \
   1448       do {                                                       \
   1449         temp = (pTree)[~temp + ((bit_buf >> code_len++) & 1)];   \
   1450       } while ((temp < 0) && (num_bits >= (code_len + 1)));      \
   1451       if (temp >= 0) break;                                      \
   1452     }                                                            \
   1453     XINFLATE_GET_BYTE(state_index, c);                           \
   1454     bit_buf |= (((xinflate_bit_buf_t)c) << num_bits);            \
   1455     num_bits += 8;                                               \
   1456   } while (num_bits < 15);
   1457 
   1458 #define XINFLATE_HUFF_DECODE(state_index, sym, pLookUp, pTree)               \
   1459   do {                                                                       \
   1460     int temp;                                                                \
   1461     uint32_t code_len, c;                                                    \
   1462     if (num_bits < 15) {                                                     \
   1463       if ((pIn_buf_end - pIn_buf_cur) < 2) {                                 \
   1464         XINFLATE_HUFF_BITBUF_FILL(state_index, pLookUp, pTree);              \
   1465       } else {                                                               \
   1466         bit_buf |= (((xinflate_bit_buf_t)pIn_buf_cur[0]) << num_bits) |      \
   1467                    (((xinflate_bit_buf_t)pIn_buf_cur[1]) << (num_bits + 8)); \
   1468         pIn_buf_cur += 2;                                                    \
   1469         num_bits += 16;                                                      \
   1470       }                                                                      \
   1471     }                                                                        \
   1472     if ((temp = (pLookUp)[bit_buf & (XINFLATE_FAST_LOOKUP_SIZE - 1)]) >= 0)  \
   1473       code_len = temp >> 9, temp &= 511;                                     \
   1474     else {                                                                   \
   1475       code_len = XINFLATE_FAST_LOOKUP_BITS;                                  \
   1476       do {                                                                   \
   1477         temp = (pTree)[~temp + ((bit_buf >> code_len++) & 1)];               \
   1478       } while (temp < 0);                                                    \
   1479     }                                                                        \
   1480     (sym) = temp;                                                            \
   1481     bit_buf >>= code_len;                                                    \
   1482     num_bits -= code_len;                                                    \
   1483   }                                                                          \
   1484   XDEFLATE_MACRO_END
   1485 
   1486 static void xinflate_clear_tree(xinflate_decompressor* r) {
   1487   if (r->m_type == 0)
   1488     XDEFLATE_CLEAR_ARR(r->m_tree_0);
   1489   else if (r->m_type == 1)
   1490     XDEFLATE_CLEAR_ARR(r->m_tree_1);
   1491   else
   1492     XDEFLATE_CLEAR_ARR(r->m_tree_2);
   1493 }
   1494 
   1495 xinflate_status xinflate_decompress(
   1496     xinflate_decompressor* r, const uint8_t* pIn_buf_next, size_t* pIn_buf_size,
   1497     uint8_t* pOut_buf_start, uint8_t* pOut_buf_next, size_t* pOut_buf_size,
   1498     uint32_t decomp_flags) {
   1499   static const uint16_t s_length_base[31] = {
   1500       3,  4,  5,  6,  7,  8,  9,  10,  11,  13,  15,  17,  19,  23, 27, 31,
   1501       35, 43, 51, 59, 67, 83, 99, 115, 131, 163, 195, 227, 258, 0,  0};
   1502   static const uint8_t s_length_extra[31] = {0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1,
   1503                                              1, 2, 2, 2, 2, 3, 3, 3, 3, 4, 4,
   1504                                              4, 4, 5, 5, 5, 5, 0, 0, 0};
   1505   static const uint16_t s_dist_base[32] = {
   1506       1,    2,    3,    4,    5,    7,     9,     13,    17,  25,   33,
   1507       49,   65,   97,   129,  193,  257,   385,   513,   769, 1025, 1537,
   1508       2049, 3073, 4097, 6145, 8193, 12289, 16385, 24577, 0,   0};
   1509   static const uint8_t s_dist_extra[32] = {
   1510       0, 0, 0, 0, 1, 1, 2, 2,  3,  3,  4,  4,  5,  5,  6,
   1511       6, 7, 7, 8, 8, 9, 9, 10, 10, 11, 11, 12, 12, 13, 13};
   1512   static const uint8_t s_length_dezigzag[19] = {
   1513       16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15};
   1514   static const uint16_t s_min_table_sizes[3] = {257, 1, 4};
   1515 
   1516   int16_t* pTrees[3];
   1517   uint8_t* pCode_sizes[3];
   1518 
   1519   xinflate_status status = XINFLATE_STATUS_FAILED;
   1520   uint32_t num_bits, dist, counter, num_extra;
   1521   xinflate_bit_buf_t bit_buf;
   1522   const uint8_t *pIn_buf_cur = pIn_buf_next, *const pIn_buf_end =
   1523                                                  pIn_buf_next + *pIn_buf_size;
   1524   uint8_t *pOut_buf_cur = pOut_buf_next, *const pOut_buf_end =
   1525                                              pOut_buf_next ? pOut_buf_next +
   1526                                                                  *pOut_buf_size
   1527                                                            : NULL;
   1528   size_t out_buf_size_mask =
   1529              (decomp_flags & XINFLATE_FLAG_USING_NON_WRAPPING_OUTPUT_BUF)
   1530                  ? (size_t)-1
   1531                  : ((pOut_buf_next - pOut_buf_start) + *pOut_buf_size) - 1,
   1532          dist_from_out_buf_start;
   1533 
   1534   if (((out_buf_size_mask + 1) & out_buf_size_mask) ||
   1535       (pOut_buf_next < pOut_buf_start)) {
   1536     *pIn_buf_size = *pOut_buf_size = 0;
   1537     return XINFLATE_STATUS_BAD_PARAM;
   1538   }
   1539 
   1540   pTrees[0] = r->m_tree_0;
   1541   pTrees[1] = r->m_tree_1;
   1542   pTrees[2] = r->m_tree_2;
   1543   pCode_sizes[0] = r->m_code_size_0;
   1544   pCode_sizes[1] = r->m_code_size_1;
   1545   pCode_sizes[2] = r->m_code_size_2;
   1546 
   1547   num_bits = r->m_num_bits;
   1548   bit_buf = r->m_bit_buf;
   1549   dist = r->m_dist;
   1550   counter = r->m_counter;
   1551   num_extra = r->m_num_extra;
   1552   dist_from_out_buf_start = r->m_dist_from_out_buf_start;
   1553   XINFLATE_CR_BEGIN
   1554 
   1555   bit_buf = num_bits = dist = counter = num_extra = r->m_zhdr0 = r->m_zhdr1 = 0;
   1556   r->m_z_adler32 = r->m_check_adler32 = 1;
   1557   if (decomp_flags & XINFLATE_FLAG_PARSE_ZLIB_HEADER) {
   1558     XINFLATE_GET_BYTE(1, r->m_zhdr0);
   1559     XINFLATE_GET_BYTE(2, r->m_zhdr1);
   1560     counter = (((r->m_zhdr0 * 256 + r->m_zhdr1) % 31 != 0) ||
   1561                (r->m_zhdr1 & 32) || ((r->m_zhdr0 & 15) != 8));
   1562     if (!(decomp_flags & XINFLATE_FLAG_USING_NON_WRAPPING_OUTPUT_BUF))
   1563       counter |= (((1U << (8U + (r->m_zhdr0 >> 4))) > 32768U) ||
   1564                   ((out_buf_size_mask + 1) <
   1565                    (size_t)((size_t)1 << (8U + (r->m_zhdr0 >> 4)))));
   1566     if (counter) {
   1567       XINFLATE_CR_RETURN_FOREVER(36, XINFLATE_STATUS_FAILED);
   1568     }
   1569   }
   1570 
   1571   do {
   1572     XINFLATE_GET_BITS(3, r->m_final, 3);
   1573     r->m_type = r->m_final >> 1;
   1574     if (r->m_type == 0) {
   1575       XINFLATE_SKIP_BITS(5, num_bits & 7);
   1576       for (counter = 0; counter < 4; ++counter) {
   1577         if (num_bits)
   1578           XINFLATE_GET_BITS(6, r->m_raw_header[counter], 8);
   1579         else
   1580           XINFLATE_GET_BYTE(7, r->m_raw_header[counter]);
   1581       }
   1582       if ((counter = (r->m_raw_header[0] | (r->m_raw_header[1] << 8))) !=
   1583           (uint32_t)(0xFFFF ^
   1584                      (r->m_raw_header[2] | (r->m_raw_header[3] << 8)))) {
   1585         XINFLATE_CR_RETURN_FOREVER(39, XINFLATE_STATUS_FAILED);
   1586       }
   1587       while ((counter) && (num_bits)) {
   1588         XINFLATE_GET_BITS(51, dist, 8);
   1589         while (pOut_buf_cur >= pOut_buf_end) {
   1590           XINFLATE_CR_RETURN(52, XINFLATE_STATUS_HAS_MORE_OUTPUT);
   1591         }
   1592         *pOut_buf_cur++ = (uint8_t)dist;
   1593         counter--;
   1594       }
   1595       while (counter) {
   1596         size_t n;
   1597         while (pOut_buf_cur >= pOut_buf_end) {
   1598           XINFLATE_CR_RETURN(9, XINFLATE_STATUS_HAS_MORE_OUTPUT);
   1599         }
   1600         while (pIn_buf_cur >= pIn_buf_end) {
   1601           XINFLATE_CR_RETURN(38,
   1602                              (decomp_flags & XINFLATE_FLAG_HAS_MORE_INPUT)
   1603                                  ? XINFLATE_STATUS_NEEDS_MORE_INPUT
   1604                                  : XINFLATE_STATUS_FAILED_CANNOT_MAKE_PROGRESS);
   1605         }
   1606         n = XDEFLATE_MIN(XDEFLATE_MIN((size_t)(pOut_buf_end - pOut_buf_cur),
   1607                                       (size_t)(pIn_buf_end - pIn_buf_cur)),
   1608                          counter);
   1609         XINFLATE_MEMCPY(pOut_buf_cur, pIn_buf_cur, n);
   1610         pIn_buf_cur += n;
   1611         pOut_buf_cur += n;
   1612         counter -= (uint32_t)n;
   1613       }
   1614     } else if (r->m_type == 3) {
   1615       XINFLATE_CR_RETURN_FOREVER(10, XINFLATE_STATUS_FAILED);
   1616     } else {
   1617       if (r->m_type == 1) {
   1618         uint8_t* p = r->m_code_size_0;
   1619         uint32_t i;
   1620         r->m_table_sizes[0] = 288;
   1621         r->m_table_sizes[1] = 32;
   1622         XINFLATE_MEMSET(r->m_code_size_1, 5, 32);
   1623         for (i = 0; i <= 143; ++i) *p++ = 8;
   1624         for (; i <= 255; ++i) *p++ = 9;
   1625         for (; i <= 279; ++i) *p++ = 7;
   1626         for (; i <= 287; ++i) *p++ = 8;
   1627       } else {
   1628         for (counter = 0; counter < 3; counter++) {
   1629           XINFLATE_GET_BITS(11, r->m_table_sizes[counter],
   1630                             "\05\05\04"[counter]);
   1631           r->m_table_sizes[counter] += s_min_table_sizes[counter];
   1632         }
   1633         XDEFLATE_CLEAR_ARR(r->m_code_size_2);
   1634         for (counter = 0; counter < r->m_table_sizes[2]; counter++) {
   1635           uint32_t s;
   1636           XINFLATE_GET_BITS(14, s, 3);
   1637           r->m_code_size_2[s_length_dezigzag[counter]] = (uint8_t)s;
   1638         }
   1639         r->m_table_sizes[2] = 19;
   1640       }
   1641       for (; (int)r->m_type >= 0; r->m_type--) {
   1642         int tree_next, tree_cur;
   1643         int16_t* pLookUp;
   1644         int16_t* pTree;
   1645         uint8_t* pCode_size;
   1646         uint32_t i, j, used_syms, total, sym_index, next_code[17],
   1647             total_syms[16];
   1648         pLookUp = r->m_look_up[r->m_type];
   1649         pTree = pTrees[r->m_type];
   1650         pCode_size = pCode_sizes[r->m_type];
   1651         XDEFLATE_CLEAR_ARR(total_syms);
   1652         XINFLATE_MEMSET(pLookUp, 0, sizeof(r->m_look_up[0]));
   1653         xinflate_clear_tree(r);
   1654         for (i = 0; i < r->m_table_sizes[r->m_type]; ++i)
   1655           total_syms[pCode_size[i]]++;
   1656         used_syms = 0, total = 0;
   1657         next_code[0] = next_code[1] = 0;
   1658         for (i = 1; i <= 15; ++i) {
   1659           used_syms += total_syms[i];
   1660           next_code[i + 1] = (total = ((total + total_syms[i]) << 1));
   1661         }
   1662         if ((65536 != total) && (used_syms > 1)) {
   1663           XINFLATE_CR_RETURN_FOREVER(35, XINFLATE_STATUS_FAILED);
   1664         }
   1665         for (tree_next = -1, sym_index = 0;
   1666              sym_index < r->m_table_sizes[r->m_type]; ++sym_index) {
   1667           uint32_t rev_code = 0, l, cur_code, code_size = pCode_size[sym_index];
   1668           if (!code_size) continue;
   1669           cur_code = next_code[code_size]++;
   1670           for (l = code_size; l > 0; l--, cur_code >>= 1)
   1671             rev_code = (rev_code << 1) | (cur_code & 1);
   1672           if (code_size <= XINFLATE_FAST_LOOKUP_BITS) {
   1673             int16_t k = (int16_t)((code_size << 9) | sym_index);
   1674             while (rev_code < XINFLATE_FAST_LOOKUP_SIZE) {
   1675               pLookUp[rev_code] = k;
   1676               rev_code += (1 << code_size);
   1677             }
   1678             continue;
   1679           }
   1680           if (0 == (tree_cur =
   1681                         pLookUp[rev_code & (XINFLATE_FAST_LOOKUP_SIZE - 1)])) {
   1682             pLookUp[rev_code & (XINFLATE_FAST_LOOKUP_SIZE - 1)] =
   1683                 (int16_t)tree_next;
   1684             tree_cur = tree_next;
   1685             tree_next -= 2;
   1686           }
   1687           rev_code >>= (XINFLATE_FAST_LOOKUP_BITS - 1);
   1688           for (j = code_size; j > (XINFLATE_FAST_LOOKUP_BITS + 1); j--) {
   1689             tree_cur -= ((rev_code >>= 1) & 1);
   1690             if (!pTree[-tree_cur - 1]) {
   1691               pTree[-tree_cur - 1] = (int16_t)tree_next;
   1692               tree_cur = tree_next;
   1693               tree_next -= 2;
   1694             } else
   1695               tree_cur = pTree[-tree_cur - 1];
   1696           }
   1697           tree_cur -= ((rev_code >>= 1) & 1);
   1698           pTree[-tree_cur - 1] = (int16_t)sym_index;
   1699         }
   1700         if (r->m_type == 2) {
   1701           for (counter = 0;
   1702                counter < (r->m_table_sizes[0] + r->m_table_sizes[1]);) {
   1703             uint32_t s;
   1704             XINFLATE_HUFF_DECODE(16, dist, r->m_look_up[2], r->m_tree_2);
   1705             if (dist < 16) {
   1706               r->m_len_codes[counter++] = (uint8_t)dist;
   1707               continue;
   1708             }
   1709             if ((dist == 16) && (!counter)) {
   1710               XINFLATE_CR_RETURN_FOREVER(17, XINFLATE_STATUS_FAILED);
   1711             }
   1712             num_extra = "\02\03\07"[dist - 16];
   1713             XINFLATE_GET_BITS(18, s, num_extra);
   1714             s += "\03\03\013"[dist - 16];
   1715             XINFLATE_MEMSET(r->m_len_codes + counter,
   1716                             (dist == 16) ? r->m_len_codes[counter - 1] : 0, s);
   1717             counter += s;
   1718           }
   1719           if ((r->m_table_sizes[0] + r->m_table_sizes[1]) != counter) {
   1720             XINFLATE_CR_RETURN_FOREVER(21, XINFLATE_STATUS_FAILED);
   1721           }
   1722           XINFLATE_MEMCPY(r->m_code_size_0, r->m_len_codes,
   1723                           r->m_table_sizes[0]);
   1724           XINFLATE_MEMCPY(r->m_code_size_1,
   1725                           r->m_len_codes + r->m_table_sizes[0],
   1726                           r->m_table_sizes[1]);
   1727         }
   1728       }
   1729       for (;;) {
   1730         uint8_t* pSrc;
   1731         for (;;) {
   1732           if (((pIn_buf_end - pIn_buf_cur) < 4) ||
   1733               ((pOut_buf_end - pOut_buf_cur) < 2)) {
   1734             XINFLATE_HUFF_DECODE(23, counter, r->m_look_up[0], r->m_tree_0);
   1735             if (counter >= 256) break;
   1736             while (pOut_buf_cur >= pOut_buf_end) {
   1737               XINFLATE_CR_RETURN(24, XINFLATE_STATUS_HAS_MORE_OUTPUT);
   1738             }
   1739             *pOut_buf_cur++ = (uint8_t)counter;
   1740           } else {
   1741             int sym2;
   1742             uint32_t code_len;
   1743 #if XDEFLATE_HAS_64BIT_REGISTERS
   1744             if (num_bits < 30) {
   1745               bit_buf |= (((xinflate_bit_buf_t)xdeflate_read_le32(pIn_buf_cur))
   1746                           << num_bits);
   1747               pIn_buf_cur += 4;
   1748               num_bits += 32;
   1749             }
   1750 #else
   1751             if (num_bits < 15) {
   1752               bit_buf |= (((xinflate_bit_buf_t)xdeflate_read_le16(pIn_buf_cur))
   1753                           << num_bits);
   1754               pIn_buf_cur += 2;
   1755               num_bits += 16;
   1756             }
   1757 #endif
   1758             if ((sym2 = r->m_look_up[0][bit_buf &
   1759                                         (XINFLATE_FAST_LOOKUP_SIZE - 1)]) >= 0)
   1760               code_len = sym2 >> 9;
   1761             else {
   1762               code_len = XINFLATE_FAST_LOOKUP_BITS;
   1763               do {
   1764                 sym2 = r->m_tree_0[~sym2 + ((bit_buf >> code_len++) & 1)];
   1765               } while (sym2 < 0);
   1766             }
   1767             counter = sym2;
   1768             bit_buf >>= code_len;
   1769             num_bits -= code_len;
   1770             if (counter & 256) break;
   1771 
   1772 #if !XDEFLATE_HAS_64BIT_REGISTERS
   1773             if (num_bits < 15) {
   1774               bit_buf |= (((xinflate_bit_buf_t)xdeflate_read_le16(pIn_buf_cur))
   1775                           << num_bits);
   1776               pIn_buf_cur += 2;
   1777               num_bits += 16;
   1778             }
   1779 #endif
   1780             if ((sym2 = r->m_look_up[0][bit_buf &
   1781                                         (XINFLATE_FAST_LOOKUP_SIZE - 1)]) >= 0)
   1782               code_len = sym2 >> 9;
   1783             else {
   1784               code_len = XINFLATE_FAST_LOOKUP_BITS;
   1785               do {
   1786                 sym2 = r->m_tree_0[~sym2 + ((bit_buf >> code_len++) & 1)];
   1787               } while (sym2 < 0);
   1788             }
   1789             bit_buf >>= code_len;
   1790             num_bits -= code_len;
   1791 
   1792             pOut_buf_cur[0] = (uint8_t)counter;
   1793             if (sym2 & 256) {
   1794               pOut_buf_cur++;
   1795               counter = sym2;
   1796               break;
   1797             }
   1798             pOut_buf_cur[1] = (uint8_t)sym2;
   1799             pOut_buf_cur += 2;
   1800           }
   1801         }
   1802         if ((counter &= 511) == 256) break;
   1803 
   1804         num_extra = s_length_extra[counter - 257];
   1805         counter = s_length_base[counter - 257];
   1806         if (num_extra) {
   1807           uint32_t extra_bits;
   1808           XINFLATE_GET_BITS(25, extra_bits, num_extra);
   1809           counter += extra_bits;
   1810         }
   1811 
   1812         XINFLATE_HUFF_DECODE(26, dist, r->m_look_up[1], r->m_tree_1);
   1813         num_extra = s_dist_extra[dist];
   1814         dist = s_dist_base[dist];
   1815         if (num_extra) {
   1816           uint32_t extra_bits;
   1817           XINFLATE_GET_BITS(27, extra_bits, num_extra);
   1818           dist += extra_bits;
   1819         }
   1820 
   1821         dist_from_out_buf_start = pOut_buf_cur - pOut_buf_start;
   1822         if ((dist == 0 || dist > dist_from_out_buf_start ||
   1823              dist_from_out_buf_start == 0) &&
   1824             (decomp_flags & XINFLATE_FLAG_USING_NON_WRAPPING_OUTPUT_BUF)) {
   1825           XINFLATE_CR_RETURN_FOREVER(37, XINFLATE_STATUS_FAILED);
   1826         }
   1827 
   1828         pSrc = pOut_buf_start +
   1829                ((dist_from_out_buf_start - dist) & out_buf_size_mask);
   1830 
   1831         if ((XDEFLATE_MAX(pOut_buf_cur, pSrc) + counter) > pOut_buf_end) {
   1832           while (counter--) {
   1833             while (pOut_buf_cur >= pOut_buf_end) {
   1834               XINFLATE_CR_RETURN(53, XINFLATE_STATUS_HAS_MORE_OUTPUT);
   1835             }
   1836             *pOut_buf_cur++ =
   1837                 pOut_buf_start[(dist_from_out_buf_start++ - dist) &
   1838                                out_buf_size_mask];
   1839           }
   1840           continue;
   1841         }
   1842         while (counter > 2) {
   1843           pOut_buf_cur[0] = pSrc[0];
   1844           pOut_buf_cur[1] = pSrc[1];
   1845           pOut_buf_cur[2] = pSrc[2];
   1846           pOut_buf_cur += 3;
   1847           pSrc += 3;
   1848           counter -= 3;
   1849         }
   1850         if (counter > 0) {
   1851           pOut_buf_cur[0] = pSrc[0];
   1852           if (counter > 1) pOut_buf_cur[1] = pSrc[1];
   1853           pOut_buf_cur += counter;
   1854         }
   1855       }
   1856     }
   1857   } while (!(r->m_final & 1));
   1858 
   1859   XINFLATE_SKIP_BITS(32, num_bits & 7);
   1860   while ((pIn_buf_cur > pIn_buf_next) && (num_bits >= 8)) {
   1861     --pIn_buf_cur;
   1862     num_bits -= 8;
   1863   }
   1864   bit_buf &= ~(~(xinflate_bit_buf_t)0 << num_bits);
   1865   XDEFLATE_ASSERT(!num_bits);
   1866 
   1867   if (decomp_flags & XINFLATE_FLAG_PARSE_ZLIB_HEADER) {
   1868     for (counter = 0; counter < 4; ++counter) {
   1869       uint32_t s;
   1870       if (num_bits)
   1871         XINFLATE_GET_BITS(41, s, 8);
   1872       else
   1873         XINFLATE_GET_BYTE(42, s);
   1874       r->m_z_adler32 = (r->m_z_adler32 << 8) | s;
   1875     }
   1876   }
   1877   XINFLATE_CR_RETURN_FOREVER(34, XINFLATE_STATUS_DONE);
   1878 
   1879   XINFLATE_CR_FINISH
   1880 
   1881 common_exit:
   1882   if ((status != XINFLATE_STATUS_NEEDS_MORE_INPUT) &&
   1883       (status != XINFLATE_STATUS_FAILED_CANNOT_MAKE_PROGRESS)) {
   1884     while ((pIn_buf_cur > pIn_buf_next) && (num_bits >= 8)) {
   1885       --pIn_buf_cur;
   1886       num_bits -= 8;
   1887     }
   1888   }
   1889   r->m_num_bits = num_bits;
   1890   r->m_bit_buf = bit_buf & ~(~(xinflate_bit_buf_t)0 << num_bits);
   1891   r->m_dist = dist;
   1892   r->m_counter = counter;
   1893   r->m_num_extra = num_extra;
   1894   r->m_dist_from_out_buf_start = dist_from_out_buf_start;
   1895   *pIn_buf_size = pIn_buf_cur - pIn_buf_next;
   1896   *pOut_buf_size = pOut_buf_cur - pOut_buf_next;
   1897   if ((decomp_flags &
   1898        (XINFLATE_FLAG_PARSE_ZLIB_HEADER | XINFLATE_FLAG_COMPUTE_ADLER32)) &&
   1899       (status >= 0)) {
   1900     const uint8_t* ptr = pOut_buf_next;
   1901     size_t buf_len = *pOut_buf_size;
   1902     uint32_t i, s1 = r->m_check_adler32 & 0xffff, s2 = r->m_check_adler32 >> 16;
   1903     size_t block_len = buf_len % 5552;
   1904     while (buf_len) {
   1905       for (i = 0; i + 7 < block_len; i += 8, ptr += 8) {
   1906         s1 += ptr[0], s2 += s1;
   1907         s1 += ptr[1], s2 += s1;
   1908         s1 += ptr[2], s2 += s1;
   1909         s1 += ptr[3], s2 += s1;
   1910         s1 += ptr[4], s2 += s1;
   1911         s1 += ptr[5], s2 += s1;
   1912         s1 += ptr[6], s2 += s1;
   1913         s1 += ptr[7], s2 += s1;
   1914       }
   1915       for (; i < block_len; ++i) s1 += *ptr++, s2 += s1;
   1916       s1 %= 65521U, s2 %= 65521U;
   1917       buf_len -= block_len;
   1918       block_len = 5552;
   1919     }
   1920     r->m_check_adler32 = (s2 << 16) + s1;
   1921     if ((status == XINFLATE_STATUS_DONE) &&
   1922         (decomp_flags & XINFLATE_FLAG_PARSE_ZLIB_HEADER) &&
   1923         (r->m_check_adler32 != r->m_z_adler32))
   1924       status = XINFLATE_STATUS_ADLER32_MISMATCH;
   1925   }
   1926   return status;
   1927 }
   1928 
   1929 size_t xinflate_decompress_mem_to_mem(void* out_buf, size_t out_buf_len,
   1930                                       const void* src_buf, size_t src_buf_len,
   1931                                       int flags) {
   1932   xinflate_decompressor decomp;
   1933   xinflate_status status;
   1934   xinflate_init(&decomp);
   1935   status =
   1936       xinflate_decompress(&decomp, (const uint8_t*)src_buf, &src_buf_len,
   1937                           (uint8_t*)out_buf, (uint8_t*)out_buf, &out_buf_len,
   1938                           (flags & ~XINFLATE_FLAG_HAS_MORE_INPUT) |
   1939                               XINFLATE_FLAG_USING_NON_WRAPPING_OUTPUT_BUF);
   1940   return (status != XINFLATE_STATUS_DONE)
   1941              ? XINFLATE_DECOMPRESS_MEM_TO_MEM_FAILED
   1942              : out_buf_len;
   1943 }
   1944 
   1945 #define GZ_MAGIC0 0x1fu
   1946 #define GZ_MAGIC1 0x8bu
   1947 #define GZ_METHOD_DEFLATE 0x08u
   1948 #define GZ_FLG_FTEXT 0x01u
   1949 #define GZ_FLG_FHCRC 0x02u
   1950 #define GZ_FLG_FEXTRA 0x04u
   1951 #define GZ_FLG_FNAME 0x08u
   1952 #define GZ_FLG_FCOMMENT 0x10u
   1953 #define GZ_FLG_RESERVED 0xe0u
   1954 #define GZ_OS_UNKNOWN 0xffu
   1955 #define GZ_HEADER_LEN 10u
   1956 #define GZ_TRAILER_LEN 8u
   1957 
   1958 static int gz_write(KitWriter* out, const void* data, size_t n) {
   1959   return kit_writer_write(out, data, n) == KIT_OK ? DIST_OK : DIST_ERR;
   1960 }
   1961 
   1962 static void put_u32le(uint8_t* p, uint32_t v) {
   1963   p[0] = (uint8_t)(v & 0xffu);
   1964   p[1] = (uint8_t)((v >> 8) & 0xffu);
   1965   p[2] = (uint8_t)((v >> 16) & 0xffu);
   1966   p[3] = (uint8_t)((v >> 24) & 0xffu);
   1967 }
   1968 
   1969 static uint16_t get_u16le(const uint8_t* p) {
   1970   return (uint16_t)(p[0] | (p[1] << 8));
   1971 }
   1972 
   1973 static uint32_t get_u32le(const uint8_t* p) {
   1974   return (uint32_t)p[0] | ((uint32_t)p[1] << 8) | ((uint32_t)p[2] << 16) |
   1975          ((uint32_t)p[3] << 24);
   1976 }
   1977 
   1978 static int gz_put_deflate(const void* data, int len, void* user) {
   1979   if (len <= 0) return 1;
   1980   return gz_write((KitWriter*)user, data, (size_t)len) == DIST_OK;
   1981 }
   1982 
   1983 static int gz_skip_header_bytes(const uint8_t* data, size_t trailer_off,
   1984                                 size_t* off, size_t n, uint32_t* hcrc) {
   1985   if (n > trailer_off - *off) return DIST_ERR;
   1986   *hcrc = kit_crc32(*hcrc, data + *off, n);
   1987   *off += n;
   1988   return DIST_OK;
   1989 }
   1990 
   1991 static int gz_skip_header_zstr(const uint8_t* data, size_t trailer_off,
   1992                                size_t* off, uint32_t* hcrc) {
   1993   while (*off < trailer_off) {
   1994     uint8_t c = data[*off];
   1995     *hcrc = kit_crc32(*hcrc, data + *off, 1);
   1996     ++*off;
   1997     if (c == 0) return DIST_OK;
   1998   }
   1999   return DIST_ERR;
   2000 }
   2001 
   2002 static int gz_parse_header(const uint8_t* data, size_t len, size_t* body_off) {
   2003   size_t off = GZ_HEADER_LEN;
   2004   size_t trailer_off;
   2005   uint32_t hcrc;
   2006   uint8_t flg;
   2007 
   2008   if (len < GZ_HEADER_LEN + GZ_TRAILER_LEN) return DIST_ERR;
   2009   trailer_off = len - GZ_TRAILER_LEN;
   2010   if (data[0] != GZ_MAGIC0 || data[1] != GZ_MAGIC1 ||
   2011       data[2] != GZ_METHOD_DEFLATE) {
   2012     return DIST_ERR;
   2013   }
   2014 
   2015   flg = data[3];
   2016   if (flg & GZ_FLG_RESERVED) return DIST_ERR;
   2017   hcrc = kit_crc32(0, data, GZ_HEADER_LEN);
   2018 
   2019   if (flg & GZ_FLG_FEXTRA) {
   2020     uint16_t xlen;
   2021     if (off + 2u > trailer_off) return DIST_ERR;
   2022     xlen = get_u16le(data + off);
   2023     if (gz_skip_header_bytes(data, trailer_off, &off, 2u, &hcrc) != DIST_OK ||
   2024         gz_skip_header_bytes(data, trailer_off, &off, xlen, &hcrc) != DIST_OK)
   2025       return DIST_ERR;
   2026   }
   2027   if ((flg & GZ_FLG_FNAME) &&
   2028       gz_skip_header_zstr(data, trailer_off, &off, &hcrc) != DIST_OK)
   2029     return DIST_ERR;
   2030   if ((flg & GZ_FLG_FCOMMENT) &&
   2031       gz_skip_header_zstr(data, trailer_off, &off, &hcrc) != DIST_OK)
   2032     return DIST_ERR;
   2033   if (flg & GZ_FLG_FHCRC) {
   2034     if (off + 2u > trailer_off) return DIST_ERR;
   2035     if (get_u16le(data + off) != (uint16_t)hcrc) return DIST_ERR;
   2036     off += 2u;
   2037   }
   2038 
   2039   *body_off = off;
   2040   return DIST_OK;
   2041 }
   2042 
   2043 int dist_gz_compress(KitWriter* out, const uint8_t* data, size_t len) {
   2044   xdeflate_compressor def;
   2045   xdeflate_status st;
   2046   uint8_t hdr[GZ_HEADER_LEN];
   2047   uint8_t trailer[GZ_TRAILER_LEN];
   2048   size_t in_len = len;
   2049   uint32_t flags;
   2050 
   2051   memset(hdr, 0, sizeof hdr);
   2052   hdr[0] = GZ_MAGIC0;
   2053   hdr[1] = GZ_MAGIC1;
   2054   hdr[2] = GZ_METHOD_DEFLATE;
   2055   /* hdr[3] flags = 0; hdr[4..7] mtime = 0 (reproducible); hdr[8] XFL = 0 */
   2056   hdr[9] = GZ_OS_UNKNOWN;
   2057   if (gz_write(out, hdr, sizeof hdr) != DIST_OK) return DIST_ERR;
   2058 
   2059   flags = xdeflate_create_flags(6, -15, 0);
   2060   if (xdeflate_init(&def, gz_put_deflate, out, (int)flags) !=
   2061       XDEFLATE_STATUS_OKAY)
   2062     return DIST_ERR;
   2063   st = xdeflate_compress(&def, data, &in_len, NULL, NULL, XDEFLATE_FINISH);
   2064   if (st != XDEFLATE_STATUS_DONE || in_len != len) return DIST_ERR;
   2065 
   2066   put_u32le(trailer, kit_crc32(0, data, len));
   2067   put_u32le(trailer + 4, (uint32_t)len);
   2068   return gz_write(out, trailer, sizeof trailer);
   2069 }
   2070 
   2071 int dist_gz_decompress(KitWriter* out, const uint8_t* data, size_t len) {
   2072   xinflate_decompressor inf;
   2073   uint8_t ring[XINFLATE_LZ_DICT_SIZE];
   2074   size_t off;
   2075   size_t comp_len;
   2076   size_t in_ofs = 0;
   2077   uint64_t total = 0;
   2078   uint32_t crc = 0;
   2079   int done = 0;
   2080 
   2081   if (gz_parse_header(data, len, &off) != DIST_OK) return DIST_ERR;
   2082   comp_len = (len - GZ_TRAILER_LEN) - off;
   2083   xinflate_init(&inf);
   2084 
   2085   while (!done) {
   2086     size_t ring_ofs = (size_t)total & (XINFLATE_LZ_DICT_SIZE - 1u);
   2087     size_t in_avail = comp_len - in_ofs;
   2088     size_t out_avail = XINFLATE_LZ_DICT_SIZE - ring_ofs;
   2089     xinflate_status st =
   2090         xinflate_decompress(&inf, data + off + in_ofs, &in_avail, ring,
   2091                             ring + ring_ofs, &out_avail, 0);
   2092     in_ofs += in_avail;
   2093     if (out_avail) {
   2094       if (gz_write(out, ring + ring_ofs, out_avail) != DIST_OK) return DIST_ERR;
   2095       crc = kit_crc32(crc, ring + ring_ofs, out_avail);
   2096       if (total + out_avail < total) return DIST_ERR;
   2097       total += out_avail;
   2098     }
   2099     if (st == XINFLATE_STATUS_DONE) {
   2100       done = 1;
   2101     } else if (st != XINFLATE_STATUS_HAS_MORE_OUTPUT) {
   2102       return DIST_ERR;
   2103     }
   2104   }
   2105 
   2106   if (in_ofs != comp_len) return DIST_ERR;
   2107   off = len - GZ_TRAILER_LEN;
   2108   if (get_u32le(data + off) != crc) return DIST_ERR;
   2109   if (get_u32le(data + off + 4) != (uint32_t)total) return DIST_ERR;
   2110   return DIST_OK;
   2111 }