kit

kit
git clone https://git.ryansepassi.com/git/kit.git
Log | Files | Refs | README

lz4.c (118145B)


      1 /*
      2    LZ4 - Fast LZ compression algorithm
      3    Copyright (C) 2011-2023, Yann Collet.
      4 
      5    BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
      6 
      7    Redistribution and use in source and binary forms, with or without
      8    modification, are permitted provided that the following conditions are
      9    met:
     10 
     11        * Redistributions of source code must retain the above copyright
     12    notice, this list of conditions and the following disclaimer.
     13        * Redistributions in binary form must reproduce the above
     14    copyright notice, this list of conditions and the following disclaimer
     15    in the documentation and/or other materials provided with the
     16    distribution.
     17 
     18    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
     19    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
     20    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
     21    A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
     22    OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
     23    SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
     24    LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
     25    DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
     26    THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
     27    (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
     28    OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     29 
     30    You can contact the author at :
     31     - LZ4 homepage : http://www.lz4.org
     32     - LZ4 source repository : https://github.com/lz4/lz4
     33 */
     34 
     35 /*-************************************
     36 *  Tuning parameters
     37 **************************************/
     38 /*
     39  * LZ4_HEAPMODE :
     40  * Select how stateless compression functions like `LZ4_compress_default()`
     41  * allocate memory for their hash table,
     42  * in memory stack (0:default, fastest), or in memory heap (1:requires malloc()).
     43  */
     44 #ifndef LZ4_HEAPMODE
     45 #  define LZ4_HEAPMODE 0
     46 #endif
     47 
     48 /*
     49  * LZ4_ACCELERATION_DEFAULT :
     50  * Select "acceleration" for LZ4_compress_fast() when parameter value <= 0
     51  */
     52 #define LZ4_ACCELERATION_DEFAULT 1
     53 /*
     54  * LZ4_ACCELERATION_MAX :
     55  * Any "acceleration" value higher than this threshold
     56  * get treated as LZ4_ACCELERATION_MAX instead (fix #876)
     57  */
     58 #define LZ4_ACCELERATION_MAX 65537
     59 
     60 
     61 /*-************************************
     62 *  CPU Feature Detection
     63 **************************************/
     64 /* LZ4_FORCE_MEMORY_ACCESS
     65  * By default, access to unaligned memory is controlled by `memcpy()`, which is safe and portable.
     66  * Unfortunately, on some target/compiler combinations, the generated assembly is sub-optimal.
     67  * The below switch allow to select different access method for improved performance.
     68  * Method 0 (default) : use `memcpy()`. Safe and portable.
     69  * Method 1 : `__packed` statement. It depends on compiler extension (ie, not portable).
     70  *            This method is safe if your compiler supports it, and *generally* as fast or faster than `memcpy`.
     71  * Method 2 : direct access. This method is portable but violate C standard.
     72  *            It can generate buggy code on targets which assembly generation depends on alignment.
     73  *            But in some circumstances, it's the only known way to get the most performance (ie GCC + ARMv6)
     74  * See https://fastcompression.blogspot.fr/2015/08/accessing-unaligned-memory.html for details.
     75  * Prefer these methods in priority order (0 > 1 > 2)
     76  */
     77 #ifndef LZ4_FORCE_MEMORY_ACCESS   /* can be defined externally */
     78 #  if defined(__GNUC__) && \
     79   ( defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) || defined(__ARM_ARCH_6K__) \
     80   || defined(__ARM_ARCH_6Z__) || defined(__ARM_ARCH_6ZK__) || defined(__ARM_ARCH_6T2__) )
     81 #    define LZ4_FORCE_MEMORY_ACCESS 2
     82 #  elif (defined(__INTEL_COMPILER) && !defined(_WIN32)) || defined(__GNUC__) || defined(_MSC_VER)
     83 #    define LZ4_FORCE_MEMORY_ACCESS 1
     84 #  endif
     85 #endif
     86 
     87 /*
     88  * LZ4_FORCE_SW_BITCOUNT
     89  * Define this parameter if your target system or compiler does not support hardware bit count
     90  */
     91 #if defined(_MSC_VER) && defined(_WIN32_WCE)   /* Visual Studio for WinCE doesn't support Hardware bit count */
     92 #  undef  LZ4_FORCE_SW_BITCOUNT  /* avoid double def */
     93 #  define LZ4_FORCE_SW_BITCOUNT
     94 #endif
     95 
     96 
     97 
     98 /*-************************************
     99 *  Dependency
    100 **************************************/
    101 /*
    102  * LZ4_SRC_INCLUDED:
    103  * Amalgamation flag, whether lz4.c is included
    104  */
    105 #ifndef LZ4_SRC_INCLUDED
    106 #  define LZ4_SRC_INCLUDED 1
    107 #endif
    108 
    109 #ifndef LZ4_DISABLE_DEPRECATE_WARNINGS
    110 #  define LZ4_DISABLE_DEPRECATE_WARNINGS /* due to LZ4_decompress_safe_withPrefix64k */
    111 #endif
    112 
    113 #ifndef LZ4_STATIC_LINKING_ONLY
    114 #  define LZ4_STATIC_LINKING_ONLY
    115 #endif
    116 #include "lz4.h"
    117 /* see also "memory routines" below */
    118 
    119 
    120 /*-************************************
    121 *  Compiler Options
    122 **************************************/
    123 #if defined(_MSC_VER) && (_MSC_VER >= 1400)  /* Visual Studio 2005+ */
    124 #  include <intrin.h>               /* only present in VS2005+ */
    125 #  pragma warning(disable : 4127)   /* disable: C4127: conditional expression is constant */
    126 #  pragma warning(disable : 6237)   /* disable: C6237: conditional expression is always 0 */
    127 #  pragma warning(disable : 6239)   /* disable: C6239: (<non-zero constant> && <expression>) always evaluates to the result of <expression> */
    128 #  pragma warning(disable : 6240)   /* disable: C6240: (<expression> && <non-zero constant>) always evaluates to the result of <expression> */
    129 #  pragma warning(disable : 6326)   /* disable: C6326: Potential comparison of a constant with another constant */
    130 #endif  /* _MSC_VER */
    131 
    132 #ifndef LZ4_FORCE_INLINE
    133 #  if defined (_MSC_VER) && !defined (__clang__)    /* MSVC */
    134 #    define LZ4_FORCE_INLINE static __forceinline
    135 #  else
    136 #    if defined (__cplusplus) || defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L   /* C99 */
    137 #      if defined (__GNUC__) || defined (__clang__)
    138 #        define LZ4_FORCE_INLINE static inline __attribute__((always_inline))
    139 #      else
    140 #        define LZ4_FORCE_INLINE static inline
    141 #      endif
    142 #    else
    143 #      define LZ4_FORCE_INLINE static
    144 #    endif /* __STDC_VERSION__ */
    145 #  endif  /* _MSC_VER */
    146 #endif /* LZ4_FORCE_INLINE */
    147 
    148 /* LZ4_FORCE_O2 and LZ4_FORCE_INLINE
    149  * gcc on ppc64le generates an unrolled SIMDized loop for LZ4_wildCopy8,
    150  * together with a simple 8-byte copy loop as a fall-back path.
    151  * However, this optimization hurts the decompression speed by >30%,
    152  * because the execution does not go to the optimized loop
    153  * for typical compressible data, and all of the preamble checks
    154  * before going to the fall-back path become useless overhead.
    155  * This optimization happens only with the -O3 flag, and -O2 generates
    156  * a simple 8-byte copy loop.
    157  * With gcc on ppc64le, all of the LZ4_decompress_* and LZ4_wildCopy8
    158  * functions are annotated with __attribute__((optimize("O2"))),
    159  * and also LZ4_wildCopy8 is forcibly inlined, so that the O2 attribute
    160  * of LZ4_wildCopy8 does not affect the compression speed.
    161  */
    162 #if defined(__PPC64__) && defined(__LITTLE_ENDIAN__) && defined(__GNUC__) && !defined(__clang__)
    163 #  define LZ4_FORCE_O2  __attribute__((optimize("O2")))
    164 #  undef LZ4_FORCE_INLINE
    165 #  define LZ4_FORCE_INLINE  static __inline __attribute__((optimize("O2"),always_inline))
    166 #else
    167 #  define LZ4_FORCE_O2
    168 #endif
    169 
    170 #if (defined(__GNUC__) && (__GNUC__ >= 3)) || (defined(__INTEL_COMPILER) && (__INTEL_COMPILER >= 800)) || defined(__clang__)
    171 #  define expect(expr,value)    (__builtin_expect ((expr),(value)) )
    172 #else
    173 #  define expect(expr,value)    (expr)
    174 #endif
    175 
    176 #ifndef likely
    177 #define likely(expr)     expect((expr) != 0, 1)
    178 #endif
    179 #ifndef unlikely
    180 #define unlikely(expr)   expect((expr) != 0, 0)
    181 #endif
    182 
    183 /* Should the alignment test prove unreliable, for some reason,
    184  * it can be disabled by setting LZ4_ALIGN_TEST to 0 */
    185 #ifndef LZ4_ALIGN_TEST  /* can be externally provided */
    186 # define LZ4_ALIGN_TEST 1
    187 #endif
    188 
    189 
    190 /*-************************************
    191 *  Memory routines
    192 **************************************/
    193 
    194 /*! LZ4_STATIC_LINKING_ONLY_DISABLE_MEMORY_ALLOCATION :
    195  *  Disable relatively high-level LZ4/HC functions that use dynamic memory
    196  *  allocation functions (malloc(), calloc(), free()).
    197  *
    198  *  Note that this is a compile-time switch. And since it disables
    199  *  public/stable LZ4 v1 API functions, we don't recommend using this
    200  *  symbol to generate a library for distribution.
    201  *
    202  *  The following public functions are removed when this symbol is defined.
    203  *  - lz4   : LZ4_createStream, LZ4_freeStream,
    204  *            LZ4_createStreamDecode, LZ4_freeStreamDecode, LZ4_create (deprecated)
    205  *  - lz4hc : LZ4_createStreamHC, LZ4_freeStreamHC,
    206  *            LZ4_createHC (deprecated), LZ4_freeHC  (deprecated)
    207  *  - lz4frame, lz4file : All LZ4F_* functions
    208  */
    209 #if defined(LZ4_STATIC_LINKING_ONLY_DISABLE_MEMORY_ALLOCATION)
    210 #  define ALLOC(s)          lz4_error_memory_allocation_is_disabled
    211 #  define ALLOC_AND_ZERO(s) lz4_error_memory_allocation_is_disabled
    212 #  define FREEMEM(p)        lz4_error_memory_allocation_is_disabled
    213 #elif defined(LZ4_USER_MEMORY_FUNCTIONS)
    214 /* memory management functions can be customized by user project.
    215  * Below functions must exist somewhere in the Project
    216  * and be available at link time */
    217 void* LZ4_malloc(size_t s);
    218 void* LZ4_calloc(size_t n, size_t s);
    219 void  LZ4_free(void* p);
    220 # define ALLOC(s)          LZ4_malloc(s)
    221 # define ALLOC_AND_ZERO(s) LZ4_calloc(1,s)
    222 # define FREEMEM(p)        LZ4_free(p)
    223 #else
    224 # include <stdlib.h>   /* malloc, calloc, free */
    225 # define ALLOC(s)          malloc(s)
    226 # define ALLOC_AND_ZERO(s) calloc(1,s)
    227 # define FREEMEM(p)        free(p)
    228 #endif
    229 
    230 #if ! LZ4_FREESTANDING
    231 #  include <string.h>   /* memset, memcpy */
    232 #endif
    233 #if !defined(LZ4_memset)
    234 #  define LZ4_memset(p,v,s) memset((p),(v),(s))
    235 #endif
    236 #define MEM_INIT(p,v,s)   LZ4_memset((p),(v),(s))
    237 
    238 
    239 /*-************************************
    240 *  Common Constants
    241 **************************************/
    242 #define MINMATCH 4
    243 
    244 #define WILDCOPYLENGTH 8
    245 #define LASTLITERALS   5   /* see ../doc/lz4_Block_format.md#parsing-restrictions */
    246 #define MFLIMIT       12   /* see ../doc/lz4_Block_format.md#parsing-restrictions */
    247 #define MATCH_SAFEGUARD_DISTANCE  ((2*WILDCOPYLENGTH) - MINMATCH)   /* ensure it's possible to write 2 x wildcopyLength without overflowing output buffer */
    248 #define FASTLOOP_SAFE_DISTANCE 64
    249 static const int LZ4_minLength = (MFLIMIT+1);
    250 
    251 #define KB *(1 <<10)
    252 #define MB *(1 <<20)
    253 #define GB *(1U<<30)
    254 
    255 #define LZ4_DISTANCE_ABSOLUTE_MAX 65535
    256 #if (LZ4_DISTANCE_MAX > LZ4_DISTANCE_ABSOLUTE_MAX)   /* max supported by LZ4 format */
    257 #  error "LZ4_DISTANCE_MAX is too big : must be <= 65535"
    258 #endif
    259 
    260 #define ML_BITS  4
    261 #define ML_MASK  ((1U<<ML_BITS)-1)
    262 #define RUN_BITS (8-ML_BITS)
    263 #define RUN_MASK ((1U<<RUN_BITS)-1)
    264 
    265 
    266 /*-************************************
    267 *  Error detection
    268 **************************************/
    269 #if defined(LZ4_DEBUG) && (LZ4_DEBUG>=1)
    270 #  include <assert.h>
    271 #else
    272 #  ifndef assert
    273 #    define assert(condition) ((void)0)
    274 #  endif
    275 #endif
    276 
    277 #define LZ4_STATIC_ASSERT(c)   { enum { LZ4_static_assert = 1/(int)(!!(c)) }; }   /* use after variable declarations */
    278 
    279 #if defined(LZ4_DEBUG) && (LZ4_DEBUG>=2)
    280 #  include <stdio.h>
    281    static int g_debuglog_enable = 1;
    282 #  define DEBUGLOG(l, ...) {                          \
    283         if ((g_debuglog_enable) && (l<=LZ4_DEBUG)) {  \
    284             fprintf(stderr, __FILE__  " %i: ", __LINE__); \
    285             fprintf(stderr, __VA_ARGS__);             \
    286             fprintf(stderr, " \n");                   \
    287     }   }
    288 #else
    289 #  define DEBUGLOG(l, ...) {}    /* disabled */
    290 #endif
    291 
    292 static int LZ4_isAligned(const void* ptr, size_t alignment)
    293 {
    294     return ((size_t)ptr & (alignment -1)) == 0;
    295 }
    296 
    297 
    298 /*-************************************
    299 *  Types
    300 **************************************/
    301 #include <limits.h>
    302 #if defined(__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */)
    303 # include <stdint.h>
    304   typedef  uint8_t BYTE;
    305   typedef uint16_t U16;
    306   typedef uint32_t U32;
    307   typedef  int32_t S32;
    308   typedef uint64_t U64;
    309   typedef uintptr_t uptrval;
    310 #else
    311 # if UINT_MAX != 4294967295UL
    312 #   error "LZ4 code (when not C++ or C99) assumes that sizeof(int) == 4"
    313 # endif
    314   typedef unsigned char       BYTE;
    315   typedef unsigned short      U16;
    316   typedef unsigned int        U32;
    317   typedef   signed int        S32;
    318   typedef unsigned long long  U64;
    319   typedef size_t              uptrval;   /* generally true, except OpenVMS-64 */
    320 #endif
    321 
    322 #if defined(__x86_64__)
    323   typedef U64    reg_t;   /* 64-bits in x32 mode */
    324 #else
    325   typedef size_t reg_t;   /* 32-bits in x32 mode */
    326 #endif
    327 
    328 typedef enum {
    329     notLimited = 0,
    330     limitedOutput = 1,
    331     fillOutput = 2
    332 } limitedOutput_directive;
    333 
    334 
    335 /*-************************************
    336 *  Reading and writing into memory
    337 **************************************/
    338 
    339 /**
    340  * LZ4 relies on memcpy with a constant size being inlined. In freestanding
    341  * environments, the compiler can't assume the implementation of memcpy() is
    342  * standard compliant, so it can't apply its specialized memcpy() inlining
    343  * logic. When possible, use __builtin_memcpy() to tell the compiler to analyze
    344  * memcpy() as if it were standard compliant, so it can inline it in freestanding
    345  * environments. This is needed when decompressing the Linux Kernel, for example.
    346  */
    347 #if !defined(LZ4_memcpy)
    348 #  if defined(__GNUC__) && (__GNUC__ >= 4)
    349 #    define LZ4_memcpy(dst, src, size) __builtin_memcpy(dst, src, size)
    350 #  else
    351 #    define LZ4_memcpy(dst, src, size) memcpy(dst, src, size)
    352 #  endif
    353 #endif
    354 
    355 #if !defined(LZ4_memmove)
    356 #  if defined(__GNUC__) && (__GNUC__ >= 4)
    357 #    define LZ4_memmove __builtin_memmove
    358 #  else
    359 #    define LZ4_memmove memmove
    360 #  endif
    361 #endif
    362 
    363 static unsigned LZ4_isLittleEndian(void)
    364 {
    365     const union { U32 u; BYTE c[4]; } one = { 1 };   /* don't use static : performance detrimental */
    366     return one.c[0];
    367 }
    368 
    369 #if defined(__GNUC__) || defined(__INTEL_COMPILER)
    370 #define LZ4_PACK( __Declaration__ ) __Declaration__ __attribute__((__packed__))
    371 #elif defined(_MSC_VER)
    372 #define LZ4_PACK( __Declaration__ ) __pragma( pack(push, 1) ) __Declaration__ __pragma( pack(pop))
    373 #endif
    374 
    375 #if defined(LZ4_FORCE_MEMORY_ACCESS) && (LZ4_FORCE_MEMORY_ACCESS==2)
    376 /* lie to the compiler about data alignment; use with caution */
    377 
    378 static U16 LZ4_read16(const void* memPtr) { return *(const U16*) memPtr; }
    379 static U32 LZ4_read32(const void* memPtr) { return *(const U32*) memPtr; }
    380 static reg_t LZ4_read_ARCH(const void* memPtr) { return *(const reg_t*) memPtr; }
    381 
    382 static void LZ4_write16(void* memPtr, U16 value) { *(U16*)memPtr = value; }
    383 static void LZ4_write32(void* memPtr, U32 value) { *(U32*)memPtr = value; }
    384 
    385 #elif defined(LZ4_FORCE_MEMORY_ACCESS) && (LZ4_FORCE_MEMORY_ACCESS==1)
    386 
    387 /* __pack instructions are safer, but compiler specific, hence potentially problematic for some compilers */
    388 /* currently only defined for gcc and icc */
    389 LZ4_PACK(typedef struct { U16 u16; }) LZ4_unalign16;
    390 LZ4_PACK(typedef struct { U32 u32; }) LZ4_unalign32;
    391 LZ4_PACK(typedef struct { reg_t uArch; }) LZ4_unalignST;
    392 
    393 static U16 LZ4_read16(const void* ptr) { return ((const LZ4_unalign16*)ptr)->u16; }
    394 static U32 LZ4_read32(const void* ptr) { return ((const LZ4_unalign32*)ptr)->u32; }
    395 static reg_t LZ4_read_ARCH(const void* ptr) { return ((const LZ4_unalignST*)ptr)->uArch; }
    396 
    397 static void LZ4_write16(void* memPtr, U16 value) { ((LZ4_unalign16*)memPtr)->u16 = value; }
    398 static void LZ4_write32(void* memPtr, U32 value) { ((LZ4_unalign32*)memPtr)->u32 = value; }
    399 
    400 #else  /* safe and portable access using memcpy() */
    401 
    402 static U16 LZ4_read16(const void* memPtr)
    403 {
    404     U16 val; LZ4_memcpy(&val, memPtr, sizeof(val)); return val;
    405 }
    406 
    407 static U32 LZ4_read32(const void* memPtr)
    408 {
    409     U32 val; LZ4_memcpy(&val, memPtr, sizeof(val)); return val;
    410 }
    411 
    412 static reg_t LZ4_read_ARCH(const void* memPtr)
    413 {
    414     reg_t val; LZ4_memcpy(&val, memPtr, sizeof(val)); return val;
    415 }
    416 
    417 static void LZ4_write16(void* memPtr, U16 value)
    418 {
    419     LZ4_memcpy(memPtr, &value, sizeof(value));
    420 }
    421 
    422 static void LZ4_write32(void* memPtr, U32 value)
    423 {
    424     LZ4_memcpy(memPtr, &value, sizeof(value));
    425 }
    426 
    427 #endif /* LZ4_FORCE_MEMORY_ACCESS */
    428 
    429 
    430 static U16 LZ4_readLE16(const void* memPtr)
    431 {
    432     if (LZ4_isLittleEndian()) {
    433         return LZ4_read16(memPtr);
    434     } else {
    435         const BYTE* p = (const BYTE*)memPtr;
    436         return (U16)((U16)p[0] | (p[1]<<8));
    437     }
    438 }
    439 
    440 #ifdef LZ4_STATIC_LINKING_ONLY_ENDIANNESS_INDEPENDENT_OUTPUT
    441 static U32 LZ4_readLE32(const void* memPtr)
    442 {
    443     if (LZ4_isLittleEndian()) {
    444         return LZ4_read32(memPtr);
    445     } else {
    446         const BYTE* p = (const BYTE*)memPtr;
    447         return (U32)p[0] | (p[1]<<8) | (p[2]<<16) | (p[3]<<24);
    448     }
    449 }
    450 #endif
    451 
    452 static void LZ4_writeLE16(void* memPtr, U16 value)
    453 {
    454     if (LZ4_isLittleEndian()) {
    455         LZ4_write16(memPtr, value);
    456     } else {
    457         BYTE* p = (BYTE*)memPtr;
    458         p[0] = (BYTE) value;
    459         p[1] = (BYTE)(value>>8);
    460     }
    461 }
    462 
    463 /* customized variant of memcpy, which can overwrite up to 8 bytes beyond dstEnd */
    464 LZ4_FORCE_INLINE
    465 void LZ4_wildCopy8(void* dstPtr, const void* srcPtr, void* dstEnd)
    466 {
    467     BYTE* d = (BYTE*)dstPtr;
    468     const BYTE* s = (const BYTE*)srcPtr;
    469     BYTE* const e = (BYTE*)dstEnd;
    470 
    471     do { LZ4_memcpy(d,s,8); d+=8; s+=8; } while (d<e);
    472 }
    473 
    474 static const unsigned inc32table[8] = {0, 1, 2,  1,  0,  4, 4, 4};
    475 static const int      dec64table[8] = {0, 0, 0, -1, -4,  1, 2, 3};
    476 
    477 
    478 #ifndef LZ4_FAST_DEC_LOOP
    479 #  if defined __i386__ || defined _M_IX86 || defined __x86_64__ || defined _M_X64
    480 #    define LZ4_FAST_DEC_LOOP 1
    481 #  elif defined(__aarch64__) && defined(__APPLE__)
    482 #    define LZ4_FAST_DEC_LOOP 1
    483 #  elif defined(__aarch64__) && !defined(__clang__)
    484      /* On non-Apple aarch64, we disable this optimization for clang because
    485       * on certain mobile chipsets, performance is reduced with clang. For
    486       * more information refer to https://github.com/lz4/lz4/pull/707 */
    487 #    define LZ4_FAST_DEC_LOOP 1
    488 #  else
    489 #    define LZ4_FAST_DEC_LOOP 0
    490 #  endif
    491 #endif
    492 
    493 #if LZ4_FAST_DEC_LOOP
    494 
    495 LZ4_FORCE_INLINE void
    496 LZ4_memcpy_using_offset_base(BYTE* dstPtr, const BYTE* srcPtr, BYTE* dstEnd, const size_t offset)
    497 {
    498     assert(srcPtr + offset == dstPtr);
    499     if (offset < 8) {
    500         LZ4_write32(dstPtr, 0);   /* silence an msan warning when offset==0 */
    501         dstPtr[0] = srcPtr[0];
    502         dstPtr[1] = srcPtr[1];
    503         dstPtr[2] = srcPtr[2];
    504         dstPtr[3] = srcPtr[3];
    505         srcPtr += inc32table[offset];
    506         LZ4_memcpy(dstPtr+4, srcPtr, 4);
    507         srcPtr -= dec64table[offset];
    508         dstPtr += 8;
    509     } else {
    510         LZ4_memcpy(dstPtr, srcPtr, 8);
    511         dstPtr += 8;
    512         srcPtr += 8;
    513     }
    514 
    515     LZ4_wildCopy8(dstPtr, srcPtr, dstEnd);
    516 }
    517 
    518 /* customized variant of memcpy, which can overwrite up to 32 bytes beyond dstEnd
    519  * this version copies two times 16 bytes (instead of one time 32 bytes)
    520  * because it must be compatible with offsets >= 16. */
    521 LZ4_FORCE_INLINE void
    522 LZ4_wildCopy32(void* dstPtr, const void* srcPtr, void* dstEnd)
    523 {
    524     BYTE* d = (BYTE*)dstPtr;
    525     const BYTE* s = (const BYTE*)srcPtr;
    526     BYTE* const e = (BYTE*)dstEnd;
    527 
    528     do { LZ4_memcpy(d,s,16); LZ4_memcpy(d+16,s+16,16); d+=32; s+=32; } while (d<e);
    529 }
    530 
    531 /* LZ4_memcpy_using_offset()  presumes :
    532  * - dstEnd >= dstPtr + MINMATCH
    533  * - there is at least 12 bytes available to write after dstEnd */
    534 LZ4_FORCE_INLINE void
    535 LZ4_memcpy_using_offset(BYTE* dstPtr, const BYTE* srcPtr, BYTE* dstEnd, const size_t offset)
    536 {
    537     BYTE v[8];
    538 
    539     assert(dstEnd >= dstPtr + MINMATCH);
    540 
    541     switch(offset) {
    542     case 1:
    543         MEM_INIT(v, *srcPtr, 8);
    544         break;
    545     case 2:
    546         LZ4_memcpy(v, srcPtr, 2);
    547         LZ4_memcpy(&v[2], srcPtr, 2);
    548 #if defined(_MSC_VER) && (_MSC_VER <= 1937) /* MSVC 2022 ver 17.7 or earlier */
    549 #  pragma warning(push)
    550 #  pragma warning(disable : 6385) /* warning C6385: Reading invalid data from 'v'. */
    551 #endif
    552         LZ4_memcpy(&v[4], v, 4);
    553 #if defined(_MSC_VER) && (_MSC_VER <= 1937) /* MSVC 2022 ver 17.7 or earlier */
    554 #  pragma warning(pop)
    555 #endif
    556         break;
    557     case 4:
    558         LZ4_memcpy(v, srcPtr, 4);
    559         LZ4_memcpy(&v[4], srcPtr, 4);
    560         break;
    561     default:
    562         LZ4_memcpy_using_offset_base(dstPtr, srcPtr, dstEnd, offset);
    563         return;
    564     }
    565 
    566     LZ4_memcpy(dstPtr, v, 8);
    567     dstPtr += 8;
    568     while (dstPtr < dstEnd) {
    569         LZ4_memcpy(dstPtr, v, 8);
    570         dstPtr += 8;
    571     }
    572 }
    573 #endif
    574 
    575 
    576 /*-************************************
    577 *  Common functions
    578 **************************************/
    579 static unsigned LZ4_NbCommonBytes (reg_t val)
    580 {
    581     assert(val != 0);
    582     if (LZ4_isLittleEndian()) {
    583         if (sizeof(val) == 8) {
    584 #       if defined(_MSC_VER) && (_MSC_VER >= 1800) && (defined(_M_AMD64) && !defined(_M_ARM64EC)) && !defined(LZ4_FORCE_SW_BITCOUNT)
    585 /*-*************************************************************************************************
    586 * ARM64EC is a Microsoft-designed ARM64 ABI compatible with AMD64 applications on ARM64 Windows 11.
    587 * The ARM64EC ABI does not support AVX/AVX2/AVX512 instructions, nor their relevant intrinsics
    588 * including _tzcnt_u64. Therefore, we need to neuter the _tzcnt_u64 code path for ARM64EC.
    589 ****************************************************************************************************/
    590 #         if defined(__clang__) && (__clang_major__ < 10)
    591             /* Avoid undefined clang-cl intrinsics issue.
    592              * See https://github.com/lz4/lz4/pull/1017 for details. */
    593             return (unsigned)__builtin_ia32_tzcnt_u64(val) >> 3;
    594 #         else
    595             /* x64 CPUS without BMI support interpret `TZCNT` as `REP BSF` */
    596             return (unsigned)_tzcnt_u64(val) >> 3;
    597 #         endif
    598 #       elif defined(_MSC_VER) && defined(_WIN64) && !defined(LZ4_FORCE_SW_BITCOUNT)
    599             unsigned long r = 0;
    600             _BitScanForward64(&r, (U64)val);
    601             return (unsigned)r >> 3;
    602 #       elif (defined(__clang__) || (defined(__GNUC__) && ((__GNUC__ > 3) || \
    603                             ((__GNUC__ == 3) && (__GNUC_MINOR__ >= 4))))) && \
    604                                         !defined(LZ4_FORCE_SW_BITCOUNT)
    605             return (unsigned)__builtin_ctzll((U64)val) >> 3;
    606 #       else
    607             const U64 m = 0x0101010101010101ULL;
    608             val ^= val - 1;
    609             return (unsigned)(((U64)((val & (m - 1)) * m)) >> 56);
    610 #       endif
    611         } else /* 32 bits */ {
    612 #       if defined(_MSC_VER) && (_MSC_VER >= 1400) && !defined(LZ4_FORCE_SW_BITCOUNT)
    613             unsigned long r;
    614             _BitScanForward(&r, (U32)val);
    615             return (unsigned)r >> 3;
    616 #       elif (defined(__clang__) || (defined(__GNUC__) && ((__GNUC__ > 3) || \
    617                             ((__GNUC__ == 3) && (__GNUC_MINOR__ >= 4))))) && \
    618                         !defined(__TINYC__) && !defined(LZ4_FORCE_SW_BITCOUNT)
    619             return (unsigned)__builtin_ctz((U32)val) >> 3;
    620 #       else
    621             const U32 m = 0x01010101;
    622             return (unsigned)((((val - 1) ^ val) & (m - 1)) * m) >> 24;
    623 #       endif
    624         }
    625     } else   /* Big Endian CPU */ {
    626         if (sizeof(val)==8) {
    627 #       if (defined(__clang__) || (defined(__GNUC__) && ((__GNUC__ > 3) || \
    628                             ((__GNUC__ == 3) && (__GNUC_MINOR__ >= 4))))) && \
    629                         !defined(__TINYC__) && !defined(LZ4_FORCE_SW_BITCOUNT)
    630             return (unsigned)__builtin_clzll((U64)val) >> 3;
    631 #       else
    632 #if 1
    633             /* this method is probably faster,
    634              * but adds a 128 bytes lookup table */
    635             static const unsigned char ctz7_tab[128] = {
    636                 7, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
    637                 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
    638                 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
    639                 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
    640                 6, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
    641                 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
    642                 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
    643                 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
    644             };
    645             U64 const mask = 0x0101010101010101ULL;
    646             U64 const t = (((val >> 8) - mask) | val) & mask;
    647             return ctz7_tab[(t * 0x0080402010080402ULL) >> 57];
    648 #else
    649             /* this method doesn't consume memory space like the previous one,
    650              * but it contains several branches,
    651              * that may end up slowing execution */
    652             static const U32 by32 = sizeof(val)*4;  /* 32 on 64 bits (goal), 16 on 32 bits.
    653             Just to avoid some static analyzer complaining about shift by 32 on 32-bits target.
    654             Note that this code path is never triggered in 32-bits mode. */
    655             unsigned r;
    656             if (!(val>>by32)) { r=4; } else { r=0; val>>=by32; }
    657             if (!(val>>16)) { r+=2; val>>=8; } else { val>>=24; }
    658             r += (!val);
    659             return r;
    660 #endif
    661 #       endif
    662         } else /* 32 bits */ {
    663 #       if (defined(__clang__) || (defined(__GNUC__) && ((__GNUC__ > 3) || \
    664                             ((__GNUC__ == 3) && (__GNUC_MINOR__ >= 4))))) && \
    665                                         !defined(LZ4_FORCE_SW_BITCOUNT)
    666             return (unsigned)__builtin_clz((U32)val) >> 3;
    667 #       else
    668             val >>= 8;
    669             val = ((((val + 0x00FFFF00) | 0x00FFFFFF) + val) |
    670               (val + 0x00FF0000)) >> 24;
    671             return (unsigned)val ^ 3;
    672 #       endif
    673         }
    674     }
    675 }
    676 
    677 
    678 #define STEPSIZE sizeof(reg_t)
    679 LZ4_FORCE_INLINE
    680 unsigned LZ4_count(const BYTE* pIn, const BYTE* pMatch, const BYTE* pInLimit)
    681 {
    682     const BYTE* const pStart = pIn;
    683 
    684     if (likely(pIn < pInLimit-(STEPSIZE-1))) {
    685         reg_t const diff = LZ4_read_ARCH(pMatch) ^ LZ4_read_ARCH(pIn);
    686         if (!diff) {
    687             pIn+=STEPSIZE; pMatch+=STEPSIZE;
    688         } else {
    689             return LZ4_NbCommonBytes(diff);
    690     }   }
    691 
    692     while (likely(pIn < pInLimit-(STEPSIZE-1))) {
    693         reg_t const diff = LZ4_read_ARCH(pMatch) ^ LZ4_read_ARCH(pIn);
    694         if (!diff) { pIn+=STEPSIZE; pMatch+=STEPSIZE; continue; }
    695         pIn += LZ4_NbCommonBytes(diff);
    696         return (unsigned)(pIn - pStart);
    697     }
    698 
    699     if ((STEPSIZE==8) && (pIn<(pInLimit-3)) && (LZ4_read32(pMatch) == LZ4_read32(pIn))) { pIn+=4; pMatch+=4; }
    700     if ((pIn<(pInLimit-1)) && (LZ4_read16(pMatch) == LZ4_read16(pIn))) { pIn+=2; pMatch+=2; }
    701     if ((pIn<pInLimit) && (*pMatch == *pIn)) pIn++;
    702     return (unsigned)(pIn - pStart);
    703 }
    704 
    705 
    706 #ifndef LZ4_COMMONDEFS_ONLY
    707 /*-************************************
    708 *  Local Constants
    709 **************************************/
    710 static const int LZ4_64Klimit = ((64 KB) + (MFLIMIT-1));
    711 static const U32 LZ4_skipTrigger = 6;  /* Increase this value ==> compression run slower on incompressible data */
    712 
    713 
    714 /*-************************************
    715 *  Local Structures and types
    716 **************************************/
    717 typedef enum { clearedTable = 0, byPtr, byU32, byU16 } tableType_t;
    718 
    719 /**
    720  * This enum distinguishes several different modes of accessing previous
    721  * content in the stream.
    722  *
    723  * - noDict        : There is no preceding content.
    724  * - withPrefix64k : Table entries up to ctx->dictSize before the current blob
    725  *                   blob being compressed are valid and refer to the preceding
    726  *                   content (of length ctx->dictSize), which is available
    727  *                   contiguously preceding in memory the content currently
    728  *                   being compressed.
    729  * - usingExtDict  : Like withPrefix64k, but the preceding content is somewhere
    730  *                   else in memory, starting at ctx->dictionary with length
    731  *                   ctx->dictSize.
    732  * - usingDictCtx  : Everything concerning the preceding content is
    733  *                   in a separate context, pointed to by ctx->dictCtx.
    734  *                   ctx->dictionary, ctx->dictSize, and table entries
    735  *                   in the current context that refer to positions
    736  *                   preceding the beginning of the current compression are
    737  *                   ignored. Instead, ctx->dictCtx->dictionary and ctx->dictCtx
    738  *                   ->dictSize describe the location and size of the preceding
    739  *                   content, and matches are found by looking in the ctx
    740  *                   ->dictCtx->hashTable.
    741  */
    742 typedef enum { noDict = 0, withPrefix64k, usingExtDict, usingDictCtx } dict_directive;
    743 typedef enum { noDictIssue = 0, dictSmall } dictIssue_directive;
    744 
    745 
    746 /*-************************************
    747 *  Local Utils
    748 **************************************/
    749 int LZ4_versionNumber (void) { return LZ4_VERSION_NUMBER; }
    750 const char* LZ4_versionString(void) { return LZ4_VERSION_STRING; }
    751 int LZ4_compressBound(int isize)  { return LZ4_COMPRESSBOUND(isize); }
    752 int LZ4_sizeofState(void) { return sizeof(LZ4_stream_t); }
    753 
    754 
    755 /*-****************************************
    756 *  Internal Definitions, used only in Tests
    757 *******************************************/
    758 #if defined (__cplusplus)
    759 extern "C" {
    760 #endif
    761 
    762 int LZ4_compress_forceExtDict (LZ4_stream_t* LZ4_dict, const char* source, char* dest, int srcSize);
    763 
    764 int LZ4_decompress_safe_forceExtDict(const char* source, char* dest,
    765                                      int compressedSize, int maxOutputSize,
    766                                      const void* dictStart, size_t dictSize);
    767 int LZ4_decompress_safe_partial_forceExtDict(const char* source, char* dest,
    768                                      int compressedSize, int targetOutputSize, int dstCapacity,
    769                                      const void* dictStart, size_t dictSize);
    770 #if defined (__cplusplus)
    771 }
    772 #endif
    773 
    774 /*-******************************
    775 *  Compression functions
    776 ********************************/
    777 LZ4_FORCE_INLINE U32 LZ4_hash4(U32 sequence, tableType_t const tableType)
    778 {
    779     if (tableType == byU16)
    780         return ((sequence * 2654435761U) >> ((MINMATCH*8)-(LZ4_HASHLOG+1)));
    781     else
    782         return ((sequence * 2654435761U) >> ((MINMATCH*8)-LZ4_HASHLOG));
    783 }
    784 
    785 LZ4_FORCE_INLINE U32 LZ4_hash5(U64 sequence, tableType_t const tableType)
    786 {
    787     const U32 hashLog = (tableType == byU16) ? LZ4_HASHLOG+1 : LZ4_HASHLOG;
    788     if (LZ4_isLittleEndian()) {
    789         const U64 prime5bytes = 889523592379ULL;
    790         return (U32)(((sequence << 24) * prime5bytes) >> (64 - hashLog));
    791     } else {
    792         const U64 prime8bytes = 11400714785074694791ULL;
    793         return (U32)(((sequence >> 24) * prime8bytes) >> (64 - hashLog));
    794     }
    795 }
    796 
    797 LZ4_FORCE_INLINE U32 LZ4_hashPosition(const void* const p, tableType_t const tableType)
    798 {
    799     if ((sizeof(reg_t)==8) && (tableType != byU16)) return LZ4_hash5(LZ4_read_ARCH(p), tableType);
    800 
    801 #ifdef LZ4_STATIC_LINKING_ONLY_ENDIANNESS_INDEPENDENT_OUTPUT
    802     return LZ4_hash4(LZ4_readLE32(p), tableType);
    803 #else
    804     return LZ4_hash4(LZ4_read32(p), tableType);
    805 #endif
    806 }
    807 
    808 LZ4_FORCE_INLINE void LZ4_clearHash(U32 h, void* tableBase, tableType_t const tableType)
    809 {
    810     switch (tableType)
    811     {
    812     default: /* fallthrough */
    813     case clearedTable: { /* illegal! */ assert(0); return; }
    814     case byPtr: { const BYTE** hashTable = (const BYTE**)tableBase; hashTable[h] = NULL; return; }
    815     case byU32: { U32* hashTable = (U32*) tableBase; hashTable[h] = 0; return; }
    816     case byU16: { U16* hashTable = (U16*) tableBase; hashTable[h] = 0; return; }
    817     }
    818 }
    819 
    820 LZ4_FORCE_INLINE void LZ4_putIndexOnHash(U32 idx, U32 h, void* tableBase, tableType_t const tableType)
    821 {
    822     switch (tableType)
    823     {
    824     default: /* fallthrough */
    825     case clearedTable: /* fallthrough */
    826     case byPtr: { /* illegal! */ assert(0); return; }
    827     case byU32: { U32* hashTable = (U32*) tableBase; hashTable[h] = idx; return; }
    828     case byU16: { U16* hashTable = (U16*) tableBase; assert(idx < 65536); hashTable[h] = (U16)idx; return; }
    829     }
    830 }
    831 
    832 /* LZ4_putPosition*() : only used in byPtr mode */
    833 LZ4_FORCE_INLINE void LZ4_putPositionOnHash(const BYTE* p, U32 h,
    834                                   void* tableBase, tableType_t const tableType)
    835 {
    836     const BYTE** const hashTable = (const BYTE**)tableBase;
    837     assert(tableType == byPtr); (void)tableType;
    838     hashTable[h] = p;
    839 }
    840 
    841 LZ4_FORCE_INLINE void LZ4_putPosition(const BYTE* p, void* tableBase, tableType_t tableType)
    842 {
    843     U32 const h = LZ4_hashPosition(p, tableType);
    844     LZ4_putPositionOnHash(p, h, tableBase, tableType);
    845 }
    846 
    847 /* LZ4_getIndexOnHash() :
    848  * Index of match position registered in hash table.
    849  * hash position must be calculated by using base+index, or dictBase+index.
    850  * Assumption 1 : only valid if tableType == byU32 or byU16.
    851  * Assumption 2 : h is presumed valid (within limits of hash table)
    852  */
    853 LZ4_FORCE_INLINE U32 LZ4_getIndexOnHash(U32 h, const void* tableBase, tableType_t tableType)
    854 {
    855     LZ4_STATIC_ASSERT(LZ4_MEMORY_USAGE > 2);
    856     if (tableType == byU32) {
    857         const U32* const hashTable = (const U32*) tableBase;
    858         assert(h < (1U << (LZ4_MEMORY_USAGE-2)));
    859         return hashTable[h];
    860     }
    861     if (tableType == byU16) {
    862         const U16* const hashTable = (const U16*) tableBase;
    863         assert(h < (1U << (LZ4_MEMORY_USAGE-1)));
    864         return hashTable[h];
    865     }
    866     assert(0); return 0;  /* forbidden case */
    867 }
    868 
    869 static const BYTE* LZ4_getPositionOnHash(U32 h, const void* tableBase, tableType_t tableType)
    870 {
    871     assert(tableType == byPtr); (void)tableType;
    872     { const BYTE* const* hashTable = (const BYTE* const*) tableBase; return hashTable[h]; }
    873 }
    874 
    875 LZ4_FORCE_INLINE const BYTE*
    876 LZ4_getPosition(const BYTE* p,
    877                 const void* tableBase, tableType_t tableType)
    878 {
    879     U32 const h = LZ4_hashPosition(p, tableType);
    880     return LZ4_getPositionOnHash(h, tableBase, tableType);
    881 }
    882 
    883 LZ4_FORCE_INLINE void
    884 LZ4_prepareTable(LZ4_stream_t_internal* const cctx,
    885            const int inputSize,
    886            const tableType_t tableType) {
    887     /* If the table hasn't been used, it's guaranteed to be zeroed out, and is
    888      * therefore safe to use no matter what mode we're in. Otherwise, we figure
    889      * out if it's safe to leave as is or whether it needs to be reset.
    890      */
    891     if ((tableType_t)cctx->tableType != clearedTable) {
    892         assert(inputSize >= 0);
    893         if ((tableType_t)cctx->tableType != tableType
    894           || ((tableType == byU16) && cctx->currentOffset + (unsigned)inputSize >= 0xFFFFU)
    895           || ((tableType == byU32) && cctx->currentOffset > 1 GB)
    896           || tableType == byPtr
    897           || inputSize >= 4 KB)
    898         {
    899             DEBUGLOG(4, "LZ4_prepareTable: Resetting table in %p", cctx);
    900             MEM_INIT(cctx->hashTable, 0, LZ4_HASHTABLESIZE);
    901             cctx->currentOffset = 0;
    902             cctx->tableType = (U32)clearedTable;
    903         } else {
    904             DEBUGLOG(4, "LZ4_prepareTable: Re-use hash table (no reset)");
    905         }
    906     }
    907 
    908     /* Adding a gap, so all previous entries are > LZ4_DISTANCE_MAX back,
    909      * is faster than compressing without a gap.
    910      * However, compressing with currentOffset == 0 is faster still,
    911      * so we preserve that case.
    912      */
    913     if (cctx->currentOffset != 0 && tableType == byU32) {
    914         DEBUGLOG(5, "LZ4_prepareTable: adding 64KB to currentOffset");
    915         cctx->currentOffset += 64 KB;
    916     }
    917 
    918     /* Finally, clear history */
    919     cctx->dictCtx = NULL;
    920     cctx->dictionary = NULL;
    921     cctx->dictSize = 0;
    922 }
    923 
    924 /** LZ4_compress_generic_validated() :
    925  *  inlined, to ensure branches are decided at compilation time.
    926  *  The following conditions are presumed already validated:
    927  *  - source != NULL
    928  *  - inputSize > 0
    929  */
    930 LZ4_FORCE_INLINE int LZ4_compress_generic_validated(
    931                  LZ4_stream_t_internal* const cctx,
    932                  const char* const source,
    933                  char* const dest,
    934                  const int inputSize,
    935                  int*  inputConsumed, /* only written when outputDirective == fillOutput */
    936                  const int maxOutputSize,
    937                  const limitedOutput_directive outputDirective,
    938                  const tableType_t tableType,
    939                  const dict_directive dictDirective,
    940                  const dictIssue_directive dictIssue,
    941                  const int acceleration)
    942 {
    943     int result;
    944     const BYTE* ip = (const BYTE*)source;
    945 
    946     U32 const startIndex = cctx->currentOffset;
    947     const BYTE* base = (const BYTE*)source - startIndex;
    948     const BYTE* lowLimit;
    949 
    950     const LZ4_stream_t_internal* dictCtx = (const LZ4_stream_t_internal*) cctx->dictCtx;
    951     const BYTE* const dictionary =
    952         dictDirective == usingDictCtx ? dictCtx->dictionary : cctx->dictionary;
    953     const U32 dictSize =
    954         dictDirective == usingDictCtx ? dictCtx->dictSize : cctx->dictSize;
    955     const U32 dictDelta =
    956         (dictDirective == usingDictCtx) ? startIndex - dictCtx->currentOffset : 0;   /* make indexes in dictCtx comparable with indexes in current context */
    957 
    958     int const maybe_extMem = (dictDirective == usingExtDict) || (dictDirective == usingDictCtx);
    959     U32 const prefixIdxLimit = startIndex - dictSize;   /* used when dictDirective == dictSmall */
    960     const BYTE* const dictEnd = dictionary ? dictionary + dictSize : dictionary;
    961     const BYTE* anchor = (const BYTE*) source;
    962     const BYTE* const iend = ip + inputSize;
    963     const BYTE* const mflimitPlusOne = iend - MFLIMIT + 1;
    964     const BYTE* const matchlimit = iend - LASTLITERALS;
    965 
    966     /* the dictCtx currentOffset is indexed on the start of the dictionary,
    967      * while a dictionary in the current context precedes the currentOffset */
    968     const BYTE* dictBase = (dictionary == NULL) ? NULL :
    969                            (dictDirective == usingDictCtx) ?
    970                             dictionary + dictSize - dictCtx->currentOffset :
    971                             dictionary + dictSize - startIndex;
    972 
    973     BYTE* op = (BYTE*) dest;
    974     BYTE* const olimit = op + maxOutputSize;
    975 
    976     U32 offset = 0;
    977     U32 forwardH;
    978 
    979     DEBUGLOG(5, "LZ4_compress_generic_validated: srcSize=%i, tableType=%u", inputSize, tableType);
    980     assert(ip != NULL);
    981     if (tableType == byU16) assert(inputSize<LZ4_64Klimit);  /* Size too large (not within 64K limit) */
    982     if (tableType == byPtr) assert(dictDirective==noDict);   /* only supported use case with byPtr */
    983     /* If init conditions are not met, we don't have to mark stream
    984      * as having dirty context, since no action was taken yet */
    985     if (outputDirective == fillOutput && maxOutputSize < 1) { return 0; } /* Impossible to store anything */
    986     assert(acceleration >= 1);
    987 
    988     lowLimit = (const BYTE*)source - (dictDirective == withPrefix64k ? dictSize : 0);
    989 
    990     /* Update context state */
    991     if (dictDirective == usingDictCtx) {
    992         /* Subsequent linked blocks can't use the dictionary. */
    993         /* Instead, they use the block we just compressed. */
    994         cctx->dictCtx = NULL;
    995         cctx->dictSize = (U32)inputSize;
    996     } else {
    997         cctx->dictSize += (U32)inputSize;
    998     }
    999     cctx->currentOffset += (U32)inputSize;
   1000     cctx->tableType = (U32)tableType;
   1001 
   1002     if (inputSize<LZ4_minLength) goto _last_literals;        /* Input too small, no compression (all literals) */
   1003 
   1004     /* First Byte */
   1005     {   U32 const h = LZ4_hashPosition(ip, tableType);
   1006         if (tableType == byPtr) {
   1007             LZ4_putPositionOnHash(ip, h, cctx->hashTable, byPtr);
   1008         } else {
   1009             LZ4_putIndexOnHash(startIndex, h, cctx->hashTable, tableType);
   1010     }   }
   1011     ip++; forwardH = LZ4_hashPosition(ip, tableType);
   1012 
   1013     /* Main Loop */
   1014     for ( ; ; ) {
   1015         const BYTE* match;
   1016         BYTE* token;
   1017         const BYTE* filledIp;
   1018 
   1019         /* Find a match */
   1020         if (tableType == byPtr) {
   1021             const BYTE* forwardIp = ip;
   1022             int step = 1;
   1023             int searchMatchNb = acceleration << LZ4_skipTrigger;
   1024             do {
   1025                 U32 const h = forwardH;
   1026                 ip = forwardIp;
   1027                 forwardIp += step;
   1028                 step = (searchMatchNb++ >> LZ4_skipTrigger);
   1029 
   1030                 if (unlikely(forwardIp > mflimitPlusOne)) goto _last_literals;
   1031                 assert(ip < mflimitPlusOne);
   1032 
   1033                 match = LZ4_getPositionOnHash(h, cctx->hashTable, tableType);
   1034                 forwardH = LZ4_hashPosition(forwardIp, tableType);
   1035                 LZ4_putPositionOnHash(ip, h, cctx->hashTable, tableType);
   1036 
   1037             } while ( (match+LZ4_DISTANCE_MAX < ip)
   1038                    || (LZ4_read32(match) != LZ4_read32(ip)) );
   1039 
   1040         } else {   /* byU32, byU16 */
   1041 
   1042             const BYTE* forwardIp = ip;
   1043             int step = 1;
   1044             int searchMatchNb = acceleration << LZ4_skipTrigger;
   1045             do {
   1046                 U32 const h = forwardH;
   1047                 U32 const current = (U32)(forwardIp - base);
   1048                 U32 matchIndex = LZ4_getIndexOnHash(h, cctx->hashTable, tableType);
   1049                 assert(matchIndex <= current);
   1050                 assert(forwardIp - base < (ptrdiff_t)(2 GB - 1));
   1051                 ip = forwardIp;
   1052                 forwardIp += step;
   1053                 step = (searchMatchNb++ >> LZ4_skipTrigger);
   1054 
   1055                 if (unlikely(forwardIp > mflimitPlusOne)) goto _last_literals;
   1056                 assert(ip < mflimitPlusOne);
   1057 
   1058                 if (dictDirective == usingDictCtx) {
   1059                     if (matchIndex < startIndex) {
   1060                         /* there was no match, try the dictionary */
   1061                         assert(tableType == byU32);
   1062                         matchIndex = LZ4_getIndexOnHash(h, dictCtx->hashTable, byU32);
   1063                         match = dictBase + matchIndex;
   1064                         matchIndex += dictDelta;   /* make dictCtx index comparable with current context */
   1065                         lowLimit = dictionary;
   1066                     } else {
   1067                         match = base + matchIndex;
   1068                         lowLimit = (const BYTE*)source;
   1069                     }
   1070                 } else if (dictDirective == usingExtDict) {
   1071                     if (matchIndex < startIndex) {
   1072                         DEBUGLOG(7, "extDict candidate: matchIndex=%5u  <  startIndex=%5u", matchIndex, startIndex);
   1073                         assert(startIndex - matchIndex >= MINMATCH);
   1074                         assert(dictBase);
   1075                         match = dictBase + matchIndex;
   1076                         lowLimit = dictionary;
   1077                     } else {
   1078                         match = base + matchIndex;
   1079                         lowLimit = (const BYTE*)source;
   1080                     }
   1081                 } else {   /* single continuous memory segment */
   1082                     match = base + matchIndex;
   1083                 }
   1084                 forwardH = LZ4_hashPosition(forwardIp, tableType);
   1085                 LZ4_putIndexOnHash(current, h, cctx->hashTable, tableType);
   1086 
   1087                 DEBUGLOG(7, "candidate at pos=%u  (offset=%u \n", matchIndex, current - matchIndex);
   1088                 if ((dictIssue == dictSmall) && (matchIndex < prefixIdxLimit)) { continue; }    /* match outside of valid area */
   1089                 assert(matchIndex < current);
   1090                 if ( ((tableType != byU16) || (LZ4_DISTANCE_MAX < LZ4_DISTANCE_ABSOLUTE_MAX))
   1091                   && (matchIndex+LZ4_DISTANCE_MAX < current)) {
   1092                     continue;
   1093                 } /* too far */
   1094                 assert((current - matchIndex) <= LZ4_DISTANCE_MAX);  /* match now expected within distance */
   1095 
   1096                 if (LZ4_read32(match) == LZ4_read32(ip)) {
   1097                     if (maybe_extMem) offset = current - matchIndex;
   1098                     break;   /* match found */
   1099                 }
   1100 
   1101             } while(1);
   1102         }
   1103 
   1104         /* Catch up */
   1105         filledIp = ip;
   1106         assert(ip > anchor); /* this is always true as ip has been advanced before entering the main loop */
   1107         if ((match > lowLimit) && unlikely(ip[-1] == match[-1])) {
   1108             do { ip--; match--; } while (((ip > anchor) & (match > lowLimit)) && (unlikely(ip[-1] == match[-1])));
   1109         }
   1110 
   1111         /* Encode Literals */
   1112         {   unsigned const litLength = (unsigned)(ip - anchor);
   1113             token = op++;
   1114             if ((outputDirective == limitedOutput) &&  /* Check output buffer overflow */
   1115                 (unlikely(op + litLength + (2 + 1 + LASTLITERALS) + (litLength/255) > olimit)) ) {
   1116                 return 0;   /* cannot compress within `dst` budget. Stored indexes in hash table are nonetheless fine */
   1117             }
   1118             if ((outputDirective == fillOutput) &&
   1119                 (unlikely(op + (litLength+240)/255 /* litlen */ + litLength /* literals */ + 2 /* offset */ + 1 /* token */ + MFLIMIT - MINMATCH /* min last literals so last match is <= end - MFLIMIT */ > olimit))) {
   1120                 op--;
   1121                 goto _last_literals;
   1122             }
   1123             if (litLength >= RUN_MASK) {
   1124                 unsigned len = litLength - RUN_MASK;
   1125                 *token = (RUN_MASK<<ML_BITS);
   1126                 for(; len >= 255 ; len-=255) *op++ = 255;
   1127                 *op++ = (BYTE)len;
   1128             }
   1129             else *token = (BYTE)(litLength<<ML_BITS);
   1130 
   1131             /* Copy Literals */
   1132             LZ4_wildCopy8(op, anchor, op+litLength);
   1133             op+=litLength;
   1134             DEBUGLOG(6, "seq.start:%i, literals=%u, match.start:%i",
   1135                         (int)(anchor-(const BYTE*)source), litLength, (int)(ip-(const BYTE*)source));
   1136         }
   1137 
   1138 _next_match:
   1139         /* at this stage, the following variables must be correctly set :
   1140          * - ip : at start of LZ operation
   1141          * - match : at start of previous pattern occurrence; can be within current prefix, or within extDict
   1142          * - offset : if maybe_ext_memSegment==1 (constant)
   1143          * - lowLimit : must be == dictionary to mean "match is within extDict"; must be == source otherwise
   1144          * - token and *token : position to write 4-bits for match length; higher 4-bits for literal length supposed already written
   1145          */
   1146 
   1147         if ((outputDirective == fillOutput) &&
   1148             (op + 2 /* offset */ + 1 /* token */ + MFLIMIT - MINMATCH /* min last literals so last match is <= end - MFLIMIT */ > olimit)) {
   1149             /* the match was too close to the end, rewind and go to last literals */
   1150             op = token;
   1151             goto _last_literals;
   1152         }
   1153 
   1154         /* Encode Offset */
   1155         if (maybe_extMem) {   /* static test */
   1156             DEBUGLOG(6, "             with offset=%u  (ext if > %i)", offset, (int)(ip - (const BYTE*)source));
   1157             assert(offset <= LZ4_DISTANCE_MAX && offset > 0);
   1158             LZ4_writeLE16(op, (U16)offset); op+=2;
   1159         } else  {
   1160             DEBUGLOG(6, "             with offset=%u  (same segment)", (U32)(ip - match));
   1161             assert(ip-match <= LZ4_DISTANCE_MAX);
   1162             LZ4_writeLE16(op, (U16)(ip - match)); op+=2;
   1163         }
   1164 
   1165         /* Encode MatchLength */
   1166         {   unsigned matchCode;
   1167 
   1168             if ( (dictDirective==usingExtDict || dictDirective==usingDictCtx)
   1169               && (lowLimit==dictionary) /* match within extDict */ ) {
   1170                 const BYTE* limit = ip + (dictEnd-match);
   1171                 assert(dictEnd > match);
   1172                 if (limit > matchlimit) limit = matchlimit;
   1173                 matchCode = LZ4_count(ip+MINMATCH, match+MINMATCH, limit);
   1174                 ip += (size_t)matchCode + MINMATCH;
   1175                 if (ip==limit) {
   1176                     unsigned const more = LZ4_count(limit, (const BYTE*)source, matchlimit);
   1177                     matchCode += more;
   1178                     ip += more;
   1179                 }
   1180                 DEBUGLOG(6, "             with matchLength=%u starting in extDict", matchCode+MINMATCH);
   1181             } else {
   1182                 matchCode = LZ4_count(ip+MINMATCH, match+MINMATCH, matchlimit);
   1183                 ip += (size_t)matchCode + MINMATCH;
   1184                 DEBUGLOG(6, "             with matchLength=%u", matchCode+MINMATCH);
   1185             }
   1186 
   1187             if ((outputDirective) &&    /* Check output buffer overflow */
   1188                 (unlikely(op + (1 + LASTLITERALS) + (matchCode+240)/255 > olimit)) ) {
   1189                 if (outputDirective == fillOutput) {
   1190                     /* Match description too long : reduce it */
   1191                     U32 newMatchCode = 15 /* in token */ - 1 /* to avoid needing a zero byte */ + ((U32)(olimit - op) - 1 - LASTLITERALS) * 255;
   1192                     ip -= matchCode - newMatchCode;
   1193                     assert(newMatchCode < matchCode);
   1194                     matchCode = newMatchCode;
   1195                     if (unlikely(ip <= filledIp)) {
   1196                         /* We have already filled up to filledIp so if ip ends up less than filledIp
   1197                          * we have positions in the hash table beyond the current position. This is
   1198                          * a problem if we reuse the hash table. So we have to remove these positions
   1199                          * from the hash table.
   1200                          */
   1201                         const BYTE* ptr;
   1202                         DEBUGLOG(5, "Clearing %u positions", (U32)(filledIp - ip));
   1203                         for (ptr = ip; ptr <= filledIp; ++ptr) {
   1204                             U32 const h = LZ4_hashPosition(ptr, tableType);
   1205                             LZ4_clearHash(h, cctx->hashTable, tableType);
   1206                         }
   1207                     }
   1208                 } else {
   1209                     assert(outputDirective == limitedOutput);
   1210                     return 0;   /* cannot compress within `dst` budget. Stored indexes in hash table are nonetheless fine */
   1211                 }
   1212             }
   1213             if (matchCode >= ML_MASK) {
   1214                 *token += ML_MASK;
   1215                 matchCode -= ML_MASK;
   1216                 LZ4_write32(op, 0xFFFFFFFF);
   1217                 while (matchCode >= 4*255) {
   1218                     op+=4;
   1219                     LZ4_write32(op, 0xFFFFFFFF);
   1220                     matchCode -= 4*255;
   1221                 }
   1222                 op += matchCode / 255;
   1223                 *op++ = (BYTE)(matchCode % 255);
   1224             } else
   1225                 *token += (BYTE)(matchCode);
   1226         }
   1227         /* Ensure we have enough space for the last literals. */
   1228         assert(!(outputDirective == fillOutput && op + 1 + LASTLITERALS > olimit));
   1229 
   1230         anchor = ip;
   1231 
   1232         /* Test end of chunk */
   1233         if (ip >= mflimitPlusOne) break;
   1234 
   1235         /* Fill table */
   1236         {   U32 const h = LZ4_hashPosition(ip-2, tableType);
   1237             if (tableType == byPtr) {
   1238                 LZ4_putPositionOnHash(ip-2, h, cctx->hashTable, byPtr);
   1239             } else {
   1240                 U32 const idx = (U32)((ip-2) - base);
   1241                 LZ4_putIndexOnHash(idx, h, cctx->hashTable, tableType);
   1242         }   }
   1243 
   1244         /* Test next position */
   1245         if (tableType == byPtr) {
   1246 
   1247             match = LZ4_getPosition(ip, cctx->hashTable, tableType);
   1248             LZ4_putPosition(ip, cctx->hashTable, tableType);
   1249             if ( (match+LZ4_DISTANCE_MAX >= ip)
   1250               && (LZ4_read32(match) == LZ4_read32(ip)) )
   1251             { token=op++; *token=0; goto _next_match; }
   1252 
   1253         } else {   /* byU32, byU16 */
   1254 
   1255             U32 const h = LZ4_hashPosition(ip, tableType);
   1256             U32 const current = (U32)(ip-base);
   1257             U32 matchIndex = LZ4_getIndexOnHash(h, cctx->hashTable, tableType);
   1258             assert(matchIndex < current);
   1259             if (dictDirective == usingDictCtx) {
   1260                 if (matchIndex < startIndex) {
   1261                     /* there was no match, try the dictionary */
   1262                     assert(tableType == byU32);
   1263                     matchIndex = LZ4_getIndexOnHash(h, dictCtx->hashTable, byU32);
   1264                     match = dictBase + matchIndex;
   1265                     lowLimit = dictionary;   /* required for match length counter */
   1266                     matchIndex += dictDelta;
   1267                 } else {
   1268                     match = base + matchIndex;
   1269                     lowLimit = (const BYTE*)source;  /* required for match length counter */
   1270                 }
   1271             } else if (dictDirective==usingExtDict) {
   1272                 if (matchIndex < startIndex) {
   1273                     assert(dictBase);
   1274                     match = dictBase + matchIndex;
   1275                     lowLimit = dictionary;   /* required for match length counter */
   1276                 } else {
   1277                     match = base + matchIndex;
   1278                     lowLimit = (const BYTE*)source;   /* required for match length counter */
   1279                 }
   1280             } else {   /* single memory segment */
   1281                 match = base + matchIndex;
   1282             }
   1283             LZ4_putIndexOnHash(current, h, cctx->hashTable, tableType);
   1284             assert(matchIndex < current);
   1285             if ( ((dictIssue==dictSmall) ? (matchIndex >= prefixIdxLimit) : 1)
   1286               && (((tableType==byU16) && (LZ4_DISTANCE_MAX == LZ4_DISTANCE_ABSOLUTE_MAX)) ? 1 : (matchIndex+LZ4_DISTANCE_MAX >= current))
   1287               && (LZ4_read32(match) == LZ4_read32(ip)) ) {
   1288                 token=op++;
   1289                 *token=0;
   1290                 if (maybe_extMem) offset = current - matchIndex;
   1291                 DEBUGLOG(6, "seq.start:%i, literals=%u, match.start:%i",
   1292                             (int)(anchor-(const BYTE*)source), 0, (int)(ip-(const BYTE*)source));
   1293                 goto _next_match;
   1294             }
   1295         }
   1296 
   1297         /* Prepare next loop */
   1298         forwardH = LZ4_hashPosition(++ip, tableType);
   1299 
   1300     }
   1301 
   1302 _last_literals:
   1303     /* Encode Last Literals */
   1304     {   size_t lastRun = (size_t)(iend - anchor);
   1305         if ( (outputDirective) &&  /* Check output buffer overflow */
   1306             (op + lastRun + 1 + ((lastRun+255-RUN_MASK)/255) > olimit)) {
   1307             if (outputDirective == fillOutput) {
   1308                 /* adapt lastRun to fill 'dst' */
   1309                 assert(olimit >= op);
   1310                 lastRun  = (size_t)(olimit-op) - 1/*token*/;
   1311                 lastRun -= (lastRun + 256 - RUN_MASK) / 256;  /*additional length tokens*/
   1312             } else {
   1313                 assert(outputDirective == limitedOutput);
   1314                 return 0;   /* cannot compress within `dst` budget. Stored indexes in hash table are nonetheless fine */
   1315             }
   1316         }
   1317         DEBUGLOG(6, "Final literal run : %i literals", (int)lastRun);
   1318         if (lastRun >= RUN_MASK) {
   1319             size_t accumulator = lastRun - RUN_MASK;
   1320             *op++ = RUN_MASK << ML_BITS;
   1321             for(; accumulator >= 255 ; accumulator-=255) *op++ = 255;
   1322             *op++ = (BYTE) accumulator;
   1323         } else {
   1324             *op++ = (BYTE)(lastRun<<ML_BITS);
   1325         }
   1326         LZ4_memcpy(op, anchor, lastRun);
   1327         ip = anchor + lastRun;
   1328         op += lastRun;
   1329     }
   1330 
   1331     if (outputDirective == fillOutput) {
   1332         *inputConsumed = (int) (((const char*)ip)-source);
   1333     }
   1334     result = (int)(((char*)op) - dest);
   1335     assert(result > 0);
   1336     DEBUGLOG(5, "LZ4_compress_generic: compressed %i bytes into %i bytes", inputSize, result);
   1337     return result;
   1338 }
   1339 
   1340 /** LZ4_compress_generic() :
   1341  *  inlined, to ensure branches are decided at compilation time;
   1342  *  takes care of src == (NULL, 0)
   1343  *  and forward the rest to LZ4_compress_generic_validated */
   1344 LZ4_FORCE_INLINE int LZ4_compress_generic(
   1345                  LZ4_stream_t_internal* const cctx,
   1346                  const char* const src,
   1347                  char* const dst,
   1348                  const int srcSize,
   1349                  int *inputConsumed, /* only written when outputDirective == fillOutput */
   1350                  const int dstCapacity,
   1351                  const limitedOutput_directive outputDirective,
   1352                  const tableType_t tableType,
   1353                  const dict_directive dictDirective,
   1354                  const dictIssue_directive dictIssue,
   1355                  const int acceleration)
   1356 {
   1357     DEBUGLOG(5, "LZ4_compress_generic: srcSize=%i, dstCapacity=%i",
   1358                 srcSize, dstCapacity);
   1359 
   1360     if ((U32)srcSize > (U32)LZ4_MAX_INPUT_SIZE) { return 0; }  /* Unsupported srcSize, too large (or negative) */
   1361     if (srcSize == 0) {   /* src == NULL supported if srcSize == 0 */
   1362         if (outputDirective != notLimited && dstCapacity <= 0) return 0;  /* no output, can't write anything */
   1363         DEBUGLOG(5, "Generating an empty block");
   1364         assert(outputDirective == notLimited || dstCapacity >= 1);
   1365         assert(dst != NULL);
   1366         dst[0] = 0;
   1367         if (outputDirective == fillOutput) {
   1368             assert (inputConsumed != NULL);
   1369             *inputConsumed = 0;
   1370         }
   1371         return 1;
   1372     }
   1373     assert(src != NULL);
   1374 
   1375     return LZ4_compress_generic_validated(cctx, src, dst, srcSize,
   1376                 inputConsumed, /* only written into if outputDirective == fillOutput */
   1377                 dstCapacity, outputDirective,
   1378                 tableType, dictDirective, dictIssue, acceleration);
   1379 }
   1380 
   1381 
   1382 int LZ4_compress_fast_extState(void* state, const char* source, char* dest, int inputSize, int maxOutputSize, int acceleration)
   1383 {
   1384     LZ4_stream_t_internal* const ctx = & LZ4_initStream(state, sizeof(LZ4_stream_t)) -> internal_donotuse;
   1385     assert(ctx != NULL);
   1386     if (acceleration < 1) acceleration = LZ4_ACCELERATION_DEFAULT;
   1387     if (acceleration > LZ4_ACCELERATION_MAX) acceleration = LZ4_ACCELERATION_MAX;
   1388     if (maxOutputSize >= LZ4_compressBound(inputSize)) {
   1389         if (inputSize < LZ4_64Klimit) {
   1390             return LZ4_compress_generic(ctx, source, dest, inputSize, NULL, 0, notLimited, byU16, noDict, noDictIssue, acceleration);
   1391         } else {
   1392             const tableType_t tableType = ((sizeof(void*)==4) && ((uptrval)source > LZ4_DISTANCE_MAX)) ? byPtr : byU32;
   1393             return LZ4_compress_generic(ctx, source, dest, inputSize, NULL, 0, notLimited, tableType, noDict, noDictIssue, acceleration);
   1394         }
   1395     } else {
   1396         if (inputSize < LZ4_64Klimit) {
   1397             return LZ4_compress_generic(ctx, source, dest, inputSize, NULL, maxOutputSize, limitedOutput, byU16, noDict, noDictIssue, acceleration);
   1398         } else {
   1399             const tableType_t tableType = ((sizeof(void*)==4) && ((uptrval)source > LZ4_DISTANCE_MAX)) ? byPtr : byU32;
   1400             return LZ4_compress_generic(ctx, source, dest, inputSize, NULL, maxOutputSize, limitedOutput, tableType, noDict, noDictIssue, acceleration);
   1401         }
   1402     }
   1403 }
   1404 
   1405 /**
   1406  * LZ4_compress_fast_extState_fastReset() :
   1407  * A variant of LZ4_compress_fast_extState().
   1408  *
   1409  * Using this variant avoids an expensive initialization step. It is only safe
   1410  * to call if the state buffer is known to be correctly initialized already
   1411  * (see comment in lz4.h on LZ4_resetStream_fast() for a definition of
   1412  * "correctly initialized").
   1413  */
   1414 int LZ4_compress_fast_extState_fastReset(void* state, const char* src, char* dst, int srcSize, int dstCapacity, int acceleration)
   1415 {
   1416     LZ4_stream_t_internal* const ctx = &((LZ4_stream_t*)state)->internal_donotuse;
   1417     if (acceleration < 1) acceleration = LZ4_ACCELERATION_DEFAULT;
   1418     if (acceleration > LZ4_ACCELERATION_MAX) acceleration = LZ4_ACCELERATION_MAX;
   1419     assert(ctx != NULL);
   1420 
   1421     if (dstCapacity >= LZ4_compressBound(srcSize)) {
   1422         if (srcSize < LZ4_64Klimit) {
   1423             const tableType_t tableType = byU16;
   1424             LZ4_prepareTable(ctx, srcSize, tableType);
   1425             if (ctx->currentOffset) {
   1426                 return LZ4_compress_generic(ctx, src, dst, srcSize, NULL, 0, notLimited, tableType, noDict, dictSmall, acceleration);
   1427             } else {
   1428                 return LZ4_compress_generic(ctx, src, dst, srcSize, NULL, 0, notLimited, tableType, noDict, noDictIssue, acceleration);
   1429             }
   1430         } else {
   1431             const tableType_t tableType = ((sizeof(void*)==4) && ((uptrval)src > LZ4_DISTANCE_MAX)) ? byPtr : byU32;
   1432             LZ4_prepareTable(ctx, srcSize, tableType);
   1433             return LZ4_compress_generic(ctx, src, dst, srcSize, NULL, 0, notLimited, tableType, noDict, noDictIssue, acceleration);
   1434         }
   1435     } else {
   1436         if (srcSize < LZ4_64Klimit) {
   1437             const tableType_t tableType = byU16;
   1438             LZ4_prepareTable(ctx, srcSize, tableType);
   1439             if (ctx->currentOffset) {
   1440                 return LZ4_compress_generic(ctx, src, dst, srcSize, NULL, dstCapacity, limitedOutput, tableType, noDict, dictSmall, acceleration);
   1441             } else {
   1442                 return LZ4_compress_generic(ctx, src, dst, srcSize, NULL, dstCapacity, limitedOutput, tableType, noDict, noDictIssue, acceleration);
   1443             }
   1444         } else {
   1445             const tableType_t tableType = ((sizeof(void*)==4) && ((uptrval)src > LZ4_DISTANCE_MAX)) ? byPtr : byU32;
   1446             LZ4_prepareTable(ctx, srcSize, tableType);
   1447             return LZ4_compress_generic(ctx, src, dst, srcSize, NULL, dstCapacity, limitedOutput, tableType, noDict, noDictIssue, acceleration);
   1448         }
   1449     }
   1450 }
   1451 
   1452 
   1453 int LZ4_compress_fast(const char* src, char* dest, int srcSize, int dstCapacity, int acceleration)
   1454 {
   1455     int result;
   1456 #if (LZ4_HEAPMODE)
   1457     LZ4_stream_t* const ctxPtr = (LZ4_stream_t*)ALLOC(sizeof(LZ4_stream_t));   /* malloc-calloc always properly aligned */
   1458     if (ctxPtr == NULL) return 0;
   1459 #else
   1460     LZ4_stream_t ctx;
   1461     LZ4_stream_t* const ctxPtr = &ctx;
   1462 #endif
   1463     result = LZ4_compress_fast_extState(ctxPtr, src, dest, srcSize, dstCapacity, acceleration);
   1464 
   1465 #if (LZ4_HEAPMODE)
   1466     FREEMEM(ctxPtr);
   1467 #endif
   1468     return result;
   1469 }
   1470 
   1471 
   1472 int LZ4_compress_default(const char* src, char* dst, int srcSize, int dstCapacity)
   1473 {
   1474     return LZ4_compress_fast(src, dst, srcSize, dstCapacity, 1);
   1475 }
   1476 
   1477 
   1478 /* Note!: This function leaves the stream in an unclean/broken state!
   1479  * It is not safe to subsequently use the same state with a _fastReset() or
   1480  * _continue() call without resetting it. */
   1481 static int LZ4_compress_destSize_extState_internal(LZ4_stream_t* state, const char* src, char* dst, int* srcSizePtr, int targetDstSize, int acceleration)
   1482 {
   1483     void* const s = LZ4_initStream(state, sizeof (*state));
   1484     assert(s != NULL); (void)s;
   1485 
   1486     if (targetDstSize >= LZ4_compressBound(*srcSizePtr)) {  /* compression success is guaranteed */
   1487         return LZ4_compress_fast_extState(state, src, dst, *srcSizePtr, targetDstSize, acceleration);
   1488     } else {
   1489         if (*srcSizePtr < LZ4_64Klimit) {
   1490             return LZ4_compress_generic(&state->internal_donotuse, src, dst, *srcSizePtr, srcSizePtr, targetDstSize, fillOutput, byU16, noDict, noDictIssue, acceleration);
   1491         } else {
   1492             tableType_t const addrMode = ((sizeof(void*)==4) && ((uptrval)src > LZ4_DISTANCE_MAX)) ? byPtr : byU32;
   1493             return LZ4_compress_generic(&state->internal_donotuse, src, dst, *srcSizePtr, srcSizePtr, targetDstSize, fillOutput, addrMode, noDict, noDictIssue, acceleration);
   1494     }   }
   1495 }
   1496 
   1497 int LZ4_compress_destSize_extState(void* state, const char* src, char* dst, int* srcSizePtr, int targetDstSize, int acceleration)
   1498 {
   1499     int const r = LZ4_compress_destSize_extState_internal((LZ4_stream_t*)state, src, dst, srcSizePtr, targetDstSize, acceleration);
   1500     /* clean the state on exit */
   1501     LZ4_initStream(state, sizeof (LZ4_stream_t));
   1502     return r;
   1503 }
   1504 
   1505 
   1506 int LZ4_compress_destSize(const char* src, char* dst, int* srcSizePtr, int targetDstSize)
   1507 {
   1508 #if (LZ4_HEAPMODE)
   1509     LZ4_stream_t* const ctx = (LZ4_stream_t*)ALLOC(sizeof(LZ4_stream_t));   /* malloc-calloc always properly aligned */
   1510     if (ctx == NULL) return 0;
   1511 #else
   1512     LZ4_stream_t ctxBody;
   1513     LZ4_stream_t* const ctx = &ctxBody;
   1514 #endif
   1515 
   1516     int result = LZ4_compress_destSize_extState_internal(ctx, src, dst, srcSizePtr, targetDstSize, 1);
   1517 
   1518 #if (LZ4_HEAPMODE)
   1519     FREEMEM(ctx);
   1520 #endif
   1521     return result;
   1522 }
   1523 
   1524 
   1525 
   1526 /*-******************************
   1527 *  Streaming functions
   1528 ********************************/
   1529 
   1530 #if !defined(LZ4_STATIC_LINKING_ONLY_DISABLE_MEMORY_ALLOCATION)
   1531 LZ4_stream_t* LZ4_createStream(void)
   1532 {
   1533     LZ4_stream_t* const lz4s = (LZ4_stream_t*)ALLOC(sizeof(LZ4_stream_t));
   1534     LZ4_STATIC_ASSERT(sizeof(LZ4_stream_t) >= sizeof(LZ4_stream_t_internal));
   1535     DEBUGLOG(4, "LZ4_createStream %p", lz4s);
   1536     if (lz4s == NULL) return NULL;
   1537     LZ4_initStream(lz4s, sizeof(*lz4s));
   1538     return lz4s;
   1539 }
   1540 #endif
   1541 
   1542 static size_t LZ4_stream_t_alignment(void)
   1543 {
   1544 #if LZ4_ALIGN_TEST
   1545     typedef struct { char c; LZ4_stream_t t; } t_a;
   1546     return sizeof(t_a) - sizeof(LZ4_stream_t);
   1547 #else
   1548     return 1;  /* effectively disabled */
   1549 #endif
   1550 }
   1551 
   1552 LZ4_stream_t* LZ4_initStream (void* buffer, size_t size)
   1553 {
   1554     DEBUGLOG(5, "LZ4_initStream");
   1555     if (buffer == NULL) { return NULL; }
   1556     if (size < sizeof(LZ4_stream_t)) { return NULL; }
   1557     if (!LZ4_isAligned(buffer, LZ4_stream_t_alignment())) return NULL;
   1558     MEM_INIT(buffer, 0, sizeof(LZ4_stream_t_internal));
   1559     return (LZ4_stream_t*)buffer;
   1560 }
   1561 
   1562 /* resetStream is now deprecated,
   1563  * prefer initStream() which is more general */
   1564 void LZ4_resetStream (LZ4_stream_t* LZ4_stream)
   1565 {
   1566     DEBUGLOG(5, "LZ4_resetStream (ctx:%p)", LZ4_stream);
   1567     MEM_INIT(LZ4_stream, 0, sizeof(LZ4_stream_t_internal));
   1568 }
   1569 
   1570 void LZ4_resetStream_fast(LZ4_stream_t* ctx) {
   1571     LZ4_prepareTable(&(ctx->internal_donotuse), 0, byU32);
   1572 }
   1573 
   1574 #if !defined(LZ4_STATIC_LINKING_ONLY_DISABLE_MEMORY_ALLOCATION)
   1575 int LZ4_freeStream (LZ4_stream_t* LZ4_stream)
   1576 {
   1577     if (!LZ4_stream) return 0;   /* support free on NULL */
   1578     DEBUGLOG(5, "LZ4_freeStream %p", LZ4_stream);
   1579     FREEMEM(LZ4_stream);
   1580     return (0);
   1581 }
   1582 #endif
   1583 
   1584 
   1585 typedef enum { _ld_fast, _ld_slow } LoadDict_mode_e;
   1586 #define HASH_UNIT sizeof(reg_t)
   1587 int LZ4_loadDict_internal(LZ4_stream_t* LZ4_dict,
   1588                     const char* dictionary, int dictSize,
   1589                     LoadDict_mode_e _ld)
   1590 {
   1591     LZ4_stream_t_internal* const dict = &LZ4_dict->internal_donotuse;
   1592     const tableType_t tableType = byU32;
   1593     const BYTE* p = (const BYTE*)dictionary;
   1594     const BYTE* const dictEnd = p + dictSize;
   1595     U32 idx32;
   1596 
   1597     DEBUGLOG(4, "LZ4_loadDict (%i bytes from %p into %p)", dictSize, dictionary, LZ4_dict);
   1598 
   1599     /* It's necessary to reset the context,
   1600      * and not just continue it with prepareTable()
   1601      * to avoid any risk of generating overflowing matchIndex
   1602      * when compressing using this dictionary */
   1603     LZ4_resetStream(LZ4_dict);
   1604 
   1605     /* We always increment the offset by 64 KB, since, if the dict is longer,
   1606      * we truncate it to the last 64k, and if it's shorter, we still want to
   1607      * advance by a whole window length so we can provide the guarantee that
   1608      * there are only valid offsets in the window, which allows an optimization
   1609      * in LZ4_compress_fast_continue() where it uses noDictIssue even when the
   1610      * dictionary isn't a full 64k. */
   1611     dict->currentOffset += 64 KB;
   1612 
   1613     if (dictSize < (int)HASH_UNIT) {
   1614         return 0;
   1615     }
   1616 
   1617     if ((dictEnd - p) > 64 KB) p = dictEnd - 64 KB;
   1618     dict->dictionary = p;
   1619     dict->dictSize = (U32)(dictEnd - p);
   1620     dict->tableType = (U32)tableType;
   1621     idx32 = dict->currentOffset - dict->dictSize;
   1622 
   1623     while (p <= dictEnd-HASH_UNIT) {
   1624         U32 const h = LZ4_hashPosition(p, tableType);
   1625         /* Note: overwriting => favors positions end of dictionary */
   1626         LZ4_putIndexOnHash(idx32, h, dict->hashTable, tableType);
   1627         p+=3; idx32+=3;
   1628     }
   1629 
   1630     if (_ld == _ld_slow) {
   1631         /* Fill hash table with additional references, to improve compression capability */
   1632         p = dict->dictionary;
   1633         idx32 = dict->currentOffset - dict->dictSize;
   1634         while (p <= dictEnd-HASH_UNIT) {
   1635             U32 const h = LZ4_hashPosition(p, tableType);
   1636             U32 const limit = dict->currentOffset - 64 KB;
   1637             if (LZ4_getIndexOnHash(h, dict->hashTable, tableType) <= limit) {
   1638                 /* Note: not overwriting => favors positions beginning of dictionary */
   1639                 LZ4_putIndexOnHash(idx32, h, dict->hashTable, tableType);
   1640             }
   1641             p++; idx32++;
   1642         }
   1643     }
   1644 
   1645     return (int)dict->dictSize;
   1646 }
   1647 
   1648 int LZ4_loadDict(LZ4_stream_t* LZ4_dict, const char* dictionary, int dictSize)
   1649 {
   1650     return LZ4_loadDict_internal(LZ4_dict, dictionary, dictSize, _ld_fast);
   1651 }
   1652 
   1653 int LZ4_loadDictSlow(LZ4_stream_t* LZ4_dict, const char* dictionary, int dictSize)
   1654 {
   1655     return LZ4_loadDict_internal(LZ4_dict, dictionary, dictSize, _ld_slow);
   1656 }
   1657 
   1658 void LZ4_attach_dictionary(LZ4_stream_t* workingStream, const LZ4_stream_t* dictionaryStream)
   1659 {
   1660     const LZ4_stream_t_internal* dictCtx = (dictionaryStream == NULL) ? NULL :
   1661         &(dictionaryStream->internal_donotuse);
   1662 
   1663     DEBUGLOG(4, "LZ4_attach_dictionary (%p, %p, size %u)",
   1664              workingStream, dictionaryStream,
   1665              dictCtx != NULL ? dictCtx->dictSize : 0);
   1666 
   1667     if (dictCtx != NULL) {
   1668         /* If the current offset is zero, we will never look in the
   1669          * external dictionary context, since there is no value a table
   1670          * entry can take that indicate a miss. In that case, we need
   1671          * to bump the offset to something non-zero.
   1672          */
   1673         if (workingStream->internal_donotuse.currentOffset == 0) {
   1674             workingStream->internal_donotuse.currentOffset = 64 KB;
   1675         }
   1676 
   1677         /* Don't actually attach an empty dictionary.
   1678          */
   1679         if (dictCtx->dictSize == 0) {
   1680             dictCtx = NULL;
   1681         }
   1682     }
   1683     workingStream->internal_donotuse.dictCtx = dictCtx;
   1684 }
   1685 
   1686 
   1687 static void LZ4_renormDictT(LZ4_stream_t_internal* LZ4_dict, int nextSize)
   1688 {
   1689     assert(nextSize >= 0);
   1690     if (LZ4_dict->currentOffset + (unsigned)nextSize > 0x80000000) {   /* potential ptrdiff_t overflow (32-bits mode) */
   1691         /* rescale hash table */
   1692         U32 const delta = LZ4_dict->currentOffset - 64 KB;
   1693         const BYTE* dictEnd = LZ4_dict->dictionary + LZ4_dict->dictSize;
   1694         int i;
   1695         DEBUGLOG(4, "LZ4_renormDictT");
   1696         for (i=0; i<LZ4_HASH_SIZE_U32; i++) {
   1697             if (LZ4_dict->hashTable[i] < delta) LZ4_dict->hashTable[i]=0;
   1698             else LZ4_dict->hashTable[i] -= delta;
   1699         }
   1700         LZ4_dict->currentOffset = 64 KB;
   1701         if (LZ4_dict->dictSize > 64 KB) LZ4_dict->dictSize = 64 KB;
   1702         LZ4_dict->dictionary = dictEnd - LZ4_dict->dictSize;
   1703     }
   1704 }
   1705 
   1706 
   1707 int LZ4_compress_fast_continue (LZ4_stream_t* LZ4_stream,
   1708                                 const char* source, char* dest,
   1709                                 int inputSize, int maxOutputSize,
   1710                                 int acceleration)
   1711 {
   1712     const tableType_t tableType = byU32;
   1713     LZ4_stream_t_internal* const streamPtr = &LZ4_stream->internal_donotuse;
   1714     const char* dictEnd = streamPtr->dictSize ? (const char*)streamPtr->dictionary + streamPtr->dictSize : NULL;
   1715 
   1716     DEBUGLOG(5, "LZ4_compress_fast_continue (inputSize=%i, dictSize=%u)", inputSize, streamPtr->dictSize);
   1717 
   1718     LZ4_renormDictT(streamPtr, inputSize);   /* fix index overflow */
   1719     if (acceleration < 1) acceleration = LZ4_ACCELERATION_DEFAULT;
   1720     if (acceleration > LZ4_ACCELERATION_MAX) acceleration = LZ4_ACCELERATION_MAX;
   1721 
   1722     /* invalidate tiny dictionaries */
   1723     if ( (streamPtr->dictSize < 4)     /* tiny dictionary : not enough for a hash */
   1724       && (dictEnd != source)           /* prefix mode */
   1725       && (inputSize > 0)               /* tolerance : don't lose history, in case next invocation would use prefix mode */
   1726       && (streamPtr->dictCtx == NULL)  /* usingDictCtx */
   1727       ) {
   1728         DEBUGLOG(5, "LZ4_compress_fast_continue: dictSize(%u) at addr:%p is too small", streamPtr->dictSize, streamPtr->dictionary);
   1729         /* remove dictionary existence from history, to employ faster prefix mode */
   1730         streamPtr->dictSize = 0;
   1731         streamPtr->dictionary = (const BYTE*)source;
   1732         dictEnd = source;
   1733     }
   1734 
   1735     /* Check overlapping input/dictionary space */
   1736     {   const char* const sourceEnd = source + inputSize;
   1737         if ((sourceEnd > (const char*)streamPtr->dictionary) && (sourceEnd < dictEnd)) {
   1738             streamPtr->dictSize = (U32)(dictEnd - sourceEnd);
   1739             if (streamPtr->dictSize > 64 KB) streamPtr->dictSize = 64 KB;
   1740             if (streamPtr->dictSize < 4) streamPtr->dictSize = 0;
   1741             streamPtr->dictionary = (const BYTE*)dictEnd - streamPtr->dictSize;
   1742         }
   1743     }
   1744 
   1745     /* prefix mode : source data follows dictionary */
   1746     if (dictEnd == source) {
   1747         if ((streamPtr->dictSize < 64 KB) && (streamPtr->dictSize < streamPtr->currentOffset))
   1748             return LZ4_compress_generic(streamPtr, source, dest, inputSize, NULL, maxOutputSize, limitedOutput, tableType, withPrefix64k, dictSmall, acceleration);
   1749         else
   1750             return LZ4_compress_generic(streamPtr, source, dest, inputSize, NULL, maxOutputSize, limitedOutput, tableType, withPrefix64k, noDictIssue, acceleration);
   1751     }
   1752 
   1753     /* external dictionary mode */
   1754     {   int result;
   1755         if (streamPtr->dictCtx) {
   1756             /* We depend here on the fact that dictCtx'es (produced by
   1757              * LZ4_loadDict) guarantee that their tables contain no references
   1758              * to offsets between dictCtx->currentOffset - 64 KB and
   1759              * dictCtx->currentOffset - dictCtx->dictSize. This makes it safe
   1760              * to use noDictIssue even when the dict isn't a full 64 KB.
   1761              */
   1762             if (inputSize > 4 KB) {
   1763                 /* For compressing large blobs, it is faster to pay the setup
   1764                  * cost to copy the dictionary's tables into the active context,
   1765                  * so that the compression loop is only looking into one table.
   1766                  */
   1767                 LZ4_memcpy(streamPtr, streamPtr->dictCtx, sizeof(*streamPtr));
   1768                 result = LZ4_compress_generic(streamPtr, source, dest, inputSize, NULL, maxOutputSize, limitedOutput, tableType, usingExtDict, noDictIssue, acceleration);
   1769             } else {
   1770                 result = LZ4_compress_generic(streamPtr, source, dest, inputSize, NULL, maxOutputSize, limitedOutput, tableType, usingDictCtx, noDictIssue, acceleration);
   1771             }
   1772         } else {  /* small data <= 4 KB */
   1773             if ((streamPtr->dictSize < 64 KB) && (streamPtr->dictSize < streamPtr->currentOffset)) {
   1774                 result = LZ4_compress_generic(streamPtr, source, dest, inputSize, NULL, maxOutputSize, limitedOutput, tableType, usingExtDict, dictSmall, acceleration);
   1775             } else {
   1776                 result = LZ4_compress_generic(streamPtr, source, dest, inputSize, NULL, maxOutputSize, limitedOutput, tableType, usingExtDict, noDictIssue, acceleration);
   1777             }
   1778         }
   1779         streamPtr->dictionary = (const BYTE*)source;
   1780         streamPtr->dictSize = (U32)inputSize;
   1781         return result;
   1782     }
   1783 }
   1784 
   1785 
   1786 /* Hidden debug function, to force-test external dictionary mode */
   1787 int LZ4_compress_forceExtDict (LZ4_stream_t* LZ4_dict, const char* source, char* dest, int srcSize)
   1788 {
   1789     LZ4_stream_t_internal* const streamPtr = &LZ4_dict->internal_donotuse;
   1790     int result;
   1791 
   1792     LZ4_renormDictT(streamPtr, srcSize);
   1793 
   1794     if ((streamPtr->dictSize < 64 KB) && (streamPtr->dictSize < streamPtr->currentOffset)) {
   1795         result = LZ4_compress_generic(streamPtr, source, dest, srcSize, NULL, 0, notLimited, byU32, usingExtDict, dictSmall, 1);
   1796     } else {
   1797         result = LZ4_compress_generic(streamPtr, source, dest, srcSize, NULL, 0, notLimited, byU32, usingExtDict, noDictIssue, 1);
   1798     }
   1799 
   1800     streamPtr->dictionary = (const BYTE*)source;
   1801     streamPtr->dictSize = (U32)srcSize;
   1802 
   1803     return result;
   1804 }
   1805 
   1806 
   1807 /*! LZ4_saveDict() :
   1808  *  If previously compressed data block is not guaranteed to remain available at its memory location,
   1809  *  save it into a safer place (char* safeBuffer).
   1810  *  Note : no need to call LZ4_loadDict() afterwards, dictionary is immediately usable,
   1811  *         one can therefore call LZ4_compress_fast_continue() right after.
   1812  * @return : saved dictionary size in bytes (necessarily <= dictSize), or 0 if error.
   1813  */
   1814 int LZ4_saveDict (LZ4_stream_t* LZ4_dict, char* safeBuffer, int dictSize)
   1815 {
   1816     LZ4_stream_t_internal* const dict = &LZ4_dict->internal_donotuse;
   1817 
   1818     DEBUGLOG(5, "LZ4_saveDict : dictSize=%i, safeBuffer=%p", dictSize, safeBuffer);
   1819 
   1820     if ((U32)dictSize > 64 KB) { dictSize = 64 KB; } /* useless to define a dictionary > 64 KB */
   1821     if ((U32)dictSize > dict->dictSize) { dictSize = (int)dict->dictSize; }
   1822 
   1823     if (safeBuffer == NULL) assert(dictSize == 0);
   1824     if (dictSize > 0) {
   1825         const BYTE* const previousDictEnd = dict->dictionary + dict->dictSize;
   1826         assert(dict->dictionary);
   1827         LZ4_memmove(safeBuffer, previousDictEnd - dictSize, (size_t)dictSize);
   1828     }
   1829 
   1830     dict->dictionary = (const BYTE*)safeBuffer;
   1831     dict->dictSize = (U32)dictSize;
   1832 
   1833     return dictSize;
   1834 }
   1835 
   1836 
   1837 
   1838 /*-*******************************
   1839  *  Decompression functions
   1840  ********************************/
   1841 
   1842 typedef enum { decode_full_block = 0, partial_decode = 1 } earlyEnd_directive;
   1843 
   1844 #undef MIN
   1845 #define MIN(a,b)    ( (a) < (b) ? (a) : (b) )
   1846 
   1847 
   1848 /* variant for decompress_unsafe()
   1849  * does not know end of input
   1850  * presumes input is well formed
   1851  * note : will consume at least one byte */
   1852 static size_t read_long_length_no_check(const BYTE** pp)
   1853 {
   1854     size_t b, l = 0;
   1855     do { b = **pp; (*pp)++; l += b; } while (b==255);
   1856     DEBUGLOG(6, "read_long_length_no_check: +length=%zu using %zu input bytes", l, l/255 + 1)
   1857     return l;
   1858 }
   1859 
   1860 /* core decoder variant for LZ4_decompress_fast*()
   1861  * for legacy support only : these entry points are deprecated.
   1862  * - Presumes input is correctly formed (no defense vs malformed inputs)
   1863  * - Does not know input size (presume input buffer is "large enough")
   1864  * - Decompress a full block (only)
   1865  * @return : nb of bytes read from input.
   1866  * Note : this variant is not optimized for speed, just for maintenance.
   1867  *        the goal is to remove support of decompress_fast*() variants by v2.0
   1868 **/
   1869 LZ4_FORCE_INLINE int
   1870 LZ4_decompress_unsafe_generic(
   1871                  const BYTE* const istart,
   1872                  BYTE* const ostart,
   1873                  int decompressedSize,
   1874 
   1875                  size_t prefixSize,
   1876                  const BYTE* const dictStart,  /* only if dict==usingExtDict */
   1877                  const size_t dictSize         /* note: =0 if dictStart==NULL */
   1878                  )
   1879 {
   1880     const BYTE* ip = istart;
   1881     BYTE* op = (BYTE*)ostart;
   1882     BYTE* const oend = ostart + decompressedSize;
   1883     const BYTE* const prefixStart = ostart - prefixSize;
   1884 
   1885     DEBUGLOG(5, "LZ4_decompress_unsafe_generic");
   1886     if (dictStart == NULL) assert(dictSize == 0);
   1887 
   1888     while (1) {
   1889         /* start new sequence */
   1890         unsigned token = *ip++;
   1891 
   1892         /* literals */
   1893         {   size_t ll = token >> ML_BITS;
   1894             if (ll==15) {
   1895                 /* long literal length */
   1896                 ll += read_long_length_no_check(&ip);
   1897             }
   1898             if ((size_t)(oend-op) < ll) return -1; /* output buffer overflow */
   1899             LZ4_memmove(op, ip, ll); /* support in-place decompression */
   1900             op += ll;
   1901             ip += ll;
   1902             if ((size_t)(oend-op) < MFLIMIT) {
   1903                 if (op==oend) break;  /* end of block */
   1904                 DEBUGLOG(5, "invalid: literals end at distance %zi from end of block", oend-op);
   1905                 /* incorrect end of block :
   1906                  * last match must start at least MFLIMIT==12 bytes before end of output block */
   1907                 return -1;
   1908         }   }
   1909 
   1910         /* match */
   1911         {   size_t ml = token & 15;
   1912             size_t const offset = LZ4_readLE16(ip);
   1913             ip+=2;
   1914 
   1915             if (ml==15) {
   1916                 /* long literal length */
   1917                 ml += read_long_length_no_check(&ip);
   1918             }
   1919             ml += MINMATCH;
   1920 
   1921             if ((size_t)(oend-op) < ml) return -1; /* output buffer overflow */
   1922 
   1923             {   const BYTE* match = op - offset;
   1924 
   1925                 /* out of range */
   1926                 if (offset > (size_t)(op - prefixStart) + dictSize) {
   1927                     DEBUGLOG(6, "offset out of range");
   1928                     return -1;
   1929                 }
   1930 
   1931                 /* check special case : extDict */
   1932                 if (offset > (size_t)(op - prefixStart)) {
   1933                     /* extDict scenario */
   1934                     const BYTE* const dictEnd = dictStart + dictSize;
   1935                     const BYTE* extMatch = dictEnd - (offset - (size_t)(op-prefixStart));
   1936                     size_t const extml = (size_t)(dictEnd - extMatch);
   1937                     if (extml > ml) {
   1938                         /* match entirely within extDict */
   1939                         LZ4_memmove(op, extMatch, ml);
   1940                         op += ml;
   1941                         ml = 0;
   1942                     } else {
   1943                         /* match split between extDict & prefix */
   1944                         LZ4_memmove(op, extMatch, extml);
   1945                         op += extml;
   1946                         ml -= extml;
   1947                     }
   1948                     match = prefixStart;
   1949                 }
   1950 
   1951                 /* match copy - slow variant, supporting overlap copy */
   1952                 {   size_t u;
   1953                     for (u=0; u<ml; u++) {
   1954                         op[u] = match[u];
   1955             }   }   }
   1956             op += ml;
   1957             if ((size_t)(oend-op) < LASTLITERALS) {
   1958                 DEBUGLOG(5, "invalid: match ends at distance %zi from end of block", oend-op);
   1959                 /* incorrect end of block :
   1960                  * last match must stop at least LASTLITERALS==5 bytes before end of output block */
   1961                 return -1;
   1962             }
   1963         } /* match */
   1964     } /* main loop */
   1965     return (int)(ip - istart);
   1966 }
   1967 
   1968 
   1969 /* Read the variable-length literal or match length.
   1970  *
   1971  * @ip : input pointer
   1972  * @ilimit : position after which if length is not decoded, the input is necessarily corrupted.
   1973  * @initial_check - check ip >= ipmax before start of loop.  Returns initial_error if so.
   1974  * @error (output) - error code.  Must be set to 0 before call.
   1975 **/
   1976 typedef size_t Rvl_t;
   1977 static const Rvl_t rvl_error = (Rvl_t)(-1);
   1978 LZ4_FORCE_INLINE Rvl_t
   1979 read_variable_length(const BYTE** ip, const BYTE* ilimit,
   1980                      int initial_check)
   1981 {
   1982     Rvl_t s, length = 0;
   1983     assert(ip != NULL);
   1984     assert(*ip !=  NULL);
   1985     assert(ilimit != NULL);
   1986     if (initial_check && unlikely((*ip) >= ilimit)) {    /* read limit reached */
   1987         return rvl_error;
   1988     }
   1989     s = **ip;
   1990     (*ip)++;
   1991     length += s;
   1992     if (unlikely((*ip) > ilimit)) {    /* read limit reached */
   1993         return rvl_error;
   1994     }
   1995     /* accumulator overflow detection (32-bit mode only) */
   1996     if ((sizeof(length) < 8) && unlikely(length > ((Rvl_t)(-1)/2)) ) {
   1997         return rvl_error;
   1998     }
   1999     if (likely(s != 255)) return length;
   2000     do {
   2001         s = **ip;
   2002         (*ip)++;
   2003         length += s;
   2004         if (unlikely((*ip) > ilimit)) {    /* read limit reached */
   2005             return rvl_error;
   2006         }
   2007         /* accumulator overflow detection (32-bit mode only) */
   2008         if ((sizeof(length) < 8) && unlikely(length > ((Rvl_t)(-1)/2)) ) {
   2009             return rvl_error;
   2010         }
   2011     } while (s == 255);
   2012 
   2013     return length;
   2014 }
   2015 
   2016 /*! LZ4_decompress_generic() :
   2017  *  This generic decompression function covers all use cases.
   2018  *  It shall be instantiated several times, using different sets of directives.
   2019  *  Note that it is important for performance that this function really get inlined,
   2020  *  in order to remove useless branches during compilation optimization.
   2021  */
   2022 LZ4_FORCE_INLINE int
   2023 LZ4_decompress_generic(
   2024                  const char* const src,
   2025                  char* const dst,
   2026                  int srcSize,
   2027                  int outputSize,         /* If endOnInput==endOnInputSize, this value is `dstCapacity` */
   2028 
   2029                  earlyEnd_directive partialDecoding,  /* full, partial */
   2030                  dict_directive dict,                 /* noDict, withPrefix64k, usingExtDict */
   2031                  const BYTE* const lowPrefix,  /* always <= dst, == dst when no prefix */
   2032                  const BYTE* const dictStart,  /* only if dict==usingExtDict */
   2033                  const size_t dictSize         /* note : = 0 if noDict */
   2034                  )
   2035 {
   2036     if ((src == NULL) || (outputSize < 0)) { return -1; }
   2037 
   2038     {   const BYTE* ip = (const BYTE*) src;
   2039         const BYTE* const iend = ip + srcSize;
   2040 
   2041         BYTE* op = (BYTE*) dst;
   2042         BYTE* const oend = op + outputSize;
   2043         BYTE* cpy;
   2044 
   2045         const BYTE* const dictEnd = (dictStart == NULL) ? NULL : dictStart + dictSize;
   2046 
   2047         const int checkOffset = (dictSize < (int)(64 KB));
   2048 
   2049 
   2050         /* Set up the "end" pointers for the shortcut. */
   2051         const BYTE* const shortiend = iend - 14 /*maxLL*/ - 2 /*offset*/;
   2052         const BYTE* const shortoend = oend - 14 /*maxLL*/ - 18 /*maxML*/;
   2053 
   2054         const BYTE* match;
   2055         size_t offset;
   2056         unsigned token;
   2057         size_t length;
   2058 
   2059 
   2060         DEBUGLOG(5, "LZ4_decompress_generic (srcSize:%i, dstSize:%i)", srcSize, outputSize);
   2061 
   2062         /* Special cases */
   2063         assert(lowPrefix <= op);
   2064         if (unlikely(outputSize==0)) {
   2065             /* Empty output buffer */
   2066             if (partialDecoding) return 0;
   2067             return ((srcSize==1) && (*ip==0)) ? 0 : -1;
   2068         }
   2069         if (unlikely(srcSize==0)) { return -1; }
   2070 
   2071     /* LZ4_FAST_DEC_LOOP:
   2072      * designed for modern OoO performance cpus,
   2073      * where copying reliably 32-bytes is preferable to an unpredictable branch.
   2074      * note : fast loop may show a regression for some client arm chips. */
   2075 #if LZ4_FAST_DEC_LOOP
   2076         if ((oend - op) < FASTLOOP_SAFE_DISTANCE) {
   2077             DEBUGLOG(6, "move to safe decode loop");
   2078             goto safe_decode;
   2079         }
   2080 
   2081         /* Fast loop : decode sequences as long as output < oend-FASTLOOP_SAFE_DISTANCE */
   2082         DEBUGLOG(6, "using fast decode loop");
   2083         while (1) {
   2084             /* Main fastloop assertion: We can always wildcopy FASTLOOP_SAFE_DISTANCE */
   2085             assert(oend - op >= FASTLOOP_SAFE_DISTANCE);
   2086             assert(ip < iend);
   2087             token = *ip++;
   2088             length = token >> ML_BITS;  /* literal length */
   2089             DEBUGLOG(7, "blockPos%6u: litLength token = %u", (unsigned)(op-(BYTE*)dst), (unsigned)length);
   2090 
   2091             /* decode literal length */
   2092             if (length == RUN_MASK) {
   2093                 size_t const addl = read_variable_length(&ip, iend-RUN_MASK, 1);
   2094                 if (addl == rvl_error) {
   2095                     DEBUGLOG(6, "error reading long literal length");
   2096                     goto _output_error;
   2097                 }
   2098                 length += addl;
   2099                 if (unlikely((uptrval)(op)+length<(uptrval)(op))) { goto _output_error; } /* overflow detection */
   2100                 if (unlikely((uptrval)(ip)+length<(uptrval)(ip))) { goto _output_error; } /* overflow detection */
   2101 
   2102                 /* copy literals */
   2103                 LZ4_STATIC_ASSERT(MFLIMIT >= WILDCOPYLENGTH);
   2104                 if ((op+length>oend-32) || (ip+length>iend-32)) { goto safe_literal_copy; }
   2105                 LZ4_wildCopy32(op, ip, op+length);
   2106                 ip += length; op += length;
   2107             } else if (ip <= iend-(16 + 1/*max lit + offset + nextToken*/)) {
   2108                 /* We don't need to check oend, since we check it once for each loop below */
   2109                 DEBUGLOG(7, "copy %u bytes in a 16-bytes stripe", (unsigned)length);
   2110                 /* Literals can only be <= 14, but hope compilers optimize better when copy by a register size */
   2111                 LZ4_memcpy(op, ip, 16);
   2112                 ip += length; op += length;
   2113             } else {
   2114                 goto safe_literal_copy;
   2115             }
   2116 
   2117             /* get offset */
   2118             offset = LZ4_readLE16(ip); ip+=2;
   2119             DEBUGLOG(6, "blockPos%6u: offset = %u", (unsigned)(op-(BYTE*)dst), (unsigned)offset);
   2120             match = op - offset;
   2121             assert(match <= op);  /* overflow check */
   2122 
   2123             /* get matchlength */
   2124             length = token & ML_MASK;
   2125             DEBUGLOG(7, "  match length token = %u (len==%u)", (unsigned)length, (unsigned)length+MINMATCH);
   2126 
   2127             if (length == ML_MASK) {
   2128                 size_t const addl = read_variable_length(&ip, iend - LASTLITERALS + 1, 0);
   2129                 if (addl == rvl_error) {
   2130                     DEBUGLOG(5, "error reading long match length");
   2131                     goto _output_error;
   2132                 }
   2133                 length += addl;
   2134                 length += MINMATCH;
   2135                 DEBUGLOG(7, "  long match length == %u", (unsigned)length);
   2136                 if (unlikely((uptrval)(op)+length<(uptrval)op)) { goto _output_error; } /* overflow detection */
   2137                 if (op + length >= oend - FASTLOOP_SAFE_DISTANCE) {
   2138                     goto safe_match_copy;
   2139                 }
   2140             } else {
   2141                 length += MINMATCH;
   2142                 if (op + length >= oend - FASTLOOP_SAFE_DISTANCE) {
   2143                     DEBUGLOG(7, "moving to safe_match_copy (ml==%u)", (unsigned)length);
   2144                     goto safe_match_copy;
   2145                 }
   2146 
   2147                 /* Fastpath check: skip LZ4_wildCopy32 when true */
   2148                 if ((dict == withPrefix64k) || (match >= lowPrefix)) {
   2149                     if (offset >= 8) {
   2150                         assert(match >= lowPrefix);
   2151                         assert(match <= op);
   2152                         assert(op + 18 <= oend);
   2153 
   2154                         LZ4_memcpy(op, match, 8);
   2155                         LZ4_memcpy(op+8, match+8, 8);
   2156                         LZ4_memcpy(op+16, match+16, 2);
   2157                         op += length;
   2158                         continue;
   2159             }   }   }
   2160 
   2161             if ( checkOffset && (unlikely(match + dictSize < lowPrefix)) ) {
   2162                 DEBUGLOG(5, "Error : pos=%zi, offset=%zi => outside buffers", op-lowPrefix, op-match);
   2163                 goto _output_error;
   2164             }
   2165             /* match starting within external dictionary */
   2166             if ((dict==usingExtDict) && (match < lowPrefix)) {
   2167                 assert(dictEnd != NULL);
   2168                 if (unlikely(op+length > oend-LASTLITERALS)) {
   2169                     if (partialDecoding) {
   2170                         DEBUGLOG(7, "partialDecoding: dictionary match, close to dstEnd");
   2171                         length = MIN(length, (size_t)(oend-op));
   2172                     } else {
   2173                         DEBUGLOG(6, "end-of-block condition violated")
   2174                         goto _output_error;
   2175                 }   }
   2176 
   2177                 if (length <= (size_t)(lowPrefix-match)) {
   2178                     /* match fits entirely within external dictionary : just copy */
   2179                     LZ4_memmove(op, dictEnd - (lowPrefix-match), length);
   2180                     op += length;
   2181                 } else {
   2182                     /* match stretches into both external dictionary and current block */
   2183                     size_t const copySize = (size_t)(lowPrefix - match);
   2184                     size_t const restSize = length - copySize;
   2185                     LZ4_memcpy(op, dictEnd - copySize, copySize);
   2186                     op += copySize;
   2187                     if (restSize > (size_t)(op - lowPrefix)) {  /* overlap copy */
   2188                         BYTE* const endOfMatch = op + restSize;
   2189                         const BYTE* copyFrom = lowPrefix;
   2190                         while (op < endOfMatch) { *op++ = *copyFrom++; }
   2191                     } else {
   2192                         LZ4_memcpy(op, lowPrefix, restSize);
   2193                         op += restSize;
   2194                 }   }
   2195                 continue;
   2196             }
   2197 
   2198             /* copy match within block */
   2199             cpy = op + length;
   2200 
   2201             assert((op <= oend) && (oend-op >= 32));
   2202             if (unlikely(offset<16)) {
   2203                 LZ4_memcpy_using_offset(op, match, cpy, offset);
   2204             } else {
   2205                 LZ4_wildCopy32(op, match, cpy);
   2206             }
   2207 
   2208             op = cpy;   /* wildcopy correction */
   2209         }
   2210     safe_decode:
   2211 #endif
   2212 
   2213         /* Main Loop : decode remaining sequences where output < FASTLOOP_SAFE_DISTANCE */
   2214         DEBUGLOG(6, "using safe decode loop");
   2215         while (1) {
   2216             assert(ip < iend);
   2217             token = *ip++;
   2218             length = token >> ML_BITS;  /* literal length */
   2219             DEBUGLOG(7, "blockPos%6u: litLength token = %u", (unsigned)(op-(BYTE*)dst), (unsigned)length);
   2220 
   2221             /* A two-stage shortcut for the most common case:
   2222              * 1) If the literal length is 0..14, and there is enough space,
   2223              * enter the shortcut and copy 16 bytes on behalf of the literals
   2224              * (in the fast mode, only 8 bytes can be safely copied this way).
   2225              * 2) Further if the match length is 4..18, copy 18 bytes in a similar
   2226              * manner; but we ensure that there's enough space in the output for
   2227              * those 18 bytes earlier, upon entering the shortcut (in other words,
   2228              * there is a combined check for both stages).
   2229              */
   2230             if ( (length != RUN_MASK)
   2231                 /* strictly "less than" on input, to re-enter the loop with at least one byte */
   2232               && likely((ip < shortiend) & (op <= shortoend)) ) {
   2233                 /* Copy the literals */
   2234                 LZ4_memcpy(op, ip, 16);
   2235                 op += length; ip += length;
   2236 
   2237                 /* The second stage: prepare for match copying, decode full info.
   2238                  * If it doesn't work out, the info won't be wasted. */
   2239                 length = token & ML_MASK; /* match length */
   2240                 DEBUGLOG(7, "blockPos%6u: matchLength token = %u (len=%u)", (unsigned)(op-(BYTE*)dst), (unsigned)length, (unsigned)length + 4);
   2241                 offset = LZ4_readLE16(ip); ip += 2;
   2242                 match = op - offset;
   2243                 assert(match <= op); /* check overflow */
   2244 
   2245                 /* Do not deal with overlapping matches. */
   2246                 if ( (length != ML_MASK)
   2247                   && (offset >= 8)
   2248                   && (dict==withPrefix64k || match >= lowPrefix) ) {
   2249                     /* Copy the match. */
   2250                     LZ4_memcpy(op + 0, match + 0, 8);
   2251                     LZ4_memcpy(op + 8, match + 8, 8);
   2252                     LZ4_memcpy(op +16, match +16, 2);
   2253                     op += length + MINMATCH;
   2254                     /* Both stages worked, load the next token. */
   2255                     continue;
   2256                 }
   2257 
   2258                 /* The second stage didn't work out, but the info is ready.
   2259                  * Propel it right to the point of match copying. */
   2260                 goto _copy_match;
   2261             }
   2262 
   2263             /* decode literal length */
   2264             if (length == RUN_MASK) {
   2265                 size_t const addl = read_variable_length(&ip, iend-RUN_MASK, 1);
   2266                 if (addl == rvl_error) { goto _output_error; }
   2267                 length += addl;
   2268                 if (unlikely((uptrval)(op)+length<(uptrval)(op))) { goto _output_error; } /* overflow detection */
   2269                 if (unlikely((uptrval)(ip)+length<(uptrval)(ip))) { goto _output_error; } /* overflow detection */
   2270             }
   2271 
   2272 #if LZ4_FAST_DEC_LOOP
   2273         safe_literal_copy:
   2274 #endif
   2275             /* copy literals */
   2276             cpy = op+length;
   2277 
   2278             LZ4_STATIC_ASSERT(MFLIMIT >= WILDCOPYLENGTH);
   2279             if ((cpy>oend-MFLIMIT) || (ip+length>iend-(2+1+LASTLITERALS))) {
   2280                 /* We've either hit the input parsing restriction or the output parsing restriction.
   2281                  * In the normal scenario, decoding a full block, it must be the last sequence,
   2282                  * otherwise it's an error (invalid input or dimensions).
   2283                  * In partialDecoding scenario, it's necessary to ensure there is no buffer overflow.
   2284                  */
   2285                 if (partialDecoding) {
   2286                     /* Since we are partial decoding we may be in this block because of the output parsing
   2287                      * restriction, which is not valid since the output buffer is allowed to be undersized.
   2288                      */
   2289                     DEBUGLOG(7, "partialDecoding: copying literals, close to input or output end")
   2290                     DEBUGLOG(7, "partialDecoding: literal length = %u", (unsigned)length);
   2291                     DEBUGLOG(7, "partialDecoding: remaining space in dstBuffer : %i", (int)(oend - op));
   2292                     DEBUGLOG(7, "partialDecoding: remaining space in srcBuffer : %i", (int)(iend - ip));
   2293                     /* Finishing in the middle of a literals segment,
   2294                      * due to lack of input.
   2295                      */
   2296                     if (ip+length > iend) {
   2297                         length = (size_t)(iend-ip);
   2298                         cpy = op + length;
   2299                     }
   2300                     /* Finishing in the middle of a literals segment,
   2301                      * due to lack of output space.
   2302                      */
   2303                     if (cpy > oend) {
   2304                         cpy = oend;
   2305                         assert(op<=oend);
   2306                         length = (size_t)(oend-op);
   2307                     }
   2308                 } else {
   2309                      /* We must be on the last sequence (or invalid) because of the parsing limitations
   2310                       * so check that we exactly consume the input and don't overrun the output buffer.
   2311                       */
   2312                     if ((ip+length != iend) || (cpy > oend)) {
   2313                         DEBUGLOG(5, "should have been last run of literals")
   2314                         DEBUGLOG(5, "ip(%p) + length(%i) = %p != iend (%p)", ip, (int)length, ip+length, iend);
   2315                         DEBUGLOG(5, "or cpy(%p) > (oend-MFLIMIT)(%p)", cpy, oend-MFLIMIT);
   2316                         DEBUGLOG(5, "after writing %u bytes / %i bytes available", (unsigned)(op-(BYTE*)dst), outputSize);
   2317                         goto _output_error;
   2318                     }
   2319                 }
   2320                 LZ4_memmove(op, ip, length);  /* supports overlapping memory regions, for in-place decompression scenarios */
   2321                 ip += length;
   2322                 op += length;
   2323                 /* Necessarily EOF when !partialDecoding.
   2324                  * When partialDecoding, it is EOF if we've either
   2325                  * filled the output buffer or
   2326                  * can't proceed with reading an offset for following match.
   2327                  */
   2328                 if (!partialDecoding || (cpy == oend) || (ip >= (iend-2))) {
   2329                     break;
   2330                 }
   2331             } else {
   2332                 LZ4_wildCopy8(op, ip, cpy);   /* can overwrite up to 8 bytes beyond cpy */
   2333                 ip += length; op = cpy;
   2334             }
   2335 
   2336             /* get offset */
   2337             offset = LZ4_readLE16(ip); ip+=2;
   2338             match = op - offset;
   2339 
   2340             /* get matchlength */
   2341             length = token & ML_MASK;
   2342             DEBUGLOG(7, "blockPos%6u: matchLength token = %u", (unsigned)(op-(BYTE*)dst), (unsigned)length);
   2343 
   2344     _copy_match:
   2345             if (length == ML_MASK) {
   2346                 size_t const addl = read_variable_length(&ip, iend - LASTLITERALS + 1, 0);
   2347                 if (addl == rvl_error) { goto _output_error; }
   2348                 length += addl;
   2349                 if (unlikely((uptrval)(op)+length<(uptrval)op)) goto _output_error;   /* overflow detection */
   2350             }
   2351             length += MINMATCH;
   2352 
   2353 #if LZ4_FAST_DEC_LOOP
   2354         safe_match_copy:
   2355 #endif
   2356             if ((checkOffset) && (unlikely(match + dictSize < lowPrefix))) goto _output_error;   /* Error : offset outside buffers */
   2357             /* match starting within external dictionary */
   2358             if ((dict==usingExtDict) && (match < lowPrefix)) {
   2359                 assert(dictEnd != NULL);
   2360                 if (unlikely(op+length > oend-LASTLITERALS)) {
   2361                     if (partialDecoding) length = MIN(length, (size_t)(oend-op));
   2362                     else goto _output_error;   /* doesn't respect parsing restriction */
   2363                 }
   2364 
   2365                 if (length <= (size_t)(lowPrefix-match)) {
   2366                     /* match fits entirely within external dictionary : just copy */
   2367                     LZ4_memmove(op, dictEnd - (lowPrefix-match), length);
   2368                     op += length;
   2369                 } else {
   2370                     /* match stretches into both external dictionary and current block */
   2371                     size_t const copySize = (size_t)(lowPrefix - match);
   2372                     size_t const restSize = length - copySize;
   2373                     LZ4_memcpy(op, dictEnd - copySize, copySize);
   2374                     op += copySize;
   2375                     if (restSize > (size_t)(op - lowPrefix)) {  /* overlap copy */
   2376                         BYTE* const endOfMatch = op + restSize;
   2377                         const BYTE* copyFrom = lowPrefix;
   2378                         while (op < endOfMatch) *op++ = *copyFrom++;
   2379                     } else {
   2380                         LZ4_memcpy(op, lowPrefix, restSize);
   2381                         op += restSize;
   2382                 }   }
   2383                 continue;
   2384             }
   2385             assert(match >= lowPrefix);
   2386 
   2387             /* copy match within block */
   2388             cpy = op + length;
   2389 
   2390             /* partialDecoding : may end anywhere within the block */
   2391             assert(op<=oend);
   2392             if (partialDecoding && (cpy > oend-MATCH_SAFEGUARD_DISTANCE)) {
   2393                 size_t const mlen = MIN(length, (size_t)(oend-op));
   2394                 const BYTE* const matchEnd = match + mlen;
   2395                 BYTE* const copyEnd = op + mlen;
   2396                 if (matchEnd > op) {   /* overlap copy */
   2397                     while (op < copyEnd) { *op++ = *match++; }
   2398                 } else {
   2399                     LZ4_memcpy(op, match, mlen);
   2400                 }
   2401                 op = copyEnd;
   2402                 if (op == oend) { break; }
   2403                 continue;
   2404             }
   2405 
   2406             if (unlikely(offset<8)) {
   2407                 LZ4_write32(op, 0);   /* silence msan warning when offset==0 */
   2408                 op[0] = match[0];
   2409                 op[1] = match[1];
   2410                 op[2] = match[2];
   2411                 op[3] = match[3];
   2412                 match += inc32table[offset];
   2413                 LZ4_memcpy(op+4, match, 4);
   2414                 match -= dec64table[offset];
   2415             } else {
   2416                 LZ4_memcpy(op, match, 8);
   2417                 match += 8;
   2418             }
   2419             op += 8;
   2420 
   2421             if (unlikely(cpy > oend-MATCH_SAFEGUARD_DISTANCE)) {
   2422                 BYTE* const oCopyLimit = oend - (WILDCOPYLENGTH-1);
   2423                 if (cpy > oend-LASTLITERALS) { goto _output_error; } /* Error : last LASTLITERALS bytes must be literals (uncompressed) */
   2424                 if (op < oCopyLimit) {
   2425                     LZ4_wildCopy8(op, match, oCopyLimit);
   2426                     match += oCopyLimit - op;
   2427                     op = oCopyLimit;
   2428                 }
   2429                 while (op < cpy) { *op++ = *match++; }
   2430             } else {
   2431                 LZ4_memcpy(op, match, 8);
   2432                 if (length > 16) { LZ4_wildCopy8(op+8, match+8, cpy); }
   2433             }
   2434             op = cpy;   /* wildcopy correction */
   2435         }
   2436 
   2437         /* end of decoding */
   2438         DEBUGLOG(5, "decoded %i bytes", (int) (((char*)op)-dst));
   2439         return (int) (((char*)op)-dst);     /* Nb of output bytes decoded */
   2440 
   2441         /* Overflow error detected */
   2442     _output_error:
   2443         return (int) (-(((const char*)ip)-src))-1;
   2444     }
   2445 }
   2446 
   2447 
   2448 /*===== Instantiate the API decoding functions. =====*/
   2449 
   2450 LZ4_FORCE_O2
   2451 int LZ4_decompress_safe(const char* source, char* dest, int compressedSize, int maxDecompressedSize)
   2452 {
   2453     return LZ4_decompress_generic(source, dest, compressedSize, maxDecompressedSize,
   2454                                   decode_full_block, noDict,
   2455                                   (BYTE*)dest, NULL, 0);
   2456 }
   2457 
   2458 LZ4_FORCE_O2
   2459 int LZ4_decompress_safe_partial(const char* src, char* dst, int compressedSize, int targetOutputSize, int dstCapacity)
   2460 {
   2461     dstCapacity = MIN(targetOutputSize, dstCapacity);
   2462     return LZ4_decompress_generic(src, dst, compressedSize, dstCapacity,
   2463                                   partial_decode,
   2464                                   noDict, (BYTE*)dst, NULL, 0);
   2465 }
   2466 
   2467 LZ4_FORCE_O2
   2468 int LZ4_decompress_fast(const char* source, char* dest, int originalSize)
   2469 {
   2470     DEBUGLOG(5, "LZ4_decompress_fast");
   2471     return LZ4_decompress_unsafe_generic(
   2472                 (const BYTE*)source, (BYTE*)dest, originalSize,
   2473                 0, NULL, 0);
   2474 }
   2475 
   2476 /*===== Instantiate a few more decoding cases, used more than once. =====*/
   2477 
   2478 LZ4_FORCE_O2 /* Exported, an obsolete API function. */
   2479 int LZ4_decompress_safe_withPrefix64k(const char* source, char* dest, int compressedSize, int maxOutputSize)
   2480 {
   2481     return LZ4_decompress_generic(source, dest, compressedSize, maxOutputSize,
   2482                                   decode_full_block, withPrefix64k,
   2483                                   (BYTE*)dest - 64 KB, NULL, 0);
   2484 }
   2485 
   2486 LZ4_FORCE_O2
   2487 static int LZ4_decompress_safe_partial_withPrefix64k(const char* source, char* dest, int compressedSize, int targetOutputSize, int dstCapacity)
   2488 {
   2489     dstCapacity = MIN(targetOutputSize, dstCapacity);
   2490     return LZ4_decompress_generic(source, dest, compressedSize, dstCapacity,
   2491                                   partial_decode, withPrefix64k,
   2492                                   (BYTE*)dest - 64 KB, NULL, 0);
   2493 }
   2494 
   2495 /* Another obsolete API function, paired with the previous one. */
   2496 int LZ4_decompress_fast_withPrefix64k(const char* source, char* dest, int originalSize)
   2497 {
   2498     return LZ4_decompress_unsafe_generic(
   2499                 (const BYTE*)source, (BYTE*)dest, originalSize,
   2500                 64 KB, NULL, 0);
   2501 }
   2502 
   2503 LZ4_FORCE_O2
   2504 static int LZ4_decompress_safe_withSmallPrefix(const char* source, char* dest, int compressedSize, int maxOutputSize,
   2505                                                size_t prefixSize)
   2506 {
   2507     return LZ4_decompress_generic(source, dest, compressedSize, maxOutputSize,
   2508                                   decode_full_block, noDict,
   2509                                   (BYTE*)dest-prefixSize, NULL, 0);
   2510 }
   2511 
   2512 LZ4_FORCE_O2
   2513 static int LZ4_decompress_safe_partial_withSmallPrefix(const char* source, char* dest, int compressedSize, int targetOutputSize, int dstCapacity,
   2514                                                size_t prefixSize)
   2515 {
   2516     dstCapacity = MIN(targetOutputSize, dstCapacity);
   2517     return LZ4_decompress_generic(source, dest, compressedSize, dstCapacity,
   2518                                   partial_decode, noDict,
   2519                                   (BYTE*)dest-prefixSize, NULL, 0);
   2520 }
   2521 
   2522 LZ4_FORCE_O2
   2523 int LZ4_decompress_safe_forceExtDict(const char* source, char* dest,
   2524                                      int compressedSize, int maxOutputSize,
   2525                                      const void* dictStart, size_t dictSize)
   2526 {
   2527     DEBUGLOG(5, "LZ4_decompress_safe_forceExtDict");
   2528     return LZ4_decompress_generic(source, dest, compressedSize, maxOutputSize,
   2529                                   decode_full_block, usingExtDict,
   2530                                   (BYTE*)dest, (const BYTE*)dictStart, dictSize);
   2531 }
   2532 
   2533 LZ4_FORCE_O2
   2534 int LZ4_decompress_safe_partial_forceExtDict(const char* source, char* dest,
   2535                                      int compressedSize, int targetOutputSize, int dstCapacity,
   2536                                      const void* dictStart, size_t dictSize)
   2537 {
   2538     dstCapacity = MIN(targetOutputSize, dstCapacity);
   2539     return LZ4_decompress_generic(source, dest, compressedSize, dstCapacity,
   2540                                   partial_decode, usingExtDict,
   2541                                   (BYTE*)dest, (const BYTE*)dictStart, dictSize);
   2542 }
   2543 
   2544 LZ4_FORCE_O2
   2545 static int LZ4_decompress_fast_extDict(const char* source, char* dest, int originalSize,
   2546                                        const void* dictStart, size_t dictSize)
   2547 {
   2548     return LZ4_decompress_unsafe_generic(
   2549                 (const BYTE*)source, (BYTE*)dest, originalSize,
   2550                 0, (const BYTE*)dictStart, dictSize);
   2551 }
   2552 
   2553 /* The "double dictionary" mode, for use with e.g. ring buffers: the first part
   2554  * of the dictionary is passed as prefix, and the second via dictStart + dictSize.
   2555  * These routines are used only once, in LZ4_decompress_*_continue().
   2556  */
   2557 LZ4_FORCE_INLINE
   2558 int LZ4_decompress_safe_doubleDict(const char* source, char* dest, int compressedSize, int maxOutputSize,
   2559                                    size_t prefixSize, const void* dictStart, size_t dictSize)
   2560 {
   2561     return LZ4_decompress_generic(source, dest, compressedSize, maxOutputSize,
   2562                                   decode_full_block, usingExtDict,
   2563                                   (BYTE*)dest-prefixSize, (const BYTE*)dictStart, dictSize);
   2564 }
   2565 
   2566 /*===== streaming decompression functions =====*/
   2567 
   2568 #if !defined(LZ4_STATIC_LINKING_ONLY_DISABLE_MEMORY_ALLOCATION)
   2569 LZ4_streamDecode_t* LZ4_createStreamDecode(void)
   2570 {
   2571     LZ4_STATIC_ASSERT(sizeof(LZ4_streamDecode_t) >= sizeof(LZ4_streamDecode_t_internal));
   2572     return (LZ4_streamDecode_t*) ALLOC_AND_ZERO(sizeof(LZ4_streamDecode_t));
   2573 }
   2574 
   2575 int LZ4_freeStreamDecode (LZ4_streamDecode_t* LZ4_stream)
   2576 {
   2577     if (LZ4_stream == NULL) { return 0; }  /* support free on NULL */
   2578     FREEMEM(LZ4_stream);
   2579     return 0;
   2580 }
   2581 #endif
   2582 
   2583 /*! LZ4_setStreamDecode() :
   2584  *  Use this function to instruct where to find the dictionary.
   2585  *  This function is not necessary if previous data is still available where it was decoded.
   2586  *  Loading a size of 0 is allowed (same effect as no dictionary).
   2587  * @return : 1 if OK, 0 if error
   2588  */
   2589 int LZ4_setStreamDecode (LZ4_streamDecode_t* LZ4_streamDecode, const char* dictionary, int dictSize)
   2590 {
   2591     LZ4_streamDecode_t_internal* lz4sd = &LZ4_streamDecode->internal_donotuse;
   2592     lz4sd->prefixSize = (size_t)dictSize;
   2593     if (dictSize) {
   2594         assert(dictionary != NULL);
   2595         lz4sd->prefixEnd = (const BYTE*) dictionary + dictSize;
   2596     } else {
   2597         lz4sd->prefixEnd = (const BYTE*) dictionary;
   2598     }
   2599     lz4sd->externalDict = NULL;
   2600     lz4sd->extDictSize  = 0;
   2601     return 1;
   2602 }
   2603 
   2604 /*! LZ4_decoderRingBufferSize() :
   2605  *  when setting a ring buffer for streaming decompression (optional scenario),
   2606  *  provides the minimum size of this ring buffer
   2607  *  to be compatible with any source respecting maxBlockSize condition.
   2608  *  Note : in a ring buffer scenario,
   2609  *  blocks are presumed decompressed next to each other.
   2610  *  When not enough space remains for next block (remainingSize < maxBlockSize),
   2611  *  decoding resumes from beginning of ring buffer.
   2612  * @return : minimum ring buffer size,
   2613  *           or 0 if there is an error (invalid maxBlockSize).
   2614  */
   2615 int LZ4_decoderRingBufferSize(int maxBlockSize)
   2616 {
   2617     if (maxBlockSize < 0) return 0;
   2618     if (maxBlockSize > LZ4_MAX_INPUT_SIZE) return 0;
   2619     if (maxBlockSize < 16) maxBlockSize = 16;
   2620     return LZ4_DECODER_RING_BUFFER_SIZE(maxBlockSize);
   2621 }
   2622 
   2623 /*
   2624 *_continue() :
   2625     These decoding functions allow decompression of multiple blocks in "streaming" mode.
   2626     Previously decoded blocks must still be available at the memory position where they were decoded.
   2627     If it's not possible, save the relevant part of decoded data into a safe buffer,
   2628     and indicate where it stands using LZ4_setStreamDecode()
   2629 */
   2630 LZ4_FORCE_O2
   2631 int LZ4_decompress_safe_continue (LZ4_streamDecode_t* LZ4_streamDecode, const char* source, char* dest, int compressedSize, int maxOutputSize)
   2632 {
   2633     LZ4_streamDecode_t_internal* lz4sd = &LZ4_streamDecode->internal_donotuse;
   2634     int result;
   2635 
   2636     if (lz4sd->prefixSize == 0) {
   2637         /* The first call, no dictionary yet. */
   2638         assert(lz4sd->extDictSize == 0);
   2639         result = LZ4_decompress_safe(source, dest, compressedSize, maxOutputSize);
   2640         if (result <= 0) return result;
   2641         lz4sd->prefixSize = (size_t)result;
   2642         lz4sd->prefixEnd = (BYTE*)dest + result;
   2643     } else if (lz4sd->prefixEnd == (BYTE*)dest) {
   2644         /* They're rolling the current segment. */
   2645         if (lz4sd->prefixSize >= 64 KB - 1)
   2646             result = LZ4_decompress_safe_withPrefix64k(source, dest, compressedSize, maxOutputSize);
   2647         else if (lz4sd->extDictSize == 0)
   2648             result = LZ4_decompress_safe_withSmallPrefix(source, dest, compressedSize, maxOutputSize,
   2649                                                          lz4sd->prefixSize);
   2650         else
   2651             result = LZ4_decompress_safe_doubleDict(source, dest, compressedSize, maxOutputSize,
   2652                                                     lz4sd->prefixSize, lz4sd->externalDict, lz4sd->extDictSize);
   2653         if (result <= 0) return result;
   2654         lz4sd->prefixSize += (size_t)result;
   2655         lz4sd->prefixEnd  += result;
   2656     } else {
   2657         /* The buffer wraps around, or they're switching to another buffer. */
   2658         lz4sd->extDictSize = lz4sd->prefixSize;
   2659         lz4sd->externalDict = lz4sd->prefixEnd - lz4sd->extDictSize;
   2660         result = LZ4_decompress_safe_forceExtDict(source, dest, compressedSize, maxOutputSize,
   2661                                                   lz4sd->externalDict, lz4sd->extDictSize);
   2662         if (result <= 0) return result;
   2663         lz4sd->prefixSize = (size_t)result;
   2664         lz4sd->prefixEnd  = (BYTE*)dest + result;
   2665     }
   2666 
   2667     return result;
   2668 }
   2669 
   2670 LZ4_FORCE_O2 int
   2671 LZ4_decompress_fast_continue (LZ4_streamDecode_t* LZ4_streamDecode,
   2672                         const char* source, char* dest, int originalSize)
   2673 {
   2674     LZ4_streamDecode_t_internal* const lz4sd =
   2675         (assert(LZ4_streamDecode!=NULL), &LZ4_streamDecode->internal_donotuse);
   2676     int result;
   2677 
   2678     DEBUGLOG(5, "LZ4_decompress_fast_continue (toDecodeSize=%i)", originalSize);
   2679     assert(originalSize >= 0);
   2680 
   2681     if (lz4sd->prefixSize == 0) {
   2682         DEBUGLOG(5, "first invocation : no prefix nor extDict");
   2683         assert(lz4sd->extDictSize == 0);
   2684         result = LZ4_decompress_fast(source, dest, originalSize);
   2685         if (result <= 0) return result;
   2686         lz4sd->prefixSize = (size_t)originalSize;
   2687         lz4sd->prefixEnd = (BYTE*)dest + originalSize;
   2688     } else if (lz4sd->prefixEnd == (BYTE*)dest) {
   2689         DEBUGLOG(5, "continue using existing prefix");
   2690         result = LZ4_decompress_unsafe_generic(
   2691                         (const BYTE*)source, (BYTE*)dest, originalSize,
   2692                         lz4sd->prefixSize,
   2693                         lz4sd->externalDict, lz4sd->extDictSize);
   2694         if (result <= 0) return result;
   2695         lz4sd->prefixSize += (size_t)originalSize;
   2696         lz4sd->prefixEnd  += originalSize;
   2697     } else {
   2698         DEBUGLOG(5, "prefix becomes extDict");
   2699         lz4sd->extDictSize = lz4sd->prefixSize;
   2700         lz4sd->externalDict = lz4sd->prefixEnd - lz4sd->extDictSize;
   2701         result = LZ4_decompress_fast_extDict(source, dest, originalSize,
   2702                                              lz4sd->externalDict, lz4sd->extDictSize);
   2703         if (result <= 0) return result;
   2704         lz4sd->prefixSize = (size_t)originalSize;
   2705         lz4sd->prefixEnd  = (BYTE*)dest + originalSize;
   2706     }
   2707 
   2708     return result;
   2709 }
   2710 
   2711 
   2712 /*
   2713 Advanced decoding functions :
   2714 *_usingDict() :
   2715     These decoding functions work the same as "_continue" ones,
   2716     the dictionary must be explicitly provided within parameters
   2717 */
   2718 
   2719 int LZ4_decompress_safe_usingDict(const char* source, char* dest, int compressedSize, int maxOutputSize, const char* dictStart, int dictSize)
   2720 {
   2721     if (dictSize==0)
   2722         return LZ4_decompress_safe(source, dest, compressedSize, maxOutputSize);
   2723     if (dictStart+dictSize == dest) {
   2724         if (dictSize >= 64 KB - 1) {
   2725             return LZ4_decompress_safe_withPrefix64k(source, dest, compressedSize, maxOutputSize);
   2726         }
   2727         assert(dictSize >= 0);
   2728         return LZ4_decompress_safe_withSmallPrefix(source, dest, compressedSize, maxOutputSize, (size_t)dictSize);
   2729     }
   2730     assert(dictSize >= 0);
   2731     return LZ4_decompress_safe_forceExtDict(source, dest, compressedSize, maxOutputSize, dictStart, (size_t)dictSize);
   2732 }
   2733 
   2734 int LZ4_decompress_safe_partial_usingDict(const char* source, char* dest, int compressedSize, int targetOutputSize, int dstCapacity, const char* dictStart, int dictSize)
   2735 {
   2736     if (dictSize==0)
   2737         return LZ4_decompress_safe_partial(source, dest, compressedSize, targetOutputSize, dstCapacity);
   2738     if (dictStart+dictSize == dest) {
   2739         if (dictSize >= 64 KB - 1) {
   2740             return LZ4_decompress_safe_partial_withPrefix64k(source, dest, compressedSize, targetOutputSize, dstCapacity);
   2741         }
   2742         assert(dictSize >= 0);
   2743         return LZ4_decompress_safe_partial_withSmallPrefix(source, dest, compressedSize, targetOutputSize, dstCapacity, (size_t)dictSize);
   2744     }
   2745     assert(dictSize >= 0);
   2746     return LZ4_decompress_safe_partial_forceExtDict(source, dest, compressedSize, targetOutputSize, dstCapacity, dictStart, (size_t)dictSize);
   2747 }
   2748 
   2749 int LZ4_decompress_fast_usingDict(const char* source, char* dest, int originalSize, const char* dictStart, int dictSize)
   2750 {
   2751     if (dictSize==0 || dictStart+dictSize == dest)
   2752         return LZ4_decompress_unsafe_generic(
   2753                         (const BYTE*)source, (BYTE*)dest, originalSize,
   2754                         (size_t)dictSize, NULL, 0);
   2755     assert(dictSize >= 0);
   2756     return LZ4_decompress_fast_extDict(source, dest, originalSize, dictStart, (size_t)dictSize);
   2757 }
   2758 
   2759 
   2760 /*=*************************************************
   2761 *  Obsolete Functions
   2762 ***************************************************/
   2763 /* obsolete compression functions */
   2764 int LZ4_compress_limitedOutput(const char* source, char* dest, int inputSize, int maxOutputSize)
   2765 {
   2766     return LZ4_compress_default(source, dest, inputSize, maxOutputSize);
   2767 }
   2768 int LZ4_compress(const char* src, char* dest, int srcSize)
   2769 {
   2770     return LZ4_compress_default(src, dest, srcSize, LZ4_compressBound(srcSize));
   2771 }
   2772 int LZ4_compress_limitedOutput_withState (void* state, const char* src, char* dst, int srcSize, int dstSize)
   2773 {
   2774     return LZ4_compress_fast_extState(state, src, dst, srcSize, dstSize, 1);
   2775 }
   2776 int LZ4_compress_withState (void* state, const char* src, char* dst, int srcSize)
   2777 {
   2778     return LZ4_compress_fast_extState(state, src, dst, srcSize, LZ4_compressBound(srcSize), 1);
   2779 }
   2780 int LZ4_compress_limitedOutput_continue (LZ4_stream_t* LZ4_stream, const char* src, char* dst, int srcSize, int dstCapacity)
   2781 {
   2782     return LZ4_compress_fast_continue(LZ4_stream, src, dst, srcSize, dstCapacity, 1);
   2783 }
   2784 int LZ4_compress_continue (LZ4_stream_t* LZ4_stream, const char* source, char* dest, int inputSize)
   2785 {
   2786     return LZ4_compress_fast_continue(LZ4_stream, source, dest, inputSize, LZ4_compressBound(inputSize), 1);
   2787 }
   2788 
   2789 /*
   2790 These decompression functions are deprecated and should no longer be used.
   2791 They are only provided here for compatibility with older user programs.
   2792 - LZ4_uncompress is totally equivalent to LZ4_decompress_fast
   2793 - LZ4_uncompress_unknownOutputSize is totally equivalent to LZ4_decompress_safe
   2794 */
   2795 int LZ4_uncompress (const char* source, char* dest, int outputSize)
   2796 {
   2797     return LZ4_decompress_fast(source, dest, outputSize);
   2798 }
   2799 int LZ4_uncompress_unknownOutputSize (const char* source, char* dest, int isize, int maxOutputSize)
   2800 {
   2801     return LZ4_decompress_safe(source, dest, isize, maxOutputSize);
   2802 }
   2803 
   2804 /* Obsolete Streaming functions */
   2805 
   2806 int LZ4_sizeofStreamState(void) { return sizeof(LZ4_stream_t); }
   2807 
   2808 int LZ4_resetStreamState(void* state, char* inputBuffer)
   2809 {
   2810     (void)inputBuffer;
   2811     LZ4_resetStream((LZ4_stream_t*)state);
   2812     return 0;
   2813 }
   2814 
   2815 #if !defined(LZ4_STATIC_LINKING_ONLY_DISABLE_MEMORY_ALLOCATION)
   2816 void* LZ4_create (char* inputBuffer)
   2817 {
   2818     (void)inputBuffer;
   2819     return LZ4_createStream();
   2820 }
   2821 #endif
   2822 
   2823 char* LZ4_slideInputBuffer (void* state)
   2824 {
   2825     /* avoid const char * -> char * conversion warning */
   2826     return (char *)(uptrval)((LZ4_stream_t*)state)->internal_donotuse.dictionary;
   2827 }
   2828 
   2829 #endif   /* LZ4_COMMONDEFS_ONLY */