--- /dev/null
+name: BSD
+
+on: [push, pull_request]
+
+jobs:
+ build:
+ runs-on: ubuntu-latest
+ strategy:
+ matrix:
+ operating_system: [ freebsd, netbsd, openbsd ]
+ architecture: [ arm64, x86-64 ]
+ include:
+ - operating_system: freebsd
+ version: '14.0'
+ pkginstall: sudo pkg install -y cmake git ninja pkgconf
+ - operating_system: netbsd
+ version: '10.0'
+ pkginstall: sudo pkgin update && sudo pkgin -y install cmake gcc12 git ninja-build pkgconf && export PATH=/usr/pkg/gcc12/bin:$PATH
+ - operating_system: openbsd
+ version: '7.5'
+ pkginstall: sudo pkg_add cmake git ninja pkgconf
+
+ steps:
+ - uses: actions/checkout@v4
+
+ - uses: cross-platform-actions/action@v0.24.0
+ with:
+ operating_system: ${{ matrix.operating_system }}
+ architecture: ${{ matrix.architecture }}
+ version: ${{ matrix.version }}
+ run: |
+ ${{ matrix.pkginstall }}
+ cmake -B build -DCMAKE_BUILD_TYPE=Release -G Ninja
+ cmake --build build --config Release
[subrepo]
remote = https://github.com/rtissera/libchdr
branch = master
- commit = 5c598c2df3a7717552a76410d79f5af01ff51b1d
- parent = e2fb1389dc12376acb84e4993ed3b08760257252
+ commit = 86b272076d542287d3f03952e7d4efe283e815bf
+ parent = db02598e737b8d50cd347fe2ef13cb85ade051dd
method = merge
cmdver = 0.4.6
# lzma
-add_subdirectory(deps/lzma-22.01 EXCLUDE_FROM_ALL)
+add_subdirectory(deps/lzma-24.05 EXCLUDE_FROM_ALL)
list(APPEND CHDR_LIBS lzma)
list(APPEND CHDR_INCLUDES lzma)
list(APPEND CHDR_LIBS zlibstatic)
endif()
-# zstd
-add_subdirectory(deps/zstd-1.5.5 EXCLUDE_FROM_ALL)
-list(APPEND CHDR_LIBS zstd)
-list(APPEND CHDR_INCLUDES zstd)
+# zstd
+option(ZSTD_BUILD_SHARED "BUILD SHARED LIBRARIES" OFF)
+option(ZSTD_BUILD_PROGRAMS "BUILD PROGRAMS" OFF)
+add_subdirectory(deps/zstd-1.5.6/build/cmake EXCLUDE_FROM_ALL)
+list(APPEND CHDR_LIBS libzstd_static)
#--------------------------------------------------
# chdr
#--------------------------------------------------
target_compile_definitions(chdr PRIVATE _CRT_SECURE_NO_WARNINGS)
elseif(APPLE)
target_link_libraries(chdr PRIVATE -Wl,-dead_strip -Wl,-exported_symbol,_chd_*)
+ elseif(CMAKE_SYSTEM_NAME STREQUAL OpenBSD)
+ target_link_libraries(chdr PRIVATE -Wl,--version-script ${CMAKE_CURRENT_SOURCE_DIR}/src/link.T)
else()
target_link_libraries(chdr PRIVATE -Wl,--version-script ${CMAKE_CURRENT_SOURCE_DIR}/src/link.T -Wl,--no-undefined)
endif()
+++ /dev/null
-add_library(lzma STATIC
- include/7zTypes.h
- include/Alloc.h
- include/Bra.h
- include/Compiler.h
- include/CpuArch.h
- include/Delta.h
- include/LzFind.h
- include/LzHash.h
- include/Lzma86.h
- include/LzmaDec.h
- include/LzmaEnc.h
- include/LzmaLib.h
- include/Precomp.h
- include/Sort.h
- src/Alloc.c
- src/Bra86.c
- src/BraIA64.c
- src/CpuArch.c
- src/Delta.c
- src/LzFind.c
- src/Lzma86Dec.c
- src/LzmaDec.c
- src/LzmaEnc.c
- src/Sort.c
-)
-
-target_compile_definitions(lzma PRIVATE _7ZIP_ST)
-
-target_include_directories(lzma PRIVATE "${CMAKE_CURRENT_SOURCE_DIR}/include")
-target_include_directories(lzma INTERFACE "${CMAKE_CURRENT_SOURCE_DIR}/include")
-
-set_target_properties(lzma PROPERTIES POSITION_INDEPENDENT_CODE ON)
+++ /dev/null
-/* 7zTypes.h -- Basic types
-2018-08-04 : Igor Pavlov : Public domain */
-
-#ifndef __7Z_TYPES_H
-#define __7Z_TYPES_H
-
-#ifdef _WIN32
-/* #include <windows.h> */
-#endif
-
-#include <stddef.h>
-
-#ifndef EXTERN_C_BEGIN
-#ifdef __cplusplus
-#define EXTERN_C_BEGIN extern "C" {
-#define EXTERN_C_END }
-#else
-#define EXTERN_C_BEGIN
-#define EXTERN_C_END
-#endif
-#endif
-
-EXTERN_C_BEGIN
-
-#define SZ_OK 0
-
-#define SZ_ERROR_DATA 1
-#define SZ_ERROR_MEM 2
-#define SZ_ERROR_CRC 3
-#define SZ_ERROR_UNSUPPORTED 4
-#define SZ_ERROR_PARAM 5
-#define SZ_ERROR_INPUT_EOF 6
-#define SZ_ERROR_OUTPUT_EOF 7
-#define SZ_ERROR_READ 8
-#define SZ_ERROR_WRITE 9
-#define SZ_ERROR_PROGRESS 10
-#define SZ_ERROR_FAIL 11
-#define SZ_ERROR_THREAD 12
-
-#define SZ_ERROR_ARCHIVE 16
-#define SZ_ERROR_NO_ARCHIVE 17
-
-typedef int SRes;
-
-
-#ifdef _WIN32
-
-/* typedef DWORD WRes; */
-typedef unsigned WRes;
-#define MY_SRes_HRESULT_FROM_WRes(x) HRESULT_FROM_WIN32(x)
-
-#else
-
-typedef int WRes;
-#define MY__FACILITY_WIN32 7
-#define MY__FACILITY__WRes MY__FACILITY_WIN32
-#define MY_SRes_HRESULT_FROM_WRes(x) ((HRESULT)(x) <= 0 ? ((HRESULT)(x)) : ((HRESULT) (((x) & 0x0000FFFF) | (MY__FACILITY__WRes << 16) | 0x80000000)))
-
-#endif
-
-
-#ifndef RINOK
-#define RINOK(x) { int __result__ = (x); if (__result__ != 0) return __result__; }
-#endif
-
-typedef unsigned char Byte;
-typedef short Int16;
-typedef unsigned short UInt16;
-
-#ifdef _LZMA_UINT32_IS_ULONG
-typedef long Int32;
-typedef unsigned long UInt32;
-#else
-typedef int Int32;
-typedef unsigned int UInt32;
-#endif
-
-#ifdef _SZ_NO_INT_64
-
-/* define _SZ_NO_INT_64, if your compiler doesn't support 64-bit integers.
- NOTES: Some code will work incorrectly in that case! */
-
-typedef long Int64;
-typedef unsigned long UInt64;
-
-#else
-
-#if defined(_MSC_VER) || defined(__BORLANDC__)
-typedef __int64 Int64;
-typedef unsigned __int64 UInt64;
-#define UINT64_CONST(n) n
-#else
-typedef long long int Int64;
-typedef unsigned long long int UInt64;
-#define UINT64_CONST(n) n ## ULL
-#endif
-
-#endif
-
-#ifdef _LZMA_NO_SYSTEM_SIZE_T
-typedef UInt32 SizeT;
-#else
-typedef size_t SizeT;
-#endif
-
-typedef int BoolInt;
-/* typedef BoolInt Bool; */
-#define True 1
-#define False 0
-
-
-#ifdef _WIN32
-#define MY_STD_CALL __stdcall
-#else
-#define MY_STD_CALL
-#endif
-
-#ifdef _MSC_VER
-
-#if _MSC_VER >= 1300
-#define MY_NO_INLINE __declspec(noinline)
-#else
-#define MY_NO_INLINE
-#endif
-
-#define MY_FORCE_INLINE __forceinline
-
-#define MY_CDECL __cdecl
-#define MY_FAST_CALL __fastcall
-
-#else
-
-#define MY_NO_INLINE
-#define MY_FORCE_INLINE
-#define MY_CDECL
-#define MY_FAST_CALL
-
-/* inline keyword : for C++ / C99 */
-
-/* GCC, clang: */
-/*
-#if defined (__GNUC__) && (__GNUC__ >= 4)
-#define MY_FORCE_INLINE __attribute__((always_inline))
-#define MY_NO_INLINE __attribute__((noinline))
-#endif
-*/
-
-#endif
-
-
-/* The following interfaces use first parameter as pointer to structure */
-
-typedef struct IByteIn IByteIn;
-struct IByteIn
-{
- Byte (*Read)(const IByteIn *p); /* reads one byte, returns 0 in case of EOF or error */
-};
-#define IByteIn_Read(p) (p)->Read(p)
-
-
-typedef struct IByteOut IByteOut;
-struct IByteOut
-{
- void (*Write)(const IByteOut *p, Byte b);
-};
-#define IByteOut_Write(p, b) (p)->Write(p, b)
-
-
-typedef struct ISeqInStream ISeqInStream;
-struct ISeqInStream
-{
- SRes (*Read)(const ISeqInStream *p, void *buf, size_t *size);
- /* if (input(*size) != 0 && output(*size) == 0) means end_of_stream.
- (output(*size) < input(*size)) is allowed */
-};
-#define ISeqInStream_Read(p, buf, size) (p)->Read(p, buf, size)
-
-/* it can return SZ_ERROR_INPUT_EOF */
-SRes SeqInStream_Read(const ISeqInStream *stream, void *buf, size_t size);
-SRes SeqInStream_Read2(const ISeqInStream *stream, void *buf, size_t size, SRes errorType);
-SRes SeqInStream_ReadByte(const ISeqInStream *stream, Byte *buf);
-
-
-typedef struct ISeqOutStream ISeqOutStream;
-struct ISeqOutStream
-{
- size_t (*Write)(const ISeqOutStream *p, const void *buf, size_t size);
- /* Returns: result - the number of actually written bytes.
- (result < size) means error */
-};
-#define ISeqOutStream_Write(p, buf, size) (p)->Write(p, buf, size)
-
-typedef enum
-{
- SZ_SEEK_SET = 0,
- SZ_SEEK_CUR = 1,
- SZ_SEEK_END = 2
-} ESzSeek;
-
-
-typedef struct ISeekInStream ISeekInStream;
-struct ISeekInStream
-{
- SRes (*Read)(const ISeekInStream *p, void *buf, size_t *size); /* same as ISeqInStream::Read */
- SRes (*Seek)(const ISeekInStream *p, Int64 *pos, ESzSeek origin);
-};
-#define ISeekInStream_Read(p, buf, size) (p)->Read(p, buf, size)
-#define ISeekInStream_Seek(p, pos, origin) (p)->Seek(p, pos, origin)
-
-
-typedef struct ILookInStream ILookInStream;
-struct ILookInStream
-{
- SRes (*Look)(const ILookInStream *p, const void **buf, size_t *size);
- /* if (input(*size) != 0 && output(*size) == 0) means end_of_stream.
- (output(*size) > input(*size)) is not allowed
- (output(*size) < input(*size)) is allowed */
- SRes (*Skip)(const ILookInStream *p, size_t offset);
- /* offset must be <= output(*size) of Look */
-
- SRes (*Read)(const ILookInStream *p, void *buf, size_t *size);
- /* reads directly (without buffer). It's same as ISeqInStream::Read */
- SRes (*Seek)(const ILookInStream *p, Int64 *pos, ESzSeek origin);
-};
-
-#define ILookInStream_Look(p, buf, size) (p)->Look(p, buf, size)
-#define ILookInStream_Skip(p, offset) (p)->Skip(p, offset)
-#define ILookInStream_Read(p, buf, size) (p)->Read(p, buf, size)
-#define ILookInStream_Seek(p, pos, origin) (p)->Seek(p, pos, origin)
-
-
-SRes LookInStream_LookRead(const ILookInStream *stream, void *buf, size_t *size);
-SRes LookInStream_SeekTo(const ILookInStream *stream, UInt64 offset);
-
-/* reads via ILookInStream::Read */
-SRes LookInStream_Read2(const ILookInStream *stream, void *buf, size_t size, SRes errorType);
-SRes LookInStream_Read(const ILookInStream *stream, void *buf, size_t size);
-
-
-
-typedef struct
-{
- ILookInStream vt;
- const ISeekInStream *realStream;
-
- size_t pos;
- size_t size; /* it's data size */
-
- /* the following variables must be set outside */
- Byte *buf;
- size_t bufSize;
-} CLookToRead2;
-
-void LookToRead2_CreateVTable(CLookToRead2 *p, int lookahead);
-
-#define LookToRead2_Init(p) { (p)->pos = (p)->size = 0; }
-
-
-typedef struct
-{
- ISeqInStream vt;
- const ILookInStream *realStream;
-} CSecToLook;
-
-void SecToLook_CreateVTable(CSecToLook *p);
-
-
-
-typedef struct
-{
- ISeqInStream vt;
- const ILookInStream *realStream;
-} CSecToRead;
-
-void SecToRead_CreateVTable(CSecToRead *p);
-
-
-typedef struct ICompressProgress ICompressProgress;
-
-struct ICompressProgress
-{
- SRes (*Progress)(const ICompressProgress *p, UInt64 inSize, UInt64 outSize);
- /* Returns: result. (result != SZ_OK) means break.
- Value (UInt64)(Int64)-1 for size means unknown value. */
-};
-#define ICompressProgress_Progress(p, inSize, outSize) (p)->Progress(p, inSize, outSize)
-
-
-
-typedef struct ISzAlloc ISzAlloc;
-typedef const ISzAlloc * ISzAllocPtr;
-
-struct ISzAlloc
-{
- void *(*Alloc)(ISzAllocPtr p, size_t size);
- void (*Free)(ISzAllocPtr p, void *address); /* address can be 0 */
-};
-
-#define ISzAlloc_Alloc(p, size) (p)->Alloc(p, size)
-#define ISzAlloc_Free(p, a) (p)->Free(p, a)
-
-/* deprecated */
-#define IAlloc_Alloc(p, size) ISzAlloc_Alloc(p, size)
-#define IAlloc_Free(p, a) ISzAlloc_Free(p, a)
-
-
-
-
-
-#ifndef MY_offsetof
- #ifdef offsetof
- #define MY_offsetof(type, m) offsetof(type, m)
- /*
- #define MY_offsetof(type, m) FIELD_OFFSET(type, m)
- */
- #else
- #define MY_offsetof(type, m) ((size_t)&(((type *)0)->m))
- #endif
-#endif
-
-
-
-#ifndef MY_container_of
-
-/*
-#define MY_container_of(ptr, type, m) container_of(ptr, type, m)
-#define MY_container_of(ptr, type, m) CONTAINING_RECORD(ptr, type, m)
-#define MY_container_of(ptr, type, m) ((type *)((char *)(ptr) - offsetof(type, m)))
-#define MY_container_of(ptr, type, m) (&((type *)0)->m == (ptr), ((type *)(((char *)(ptr)) - MY_offsetof(type, m))))
-*/
-
-/*
- GCC shows warning: "perhaps the 'offsetof' macro was used incorrectly"
- GCC 3.4.4 : classes with constructor
- GCC 4.8.1 : classes with non-public variable members"
-*/
-
-#define MY_container_of(ptr, type, m) ((type *)((char *)(1 ? (ptr) : &((type *)0)->m) - MY_offsetof(type, m)))
-
-
-#endif
-
-#define CONTAINER_FROM_VTBL_SIMPLE(ptr, type, m) ((type *)(ptr))
-
-/*
-#define CONTAINER_FROM_VTBL(ptr, type, m) CONTAINER_FROM_VTBL_SIMPLE(ptr, type, m)
-*/
-#define CONTAINER_FROM_VTBL(ptr, type, m) MY_container_of(ptr, type, m)
-
-#define CONTAINER_FROM_VTBL_CLS(ptr, type, m) CONTAINER_FROM_VTBL_SIMPLE(ptr, type, m)
-/*
-#define CONTAINER_FROM_VTBL_CLS(ptr, type, m) CONTAINER_FROM_VTBL(ptr, type, m)
-*/
-
-
-
-#ifdef _WIN32
-
-#define CHAR_PATH_SEPARATOR '\\'
-#define WCHAR_PATH_SEPARATOR L'\\'
-#define STRING_PATH_SEPARATOR "\\"
-#define WSTRING_PATH_SEPARATOR L"\\"
-
-#else
-
-#define CHAR_PATH_SEPARATOR '/'
-#define WCHAR_PATH_SEPARATOR L'/'
-#define STRING_PATH_SEPARATOR "/"
-#define WSTRING_PATH_SEPARATOR L"/"
-
-#endif
-
-EXTERN_C_END
-
-#endif
+++ /dev/null
-/* Alloc.h -- Memory allocation functions
-2018-02-19 : Igor Pavlov : Public domain */
-
-#ifndef __COMMON_ALLOC_H
-#define __COMMON_ALLOC_H
-
-#include "7zTypes.h"
-
-EXTERN_C_BEGIN
-
-void *MyAlloc(size_t size);
-void MyFree(void *address);
-
-#ifdef _WIN32
-
-void SetLargePageSize();
-
-void *MidAlloc(size_t size);
-void MidFree(void *address);
-void *BigAlloc(size_t size);
-void BigFree(void *address);
-
-#else
-
-#define MidAlloc(size) MyAlloc(size)
-#define MidFree(address) MyFree(address)
-#define BigAlloc(size) MyAlloc(size)
-#define BigFree(address) MyFree(address)
-
-#endif
-
-extern const ISzAlloc g_Alloc;
-extern const ISzAlloc g_BigAlloc;
-extern const ISzAlloc g_MidAlloc;
-extern const ISzAlloc g_AlignedAlloc;
-
-
-typedef struct
-{
- ISzAlloc vt;
- ISzAllocPtr baseAlloc;
- unsigned numAlignBits; /* ((1 << numAlignBits) >= sizeof(void *)) */
- size_t offset; /* (offset == (k * sizeof(void *)) && offset < (1 << numAlignBits) */
-} CAlignOffsetAlloc;
-
-void AlignOffsetAlloc_CreateVTable(CAlignOffsetAlloc *p);
-
-
-EXTERN_C_END
-
-#endif
+++ /dev/null
-/* Bra.h -- Branch converters for executables
-2013-01-18 : Igor Pavlov : Public domain */
-
-#ifndef __BRA_H
-#define __BRA_H
-
-#include "7zTypes.h"
-
-EXTERN_C_BEGIN
-
-/*
-These functions convert relative addresses to absolute addresses
-in CALL instructions to increase the compression ratio.
-
- In:
- data - data buffer
- size - size of data
- ip - current virtual Instruction Pinter (IP) value
- state - state variable for x86 converter
- encoding - 0 (for decoding), 1 (for encoding)
-
- Out:
- state - state variable for x86 converter
-
- Returns:
- The number of processed bytes. If you call these functions with multiple calls,
- you must start next call with first byte after block of processed bytes.
-
- Type Endian Alignment LookAhead
-
- x86 little 1 4
- ARMT little 2 2
- ARM little 4 0
- PPC big 4 0
- SPARC big 4 0
- IA64 little 16 0
-
- size must be >= Alignment + LookAhead, if it's not last block.
- If (size < Alignment + LookAhead), converter returns 0.
-
- Example:
-
- UInt32 ip = 0;
- for ()
- {
- ; size must be >= Alignment + LookAhead, if it's not last block
- SizeT processed = Convert(data, size, ip, 1);
- data += processed;
- size -= processed;
- ip += processed;
- }
-*/
-
-#define x86_Convert_Init(state) { state = 0; }
-SizeT x86_Convert(Byte *data, SizeT size, UInt32 ip, UInt32 *state, int encoding);
-SizeT ARM_Convert(Byte *data, SizeT size, UInt32 ip, int encoding);
-SizeT ARMT_Convert(Byte *data, SizeT size, UInt32 ip, int encoding);
-SizeT PPC_Convert(Byte *data, SizeT size, UInt32 ip, int encoding);
-SizeT SPARC_Convert(Byte *data, SizeT size, UInt32 ip, int encoding);
-SizeT IA64_Convert(Byte *data, SizeT size, UInt32 ip, int encoding);
-
-EXTERN_C_END
-
-#endif
+++ /dev/null
-/* Compiler.h
-2017-04-03 : Igor Pavlov : Public domain */
-
-#ifndef __7Z_COMPILER_H
-#define __7Z_COMPILER_H
-
-#ifdef _MSC_VER
-
- #ifdef UNDER_CE
- #define RPC_NO_WINDOWS_H
- /* #pragma warning(disable : 4115) // '_RPC_ASYNC_STATE' : named type definition in parentheses */
- #pragma warning(disable : 4201) // nonstandard extension used : nameless struct/union
- #pragma warning(disable : 4214) // nonstandard extension used : bit field types other than int
- #endif
-
- #if _MSC_VER >= 1300
- #pragma warning(disable : 4996) // This function or variable may be unsafe
- #else
- #pragma warning(disable : 4511) // copy constructor could not be generated
- #pragma warning(disable : 4512) // assignment operator could not be generated
- #pragma warning(disable : 4514) // unreferenced inline function has been removed
- #pragma warning(disable : 4702) // unreachable code
- #pragma warning(disable : 4710) // not inlined
- #pragma warning(disable : 4714) // function marked as __forceinline not inlined
- #pragma warning(disable : 4786) // identifier was truncated to '255' characters in the debug information
- #endif
-
-#endif
-
-#define UNUSED_VAR(x) (void)x;
-/* #define UNUSED_VAR(x) x=x; */
-
-#endif
+++ /dev/null
-/* CpuArch.h -- CPU specific code
-2018-02-18 : Igor Pavlov : Public domain */
-
-#ifndef __CPU_ARCH_H
-#define __CPU_ARCH_H
-
-#include "7zTypes.h"
-
-EXTERN_C_BEGIN
-
-/*
-MY_CPU_LE means that CPU is LITTLE ENDIAN.
-MY_CPU_BE means that CPU is BIG ENDIAN.
-If MY_CPU_LE and MY_CPU_BE are not defined, we don't know about ENDIANNESS of platform.
-
-MY_CPU_LE_UNALIGN means that CPU is LITTLE ENDIAN and CPU supports unaligned memory accesses.
-*/
-
-#if defined(_M_X64) \
- || defined(_M_AMD64) \
- || defined(__x86_64__) \
- || defined(__AMD64__) \
- || defined(__amd64__)
- #define MY_CPU_AMD64
- #ifdef __ILP32__
- #define MY_CPU_NAME "x32"
- #else
- #define MY_CPU_NAME "x64"
- #endif
- #define MY_CPU_64BIT
-#endif
-
-
-#if defined(_M_IX86) \
- || defined(__i386__)
- #define MY_CPU_X86
- #define MY_CPU_NAME "x86"
- #define MY_CPU_32BIT
-#endif
-
-
-#if defined(_M_ARM64) \
- || defined(__AARCH64EL__) \
- || defined(__AARCH64EB__) \
- || defined(__aarch64__)
- #define MY_CPU_ARM64
- #define MY_CPU_NAME "arm64"
- #define MY_CPU_64BIT
-#endif
-
-
-#if defined(_M_ARM) \
- || defined(_M_ARM_NT) \
- || defined(_M_ARMT) \
- || defined(__arm__) \
- || defined(__thumb__) \
- || defined(__ARMEL__) \
- || defined(__ARMEB__) \
- || defined(__THUMBEL__) \
- || defined(__THUMBEB__)
- #define MY_CPU_ARM
- #define MY_CPU_NAME "arm"
- #define MY_CPU_32BIT
-#endif
-
-
-#if defined(_M_IA64) \
- || defined(__ia64__)
- #define MY_CPU_IA64
- #define MY_CPU_NAME "ia64"
- #define MY_CPU_64BIT
-#endif
-
-
-#if defined(__mips64) \
- || defined(__mips64__) \
- || (defined(__mips) && (__mips == 64 || __mips == 4 || __mips == 3))
- #define MY_CPU_NAME "mips64"
- #define MY_CPU_64BIT
-#elif defined(__mips__)
- #define MY_CPU_NAME "mips"
- /* #define MY_CPU_32BIT */
-#endif
-
-
-#if defined(__ppc64__) \
- || defined(__powerpc64__)
- #ifdef __ILP32__
- #define MY_CPU_NAME "ppc64-32"
- #else
- #define MY_CPU_NAME "ppc64"
- #endif
- #define MY_CPU_64BIT
-#elif defined(__ppc__) \
- || defined(__powerpc__)
- #define MY_CPU_NAME "ppc"
- #define MY_CPU_32BIT
-#endif
-
-
-#if defined(__sparc64__)
- #define MY_CPU_NAME "sparc64"
- #define MY_CPU_64BIT
-#elif defined(__sparc__)
- #define MY_CPU_NAME "sparc"
- /* #define MY_CPU_32BIT */
-#endif
-
-
-#if defined(MY_CPU_X86) || defined(MY_CPU_AMD64)
-#define MY_CPU_X86_OR_AMD64
-#endif
-
-
-#ifdef _WIN32
-
- #ifdef MY_CPU_ARM
- #define MY_CPU_ARM_LE
- #endif
-
- #ifdef MY_CPU_ARM64
- #define MY_CPU_ARM64_LE
- #endif
-
- #ifdef _M_IA64
- #define MY_CPU_IA64_LE
- #endif
-
-#endif
-
-
-#if defined(MY_CPU_X86_OR_AMD64) \
- || defined(MY_CPU_ARM_LE) \
- || defined(MY_CPU_ARM64_LE) \
- || defined(MY_CPU_IA64_LE) \
- || defined(__LITTLE_ENDIAN__) \
- || defined(__ARMEL__) \
- || defined(__THUMBEL__) \
- || defined(__AARCH64EL__) \
- || defined(__MIPSEL__) \
- || defined(__MIPSEL) \
- || defined(_MIPSEL) \
- || defined(__BFIN__) \
- || (defined(__BYTE_ORDER__) && (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__))
- #define MY_CPU_LE
-#endif
-
-#if defined(__BIG_ENDIAN__) \
- || defined(__ARMEB__) \
- || defined(__THUMBEB__) \
- || defined(__AARCH64EB__) \
- || defined(__MIPSEB__) \
- || defined(__MIPSEB) \
- || defined(_MIPSEB) \
- || defined(__m68k__) \
- || defined(__s390__) \
- || defined(__s390x__) \
- || defined(__zarch__) \
- || (defined(__BYTE_ORDER__) && (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__))
- #define MY_CPU_BE
-#endif
-
-
-#if defined(MY_CPU_LE) && defined(MY_CPU_BE)
- #error Stop_Compiling_Bad_Endian
-#endif
-
-
-#if defined(MY_CPU_32BIT) && defined(MY_CPU_64BIT)
- #error Stop_Compiling_Bad_32_64_BIT
-#endif
-
-
-#ifndef MY_CPU_NAME
- #ifdef MY_CPU_LE
- #define MY_CPU_NAME "LE"
- #elif defined(MY_CPU_BE)
- #define MY_CPU_NAME "BE"
- #else
- /*
- #define MY_CPU_NAME ""
- */
- #endif
-#endif
-
-
-
-
-
-#ifdef MY_CPU_LE
- #if defined(MY_CPU_X86_OR_AMD64) \
- || defined(MY_CPU_ARM64) \
- || defined(__ARM_FEATURE_UNALIGNED)
- #define MY_CPU_LE_UNALIGN
- #endif
-#endif
-
-
-#ifdef MY_CPU_LE_UNALIGN
-
-#define GetUi16(p) (*(const UInt16 *)(const void *)(p))
-#define GetUi32(p) (*(const UInt32 *)(const void *)(p))
-#define GetUi64(p) (*(const UInt64 *)(const void *)(p))
-
-#define SetUi16(p, v) { *(UInt16 *)(p) = (v); }
-#define SetUi32(p, v) { *(UInt32 *)(p) = (v); }
-#define SetUi64(p, v) { *(UInt64 *)(p) = (v); }
-
-#else
-
-#define GetUi16(p) ( (UInt16) ( \
- ((const Byte *)(p))[0] | \
- ((UInt16)((const Byte *)(p))[1] << 8) ))
-
-#define GetUi32(p) ( \
- ((const Byte *)(p))[0] | \
- ((UInt32)((const Byte *)(p))[1] << 8) | \
- ((UInt32)((const Byte *)(p))[2] << 16) | \
- ((UInt32)((const Byte *)(p))[3] << 24))
-
-#define GetUi64(p) (GetUi32(p) | ((UInt64)GetUi32(((const Byte *)(p)) + 4) << 32))
-
-#define SetUi16(p, v) { Byte *_ppp_ = (Byte *)(p); UInt32 _vvv_ = (v); \
- _ppp_[0] = (Byte)_vvv_; \
- _ppp_[1] = (Byte)(_vvv_ >> 8); }
-
-#define SetUi32(p, v) { Byte *_ppp_ = (Byte *)(p); UInt32 _vvv_ = (v); \
- _ppp_[0] = (Byte)_vvv_; \
- _ppp_[1] = (Byte)(_vvv_ >> 8); \
- _ppp_[2] = (Byte)(_vvv_ >> 16); \
- _ppp_[3] = (Byte)(_vvv_ >> 24); }
-
-#define SetUi64(p, v) { Byte *_ppp2_ = (Byte *)(p); UInt64 _vvv2_ = (v); \
- SetUi32(_ppp2_ , (UInt32)_vvv2_); \
- SetUi32(_ppp2_ + 4, (UInt32)(_vvv2_ >> 32)); }
-
-#endif
-
-#ifdef __has_builtin
- #define MY__has_builtin(x) __has_builtin(x)
-#else
- #define MY__has_builtin(x) 0
-#endif
-
-#if defined(MY_CPU_LE_UNALIGN) && /* defined(_WIN64) && */ (_MSC_VER >= 1300)
-
-/* Note: we use bswap instruction, that is unsupported in 386 cpu */
-
-#include <stdlib.h>
-
-#pragma intrinsic(_byteswap_ushort)
-#pragma intrinsic(_byteswap_ulong)
-#pragma intrinsic(_byteswap_uint64)
-
-/* #define GetBe16(p) _byteswap_ushort(*(const UInt16 *)(const Byte *)(p)) */
-#define GetBe32(p) _byteswap_ulong(*(const UInt32 *)(const Byte *)(p))
-#define GetBe64(p) _byteswap_uint64(*(const UInt64 *)(const Byte *)(p))
-
-#define SetBe32(p, v) (*(UInt32 *)(void *)(p)) = _byteswap_ulong(v)
-
-#elif defined(MY_CPU_LE_UNALIGN) && ( \
- (defined(__GNUC__) && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 3))) \
- || (defined(__clang__) && MY__has_builtin(__builtin_bswap16)) )
-
-/* #define GetBe16(p) __builtin_bswap16(*(const UInt16 *)(const Byte *)(p)) */
-#define GetBe32(p) __builtin_bswap32(*(const UInt32 *)(const Byte *)(p))
-#define GetBe64(p) __builtin_bswap64(*(const UInt64 *)(const Byte *)(p))
-
-#define SetBe32(p, v) (*(UInt32 *)(void *)(p)) = __builtin_bswap32(v)
-
-#else
-
-#define GetBe32(p) ( \
- ((UInt32)((const Byte *)(p))[0] << 24) | \
- ((UInt32)((const Byte *)(p))[1] << 16) | \
- ((UInt32)((const Byte *)(p))[2] << 8) | \
- ((const Byte *)(p))[3] )
-
-#define GetBe64(p) (((UInt64)GetBe32(p) << 32) | GetBe32(((const Byte *)(p)) + 4))
-
-#define SetBe32(p, v) { Byte *_ppp_ = (Byte *)(p); UInt32 _vvv_ = (v); \
- _ppp_[0] = (Byte)(_vvv_ >> 24); \
- _ppp_[1] = (Byte)(_vvv_ >> 16); \
- _ppp_[2] = (Byte)(_vvv_ >> 8); \
- _ppp_[3] = (Byte)_vvv_; }
-
-#endif
-
-
-#ifndef GetBe16
-
-#define GetBe16(p) ( (UInt16) ( \
- ((UInt16)((const Byte *)(p))[0] << 8) | \
- ((const Byte *)(p))[1] ))
-
-#endif
-
-
-
-#ifdef MY_CPU_X86_OR_AMD64
-
-typedef struct
-{
- UInt32 maxFunc;
- UInt32 vendor[3];
- UInt32 ver;
- UInt32 b;
- UInt32 c;
- UInt32 d;
-} Cx86cpuid;
-
-enum
-{
- CPU_FIRM_INTEL,
- CPU_FIRM_AMD,
- CPU_FIRM_VIA
-};
-
-void MyCPUID(UInt32 function, UInt32 *a, UInt32 *b, UInt32 *c, UInt32 *d);
-
-BoolInt x86cpuid_CheckAndRead(Cx86cpuid *p);
-int x86cpuid_GetFirm(const Cx86cpuid *p);
-
-#define x86cpuid_GetFamily(ver) (((ver >> 16) & 0xFF0) | ((ver >> 8) & 0xF))
-#define x86cpuid_GetModel(ver) (((ver >> 12) & 0xF0) | ((ver >> 4) & 0xF))
-#define x86cpuid_GetStepping(ver) (ver & 0xF)
-
-BoolInt CPU_Is_InOrder();
-BoolInt CPU_Is_Aes_Supported();
-BoolInt CPU_IsSupported_PageGB();
-
-#endif
-
-EXTERN_C_END
-
-#endif
+++ /dev/null
-/* LzFind.h -- Match finder for LZ algorithms
-2017-06-10 : Igor Pavlov : Public domain */
-
-#ifndef __LZ_FIND_H
-#define __LZ_FIND_H
-
-#include "7zTypes.h"
-
-EXTERN_C_BEGIN
-
-typedef UInt32 CLzRef;
-
-typedef struct _CMatchFinder
-{
- Byte *buffer;
- UInt32 pos;
- UInt32 posLimit;
- UInt32 streamPos;
- UInt32 lenLimit;
-
- UInt32 cyclicBufferPos;
- UInt32 cyclicBufferSize; /* it must be = (historySize + 1) */
-
- Byte streamEndWasReached;
- Byte btMode;
- Byte bigHash;
- Byte directInput;
-
- UInt32 matchMaxLen;
- CLzRef *hash;
- CLzRef *son;
- UInt32 hashMask;
- UInt32 cutValue;
-
- Byte *bufferBase;
- ISeqInStream *stream;
-
- UInt32 blockSize;
- UInt32 keepSizeBefore;
- UInt32 keepSizeAfter;
-
- UInt32 numHashBytes;
- size_t directInputRem;
- UInt32 historySize;
- UInt32 fixedHashSize;
- UInt32 hashSizeSum;
- SRes result;
- UInt32 crc[256];
- size_t numRefs;
-
- UInt64 expectedDataSize;
-} CMatchFinder;
-
-#define Inline_MatchFinder_GetPointerToCurrentPos(p) ((p)->buffer)
-
-#define Inline_MatchFinder_GetNumAvailableBytes(p) ((p)->streamPos - (p)->pos)
-
-#define Inline_MatchFinder_IsFinishedOK(p) \
- ((p)->streamEndWasReached \
- && (p)->streamPos == (p)->pos \
- && (!(p)->directInput || (p)->directInputRem == 0))
-
-int MatchFinder_NeedMove(CMatchFinder *p);
-Byte *MatchFinder_GetPointerToCurrentPos(CMatchFinder *p);
-void MatchFinder_MoveBlock(CMatchFinder *p);
-void MatchFinder_ReadIfRequired(CMatchFinder *p);
-
-void MatchFinder_Construct(CMatchFinder *p);
-
-/* Conditions:
- historySize <= 3 GB
- keepAddBufferBefore + matchMaxLen + keepAddBufferAfter < 511MB
-*/
-int MatchFinder_Create(CMatchFinder *p, UInt32 historySize,
- UInt32 keepAddBufferBefore, UInt32 matchMaxLen, UInt32 keepAddBufferAfter,
- ISzAllocPtr alloc);
-void MatchFinder_Free(CMatchFinder *p, ISzAllocPtr alloc);
-void MatchFinder_Normalize3(UInt32 subValue, CLzRef *items, size_t numItems);
-void MatchFinder_ReduceOffsets(CMatchFinder *p, UInt32 subValue);
-
-UInt32 * GetMatchesSpec1(UInt32 lenLimit, UInt32 curMatch, UInt32 pos, const Byte *buffer, CLzRef *son,
- UInt32 _cyclicBufferPos, UInt32 _cyclicBufferSize, UInt32 _cutValue,
- UInt32 *distances, UInt32 maxLen);
-
-/*
-Conditions:
- Mf_GetNumAvailableBytes_Func must be called before each Mf_GetMatchLen_Func.
- Mf_GetPointerToCurrentPos_Func's result must be used only before any other function
-*/
-
-typedef void (*Mf_Init_Func)(void *object);
-typedef UInt32 (*Mf_GetNumAvailableBytes_Func)(void *object);
-typedef const Byte * (*Mf_GetPointerToCurrentPos_Func)(void *object);
-typedef UInt32 (*Mf_GetMatches_Func)(void *object, UInt32 *distances);
-typedef void (*Mf_Skip_Func)(void *object, UInt32);
-
-typedef struct _IMatchFinder
-{
- Mf_Init_Func Init;
- Mf_GetNumAvailableBytes_Func GetNumAvailableBytes;
- Mf_GetPointerToCurrentPos_Func GetPointerToCurrentPos;
- Mf_GetMatches_Func GetMatches;
- Mf_Skip_Func Skip;
-} IMatchFinder;
-
-void MatchFinder_CreateVTable(CMatchFinder *p, IMatchFinder *vTable);
-
-void MatchFinder_Init_LowHash(CMatchFinder *p);
-void MatchFinder_Init_HighHash(CMatchFinder *p);
-void MatchFinder_Init_3(CMatchFinder *p, int readData);
-void MatchFinder_Init(CMatchFinder *p);
-
-UInt32 Bt3Zip_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances);
-UInt32 Hc3Zip_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances);
-
-void Bt3Zip_MatchFinder_Skip(CMatchFinder *p, UInt32 num);
-void Hc3Zip_MatchFinder_Skip(CMatchFinder *p, UInt32 num);
-
-EXTERN_C_END
-
-#endif
+++ /dev/null
-/* LzHash.h -- HASH functions for LZ algorithms
-2015-04-12 : Igor Pavlov : Public domain */
-
-#ifndef __LZ_HASH_H
-#define __LZ_HASH_H
-
-#define kHash2Size (1 << 10)
-#define kHash3Size (1 << 16)
-#define kHash4Size (1 << 20)
-
-#define kFix3HashSize (kHash2Size)
-#define kFix4HashSize (kHash2Size + kHash3Size)
-#define kFix5HashSize (kHash2Size + kHash3Size + kHash4Size)
-
-#define HASH2_CALC hv = cur[0] | ((UInt32)cur[1] << 8);
-
-#define HASH3_CALC { \
- UInt32 temp = p->crc[cur[0]] ^ cur[1]; \
- h2 = temp & (kHash2Size - 1); \
- hv = (temp ^ ((UInt32)cur[2] << 8)) & p->hashMask; }
-
-#define HASH4_CALC { \
- UInt32 temp = p->crc[cur[0]] ^ cur[1]; \
- h2 = temp & (kHash2Size - 1); \
- temp ^= ((UInt32)cur[2] << 8); \
- h3 = temp & (kHash3Size - 1); \
- hv = (temp ^ (p->crc[cur[3]] << 5)) & p->hashMask; }
-
-#define HASH5_CALC { \
- UInt32 temp = p->crc[cur[0]] ^ cur[1]; \
- h2 = temp & (kHash2Size - 1); \
- temp ^= ((UInt32)cur[2] << 8); \
- h3 = temp & (kHash3Size - 1); \
- temp ^= (p->crc[cur[3]] << 5); \
- h4 = temp & (kHash4Size - 1); \
- hv = (temp ^ (p->crc[cur[4]] << 3)) & p->hashMask; }
-
-/* #define HASH_ZIP_CALC hv = ((cur[0] | ((UInt32)cur[1] << 8)) ^ p->crc[cur[2]]) & 0xFFFF; */
-#define HASH_ZIP_CALC hv = ((cur[2] | ((UInt32)cur[0] << 8)) ^ p->crc[cur[1]]) & 0xFFFF;
-
-
-#define MT_HASH2_CALC \
- h2 = (p->crc[cur[0]] ^ cur[1]) & (kHash2Size - 1);
-
-#define MT_HASH3_CALC { \
- UInt32 temp = p->crc[cur[0]] ^ cur[1]; \
- h2 = temp & (kHash2Size - 1); \
- h3 = (temp ^ ((UInt32)cur[2] << 8)) & (kHash3Size - 1); }
-
-#define MT_HASH4_CALC { \
- UInt32 temp = p->crc[cur[0]] ^ cur[1]; \
- h2 = temp & (kHash2Size - 1); \
- temp ^= ((UInt32)cur[2] << 8); \
- h3 = temp & (kHash3Size - 1); \
- h4 = (temp ^ (p->crc[cur[3]] << 5)) & (kHash4Size - 1); }
-
-#endif
+++ /dev/null
-/* LzmaDec.h -- LZMA Decoder
-2018-04-21 : Igor Pavlov : Public domain */
-
-#ifndef __LZMA_DEC_H
-#define __LZMA_DEC_H
-
-#include "7zTypes.h"
-
-EXTERN_C_BEGIN
-
-/* #define _LZMA_PROB32 */
-/* _LZMA_PROB32 can increase the speed on some CPUs,
- but memory usage for CLzmaDec::probs will be doubled in that case */
-
-typedef
-#ifdef _LZMA_PROB32
- UInt32
-#else
- UInt16
-#endif
- CLzmaProb;
-
-
-/* ---------- LZMA Properties ---------- */
-
-#define LZMA_PROPS_SIZE 5
-
-typedef struct _CLzmaProps
-{
- Byte lc;
- Byte lp;
- Byte pb;
- Byte _pad_;
- UInt32 dicSize;
-} CLzmaProps;
-
-/* LzmaProps_Decode - decodes properties
-Returns:
- SZ_OK
- SZ_ERROR_UNSUPPORTED - Unsupported properties
-*/
-
-SRes LzmaProps_Decode(CLzmaProps *p, const Byte *data, unsigned size);
-
-
-/* ---------- LZMA Decoder state ---------- */
-
-/* LZMA_REQUIRED_INPUT_MAX = number of required input bytes for worst case.
- Num bits = log2((2^11 / 31) ^ 22) + 26 < 134 + 26 = 160; */
-
-#define LZMA_REQUIRED_INPUT_MAX 20
-
-typedef struct
-{
- /* Don't change this structure. ASM code can use it. */
- CLzmaProps prop;
- CLzmaProb *probs;
- CLzmaProb *probs_1664;
- Byte *dic;
- SizeT dicBufSize;
- SizeT dicPos;
- const Byte *buf;
- UInt32 range;
- UInt32 code;
- UInt32 processedPos;
- UInt32 checkDicSize;
- UInt32 reps[4];
- UInt32 state;
- UInt32 remainLen;
-
- UInt32 numProbs;
- unsigned tempBufSize;
- Byte tempBuf[LZMA_REQUIRED_INPUT_MAX];
-} CLzmaDec;
-
-#define LzmaDec_Construct(p) { (p)->dic = NULL; (p)->probs = NULL; }
-
-void LzmaDec_Init(CLzmaDec *p);
-
-/* There are two types of LZMA streams:
- - Stream with end mark. That end mark adds about 6 bytes to compressed size.
- - Stream without end mark. You must know exact uncompressed size to decompress such stream. */
-
-typedef enum
-{
- LZMA_FINISH_ANY, /* finish at any point */
- LZMA_FINISH_END /* block must be finished at the end */
-} ELzmaFinishMode;
-
-/* ELzmaFinishMode has meaning only if the decoding reaches output limit !!!
-
- You must use LZMA_FINISH_END, when you know that current output buffer
- covers last bytes of block. In other cases you must use LZMA_FINISH_ANY.
-
- If LZMA decoder sees end marker before reaching output limit, it returns SZ_OK,
- and output value of destLen will be less than output buffer size limit.
- You can check status result also.
-
- You can use multiple checks to test data integrity after full decompression:
- 1) Check Result and "status" variable.
- 2) Check that output(destLen) = uncompressedSize, if you know real uncompressedSize.
- 3) Check that output(srcLen) = compressedSize, if you know real compressedSize.
- You must use correct finish mode in that case. */
-
-typedef enum
-{
- LZMA_STATUS_NOT_SPECIFIED, /* use main error code instead */
- LZMA_STATUS_FINISHED_WITH_MARK, /* stream was finished with end mark. */
- LZMA_STATUS_NOT_FINISHED, /* stream was not finished */
- LZMA_STATUS_NEEDS_MORE_INPUT, /* you must provide more input bytes */
- LZMA_STATUS_MAYBE_FINISHED_WITHOUT_MARK /* there is probability that stream was finished without end mark */
-} ELzmaStatus;
-
-/* ELzmaStatus is used only as output value for function call */
-
-
-/* ---------- Interfaces ---------- */
-
-/* There are 3 levels of interfaces:
- 1) Dictionary Interface
- 2) Buffer Interface
- 3) One Call Interface
- You can select any of these interfaces, but don't mix functions from different
- groups for same object. */
-
-
-/* There are two variants to allocate state for Dictionary Interface:
- 1) LzmaDec_Allocate / LzmaDec_Free
- 2) LzmaDec_AllocateProbs / LzmaDec_FreeProbs
- You can use variant 2, if you set dictionary buffer manually.
- For Buffer Interface you must always use variant 1.
-
-LzmaDec_Allocate* can return:
- SZ_OK
- SZ_ERROR_MEM - Memory allocation error
- SZ_ERROR_UNSUPPORTED - Unsupported properties
-*/
-
-SRes LzmaDec_AllocateProbs(CLzmaDec *p, const Byte *props, unsigned propsSize, ISzAllocPtr alloc);
-void LzmaDec_FreeProbs(CLzmaDec *p, ISzAllocPtr alloc);
-
-SRes LzmaDec_Allocate(CLzmaDec *p, const Byte *props, unsigned propsSize, ISzAllocPtr alloc);
-void LzmaDec_Free(CLzmaDec *p, ISzAllocPtr alloc);
-
-/* ---------- Dictionary Interface ---------- */
-
-/* You can use it, if you want to eliminate the overhead for data copying from
- dictionary to some other external buffer.
- You must work with CLzmaDec variables directly in this interface.
-
- STEPS:
- LzmaDec_Construct()
- LzmaDec_Allocate()
- for (each new stream)
- {
- LzmaDec_Init()
- while (it needs more decompression)
- {
- LzmaDec_DecodeToDic()
- use data from CLzmaDec::dic and update CLzmaDec::dicPos
- }
- }
- LzmaDec_Free()
-*/
-
-/* LzmaDec_DecodeToDic
-
- The decoding to internal dictionary buffer (CLzmaDec::dic).
- You must manually update CLzmaDec::dicPos, if it reaches CLzmaDec::dicBufSize !!!
-
-finishMode:
- It has meaning only if the decoding reaches output limit (dicLimit).
- LZMA_FINISH_ANY - Decode just dicLimit bytes.
- LZMA_FINISH_END - Stream must be finished after dicLimit.
-
-Returns:
- SZ_OK
- status:
- LZMA_STATUS_FINISHED_WITH_MARK
- LZMA_STATUS_NOT_FINISHED
- LZMA_STATUS_NEEDS_MORE_INPUT
- LZMA_STATUS_MAYBE_FINISHED_WITHOUT_MARK
- SZ_ERROR_DATA - Data error
-*/
-
-SRes LzmaDec_DecodeToDic(CLzmaDec *p, SizeT dicLimit,
- const Byte *src, SizeT *srcLen, ELzmaFinishMode finishMode, ELzmaStatus *status);
-
-
-/* ---------- Buffer Interface ---------- */
-
-/* It's zlib-like interface.
- See LzmaDec_DecodeToDic description for information about STEPS and return results,
- but you must use LzmaDec_DecodeToBuf instead of LzmaDec_DecodeToDic and you don't need
- to work with CLzmaDec variables manually.
-
-finishMode:
- It has meaning only if the decoding reaches output limit (*destLen).
- LZMA_FINISH_ANY - Decode just destLen bytes.
- LZMA_FINISH_END - Stream must be finished after (*destLen).
-*/
-
-SRes LzmaDec_DecodeToBuf(CLzmaDec *p, Byte *dest, SizeT *destLen,
- const Byte *src, SizeT *srcLen, ELzmaFinishMode finishMode, ELzmaStatus *status);
-
-
-/* ---------- One Call Interface ---------- */
-
-/* LzmaDecode
-
-finishMode:
- It has meaning only if the decoding reaches output limit (*destLen).
- LZMA_FINISH_ANY - Decode just destLen bytes.
- LZMA_FINISH_END - Stream must be finished after (*destLen).
-
-Returns:
- SZ_OK
- status:
- LZMA_STATUS_FINISHED_WITH_MARK
- LZMA_STATUS_NOT_FINISHED
- LZMA_STATUS_MAYBE_FINISHED_WITHOUT_MARK
- SZ_ERROR_DATA - Data error
- SZ_ERROR_MEM - Memory allocation error
- SZ_ERROR_UNSUPPORTED - Unsupported properties
- SZ_ERROR_INPUT_EOF - It needs more bytes in input buffer (src).
-*/
-
-SRes LzmaDecode(Byte *dest, SizeT *destLen, const Byte *src, SizeT *srcLen,
- const Byte *propData, unsigned propSize, ELzmaFinishMode finishMode,
- ELzmaStatus *status, ISzAllocPtr alloc);
-
-EXTERN_C_END
-
-#endif
+++ /dev/null
-/* LzmaEnc.h -- LZMA Encoder
-2017-07-27 : Igor Pavlov : Public domain */
-
-#ifndef __LZMA_ENC_H
-#define __LZMA_ENC_H
-
-#include "7zTypes.h"
-
-EXTERN_C_BEGIN
-
-#define LZMA_PROPS_SIZE 5
-
-typedef struct _CLzmaEncProps
-{
- int level; /* 0 <= level <= 9 */
- UInt32 dictSize; /* (1 << 12) <= dictSize <= (1 << 27) for 32-bit version
- (1 << 12) <= dictSize <= (3 << 29) for 64-bit version
- default = (1 << 24) */
- int lc; /* 0 <= lc <= 8, default = 3 */
- int lp; /* 0 <= lp <= 4, default = 0 */
- int pb; /* 0 <= pb <= 4, default = 2 */
- int algo; /* 0 - fast, 1 - normal, default = 1 */
- int fb; /* 5 <= fb <= 273, default = 32 */
- int btMode; /* 0 - hashChain Mode, 1 - binTree mode - normal, default = 1 */
- int numHashBytes; /* 2, 3 or 4, default = 4 */
- UInt32 mc; /* 1 <= mc <= (1 << 30), default = 32 */
- unsigned writeEndMark; /* 0 - do not write EOPM, 1 - write EOPM, default = 0 */
- int numThreads; /* 1 or 2, default = 2 */
-
- UInt64 reduceSize; /* estimated size of data that will be compressed. default = (UInt64)(Int64)-1.
- Encoder uses this value to reduce dictionary size */
-} CLzmaEncProps;
-
-void LzmaEncProps_Init(CLzmaEncProps *p);
-void LzmaEncProps_Normalize(CLzmaEncProps *p);
-UInt32 LzmaEncProps_GetDictSize(const CLzmaEncProps *props2);
-
-
-/* ---------- CLzmaEncHandle Interface ---------- */
-
-/* LzmaEnc* functions can return the following exit codes:
-SRes:
- SZ_OK - OK
- SZ_ERROR_MEM - Memory allocation error
- SZ_ERROR_PARAM - Incorrect paramater in props
- SZ_ERROR_WRITE - ISeqOutStream write callback error
- SZ_ERROR_OUTPUT_EOF - output buffer overflow - version with (Byte *) output
- SZ_ERROR_PROGRESS - some break from progress callback
- SZ_ERROR_THREAD - error in multithreading functions (only for Mt version)
-*/
-
-typedef void * CLzmaEncHandle;
-
-CLzmaEncHandle LzmaEnc_Create(ISzAllocPtr alloc);
-void LzmaEnc_Destroy(CLzmaEncHandle p, ISzAllocPtr alloc, ISzAllocPtr allocBig);
-
-SRes LzmaEnc_SetProps(CLzmaEncHandle p, const CLzmaEncProps *props);
-void LzmaEnc_SetDataSize(CLzmaEncHandle p, UInt64 expectedDataSiize);
-SRes LzmaEnc_WriteProperties(CLzmaEncHandle p, Byte *properties, SizeT *size);
-unsigned LzmaEnc_IsWriteEndMark(CLzmaEncHandle p);
-
-SRes LzmaEnc_Encode(CLzmaEncHandle p, ISeqOutStream *outStream, ISeqInStream *inStream,
- ICompressProgress *progress, ISzAllocPtr alloc, ISzAllocPtr allocBig);
-SRes LzmaEnc_MemEncode(CLzmaEncHandle p, Byte *dest, SizeT *destLen, const Byte *src, SizeT srcLen,
- int writeEndMark, ICompressProgress *progress, ISzAllocPtr alloc, ISzAllocPtr allocBig);
-
-
-/* ---------- One Call Interface ---------- */
-
-SRes LzmaEncode(Byte *dest, SizeT *destLen, const Byte *src, SizeT srcLen,
- const CLzmaEncProps *props, Byte *propsEncoded, SizeT *propsSize, int writeEndMark,
- ICompressProgress *progress, ISzAllocPtr alloc, ISzAllocPtr allocBig);
-
-EXTERN_C_END
-
-#endif
+++ /dev/null
-/* LzmaLib.h -- LZMA library interface
-2013-01-18 : Igor Pavlov : Public domain */
-
-#ifndef __LZMA_LIB_H
-#define __LZMA_LIB_H
-
-#include "7zTypes.h"
-
-EXTERN_C_BEGIN
-
-#define MY_STDAPI int MY_STD_CALL
-
-#define LZMA_PROPS_SIZE 5
-
-/*
-RAM requirements for LZMA:
- for compression: (dictSize * 11.5 + 6 MB) + state_size
- for decompression: dictSize + state_size
- state_size = (4 + (1.5 << (lc + lp))) KB
- by default (lc=3, lp=0), state_size = 16 KB.
-
-LZMA properties (5 bytes) format
- Offset Size Description
- 0 1 lc, lp and pb in encoded form.
- 1 4 dictSize (little endian).
-*/
-
-/*
-LzmaCompress
-------------
-
-outPropsSize -
- In: the pointer to the size of outProps buffer; *outPropsSize = LZMA_PROPS_SIZE = 5.
- Out: the pointer to the size of written properties in outProps buffer; *outPropsSize = LZMA_PROPS_SIZE = 5.
-
- LZMA Encoder will use defult values for any parameter, if it is
- -1 for any from: level, loc, lp, pb, fb, numThreads
- 0 for dictSize
-
-level - compression level: 0 <= level <= 9;
-
- level dictSize algo fb
- 0: 16 KB 0 32
- 1: 64 KB 0 32
- 2: 256 KB 0 32
- 3: 1 MB 0 32
- 4: 4 MB 0 32
- 5: 16 MB 1 32
- 6: 32 MB 1 32
- 7+: 64 MB 1 64
-
- The default value for "level" is 5.
-
- algo = 0 means fast method
- algo = 1 means normal method
-
-dictSize - The dictionary size in bytes. The maximum value is
- 128 MB = (1 << 27) bytes for 32-bit version
- 1 GB = (1 << 30) bytes for 64-bit version
- The default value is 16 MB = (1 << 24) bytes.
- It's recommended to use the dictionary that is larger than 4 KB and
- that can be calculated as (1 << N) or (3 << N) sizes.
-
-lc - The number of literal context bits (high bits of previous literal).
- It can be in the range from 0 to 8. The default value is 3.
- Sometimes lc=4 gives the gain for big files.
-
-lp - The number of literal pos bits (low bits of current position for literals).
- It can be in the range from 0 to 4. The default value is 0.
- The lp switch is intended for periodical data when the period is equal to 2^lp.
- For example, for 32-bit (4 bytes) periodical data you can use lp=2. Often it's
- better to set lc=0, if you change lp switch.
-
-pb - The number of pos bits (low bits of current position).
- It can be in the range from 0 to 4. The default value is 2.
- The pb switch is intended for periodical data when the period is equal 2^pb.
-
-fb - Word size (the number of fast bytes).
- It can be in the range from 5 to 273. The default value is 32.
- Usually, a big number gives a little bit better compression ratio and
- slower compression process.
-
-numThreads - The number of thereads. 1 or 2. The default value is 2.
- Fast mode (algo = 0) can use only 1 thread.
-
-Out:
- destLen - processed output size
-Returns:
- SZ_OK - OK
- SZ_ERROR_MEM - Memory allocation error
- SZ_ERROR_PARAM - Incorrect paramater
- SZ_ERROR_OUTPUT_EOF - output buffer overflow
- SZ_ERROR_THREAD - errors in multithreading functions (only for Mt version)
-*/
-
-MY_STDAPI LzmaCompress(unsigned char *dest, size_t *destLen, const unsigned char *src, size_t srcLen,
- unsigned char *outProps, size_t *outPropsSize, /* *outPropsSize must be = 5 */
- int level, /* 0 <= level <= 9, default = 5 */
- unsigned dictSize, /* default = (1 << 24) */
- int lc, /* 0 <= lc <= 8, default = 3 */
- int lp, /* 0 <= lp <= 4, default = 0 */
- int pb, /* 0 <= pb <= 4, default = 2 */
- int fb, /* 5 <= fb <= 273, default = 32 */
- int numThreads /* 1 or 2, default = 2 */
- );
-
-/*
-LzmaUncompress
---------------
-In:
- dest - output data
- destLen - output data size
- src - input data
- srcLen - input data size
-Out:
- destLen - processed output size
- srcLen - processed input size
-Returns:
- SZ_OK - OK
- SZ_ERROR_DATA - Data error
- SZ_ERROR_MEM - Memory allocation arror
- SZ_ERROR_UNSUPPORTED - Unsupported properties
- SZ_ERROR_INPUT_EOF - it needs more bytes in input buffer (src)
-*/
-
-MY_STDAPI LzmaUncompress(unsigned char *dest, size_t *destLen, const unsigned char *src, SizeT *srcLen,
- const unsigned char *props, size_t propsSize);
-
-EXTERN_C_END
-
-#endif
+++ /dev/null
-/* Precomp.h -- StdAfx
-2013-11-12 : Igor Pavlov : Public domain */
-
-#ifndef __7Z_PRECOMP_H
-#define __7Z_PRECOMP_H
-
-#include "Compiler.h"
-/* #include "7zTypes.h" */
-
-#endif
+++ /dev/null
-HISTORY of the LZMA SDK
------------------------
-
-19.00 2019-02-21
--------------------------
-- Encryption strength for 7z archives was increased:
- the size of random initialization vector was increased from 64-bit to 128-bit,
- and the pseudo-random number generator was improved.
-- The bug in 7zIn.c code was fixed.
-
-
-18.06 2018-12-30
--------------------------
-- The speed for LZMA/LZMA2 compressing was increased by 3-10%,
- and there are minor changes in compression ratio.
-- Some bugs were fixed.
-- The bug in 7-Zip 18.02-18.05 was fixed:
- There was memory leak in multithreading xz decoder - XzDecMt_Decode(),
- if xz stream contains only one block.
-- The changes for MSVS compiler makefiles:
- - the makefiles now use "PLATFORM" macroname with values (x64, x86, arm64)
- instead of "CPU" macroname with values (AMD64, ARM64).
- - the makefiles by default now use static version of the run-time library.
-
-
-18.05 2018-04-30
--------------------------
-- The speed for LZMA/LZMA2 compressing was increased
- by 8% for fastest/fast compression levels and
- by 3% for normal/maximum compression levels.
-- Previous versions of 7-Zip could work incorrectly in "Large memory pages" mode in
- Windows 10 because of some BUG with "Large Pages" in Windows 10.
- Now 7-Zip doesn't use "Large Pages" on Windows 10 up to revision 1709 (16299).
-- The BUG was fixed in Lzma2Enc.c
- Lzma2Enc_Encode2() function worked incorretly,
- if (inStream == NULL) and the number of block threads is more than 1.
-
-
-18.03 beta 2018-03-04
--------------------------
-- Asm\x86\LzmaDecOpt.asm: new optimized LZMA decoder written in asm
- for x64 with about 30% higher speed than main version of LZMA decoder written in C.
-- The speed for single-thread LZMA/LZMA2 decoder written in C was increased by 3%.
-- 7-Zip now can use multi-threading for 7z/LZMA2 decoding,
- if there are multiple independent data chunks in LZMA2 stream.
-- 7-Zip now can use multi-threading for xz decoding,
- if there are multiple blocks in xz stream.
-
-
-18.01 2019-01-28
--------------------------
-- The BUG in 17.01 - 18.00 beta was fixed:
- XzDec.c : random block unpacking and XzUnpacker_IsBlockFinished()
- didn't work correctly for xz archives without checksum (CRC).
-
-
-18.00 beta 2019-01-10
--------------------------
-- The BUG in xz encoder was fixed:
- There was memory leak of 16 KB for each file compressed with
- xz compression method, if additional filter was used.
-
-
-17.01 beta 2017-08-28
--------------------------
-- Minor speed optimization for LZMA2 (xz and 7z) multi-threading compression.
- 7-Zip now uses additional memory buffers for multi-block LZMA2 compression.
- CPU utilization was slightly improved.
-- 7-zip now creates multi-block xz archives by default. Block size can be
- specified with -ms[Size]{m|g} switch.
-- xz decoder now can unpack random block from multi-block xz archives.
-- 7-Zip command line: @listfile now doesn't work after -- switch.
- Use -i@listfile before -- switch instead.
-- The BUGs were fixed:
- 7-Zip 17.00 beta crashed for commands that write anti-item to 7z archive.
-
-
-17.00 beta 2017-04-29
--------------------------
-- NewHandler.h / NewHandler.cpp:
- now it redefines operator new() only for old MSVC compilers (_MSC_VER < 1900).
-- C/7zTypes.h : the names of variables in interface structures were changed (vt).
-- Some bugs were fixed. 7-Zip could crash in some cases.
-- Some internal changes in code.
-
-
-16.04 2016-10-04
--------------------------
-- The bug was fixed in DllSecur.c.
-
-
-16.03 2016-09-28
--------------------------
-- SFX modules now use some protection against DLL preloading attack.
-- Some bugs in 7z code were fixed.
-
-
-16.02 2016-05-21
--------------------------
-- The BUG in 16.00 - 16.01 was fixed:
- Split Handler (SplitHandler.cpp) returned incorrect
- total size value (kpidSize) for split archives.
-
-
-16.01 2016-05-19
--------------------------
-- Some internal changes to reduce the number of compiler warnings.
-
-
-16.00 2016-05-10
--------------------------
-- Some bugs were fixed.
-
-
-15.12 2015-11-19
--------------------------
-- The BUG in C version of 7z decoder was fixed:
- 7zDec.c : SzDecodeLzma2()
- 7z decoder could mistakenly report about decoding error for some 7z archives
- that use LZMA2 compression method.
- The probability to get that mistaken decoding error report was about
- one error per 16384 solid blocks for solid blocks larger than 16 KB (compressed size).
-- The BUG (in 9.26-15.11) in C version of 7z decoder was fixed:
- 7zArcIn.c : SzReadHeader2()
- 7z decoder worked incorrectly for 7z archives that contain
- empty solid blocks, that can be placed to 7z archive, if some file is
- unavailable for reading during archive creation.
-
-
-15.09 beta 2015-10-16
--------------------------
-- The BUG in LZMA / LZMA2 encoding code was fixed.
- The BUG in LzFind.c::MatchFinder_ReadBlock() function.
- If input data size is larger than (4 GiB - dictionary_size),
- the following code worked incorrectly:
- - LZMA : LzmaEnc_MemEncode(), LzmaEncode() : LZMA encoding functions
- for compressing from memory to memory.
- That BUG is not related to LZMA encoder version that works via streams.
- - LZMA2 : multi-threaded version of LZMA2 encoder worked incorrectly, if
- default value of chunk size (CLzma2EncProps::blockSize) is changed
- to value larger than (4 GiB - dictionary_size).
-
-
-9.38 beta 2015-01-03
--------------------------
-- The BUG in 9.31-9.37 was fixed:
- IArchiveGetRawProps interface was disabled for 7z archives.
-- The BUG in 9.26-9.36 was fixed:
- Some code in CPP\7zip\Archive\7z\ worked correctly only under Windows.
-
-
-9.36 beta 2014-12-26
--------------------------
-- The BUG in command line version was fixed:
- 7-Zip created temporary archive in current folder during update archive
- operation, if -w{Path} switch was not specified.
- The fixed 7-Zip creates temporary archive in folder that contains updated archive.
-- The BUG in 9.33-9.35 was fixed:
- 7-Zip silently ignored file reading errors during 7z or gz archive creation,
- and the created archive contained only part of file that was read before error.
- The fixed 7-Zip stops archive creation and it reports about error.
-
-
-9.35 beta 2014-12-07
--------------------------
-- 7zr.exe now support AES encryption.
-- SFX mudules were added to LZMA SDK
-- Some bugs were fixed.
-
-
-9.21 beta 2011-04-11
--------------------------
-- New class FString for file names at file systems.
-- Speed optimization in CRC code for big-endian CPUs.
-- The BUG in Lzma2Dec.c was fixed:
- Lzma2Decode function didn't work.
-
-
-9.18 beta 2010-11-02
--------------------------
-- New small SFX module for installers (SfxSetup).
-
-
-9.12 beta 2010-03-24
--------------------------
-- The BUG in LZMA SDK 9.* was fixed: LZMA2 codec didn't work,
- if more than 10 threads were used (or more than 20 threads in some modes).
-
-
-9.11 beta 2010-03-15
--------------------------
-- PPMd compression method support
-
-
-9.09 2009-12-12
--------------------------
-- The bug was fixed:
- Utf16_To_Utf8 funstions in UTFConvert.cpp and 7zMain.c
- incorrectly converted surrogate characters (the code >= 0x10000) to UTF-8.
-- Some bugs were fixed
-
-
-9.06 2009-08-17
--------------------------
-- Some changes in ANSI-C 7z Decoder interfaces.
-
-
-9.04 2009-05-30
--------------------------
-- LZMA2 compression method support
-- xz format support
-
-
-4.65 2009-02-03
--------------------------
-- Some minor fixes
-
-
-4.63 2008-12-31
--------------------------
-- Some minor fixes
-
-
-4.61 beta 2008-11-23
--------------------------
-- The bug in ANSI-C LZMA Decoder was fixed:
- If encoded stream was corrupted, decoder could access memory
- outside of allocated range.
-- Some changes in ANSI-C 7z Decoder interfaces.
-- LZMA SDK is placed in the public domain.
-
-
-4.60 beta 2008-08-19
--------------------------
-- Some minor fixes.
-
-
-4.59 beta 2008-08-13
--------------------------
-- The bug was fixed:
- LZMA Encoder in fast compression mode could access memory outside of
- allocated range in some rare cases.
-
-
-4.58 beta 2008-05-05
--------------------------
-- ANSI-C LZMA Decoder was rewritten for speed optimizations.
-- ANSI-C LZMA Encoder was included to LZMA SDK.
-- C++ LZMA code now is just wrapper over ANSI-C code.
-
-
-4.57 2007-12-12
--------------------------
-- Speed optimizations in Ñ++ LZMA Decoder.
-- Small changes for more compatibility with some C/C++ compilers.
-
-
-4.49 beta 2007-07-05
--------------------------
-- .7z ANSI-C Decoder:
- - now it supports BCJ and BCJ2 filters
- - now it supports files larger than 4 GB.
- - now it supports "Last Write Time" field for files.
-- C++ code for .7z archives compressing/decompressing from 7-zip
- was included to LZMA SDK.
-
-
-4.43 2006-06-04
--------------------------
-- Small changes for more compatibility with some C/C++ compilers.
-
-
-4.42 2006-05-15
--------------------------
-- Small changes in .h files in ANSI-C version.
-
-
-4.39 beta 2006-04-14
--------------------------
-- The bug in versions 4.33b:4.38b was fixed:
- C++ version of LZMA encoder could not correctly compress
- files larger than 2 GB with HC4 match finder (-mfhc4).
-
-
-4.37 beta 2005-04-06
--------------------------
-- Fixes in C++ code: code could no be compiled if _NO_EXCEPTIONS was defined.
-
-
-4.35 beta 2005-03-02
--------------------------
-- The bug was fixed in C++ version of LZMA Decoder:
- If encoded stream was corrupted, decoder could access memory
- outside of allocated range.
-
-
-4.34 beta 2006-02-27
--------------------------
-- Compressing speed and memory requirements for compressing were increased
-- LZMA now can use only these match finders: HC4, BT2, BT3, BT4
-
-
-4.32 2005-12-09
--------------------------
-- Java version of LZMA SDK was included
-
-
-4.30 2005-11-20
--------------------------
-- Compression ratio was improved in -a2 mode
-- Speed optimizations for compressing in -a2 mode
-- -fb switch now supports values up to 273
-- The bug in 7z_C (7zIn.c) was fixed:
- It used Alloc/Free functions from different memory pools.
- So if program used two memory pools, it worked incorrectly.
-- 7z_C: .7z format supporting was improved
-- LZMA# SDK (C#.NET version) was included
-
-
-4.27 (Updated) 2005-09-21
--------------------------
-- Some GUIDs/interfaces in C++ were changed.
- IStream.h:
- ISequentialInStream::Read now works as old ReadPart
- ISequentialOutStream::Write now works as old WritePart
-
-
-4.27 2005-08-07
--------------------------
-- The bug in LzmaDecodeSize.c was fixed:
- if _LZMA_IN_CB and _LZMA_OUT_READ were defined,
- decompressing worked incorrectly.
-
-
-4.26 2005-08-05
--------------------------
-- Fixes in 7z_C code and LzmaTest.c:
- previous versions could work incorrectly,
- if malloc(0) returns 0
-
-
-4.23 2005-06-29
--------------------------
-- Small fixes in C++ code
-
-
-4.22 2005-06-10
--------------------------
-- Small fixes
-
-
-4.21 2005-06-08
--------------------------
-- Interfaces for ANSI-C LZMA Decoder (LzmaDecode.c) were changed
-- New additional version of ANSI-C LZMA Decoder with zlib-like interface:
- - LzmaStateDecode.h
- - LzmaStateDecode.c
- - LzmaStateTest.c
-- ANSI-C LZMA Decoder now can decompress files larger than 4 GB
-
-
-4.17 2005-04-18
--------------------------
-- New example for RAM->RAM compressing/decompressing:
- LZMA + BCJ (filter for x86 code):
- - LzmaRam.h
- - LzmaRam.cpp
- - LzmaRamDecode.h
- - LzmaRamDecode.c
- - -f86 switch for lzma.exe
-
-
-4.16 2005-03-29
--------------------------
-- The bug was fixed in LzmaDecode.c (ANSI-C LZMA Decoder):
- If _LZMA_OUT_READ was defined, and if encoded stream was corrupted,
- decoder could access memory outside of allocated range.
-- Speed optimization of ANSI-C LZMA Decoder (now it's about 20% faster).
- Old version of LZMA Decoder now is in file LzmaDecodeSize.c.
- LzmaDecodeSize.c can provide slightly smaller code than LzmaDecode.c
-- Small speed optimization in LZMA C++ code
-- filter for SPARC's code was added
-- Simplified version of .7z ANSI-C Decoder was included
-
-
-4.06 2004-09-05
--------------------------
-- The bug in v4.05 was fixed:
- LZMA-Encoder didn't release output stream in some cases.
-
-
-4.05 2004-08-25
--------------------------
-- Source code of filters for x86, IA-64, ARM, ARM-Thumb
- and PowerPC code was included to SDK
-- Some internal minor changes
-
-
-4.04 2004-07-28
--------------------------
-- More compatibility with some C++ compilers
-
-
-4.03 2004-06-18
--------------------------
-- "Benchmark" command was added. It measures compressing
- and decompressing speed and shows rating values.
- Also it checks hardware errors.
-
-
-4.02 2004-06-10
--------------------------
-- C++ LZMA Encoder/Decoder code now is more portable
- and it can be compiled by GCC on Linux.
-
-
-4.01 2004-02-15
--------------------------
-- Some detection of data corruption was enabled.
- LzmaDecode.c / RangeDecoderReadByte
- .....
- {
- rd->ExtraBytes = 1;
- return 0xFF;
- }
-
-
-4.00 2004-02-13
--------------------------
-- Original version of LZMA SDK
-
-
-
-HISTORY of the LZMA
--------------------
- 2001-2008: Improvements to LZMA compressing/decompressing code,
- keeping compatibility with original LZMA format
- 1996-2001: Development of LZMA compression format
-
- Some milestones:
-
- 2001-08-30: LZMA compression was added to 7-Zip
- 1999-01-02: First version of 7-Zip was released
-
-
-End of document
+++ /dev/null
-LZMA compression
-----------------
-Version: 9.35
-
-This file describes LZMA encoding and decoding functions written in C language.
-
-LZMA is an improved version of famous LZ77 compression algorithm.
-It was improved in way of maximum increasing of compression ratio,
-keeping high decompression speed and low memory requirements for
-decompressing.
-
-Note: you can read also LZMA Specification (lzma-specification.txt from LZMA SDK)
-
-Also you can look source code for LZMA encoding and decoding:
- C/Util/Lzma/LzmaUtil.c
-
-
-LZMA compressed file format
----------------------------
-Offset Size Description
- 0 1 Special LZMA properties (lc,lp, pb in encoded form)
- 1 4 Dictionary size (little endian)
- 5 8 Uncompressed size (little endian). -1 means unknown size
- 13 Compressed data
-
-
-
-ANSI-C LZMA Decoder
-~~~~~~~~~~~~~~~~~~~
-
-Please note that interfaces for ANSI-C code were changed in LZMA SDK 4.58.
-If you want to use old interfaces you can download previous version of LZMA SDK
-from sourceforge.net site.
-
-To use ANSI-C LZMA Decoder you need the following files:
-1) LzmaDec.h + LzmaDec.c + 7zTypes.h + Precomp.h + Compiler.h
-
-Look example code:
- C/Util/Lzma/LzmaUtil.c
-
-
-Memory requirements for LZMA decoding
--------------------------------------
-
-Stack usage of LZMA decoding function for local variables is not
-larger than 200-400 bytes.
-
-LZMA Decoder uses dictionary buffer and internal state structure.
-Internal state structure consumes
- state_size = (4 + (1.5 << (lc + lp))) KB
-by default (lc=3, lp=0), state_size = 16 KB.
-
-
-How To decompress data
-----------------------
-
-LZMA Decoder (ANSI-C version) now supports 2 interfaces:
-1) Single-call Decompressing
-2) Multi-call State Decompressing (zlib-like interface)
-
-You must use external allocator:
-Example:
-void *SzAlloc(void *p, size_t size) { p = p; return malloc(size); }
-void SzFree(void *p, void *address) { p = p; free(address); }
-ISzAlloc alloc = { SzAlloc, SzFree };
-
-You can use p = p; operator to disable compiler warnings.
-
-
-Single-call Decompressing
--------------------------
-When to use: RAM->RAM decompressing
-Compile files: LzmaDec.h + LzmaDec.c + 7zTypes.h
-Compile defines: no defines
-Memory Requirements:
- - Input buffer: compressed size
- - Output buffer: uncompressed size
- - LZMA Internal Structures: state_size (16 KB for default settings)
-
-Interface:
- int LzmaDecode(Byte *dest, SizeT *destLen, const Byte *src, SizeT *srcLen,
- const Byte *propData, unsigned propSize, ELzmaFinishMode finishMode,
- ELzmaStatus *status, ISzAlloc *alloc);
- In:
- dest - output data
- destLen - output data size
- src - input data
- srcLen - input data size
- propData - LZMA properties (5 bytes)
- propSize - size of propData buffer (5 bytes)
- finishMode - It has meaning only if the decoding reaches output limit (*destLen).
- LZMA_FINISH_ANY - Decode just destLen bytes.
- LZMA_FINISH_END - Stream must be finished after (*destLen).
- You can use LZMA_FINISH_END, when you know that
- current output buffer covers last bytes of stream.
- alloc - Memory allocator.
-
- Out:
- destLen - processed output size
- srcLen - processed input size
-
- Output:
- SZ_OK
- status:
- LZMA_STATUS_FINISHED_WITH_MARK
- LZMA_STATUS_NOT_FINISHED
- LZMA_STATUS_MAYBE_FINISHED_WITHOUT_MARK
- SZ_ERROR_DATA - Data error
- SZ_ERROR_MEM - Memory allocation error
- SZ_ERROR_UNSUPPORTED - Unsupported properties
- SZ_ERROR_INPUT_EOF - It needs more bytes in input buffer (src).
-
- If LZMA decoder sees end_marker before reaching output limit, it returns OK result,
- and output value of destLen will be less than output buffer size limit.
-
- You can use multiple checks to test data integrity after full decompression:
- 1) Check Result and "status" variable.
- 2) Check that output(destLen) = uncompressedSize, if you know real uncompressedSize.
- 3) Check that output(srcLen) = compressedSize, if you know real compressedSize.
- You must use correct finish mode in that case. */
-
-
-Multi-call State Decompressing (zlib-like interface)
-----------------------------------------------------
-
-When to use: file->file decompressing
-Compile files: LzmaDec.h + LzmaDec.c + 7zTypes.h
-
-Memory Requirements:
- - Buffer for input stream: any size (for example, 16 KB)
- - Buffer for output stream: any size (for example, 16 KB)
- - LZMA Internal Structures: state_size (16 KB for default settings)
- - LZMA dictionary (dictionary size is encoded in LZMA properties header)
-
-1) read LZMA properties (5 bytes) and uncompressed size (8 bytes, little-endian) to header:
- unsigned char header[LZMA_PROPS_SIZE + 8];
- ReadFile(inFile, header, sizeof(header)
-
-2) Allocate CLzmaDec structures (state + dictionary) using LZMA properties
-
- CLzmaDec state;
- LzmaDec_Constr(&state);
- res = LzmaDec_Allocate(&state, header, LZMA_PROPS_SIZE, &g_Alloc);
- if (res != SZ_OK)
- return res;
-
-3) Init LzmaDec structure before any new LZMA stream. And call LzmaDec_DecodeToBuf in loop
-
- LzmaDec_Init(&state);
- for (;;)
- {
- ...
- int res = LzmaDec_DecodeToBuf(CLzmaDec *p, Byte *dest, SizeT *destLen,
- const Byte *src, SizeT *srcLen, ELzmaFinishMode finishMode);
- ...
- }
-
-
-4) Free all allocated structures
- LzmaDec_Free(&state, &g_Alloc);
-
-Look example code:
- C/Util/Lzma/LzmaUtil.c
-
-
-How To compress data
---------------------
-
-Compile files:
- 7zTypes.h
- Threads.h
- LzmaEnc.h
- LzmaEnc.c
- LzFind.h
- LzFind.c
- LzFindMt.h
- LzFindMt.c
- LzHash.h
-
-Memory Requirements:
- - (dictSize * 11.5 + 6 MB) + state_size
-
-Lzma Encoder can use two memory allocators:
-1) alloc - for small arrays.
-2) allocBig - for big arrays.
-
-For example, you can use Large RAM Pages (2 MB) in allocBig allocator for
-better compression speed. Note that Windows has bad implementation for
-Large RAM Pages.
-It's OK to use same allocator for alloc and allocBig.
-
-
-Single-call Compression with callbacks
---------------------------------------
-
-Look example code:
- C/Util/Lzma/LzmaUtil.c
-
-When to use: file->file compressing
-
-1) you must implement callback structures for interfaces:
-ISeqInStream
-ISeqOutStream
-ICompressProgress
-ISzAlloc
-
-static void *SzAlloc(void *p, size_t size) { p = p; return MyAlloc(size); }
-static void SzFree(void *p, void *address) { p = p; MyFree(address); }
-static ISzAlloc g_Alloc = { SzAlloc, SzFree };
-
- CFileSeqInStream inStream;
- CFileSeqOutStream outStream;
-
- inStream.funcTable.Read = MyRead;
- inStream.file = inFile;
- outStream.funcTable.Write = MyWrite;
- outStream.file = outFile;
-
-
-2) Create CLzmaEncHandle object;
-
- CLzmaEncHandle enc;
-
- enc = LzmaEnc_Create(&g_Alloc);
- if (enc == 0)
- return SZ_ERROR_MEM;
-
-
-3) initialize CLzmaEncProps properties;
-
- LzmaEncProps_Init(&props);
-
- Then you can change some properties in that structure.
-
-4) Send LZMA properties to LZMA Encoder
-
- res = LzmaEnc_SetProps(enc, &props);
-
-5) Write encoded properties to header
-
- Byte header[LZMA_PROPS_SIZE + 8];
- size_t headerSize = LZMA_PROPS_SIZE;
- UInt64 fileSize;
- int i;
-
- res = LzmaEnc_WriteProperties(enc, header, &headerSize);
- fileSize = MyGetFileLength(inFile);
- for (i = 0; i < 8; i++)
- header[headerSize++] = (Byte)(fileSize >> (8 * i));
- MyWriteFileAndCheck(outFile, header, headerSize)
-
-6) Call encoding function:
- res = LzmaEnc_Encode(enc, &outStream.funcTable, &inStream.funcTable,
- NULL, &g_Alloc, &g_Alloc);
-
-7) Destroy LZMA Encoder Object
- LzmaEnc_Destroy(enc, &g_Alloc, &g_Alloc);
-
-
-If callback function return some error code, LzmaEnc_Encode also returns that code
-or it can return the code like SZ_ERROR_READ, SZ_ERROR_WRITE or SZ_ERROR_PROGRESS.
-
-
-Single-call RAM->RAM Compression
---------------------------------
-
-Single-call RAM->RAM Compression is similar to Compression with callbacks,
-but you provide pointers to buffers instead of pointers to stream callbacks:
-
-SRes LzmaEncode(Byte *dest, SizeT *destLen, const Byte *src, SizeT srcLen,
- const CLzmaEncProps *props, Byte *propsEncoded, SizeT *propsSize, int writeEndMark,
- ICompressProgress *progress, ISzAlloc *alloc, ISzAlloc *allocBig);
-
-Return code:
- SZ_OK - OK
- SZ_ERROR_MEM - Memory allocation error
- SZ_ERROR_PARAM - Incorrect paramater
- SZ_ERROR_OUTPUT_EOF - output buffer overflow
- SZ_ERROR_THREAD - errors in multithreading functions (only for Mt version)
-
-
-
-Defines
--------
-
-_LZMA_SIZE_OPT - Enable some optimizations in LZMA Decoder to get smaller executable code.
-
-_LZMA_PROB32 - It can increase the speed on some 32-bit CPUs, but memory usage for
- some structures will be doubled in that case.
-
-_LZMA_UINT32_IS_ULONG - Define it if int is 16-bit on your compiler and long is 32-bit.
-
-_LZMA_NO_SYSTEM_SIZE_T - Define it if you don't want to use size_t type.
-
-
-_7ZIP_PPMD_SUPPPORT - Define it if you don't want to support PPMD method in AMSI-C .7z decoder.
-
-
-C++ LZMA Encoder/Decoder
-~~~~~~~~~~~~~~~~~~~~~~~~
-C++ LZMA code use COM-like interfaces. So if you want to use it,
-you can study basics of COM/OLE.
-C++ LZMA code is just wrapper over ANSI-C code.
-
-
-C++ Notes
-~~~~~~~~~~~~~~~~~~~~~~~~
-If you use some C++ code folders in 7-Zip (for example, C++ code for .7z handling),
-you must check that you correctly work with "new" operator.
-7-Zip can be compiled with MSVC 6.0 that doesn't throw "exception" from "new" operator.
-So 7-Zip uses "CPP\Common\NewHandler.cpp" that redefines "new" operator:
-operator new(size_t size)
-{
- void *p = ::malloc(size);
- if (p == 0)
- throw CNewException();
- return p;
-}
-If you use MSCV that throws exception for "new" operator, you can compile without
-"NewHandler.cpp". So standard exception will be used. Actually some code of
-7-Zip catches any exception in internal code and converts it to HRESULT code.
-So you don't need to catch CNewException, if you call COM interfaces of 7-Zip.
-
----
-
-http://www.7-zip.org
-http://www.7-zip.org/sdk.html
-http://www.7-zip.org/support.html
+++ /dev/null
-<?xml version="1.0" encoding="utf-8"?>\r
-<Project DefaultTargets="Build" ToolsVersion="15.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">\r
- <ItemGroup Label="ProjectConfigurations">\r
- <ProjectConfiguration Include="DebugFast|ARM64">\r
- <Configuration>DebugFast</Configuration>\r
- <Platform>ARM64</Platform>\r
- </ProjectConfiguration>\r
- <ProjectConfiguration Include="DebugFast|Win32">\r
- <Configuration>DebugFast</Configuration>\r
- <Platform>Win32</Platform>\r
- </ProjectConfiguration>\r
- <ProjectConfiguration Include="DebugFast|x64">\r
- <Configuration>DebugFast</Configuration>\r
- <Platform>x64</Platform>\r
- </ProjectConfiguration>\r
- <ProjectConfiguration Include="Debug|ARM64">\r
- <Configuration>Debug</Configuration>\r
- <Platform>ARM64</Platform>\r
- </ProjectConfiguration>\r
- <ProjectConfiguration Include="Debug|Win32">\r
- <Configuration>Debug</Configuration>\r
- <Platform>Win32</Platform>\r
- </ProjectConfiguration>\r
- <ProjectConfiguration Include="Debug|x64">\r
- <Configuration>Debug</Configuration>\r
- <Platform>x64</Platform>\r
- </ProjectConfiguration>\r
- <ProjectConfiguration Include="ReleaseLTCG|ARM64">\r
- <Configuration>ReleaseLTCG</Configuration>\r
- <Platform>ARM64</Platform>\r
- </ProjectConfiguration>\r
- <ProjectConfiguration Include="ReleaseLTCG|Win32">\r
- <Configuration>ReleaseLTCG</Configuration>\r
- <Platform>Win32</Platform>\r
- </ProjectConfiguration>\r
- <ProjectConfiguration Include="ReleaseLTCG|x64">\r
- <Configuration>ReleaseLTCG</Configuration>\r
- <Platform>x64</Platform>\r
- </ProjectConfiguration>\r
- <ProjectConfiguration Include="Release|ARM64">\r
- <Configuration>Release</Configuration>\r
- <Platform>ARM64</Platform>\r
- </ProjectConfiguration>\r
- <ProjectConfiguration Include="Release|Win32">\r
- <Configuration>Release</Configuration>\r
- <Platform>Win32</Platform>\r
- </ProjectConfiguration>\r
- <ProjectConfiguration Include="Release|x64">\r
- <Configuration>Release</Configuration>\r
- <Platform>x64</Platform>\r
- </ProjectConfiguration>\r
- </ItemGroup>\r
- <ItemGroup>\r
- <ClCompile Include="src\Alloc.c" />\r
- <ClCompile Include="src\Bra86.c" />\r
- <ClCompile Include="src\BraIA64.c" />\r
- <ClCompile Include="src\CpuArch.c" />\r
- <ClCompile Include="src\Delta.c" />\r
- <ClCompile Include="src\LzFind.c" />\r
- <ClCompile Include="src\Lzma86Dec.c" />\r
- <ClCompile Include="src\Lzma86Enc.c" />\r
- <ClCompile Include="src\LzmaDec.c" />\r
- <ClCompile Include="src\LzmaEnc.c" />\r
- <ClCompile Include="src\LzmaLib.c" />\r
- <ClCompile Include="src\Sort.c" />\r
- </ItemGroup>\r
- <PropertyGroup Label="Globals">\r
- <ProjectGuid>{DD944834-7899-4C1C-A4C1-064B5009D239}</ProjectGuid>\r
- <Keyword>Win32Proj</Keyword>\r
- <RootNamespace>lzma</RootNamespace>\r
- <WindowsTargetPlatformVersion>10.0</WindowsTargetPlatformVersion>\r
- </PropertyGroup>\r
- <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />\r
- <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">\r
- <ConfigurationType>StaticLibrary</ConfigurationType>\r
- <UseDebugLibraries>true</UseDebugLibraries>\r
- <PlatformToolset>v142</PlatformToolset>\r
- <CharacterSet>NotSet</CharacterSet>\r
- </PropertyGroup>\r
- <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">\r
- <ConfigurationType>StaticLibrary</ConfigurationType>\r
- <UseDebugLibraries>true</UseDebugLibraries>\r
- <PlatformToolset>v142</PlatformToolset>\r
- <CharacterSet>NotSet</CharacterSet>\r
- </PropertyGroup>\r
- <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|ARM64'" Label="Configuration">\r
- <ConfigurationType>StaticLibrary</ConfigurationType>\r
- <UseDebugLibraries>true</UseDebugLibraries>\r
- <PlatformToolset>v142</PlatformToolset>\r
- <CharacterSet>NotSet</CharacterSet>\r
- </PropertyGroup>\r
- <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='DebugFast|Win32'" Label="Configuration">\r
- <ConfigurationType>StaticLibrary</ConfigurationType>\r
- <UseDebugLibraries>true</UseDebugLibraries>\r
- <PlatformToolset>v142</PlatformToolset>\r
- <CharacterSet>NotSet</CharacterSet>\r
- </PropertyGroup>\r
- <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='DebugFast|x64'" Label="Configuration">\r
- <ConfigurationType>StaticLibrary</ConfigurationType>\r
- <UseDebugLibraries>true</UseDebugLibraries>\r
- <PlatformToolset>v142</PlatformToolset>\r
- <CharacterSet>NotSet</CharacterSet>\r
- </PropertyGroup>\r
- <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='DebugFast|ARM64'" Label="Configuration">\r
- <ConfigurationType>StaticLibrary</ConfigurationType>\r
- <UseDebugLibraries>true</UseDebugLibraries>\r
- <PlatformToolset>v142</PlatformToolset>\r
- <CharacterSet>NotSet</CharacterSet>\r
- </PropertyGroup>\r
- <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">\r
- <ConfigurationType>StaticLibrary</ConfigurationType>\r
- <UseDebugLibraries>false</UseDebugLibraries>\r
- <PlatformToolset>v142</PlatformToolset>\r
- <WholeProgramOptimization>true</WholeProgramOptimization>\r
- <CharacterSet>NotSet</CharacterSet>\r
- <SpectreMitigation>false</SpectreMitigation>\r
- </PropertyGroup>\r
- <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='ReleaseLTCG|Win32'" Label="Configuration">\r
- <ConfigurationType>StaticLibrary</ConfigurationType>\r
- <UseDebugLibraries>false</UseDebugLibraries>\r
- <PlatformToolset>v142</PlatformToolset>\r
- <WholeProgramOptimization>true</WholeProgramOptimization>\r
- <CharacterSet>NotSet</CharacterSet>\r
- <SpectreMitigation>false</SpectreMitigation>\r
- </PropertyGroup>\r
- <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">\r
- <ConfigurationType>StaticLibrary</ConfigurationType>\r
- <UseDebugLibraries>false</UseDebugLibraries>\r
- <PlatformToolset>v142</PlatformToolset>\r
- <WholeProgramOptimization>true</WholeProgramOptimization>\r
- <CharacterSet>NotSet</CharacterSet>\r
- <SpectreMitigation>false</SpectreMitigation>\r
- </PropertyGroup>\r
- <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|ARM64'" Label="Configuration">\r
- <ConfigurationType>StaticLibrary</ConfigurationType>\r
- <UseDebugLibraries>false</UseDebugLibraries>\r
- <PlatformToolset>v142</PlatformToolset>\r
- <WholeProgramOptimization>true</WholeProgramOptimization>\r
- <CharacterSet>NotSet</CharacterSet>\r
- <SpectreMitigation>false</SpectreMitigation>\r
- </PropertyGroup>\r
- <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='ReleaseLTCG|x64'" Label="Configuration">\r
- <ConfigurationType>StaticLibrary</ConfigurationType>\r
- <UseDebugLibraries>false</UseDebugLibraries>\r
- <PlatformToolset>v142</PlatformToolset>\r
- <WholeProgramOptimization>true</WholeProgramOptimization>\r
- <CharacterSet>NotSet</CharacterSet>\r
- <SpectreMitigation>false</SpectreMitigation>\r
- </PropertyGroup>\r
- <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='ReleaseLTCG|ARM64'" Label="Configuration">\r
- <ConfigurationType>StaticLibrary</ConfigurationType>\r
- <UseDebugLibraries>false</UseDebugLibraries>\r
- <PlatformToolset>v142</PlatformToolset>\r
- <WholeProgramOptimization>true</WholeProgramOptimization>\r
- <CharacterSet>NotSet</CharacterSet>\r
- <SpectreMitigation>false</SpectreMitigation>\r
- </PropertyGroup>\r
- <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />\r
- <ImportGroup Label="ExtensionSettings">\r
- </ImportGroup>\r
- <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">\r
- <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />\r
- </ImportGroup>\r
- <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="PropertySheets">\r
- <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />\r
- </ImportGroup>\r
- <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug|ARM64'" Label="PropertySheets">\r
- <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />\r
- </ImportGroup>\r
- <ImportGroup Condition="'$(Configuration)|$(Platform)'=='DebugFast|Win32'" Label="PropertySheets">\r
- <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />\r
- </ImportGroup>\r
- <ImportGroup Condition="'$(Configuration)|$(Platform)'=='DebugFast|x64'" Label="PropertySheets">\r
- <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />\r
- </ImportGroup>\r
- <ImportGroup Condition="'$(Configuration)|$(Platform)'=='DebugFast|ARM64'" Label="PropertySheets">\r
- <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />\r
- </ImportGroup>\r
- <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">\r
- <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />\r
- </ImportGroup>\r
- <ImportGroup Condition="'$(Configuration)|$(Platform)'=='ReleaseLTCG|Win32'" Label="PropertySheets">\r
- <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />\r
- </ImportGroup>\r
- <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="PropertySheets">\r
- <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />\r
- </ImportGroup>\r
- <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release|ARM64'" Label="PropertySheets">\r
- <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />\r
- </ImportGroup>\r
- <ImportGroup Condition="'$(Configuration)|$(Platform)'=='ReleaseLTCG|x64'" Label="PropertySheets">\r
- <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />\r
- </ImportGroup>\r
- <ImportGroup Condition="'$(Configuration)|$(Platform)'=='ReleaseLTCG|ARM64'" Label="PropertySheets">\r
- <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />\r
- </ImportGroup>\r
- <PropertyGroup Label="UserMacros" />\r
- <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">\r
- <LinkIncremental>true</LinkIncremental>\r
- <OutDir>$(SolutionDir)build\$(ProjectName)-$(Platform)-$(Configuration)\</OutDir>\r
- <IntDir>$(SolutionDir)build\$(ProjectName)-$(Platform)-$(Configuration)\</IntDir>\r
- <TargetName>$(ProjectName)-$(Platform)-$(Configuration)</TargetName>\r
- </PropertyGroup>\r
- <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">\r
- <IntDir>$(SolutionDir)build\$(ProjectName)-$(Platform)-$(Configuration)\</IntDir>\r
- <TargetName>$(ProjectName)-$(Platform)-$(Configuration)</TargetName>\r
- <LinkIncremental>true</LinkIncremental>\r
- <OutDir>$(SolutionDir)build\$(ProjectName)-$(Platform)-$(Configuration)\</OutDir>\r
- </PropertyGroup>\r
- <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|ARM64'">\r
- <IntDir>$(SolutionDir)build\$(ProjectName)-$(Platform)-$(Configuration)\</IntDir>\r
- <TargetName>$(ProjectName)-$(Platform)-$(Configuration)</TargetName>\r
- <LinkIncremental>true</LinkIncremental>\r
- <OutDir>$(SolutionDir)build\$(ProjectName)-$(Platform)-$(Configuration)\</OutDir>\r
- </PropertyGroup>\r
- <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='DebugFast|Win32'">\r
- <LinkIncremental>true</LinkIncremental>\r
- <OutDir>$(SolutionDir)build\$(ProjectName)-$(Platform)-$(Configuration)\</OutDir>\r
- <IntDir>$(SolutionDir)build\$(ProjectName)-$(Platform)-$(Configuration)\</IntDir>\r
- <TargetName>$(ProjectName)-$(Platform)-$(Configuration)</TargetName>\r
- </PropertyGroup>\r
- <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='DebugFast|x64'">\r
- <IntDir>$(SolutionDir)build\$(ProjectName)-$(Platform)-$(Configuration)\</IntDir>\r
- <TargetName>$(ProjectName)-$(Platform)-$(Configuration)</TargetName>\r
- <LinkIncremental>true</LinkIncremental>\r
- <OutDir>$(SolutionDir)build\$(ProjectName)-$(Platform)-$(Configuration)\</OutDir>\r
- </PropertyGroup>\r
- <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='DebugFast|ARM64'">\r
- <IntDir>$(SolutionDir)build\$(ProjectName)-$(Platform)-$(Configuration)\</IntDir>\r
- <TargetName>$(ProjectName)-$(Platform)-$(Configuration)</TargetName>\r
- <LinkIncremental>true</LinkIncremental>\r
- <OutDir>$(SolutionDir)build\$(ProjectName)-$(Platform)-$(Configuration)\</OutDir>\r
- </PropertyGroup>\r
- <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">\r
- <LinkIncremental>false</LinkIncremental>\r
- <OutDir>$(SolutionDir)build\$(ProjectName)-$(Platform)-$(Configuration)\</OutDir>\r
- <IntDir>$(SolutionDir)build\$(ProjectName)-$(Platform)-$(Configuration)\</IntDir>\r
- <TargetName>$(ProjectName)-$(Platform)-$(Configuration)</TargetName>\r
- </PropertyGroup>\r
- <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='ReleaseLTCG|Win32'">\r
- <LinkIncremental>false</LinkIncremental>\r
- <OutDir>$(SolutionDir)build\$(ProjectName)-$(Platform)-$(Configuration)\</OutDir>\r
- <IntDir>$(SolutionDir)build\$(ProjectName)-$(Platform)-$(Configuration)\</IntDir>\r
- <TargetName>$(ProjectName)-$(Platform)-$(Configuration)</TargetName>\r
- </PropertyGroup>\r
- <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">\r
- <IntDir>$(SolutionDir)build\$(ProjectName)-$(Platform)-$(Configuration)\</IntDir>\r
- <TargetName>$(ProjectName)-$(Platform)-$(Configuration)</TargetName>\r
- <LinkIncremental>false</LinkIncremental>\r
- <OutDir>$(SolutionDir)build\$(ProjectName)-$(Platform)-$(Configuration)\</OutDir>\r
- </PropertyGroup>\r
- <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|ARM64'">\r
- <IntDir>$(SolutionDir)build\$(ProjectName)-$(Platform)-$(Configuration)\</IntDir>\r
- <TargetName>$(ProjectName)-$(Platform)-$(Configuration)</TargetName>\r
- <LinkIncremental>false</LinkIncremental>\r
- <OutDir>$(SolutionDir)build\$(ProjectName)-$(Platform)-$(Configuration)\</OutDir>\r
- </PropertyGroup>\r
- <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='ReleaseLTCG|x64'">\r
- <IntDir>$(SolutionDir)build\$(ProjectName)-$(Platform)-$(Configuration)\</IntDir>\r
- <TargetName>$(ProjectName)-$(Platform)-$(Configuration)</TargetName>\r
- <LinkIncremental>false</LinkIncremental>\r
- <OutDir>$(SolutionDir)build\$(ProjectName)-$(Platform)-$(Configuration)\</OutDir>\r
- </PropertyGroup>\r
- <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='ReleaseLTCG|ARM64'">\r
- <IntDir>$(SolutionDir)build\$(ProjectName)-$(Platform)-$(Configuration)\</IntDir>\r
- <TargetName>$(ProjectName)-$(Platform)-$(Configuration)</TargetName>\r
- <LinkIncremental>false</LinkIncremental>\r
- <OutDir>$(SolutionDir)build\$(ProjectName)-$(Platform)-$(Configuration)\</OutDir>\r
- </PropertyGroup>\r
- <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">\r
- <ClCompile>\r
- <PrecompiledHeader>\r
- </PrecompiledHeader>\r
- <WarningLevel>Level2</WarningLevel>\r
- <Optimization>Disabled</Optimization>\r
- <PreprocessorDefinitions>_7ZIP_ST;_CRT_NONSTDC_NO_WARNINGS;_CRT_SECURE_NO_WARNINGS;WIN32;_DEBUG;_CONSOLE;_LIB;%(PreprocessorDefinitions)</PreprocessorDefinitions>\r
- <SDLCheck>true</SDLCheck>\r
- <DebugInformationFormat>ProgramDatabase</DebugInformationFormat>\r
- <AdditionalIncludeDirectories>$(ProjectDir)include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>\r
- <MinimalRebuild>false</MinimalRebuild>\r
- <LanguageStandard>stdcpp17</LanguageStandard>\r
- <MultiProcessorCompilation>true</MultiProcessorCompilation>\r
- <ConformanceMode>true</ConformanceMode>\r
- <AdditionalOptions>/Zo /utf-8 %(AdditionalOptions)</AdditionalOptions>\r
- </ClCompile>\r
- <Link>\r
- <SubSystem>Windows</SubSystem>\r
- <GenerateDebugInformation>true</GenerateDebugInformation>\r
- </Link>\r
- </ItemDefinitionGroup>\r
- <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">\r
- <ClCompile>\r
- <PrecompiledHeader>\r
- </PrecompiledHeader>\r
- <WarningLevel>Level2</WarningLevel>\r
- <Optimization>Disabled</Optimization>\r
- <PreprocessorDefinitions>_7ZIP_ST;_CRT_NONSTDC_NO_WARNINGS;_CRT_SECURE_NO_WARNINGS;WIN32;_DEBUG;_CONSOLE;_LIB;%(PreprocessorDefinitions)</PreprocessorDefinitions>\r
- <SDLCheck>true</SDLCheck>\r
- <DebugInformationFormat>ProgramDatabase</DebugInformationFormat>\r
- <AdditionalIncludeDirectories>$(ProjectDir)include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>\r
- <MinimalRebuild>false</MinimalRebuild>\r
- <LanguageStandard>stdcpp17</LanguageStandard>\r
- <MultiProcessorCompilation>true</MultiProcessorCompilation>\r
- <ConformanceMode>true</ConformanceMode>\r
- <AdditionalOptions>/Zo /utf-8 %(AdditionalOptions)</AdditionalOptions>\r
- </ClCompile>\r
- <Link>\r
- <SubSystem>Windows</SubSystem>\r
- <GenerateDebugInformation>true</GenerateDebugInformation>\r
- </Link>\r
- </ItemDefinitionGroup>\r
- <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|ARM64'">\r
- <ClCompile>\r
- <PrecompiledHeader>\r
- </PrecompiledHeader>\r
- <WarningLevel>Level2</WarningLevel>\r
- <Optimization>Disabled</Optimization>\r
- <PreprocessorDefinitions>_7ZIP_ST;_CRT_NONSTDC_NO_WARNINGS;_CRT_SECURE_NO_WARNINGS;WIN32;_DEBUG;_CONSOLE;_LIB;%(PreprocessorDefinitions)</PreprocessorDefinitions>\r
- <SDLCheck>true</SDLCheck>\r
- <DebugInformationFormat>ProgramDatabase</DebugInformationFormat>\r
- <AdditionalIncludeDirectories>$(ProjectDir)include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>\r
- <MinimalRebuild>false</MinimalRebuild>\r
- <LanguageStandard>stdcpp17</LanguageStandard>\r
- <MultiProcessorCompilation>true</MultiProcessorCompilation>\r
- <ConformanceMode>true</ConformanceMode>\r
- <AdditionalOptions>/Zo /utf-8 %(AdditionalOptions)</AdditionalOptions>\r
- </ClCompile>\r
- <Link>\r
- <SubSystem>Windows</SubSystem>\r
- <GenerateDebugInformation>true</GenerateDebugInformation>\r
- </Link>\r
- </ItemDefinitionGroup>\r
- <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='DebugFast|Win32'">\r
- <ClCompile>\r
- <PrecompiledHeader>\r
- </PrecompiledHeader>\r
- <WarningLevel>Level2</WarningLevel>\r
- <Optimization>Disabled</Optimization>\r
- <PreprocessorDefinitions>_7ZIP_ST;_ITERATOR_DEBUG_LEVEL=1;_CRT_NONSTDC_NO_WARNINGS;_CRT_SECURE_NO_WARNINGS;WIN32;_DEBUGFAST;_DEBUG;_CONSOLE;_LIB;%(PreprocessorDefinitions)</PreprocessorDefinitions>\r
- <SDLCheck>true</SDLCheck>\r
- <DebugInformationFormat>ProgramDatabase</DebugInformationFormat>\r
- <AdditionalIncludeDirectories>$(ProjectDir)include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>\r
- <BasicRuntimeChecks>Default</BasicRuntimeChecks>\r
- <MinimalRebuild>false</MinimalRebuild>\r
- <LanguageStandard>stdcpp17</LanguageStandard>\r
- <SupportJustMyCode>false</SupportJustMyCode>\r
- <MultiProcessorCompilation>true</MultiProcessorCompilation>\r
- <InlineFunctionExpansion>OnlyExplicitInline</InlineFunctionExpansion>\r
- <ConformanceMode>true</ConformanceMode>\r
- <AdditionalOptions>/Zo /utf-8 %(AdditionalOptions)</AdditionalOptions>\r
- </ClCompile>\r
- <Link>\r
- <SubSystem>Windows</SubSystem>\r
- <GenerateDebugInformation>true</GenerateDebugInformation>\r
- </Link>\r
- </ItemDefinitionGroup>\r
- <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='DebugFast|x64'">\r
- <ClCompile>\r
- <PrecompiledHeader>\r
- </PrecompiledHeader>\r
- <WarningLevel>Level2</WarningLevel>\r
- <Optimization>Disabled</Optimization>\r
- <PreprocessorDefinitions>_7ZIP_ST;_ITERATOR_DEBUG_LEVEL=1;_CRT_NONSTDC_NO_WARNINGS;_CRT_SECURE_NO_WARNINGS;WIN32;_DEBUGFAST;_DEBUG;_CONSOLE;_LIB;%(PreprocessorDefinitions)</PreprocessorDefinitions>\r
- <SDLCheck>true</SDLCheck>\r
- <DebugInformationFormat>ProgramDatabase</DebugInformationFormat>\r
- <AdditionalIncludeDirectories>$(ProjectDir)include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>\r
- <BasicRuntimeChecks>Default</BasicRuntimeChecks>\r
- <MinimalRebuild>false</MinimalRebuild>\r
- <LanguageStandard>stdcpp17</LanguageStandard>\r
- <SupportJustMyCode>false</SupportJustMyCode>\r
- <MultiProcessorCompilation>true</MultiProcessorCompilation>\r
- <InlineFunctionExpansion>OnlyExplicitInline</InlineFunctionExpansion>\r
- <ConformanceMode>true</ConformanceMode>\r
- <AdditionalOptions>/Zo /utf-8 %(AdditionalOptions)</AdditionalOptions>\r
- </ClCompile>\r
- <Link>\r
- <SubSystem>Windows</SubSystem>\r
- <GenerateDebugInformation>true</GenerateDebugInformation>\r
- </Link>\r
- </ItemDefinitionGroup>\r
- <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='DebugFast|ARM64'">\r
- <ClCompile>\r
- <PrecompiledHeader>\r
- </PrecompiledHeader>\r
- <WarningLevel>Level2</WarningLevel>\r
- <Optimization>Disabled</Optimization>\r
- <PreprocessorDefinitions>_7ZIP_ST;_ITERATOR_DEBUG_LEVEL=1;_CRT_NONSTDC_NO_WARNINGS;_CRT_SECURE_NO_WARNINGS;WIN32;_DEBUGFAST;_DEBUG;_CONSOLE;_LIB;%(PreprocessorDefinitions)</PreprocessorDefinitions>\r
- <SDLCheck>true</SDLCheck>\r
- <DebugInformationFormat>ProgramDatabase</DebugInformationFormat>\r
- <AdditionalIncludeDirectories>$(ProjectDir)include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>\r
- <BasicRuntimeChecks>Default</BasicRuntimeChecks>\r
- <MinimalRebuild>false</MinimalRebuild>\r
- <LanguageStandard>stdcpp17</LanguageStandard>\r
- <SupportJustMyCode>false</SupportJustMyCode>\r
- <MultiProcessorCompilation>true</MultiProcessorCompilation>\r
- <InlineFunctionExpansion>OnlyExplicitInline</InlineFunctionExpansion>\r
- <ConformanceMode>true</ConformanceMode>\r
- <AdditionalOptions>/Zo /utf-8 %(AdditionalOptions)</AdditionalOptions>\r
- </ClCompile>\r
- <Link>\r
- <SubSystem>Windows</SubSystem>\r
- <GenerateDebugInformation>true</GenerateDebugInformation>\r
- </Link>\r
- </ItemDefinitionGroup>\r
- <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">\r
- <ClCompile>\r
- <WarningLevel>Level2</WarningLevel>\r
- <PrecompiledHeader>\r
- </PrecompiledHeader>\r
- <Optimization>MaxSpeed</Optimization>\r
- <IntrinsicFunctions>true</IntrinsicFunctions>\r
- <PreprocessorDefinitions>_7ZIP_ST;_CRT_NONSTDC_NO_WARNINGS;_CRT_SECURE_NO_WARNINGS;WIN32;NDEBUG;_CONSOLE;_LIB;%(PreprocessorDefinitions)</PreprocessorDefinitions>\r
- <AdditionalIncludeDirectories>$(ProjectDir)include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>\r
- <WholeProgramOptimization>false</WholeProgramOptimization>\r
- <LanguageStandard>stdcpp17</LanguageStandard>\r
- <MultiProcessorCompilation>true</MultiProcessorCompilation>\r
- <ConformanceMode>true</ConformanceMode>\r
- <AdditionalOptions>/Zo /utf-8 %(AdditionalOptions)</AdditionalOptions>\r
- </ClCompile>\r
- <Link>\r
- <SubSystem>Windows</SubSystem>\r
- <GenerateDebugInformation>true</GenerateDebugInformation>\r
- <EnableCOMDATFolding>true</EnableCOMDATFolding>\r
- <OptimizeReferences>true</OptimizeReferences>\r
- </Link>\r
- </ItemDefinitionGroup>\r
- <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='ReleaseLTCG|Win32'">\r
- <ClCompile>\r
- <WarningLevel>Level2</WarningLevel>\r
- <PrecompiledHeader>\r
- </PrecompiledHeader>\r
- <Optimization>MaxSpeed</Optimization>\r
- <IntrinsicFunctions>true</IntrinsicFunctions>\r
- <PreprocessorDefinitions>_7ZIP_ST;_CRT_NONSTDC_NO_WARNINGS;_CRT_SECURE_NO_WARNINGS;WIN32;NDEBUG;_CONSOLE;_LIB;%(PreprocessorDefinitions)</PreprocessorDefinitions>\r
- <AdditionalIncludeDirectories>$(ProjectDir)include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>\r
- <WholeProgramOptimization>true</WholeProgramOptimization>\r
- <LanguageStandard>stdcpp17</LanguageStandard>\r
- <OmitFramePointers>true</OmitFramePointers>\r
- <MultiProcessorCompilation>true</MultiProcessorCompilation>\r
- <ConformanceMode>true</ConformanceMode>\r
- <AdditionalOptions>/Zo /utf-8 %(AdditionalOptions)</AdditionalOptions>\r
- </ClCompile>\r
- <Link>\r
- <SubSystem>Windows</SubSystem>\r
- <GenerateDebugInformation>true</GenerateDebugInformation>\r
- <EnableCOMDATFolding>true</EnableCOMDATFolding>\r
- <OptimizeReferences>true</OptimizeReferences>\r
- </Link>\r
- </ItemDefinitionGroup>\r
- <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">\r
- <ClCompile>\r
- <WarningLevel>Level2</WarningLevel>\r
- <PrecompiledHeader>\r
- </PrecompiledHeader>\r
- <Optimization>MaxSpeed</Optimization>\r
- <IntrinsicFunctions>true</IntrinsicFunctions>\r
- <PreprocessorDefinitions>_7ZIP_ST;_CRT_NONSTDC_NO_WARNINGS;_CRT_SECURE_NO_WARNINGS;WIN32;NDEBUG;_CONSOLE;_LIB;%(PreprocessorDefinitions)</PreprocessorDefinitions>\r
- <AdditionalIncludeDirectories>$(ProjectDir)include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>\r
- <WholeProgramOptimization>false</WholeProgramOptimization>\r
- <LanguageStandard>stdcpp17</LanguageStandard>\r
- <MultiProcessorCompilation>true</MultiProcessorCompilation>\r
- <ConformanceMode>true</ConformanceMode>\r
- <AdditionalOptions>/Zo /utf-8 %(AdditionalOptions)</AdditionalOptions>\r
- </ClCompile>\r
- <Link>\r
- <SubSystem>Windows</SubSystem>\r
- <GenerateDebugInformation>true</GenerateDebugInformation>\r
- <EnableCOMDATFolding>true</EnableCOMDATFolding>\r
- <OptimizeReferences>true</OptimizeReferences>\r
- </Link>\r
- </ItemDefinitionGroup>\r
- <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|ARM64'">\r
- <ClCompile>\r
- <WarningLevel>Level2</WarningLevel>\r
- <PrecompiledHeader>\r
- </PrecompiledHeader>\r
- <Optimization>MaxSpeed</Optimization>\r
- <IntrinsicFunctions>true</IntrinsicFunctions>\r
- <PreprocessorDefinitions>_7ZIP_ST;_CRT_NONSTDC_NO_WARNINGS;_CRT_SECURE_NO_WARNINGS;WIN32;NDEBUG;_CONSOLE;_LIB;%(PreprocessorDefinitions)</PreprocessorDefinitions>\r
- <AdditionalIncludeDirectories>$(ProjectDir)include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>\r
- <WholeProgramOptimization>false</WholeProgramOptimization>\r
- <LanguageStandard>stdcpp17</LanguageStandard>\r
- <MultiProcessorCompilation>true</MultiProcessorCompilation>\r
- <ConformanceMode>true</ConformanceMode>\r
- <AdditionalOptions>/Zo /utf-8 %(AdditionalOptions)</AdditionalOptions>\r
- </ClCompile>\r
- <Link>\r
- <SubSystem>Windows</SubSystem>\r
- <GenerateDebugInformation>true</GenerateDebugInformation>\r
- <EnableCOMDATFolding>true</EnableCOMDATFolding>\r
- <OptimizeReferences>true</OptimizeReferences>\r
- </Link>\r
- </ItemDefinitionGroup>\r
- <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='ReleaseLTCG|x64'">\r
- <ClCompile>\r
- <WarningLevel>Level2</WarningLevel>\r
- <PrecompiledHeader>\r
- </PrecompiledHeader>\r
- <Optimization>MaxSpeed</Optimization>\r
- <IntrinsicFunctions>true</IntrinsicFunctions>\r
- <PreprocessorDefinitions>_7ZIP_ST;_CRT_NONSTDC_NO_WARNINGS;_CRT_SECURE_NO_WARNINGS;WIN32;NDEBUG;_CONSOLE;_LIB;%(PreprocessorDefinitions)</PreprocessorDefinitions>\r
- <AdditionalIncludeDirectories>$(ProjectDir)include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>\r
- <WholeProgramOptimization>true</WholeProgramOptimization>\r
- <LanguageStandard>stdcpp17</LanguageStandard>\r
- <OmitFramePointers>true</OmitFramePointers>\r
- <MultiProcessorCompilation>true</MultiProcessorCompilation>\r
- <ConformanceMode>true</ConformanceMode>\r
- <AdditionalOptions>/Zo /utf-8 %(AdditionalOptions)</AdditionalOptions>\r
- </ClCompile>\r
- <Link>\r
- <SubSystem>Windows</SubSystem>\r
- <GenerateDebugInformation>true</GenerateDebugInformation>\r
- <EnableCOMDATFolding>true</EnableCOMDATFolding>\r
- <OptimizeReferences>true</OptimizeReferences>\r
- </Link>\r
- </ItemDefinitionGroup>\r
- <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='ReleaseLTCG|ARM64'">\r
- <ClCompile>\r
- <WarningLevel>Level2</WarningLevel>\r
- <PrecompiledHeader>\r
- </PrecompiledHeader>\r
- <Optimization>MaxSpeed</Optimization>\r
- <IntrinsicFunctions>true</IntrinsicFunctions>\r
- <PreprocessorDefinitions>_7ZIP_ST;_CRT_NONSTDC_NO_WARNINGS;_CRT_SECURE_NO_WARNINGS;WIN32;NDEBUG;_CONSOLE;_LIB;%(PreprocessorDefinitions)</PreprocessorDefinitions>\r
- <AdditionalIncludeDirectories>$(ProjectDir)include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>\r
- <WholeProgramOptimization>true</WholeProgramOptimization>\r
- <LanguageStandard>stdcpp17</LanguageStandard>\r
- <OmitFramePointers>true</OmitFramePointers>\r
- <MultiProcessorCompilation>true</MultiProcessorCompilation>\r
- <ConformanceMode>true</ConformanceMode>\r
- <AdditionalOptions>/Zo /utf-8 %(AdditionalOptions)</AdditionalOptions>\r
- </ClCompile>\r
- <Link>\r
- <SubSystem>Windows</SubSystem>\r
- <GenerateDebugInformation>true</GenerateDebugInformation>\r
- <EnableCOMDATFolding>true</EnableCOMDATFolding>\r
- <OptimizeReferences>true</OptimizeReferences>\r
- </Link>\r
- </ItemDefinitionGroup>\r
- <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />\r
- <ImportGroup Label="ExtensionTargets">\r
- </ImportGroup>\r
-</Project>
\ No newline at end of file
+++ /dev/null
-<?xml version="1.0" encoding="utf-8"?>
-<Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
- <ItemGroup>
- <ClCompile Include="src\BraIA64.c" />
- <ClCompile Include="src\CpuArch.c" />
- <ClCompile Include="src\Delta.c" />
- <ClCompile Include="src\LzFind.c" />
- <ClCompile Include="src\Lzma86Dec.c" />
- <ClCompile Include="src\Lzma86Enc.c" />
- <ClCompile Include="src\LzmaDec.c" />
- <ClCompile Include="src\LzmaEnc.c" />
- <ClCompile Include="src\LzmaLib.c" />
- <ClCompile Include="src\Sort.c" />
- <ClCompile Include="src\Alloc.c" />
- <ClCompile Include="src\Bra86.c" />
- </ItemGroup>
-</Project>
\ No newline at end of file
+++ /dev/null
-/* Alloc.c -- Memory allocation functions
-2018-04-27 : Igor Pavlov : Public domain */
-
-#include "Precomp.h"
-
-#include <stdio.h>
-
-#ifdef _WIN32
-#include <windows.h>
-#endif
-#include <stdlib.h>
-
-#include "Alloc.h"
-
-/* #define _SZ_ALLOC_DEBUG */
-
-/* use _SZ_ALLOC_DEBUG to debug alloc/free operations */
-#ifdef _SZ_ALLOC_DEBUG
-
-#include <stdio.h>
-int g_allocCount = 0;
-int g_allocCountMid = 0;
-int g_allocCountBig = 0;
-
-
-#define CONVERT_INT_TO_STR(charType, tempSize) \
- unsigned char temp[tempSize]; unsigned i = 0; \
- while (val >= 10) { temp[i++] = (unsigned char)('0' + (unsigned)(val % 10)); val /= 10; } \
- *s++ = (charType)('0' + (unsigned)val); \
- while (i != 0) { i--; *s++ = temp[i]; } \
- *s = 0;
-
-static void ConvertUInt64ToString(UInt64 val, char *s)
-{
- CONVERT_INT_TO_STR(char, 24);
-}
-
-#define GET_HEX_CHAR(t) ((char)(((t < 10) ? ('0' + t) : ('A' + (t - 10)))))
-
-static void ConvertUInt64ToHex(UInt64 val, char *s)
-{
- UInt64 v = val;
- unsigned i;
- for (i = 1;; i++)
- {
- v >>= 4;
- if (v == 0)
- break;
- }
- s[i] = 0;
- do
- {
- unsigned t = (unsigned)(val & 0xF);
- val >>= 4;
- s[--i] = GET_HEX_CHAR(t);
- }
- while (i);
-}
-
-#define DEBUG_OUT_STREAM stderr
-
-static void Print(const char *s)
-{
- fputs(s, DEBUG_OUT_STREAM);
-}
-
-static void PrintAligned(const char *s, size_t align)
-{
- size_t len = strlen(s);
- for(;;)
- {
- fputc(' ', DEBUG_OUT_STREAM);
- if (len >= align)
- break;
- ++len;
- }
- Print(s);
-}
-
-static void PrintLn()
-{
- Print("\n");
-}
-
-static void PrintHex(UInt64 v, size_t align)
-{
- char s[32];
- ConvertUInt64ToHex(v, s);
- PrintAligned(s, align);
-}
-
-static void PrintDec(UInt64 v, size_t align)
-{
- char s[32];
- ConvertUInt64ToString(v, s);
- PrintAligned(s, align);
-}
-
-static void PrintAddr(void *p)
-{
- PrintHex((UInt64)(size_t)(ptrdiff_t)p, 12);
-}
-
-
-#define PRINT_ALLOC(name, cnt, size, ptr) \
- Print(name " "); \
- PrintDec(cnt++, 10); \
- PrintHex(size, 10); \
- PrintAddr(ptr); \
- PrintLn();
-
-#define PRINT_FREE(name, cnt, ptr) if (ptr) { \
- Print(name " "); \
- PrintDec(--cnt, 10); \
- PrintAddr(ptr); \
- PrintLn(); }
-
-#else
-
-#define PRINT_ALLOC(name, cnt, size, ptr)
-#define PRINT_FREE(name, cnt, ptr)
-#define Print(s)
-#define PrintLn()
-#define PrintHex(v, align)
-#define PrintDec(v, align)
-#define PrintAddr(p)
-
-#endif
-
-
-
-void *MyAlloc(size_t size)
-{
- if (size == 0)
- return NULL;
- #ifdef _SZ_ALLOC_DEBUG
- {
- void *p = malloc(size);
- PRINT_ALLOC("Alloc ", g_allocCount, size, p);
- return p;
- }
- #else
- return malloc(size);
- #endif
-}
-
-void MyFree(void *address)
-{
- PRINT_FREE("Free ", g_allocCount, address);
-
- free(address);
-}
-
-#ifdef _WIN32
-
-void *MidAlloc(size_t size)
-{
- if (size == 0)
- return NULL;
-
- PRINT_ALLOC("Alloc-Mid", g_allocCountMid, size, NULL);
-
- return VirtualAlloc(NULL, size, MEM_COMMIT, PAGE_READWRITE);
-}
-
-void MidFree(void *address)
-{
- PRINT_FREE("Free-Mid", g_allocCountMid, address);
-
- if (!address)
- return;
- VirtualFree(address, 0, MEM_RELEASE);
-}
-
-#ifndef MEM_LARGE_PAGES
-#undef _7ZIP_LARGE_PAGES
-#endif
-
-#ifdef _7ZIP_LARGE_PAGES
-SIZE_T g_LargePageSize = 0;
-typedef SIZE_T (WINAPI *GetLargePageMinimumP)();
-#endif
-
-void SetLargePageSize()
-{
- #ifdef _7ZIP_LARGE_PAGES
- SIZE_T size;
- GetLargePageMinimumP largePageMinimum = (GetLargePageMinimumP)
- GetProcAddress(GetModuleHandle(TEXT("kernel32.dll")), "GetLargePageMinimum");
- if (!largePageMinimum)
- return;
- size = largePageMinimum();
- if (size == 0 || (size & (size - 1)) != 0)
- return;
- g_LargePageSize = size;
- #endif
-}
-
-
-void *BigAlloc(size_t size)
-{
- if (size == 0)
- return NULL;
-
- PRINT_ALLOC("Alloc-Big", g_allocCountBig, size, NULL);
-
- #ifdef _7ZIP_LARGE_PAGES
- {
- SIZE_T ps = g_LargePageSize;
- if (ps != 0 && ps <= (1 << 30) && size > (ps / 2))
- {
- size_t size2;
- ps--;
- size2 = (size + ps) & ~ps;
- if (size2 >= size)
- {
- void *res = VirtualAlloc(NULL, size2, MEM_COMMIT | MEM_LARGE_PAGES, PAGE_READWRITE);
- if (res)
- return res;
- }
- }
- }
- #endif
-
- return VirtualAlloc(NULL, size, MEM_COMMIT, PAGE_READWRITE);
-}
-
-void BigFree(void *address)
-{
- PRINT_FREE("Free-Big", g_allocCountBig, address);
-
- if (!address)
- return;
- VirtualFree(address, 0, MEM_RELEASE);
-}
-
-#endif
-
-
-static void *SzAlloc(ISzAllocPtr p, size_t size) { UNUSED_VAR(p); return MyAlloc(size); }
-static void SzFree(ISzAllocPtr p, void *address) { UNUSED_VAR(p); MyFree(address); }
-const ISzAlloc g_Alloc = { SzAlloc, SzFree };
-
-static void *SzMidAlloc(ISzAllocPtr p, size_t size) { UNUSED_VAR(p); return MidAlloc(size); }
-static void SzMidFree(ISzAllocPtr p, void *address) { UNUSED_VAR(p); MidFree(address); }
-const ISzAlloc g_MidAlloc = { SzMidAlloc, SzMidFree };
-
-static void *SzBigAlloc(ISzAllocPtr p, size_t size) { UNUSED_VAR(p); return BigAlloc(size); }
-static void SzBigFree(ISzAllocPtr p, void *address) { UNUSED_VAR(p); BigFree(address); }
-const ISzAlloc g_BigAlloc = { SzBigAlloc, SzBigFree };
-
-
-/*
- uintptr_t : <stdint.h> C99 (optional)
- : unsupported in VS6
-*/
-
-#ifdef _WIN32
- typedef UINT_PTR UIntPtr;
-#else
- /*
- typedef uintptr_t UIntPtr;
- */
- typedef ptrdiff_t UIntPtr;
-#endif
-
-
-#define ADJUST_ALLOC_SIZE 0
-/*
-#define ADJUST_ALLOC_SIZE (sizeof(void *) - 1)
-*/
-/*
- Use (ADJUST_ALLOC_SIZE = (sizeof(void *) - 1)), if
- MyAlloc() can return address that is NOT multiple of sizeof(void *).
-*/
-
-
-/*
-#define MY_ALIGN_PTR_DOWN(p, align) ((void *)((char *)(p) - ((size_t)(UIntPtr)(p) & ((align) - 1))))
-*/
-#define MY_ALIGN_PTR_DOWN(p, align) ((void *)((((UIntPtr)(p)) & ~((UIntPtr)(align) - 1))))
-
-#define MY_ALIGN_PTR_UP_PLUS(p, align) MY_ALIGN_PTR_DOWN(((char *)(p) + (align) + ADJUST_ALLOC_SIZE), align)
-
-
-#if (_POSIX_C_SOURCE >= 200112L) && !defined(_WIN32)
- #define USE_posix_memalign
-#endif
-
-/*
- This posix_memalign() is for test purposes only.
- We also need special Free() function instead of free(),
- if this posix_memalign() is used.
-*/
-
-/*
-static int posix_memalign(void **ptr, size_t align, size_t size)
-{
- size_t newSize = size + align;
- void *p;
- void *pAligned;
- *ptr = NULL;
- if (newSize < size)
- return 12; // ENOMEM
- p = MyAlloc(newSize);
- if (!p)
- return 12; // ENOMEM
- pAligned = MY_ALIGN_PTR_UP_PLUS(p, align);
- ((void **)pAligned)[-1] = p;
- *ptr = pAligned;
- return 0;
-}
-*/
-
-/*
- ALLOC_ALIGN_SIZE >= sizeof(void *)
- ALLOC_ALIGN_SIZE >= cache_line_size
-*/
-
-#define ALLOC_ALIGN_SIZE ((size_t)1 << 7)
-
-static void *SzAlignedAlloc(ISzAllocPtr pp, size_t size)
-{
- #ifndef USE_posix_memalign
-
- void *p;
- void *pAligned;
- size_t newSize;
- UNUSED_VAR(pp);
-
- /* also we can allocate additional dummy ALLOC_ALIGN_SIZE bytes after aligned
- block to prevent cache line sharing with another allocated blocks */
-
- newSize = size + ALLOC_ALIGN_SIZE * 1 + ADJUST_ALLOC_SIZE;
- if (newSize < size)
- return NULL;
-
- p = MyAlloc(newSize);
-
- if (!p)
- return NULL;
- pAligned = MY_ALIGN_PTR_UP_PLUS(p, ALLOC_ALIGN_SIZE);
-
- Print(" size="); PrintHex(size, 8);
- Print(" a_size="); PrintHex(newSize, 8);
- Print(" ptr="); PrintAddr(p);
- Print(" a_ptr="); PrintAddr(pAligned);
- PrintLn();
-
- ((void **)pAligned)[-1] = p;
-
- return pAligned;
-
- #else
-
- void *p;
- UNUSED_VAR(pp);
- if (posix_memalign(&p, ALLOC_ALIGN_SIZE, size))
- return NULL;
-
- Print(" posix_memalign="); PrintAddr(p);
- PrintLn();
-
- return p;
-
- #endif
-}
-
-
-static void SzAlignedFree(ISzAllocPtr pp, void *address)
-{
- UNUSED_VAR(pp);
- #ifndef USE_posix_memalign
- if (address)
- MyFree(((void **)address)[-1]);
- #else
- free(address);
- #endif
-}
-
-
-const ISzAlloc g_AlignedAlloc = { SzAlignedAlloc, SzAlignedFree };
-
-
-
-#define MY_ALIGN_PTR_DOWN_1(p) MY_ALIGN_PTR_DOWN(p, sizeof(void *))
-
-/* we align ptr to support cases where CAlignOffsetAlloc::offset is not multiply of sizeof(void *) */
-#define REAL_BLOCK_PTR_VAR(p) ((void **)MY_ALIGN_PTR_DOWN_1(p))[-1]
-/*
-#define REAL_BLOCK_PTR_VAR(p) ((void **)(p))[-1]
-*/
-
-static void *AlignOffsetAlloc_Alloc(ISzAllocPtr pp, size_t size)
-{
- CAlignOffsetAlloc *p = CONTAINER_FROM_VTBL(pp, CAlignOffsetAlloc, vt);
- void *adr;
- void *pAligned;
- size_t newSize;
- size_t extra;
- size_t alignSize = (size_t)1 << p->numAlignBits;
-
- if (alignSize < sizeof(void *))
- alignSize = sizeof(void *);
-
- if (p->offset >= alignSize)
- return NULL;
-
- /* also we can allocate additional dummy ALLOC_ALIGN_SIZE bytes after aligned
- block to prevent cache line sharing with another allocated blocks */
- extra = p->offset & (sizeof(void *) - 1);
- newSize = size + alignSize + extra + ADJUST_ALLOC_SIZE;
- if (newSize < size)
- return NULL;
-
- adr = ISzAlloc_Alloc(p->baseAlloc, newSize);
-
- if (!adr)
- return NULL;
-
- pAligned = (char *)MY_ALIGN_PTR_DOWN((char *)adr +
- alignSize - p->offset + extra + ADJUST_ALLOC_SIZE, alignSize) + p->offset;
-
- PrintLn();
- Print("- Aligned: ");
- Print(" size="); PrintHex(size, 8);
- Print(" a_size="); PrintHex(newSize, 8);
- Print(" ptr="); PrintAddr(adr);
- Print(" a_ptr="); PrintAddr(pAligned);
- PrintLn();
-
- REAL_BLOCK_PTR_VAR(pAligned) = adr;
-
- return pAligned;
-}
-
-
-static void AlignOffsetAlloc_Free(ISzAllocPtr pp, void *address)
-{
- if (address)
- {
- CAlignOffsetAlloc *p = CONTAINER_FROM_VTBL(pp, CAlignOffsetAlloc, vt);
- PrintLn();
- Print("- Aligned Free: ");
- PrintLn();
- ISzAlloc_Free(p->baseAlloc, REAL_BLOCK_PTR_VAR(address));
- }
-}
-
-
-void AlignOffsetAlloc_CreateVTable(CAlignOffsetAlloc *p)
-{
- p->vt.Alloc = AlignOffsetAlloc_Alloc;
- p->vt.Free = AlignOffsetAlloc_Free;
-}
+++ /dev/null
-/* Bra86.c -- Converter for x86 code (BCJ)
-2017-04-03 : Igor Pavlov : Public domain */
-
-#include "Precomp.h"
-
-#include "Bra.h"
-
-#define Test86MSByte(b) ((((b) + 1) & 0xFE) == 0)
-
-SizeT x86_Convert(Byte *data, SizeT size, UInt32 ip, UInt32 *state, int encoding)
-{
- SizeT pos = 0;
- UInt32 mask = *state & 7;
- if (size < 5)
- return 0;
- size -= 4;
- ip += 5;
-
- for (;;)
- {
- Byte *p = data + pos;
- const Byte *limit = data + size;
- for (; p < limit; p++)
- if ((*p & 0xFE) == 0xE8)
- break;
-
- {
- SizeT d = (SizeT)(p - data - pos);
- pos = (SizeT)(p - data);
- if (p >= limit)
- {
- *state = (d > 2 ? 0 : mask >> (unsigned)d);
- return pos;
- }
- if (d > 2)
- mask = 0;
- else
- {
- mask >>= (unsigned)d;
- if (mask != 0 && (mask > 4 || mask == 3 || Test86MSByte(p[(size_t)(mask >> 1) + 1])))
- {
- mask = (mask >> 1) | 4;
- pos++;
- continue;
- }
- }
- }
-
- if (Test86MSByte(p[4]))
- {
- UInt32 v = ((UInt32)p[4] << 24) | ((UInt32)p[3] << 16) | ((UInt32)p[2] << 8) | ((UInt32)p[1]);
- UInt32 cur = ip + (UInt32)pos;
- pos += 5;
- if (encoding)
- v += cur;
- else
- v -= cur;
- if (mask != 0)
- {
- unsigned sh = (mask & 6) << 2;
- if (Test86MSByte((Byte)(v >> sh)))
- {
- v ^= (((UInt32)0x100 << sh) - 1);
- if (encoding)
- v += cur;
- else
- v -= cur;
- }
- mask = 0;
- }
- p[1] = (Byte)v;
- p[2] = (Byte)(v >> 8);
- p[3] = (Byte)(v >> 16);
- p[4] = (Byte)(0 - ((v >> 24) & 1));
- }
- else
- {
- mask = (mask >> 1) | 4;
- pos++;
- }
- }
-}
+++ /dev/null
-/* BraIA64.c -- Converter for IA-64 code
-2017-01-26 : Igor Pavlov : Public domain */
-
-#include "Precomp.h"
-
-#include "CpuArch.h"
-#include "Bra.h"
-
-SizeT IA64_Convert(Byte *data, SizeT size, UInt32 ip, int encoding)
-{
- SizeT i;
- if (size < 16)
- return 0;
- size -= 16;
- i = 0;
- do
- {
- unsigned m = ((UInt32)0x334B0000 >> (data[i] & 0x1E)) & 3;
- if (m)
- {
- m++;
- do
- {
- Byte *p = data + (i + (size_t)m * 5 - 8);
- if (((p[3] >> m) & 15) == 5
- && (((p[-1] | ((UInt32)p[0] << 8)) >> m) & 0x70) == 0)
- {
- unsigned raw = GetUi32(p);
- unsigned v = raw >> m;
- v = (v & 0xFFFFF) | ((v & (1 << 23)) >> 3);
-
- v <<= 4;
- if (encoding)
- v += ip + (UInt32)i;
- else
- v -= ip + (UInt32)i;
- v >>= 4;
-
- v &= 0x1FFFFF;
- v += 0x700000;
- v &= 0x8FFFFF;
- raw &= ~((UInt32)0x8FFFFF << m);
- raw |= (v << m);
- SetUi32(p, raw);
- }
- }
- while (++m <= 4);
- }
- i += 16;
- }
- while (i <= size);
- return i;
-}
+++ /dev/null
-/* CpuArch.c -- CPU specific code
-2018-02-18: Igor Pavlov : Public domain */
-
-#include "Precomp.h"
-
-#include "CpuArch.h"
-
-#ifdef MY_CPU_X86_OR_AMD64
-
-#if (defined(_MSC_VER) && !defined(MY_CPU_AMD64)) || defined(__GNUC__)
-#define USE_ASM
-#endif
-
-#if !defined(USE_ASM) && _MSC_VER >= 1500
-#include <intrin.h>
-#endif
-
-#if defined(USE_ASM) && !defined(MY_CPU_AMD64)
-static UInt32 CheckFlag(UInt32 flag)
-{
- #ifdef _MSC_VER
- __asm pushfd;
- __asm pop EAX;
- __asm mov EDX, EAX;
- __asm xor EAX, flag;
- __asm push EAX;
- __asm popfd;
- __asm pushfd;
- __asm pop EAX;
- __asm xor EAX, EDX;
- __asm push EDX;
- __asm popfd;
- __asm and flag, EAX;
- #else
- __asm__ __volatile__ (
- "pushf\n\t"
- "pop %%EAX\n\t"
- "movl %%EAX,%%EDX\n\t"
- "xorl %0,%%EAX\n\t"
- "push %%EAX\n\t"
- "popf\n\t"
- "pushf\n\t"
- "pop %%EAX\n\t"
- "xorl %%EDX,%%EAX\n\t"
- "push %%EDX\n\t"
- "popf\n\t"
- "andl %%EAX, %0\n\t":
- "=c" (flag) : "c" (flag) :
- "%eax", "%edx");
- #endif
- return flag;
-}
-#define CHECK_CPUID_IS_SUPPORTED if (CheckFlag(1 << 18) == 0 || CheckFlag(1 << 21) == 0) return False;
-#else
-#define CHECK_CPUID_IS_SUPPORTED
-#endif
-
-void MyCPUID(UInt32 function, UInt32 *a, UInt32 *b, UInt32 *c, UInt32 *d)
-{
- #ifdef USE_ASM
-
- #ifdef _MSC_VER
-
- UInt32 a2, b2, c2, d2;
- __asm xor EBX, EBX;
- __asm xor ECX, ECX;
- __asm xor EDX, EDX;
- __asm mov EAX, function;
- __asm cpuid;
- __asm mov a2, EAX;
- __asm mov b2, EBX;
- __asm mov c2, ECX;
- __asm mov d2, EDX;
-
- *a = a2;
- *b = b2;
- *c = c2;
- *d = d2;
-
- #else
-
- __asm__ __volatile__ (
- #if defined(MY_CPU_AMD64) && defined(__PIC__)
- "mov %%rbx, %%rdi;"
- "cpuid;"
- "xchg %%rbx, %%rdi;"
- : "=a" (*a) ,
- "=D" (*b) ,
- #elif defined(MY_CPU_X86) && defined(__PIC__)
- "mov %%ebx, %%edi;"
- "cpuid;"
- "xchgl %%ebx, %%edi;"
- : "=a" (*a) ,
- "=D" (*b) ,
- #else
- "cpuid"
- : "=a" (*a) ,
- "=b" (*b) ,
- #endif
- "=c" (*c) ,
- "=d" (*d)
- : "0" (function)) ;
-
- #endif
-
- #else
-
- int CPUInfo[4];
- __cpuid(CPUInfo, function);
- *a = CPUInfo[0];
- *b = CPUInfo[1];
- *c = CPUInfo[2];
- *d = CPUInfo[3];
-
- #endif
-}
-
-BoolInt x86cpuid_CheckAndRead(Cx86cpuid *p)
-{
- CHECK_CPUID_IS_SUPPORTED
- MyCPUID(0, &p->maxFunc, &p->vendor[0], &p->vendor[2], &p->vendor[1]);
- MyCPUID(1, &p->ver, &p->b, &p->c, &p->d);
- return True;
-}
-
-static const UInt32 kVendors[][3] =
-{
- { 0x756E6547, 0x49656E69, 0x6C65746E},
- { 0x68747541, 0x69746E65, 0x444D4163},
- { 0x746E6543, 0x48727561, 0x736C7561}
-};
-
-int x86cpuid_GetFirm(const Cx86cpuid *p)
-{
- unsigned i;
- for (i = 0; i < sizeof(kVendors) / sizeof(kVendors[i]); i++)
- {
- const UInt32 *v = kVendors[i];
- if (v[0] == p->vendor[0] &&
- v[1] == p->vendor[1] &&
- v[2] == p->vendor[2])
- return (int)i;
- }
- return -1;
-}
-
-BoolInt CPU_Is_InOrder()
-{
- Cx86cpuid p;
- int firm;
- UInt32 family, model;
- if (!x86cpuid_CheckAndRead(&p))
- return True;
-
- family = x86cpuid_GetFamily(p.ver);
- model = x86cpuid_GetModel(p.ver);
-
- firm = x86cpuid_GetFirm(&p);
-
- switch (firm)
- {
- case CPU_FIRM_INTEL: return (family < 6 || (family == 6 && (
- /* In-Order Atom CPU */
- model == 0x1C /* 45 nm, N4xx, D4xx, N5xx, D5xx, 230, 330 */
- || model == 0x26 /* 45 nm, Z6xx */
- || model == 0x27 /* 32 nm, Z2460 */
- || model == 0x35 /* 32 nm, Z2760 */
- || model == 0x36 /* 32 nm, N2xxx, D2xxx */
- )));
- case CPU_FIRM_AMD: return (family < 5 || (family == 5 && (model < 6 || model == 0xA)));
- case CPU_FIRM_VIA: return (family < 6 || (family == 6 && model < 0xF));
- }
- return True;
-}
-
-#if !defined(MY_CPU_AMD64) && defined(_WIN32)
-#include <windows.h>
-static BoolInt CPU_Sys_Is_SSE_Supported()
-{
- OSVERSIONINFO vi;
- vi.dwOSVersionInfoSize = sizeof(vi);
- if (!GetVersionEx(&vi))
- return False;
- return (vi.dwMajorVersion >= 5);
-}
-#define CHECK_SYS_SSE_SUPPORT if (!CPU_Sys_Is_SSE_Supported()) return False;
-#else
-#define CHECK_SYS_SSE_SUPPORT
-#endif
-
-BoolInt CPU_Is_Aes_Supported()
-{
- Cx86cpuid p;
- CHECK_SYS_SSE_SUPPORT
- if (!x86cpuid_CheckAndRead(&p))
- return False;
- return (p.c >> 25) & 1;
-}
-
-BoolInt CPU_IsSupported_PageGB()
-{
- Cx86cpuid cpuid;
- if (!x86cpuid_CheckAndRead(&cpuid))
- return False;
- {
- UInt32 d[4] = { 0 };
- MyCPUID(0x80000000, &d[0], &d[1], &d[2], &d[3]);
- if (d[0] < 0x80000001)
- return False;
- }
- {
- UInt32 d[4] = { 0 };
- MyCPUID(0x80000001, &d[0], &d[1], &d[2], &d[3]);
- return (d[3] >> 26) & 1;
- }
-}
-
-#endif
+++ /dev/null
-/* Delta.c -- Delta converter
-2009-05-26 : Igor Pavlov : Public domain */
-
-#include "Precomp.h"
-
-#include "Delta.h"
-
-void Delta_Init(Byte *state)
-{
- unsigned i;
- for (i = 0; i < DELTA_STATE_SIZE; i++)
- state[i] = 0;
-}
-
-static void MyMemCpy(Byte *dest, const Byte *src, unsigned size)
-{
- unsigned i;
- for (i = 0; i < size; i++)
- dest[i] = src[i];
-}
-
-void Delta_Encode(Byte *state, unsigned delta, Byte *data, SizeT size)
-{
- Byte buf[DELTA_STATE_SIZE];
- unsigned j = 0;
- MyMemCpy(buf, state, delta);
- {
- SizeT i;
- for (i = 0; i < size;)
- {
- for (j = 0; j < delta && i < size; i++, j++)
- {
- Byte b = data[i];
- data[i] = (Byte)(b - buf[j]);
- buf[j] = b;
- }
- }
- }
- if (j == delta)
- j = 0;
- MyMemCpy(state, buf + j, delta - j);
- MyMemCpy(state + delta - j, buf, j);
-}
-
-void Delta_Decode(Byte *state, unsigned delta, Byte *data, SizeT size)
-{
- Byte buf[DELTA_STATE_SIZE];
- unsigned j = 0;
- MyMemCpy(buf, state, delta);
- {
- SizeT i;
- for (i = 0; i < size;)
- {
- for (j = 0; j < delta && i < size; i++, j++)
- {
- buf[j] = data[i] = (Byte)(buf[j] + data[i]);
- }
- }
- }
- if (j == delta)
- j = 0;
- MyMemCpy(state, buf + j, delta - j);
- MyMemCpy(state + delta - j, buf, j);
-}
+++ /dev/null
-/* LzFind.c -- Match finder for LZ algorithms
-2018-07-08 : Igor Pavlov : Public domain */
-
-#include "Precomp.h"
-
-#include <string.h>
-
-#include "LzFind.h"
-#include "LzHash.h"
-
-#define kEmptyHashValue 0
-#define kMaxValForNormalize ((UInt32)0xFFFFFFFF)
-#define kNormalizeStepMin (1 << 10) /* it must be power of 2 */
-#define kNormalizeMask (~(UInt32)(kNormalizeStepMin - 1))
-#define kMaxHistorySize ((UInt32)7 << 29)
-
-#define kStartMaxLen 3
-
-static void LzInWindow_Free(CMatchFinder *p, ISzAllocPtr alloc)
-{
- if (!p->directInput)
- {
- ISzAlloc_Free(alloc, p->bufferBase);
- p->bufferBase = NULL;
- }
-}
-
-/* keepSizeBefore + keepSizeAfter + keepSizeReserv must be < 4G) */
-
-static int LzInWindow_Create(CMatchFinder *p, UInt32 keepSizeReserv, ISzAllocPtr alloc)
-{
- UInt32 blockSize = p->keepSizeBefore + p->keepSizeAfter + keepSizeReserv;
- if (p->directInput)
- {
- p->blockSize = blockSize;
- return 1;
- }
- if (!p->bufferBase || p->blockSize != blockSize)
- {
- LzInWindow_Free(p, alloc);
- p->blockSize = blockSize;
- p->bufferBase = (Byte *)ISzAlloc_Alloc(alloc, (size_t)blockSize);
- }
- return (p->bufferBase != NULL);
-}
-
-Byte *MatchFinder_GetPointerToCurrentPos(CMatchFinder *p) { return p->buffer; }
-
-UInt32 MatchFinder_GetNumAvailableBytes(CMatchFinder *p) { return p->streamPos - p->pos; }
-
-void MatchFinder_ReduceOffsets(CMatchFinder *p, UInt32 subValue)
-{
- p->posLimit -= subValue;
- p->pos -= subValue;
- p->streamPos -= subValue;
-}
-
-static void MatchFinder_ReadBlock(CMatchFinder *p)
-{
- if (p->streamEndWasReached || p->result != SZ_OK)
- return;
-
- /* We use (p->streamPos - p->pos) value. (p->streamPos < p->pos) is allowed. */
-
- if (p->directInput)
- {
- UInt32 curSize = 0xFFFFFFFF - (p->streamPos - p->pos);
- if (curSize > p->directInputRem)
- curSize = (UInt32)p->directInputRem;
- p->directInputRem -= curSize;
- p->streamPos += curSize;
- if (p->directInputRem == 0)
- p->streamEndWasReached = 1;
- return;
- }
-
- for (;;)
- {
- Byte *dest = p->buffer + (p->streamPos - p->pos);
- size_t size = (p->bufferBase + p->blockSize - dest);
- if (size == 0)
- return;
-
- p->result = ISeqInStream_Read(p->stream, dest, &size);
- if (p->result != SZ_OK)
- return;
- if (size == 0)
- {
- p->streamEndWasReached = 1;
- return;
- }
- p->streamPos += (UInt32)size;
- if (p->streamPos - p->pos > p->keepSizeAfter)
- return;
- }
-}
-
-void MatchFinder_MoveBlock(CMatchFinder *p)
-{
- memmove(p->bufferBase,
- p->buffer - p->keepSizeBefore,
- (size_t)(p->streamPos - p->pos) + p->keepSizeBefore);
- p->buffer = p->bufferBase + p->keepSizeBefore;
-}
-
-int MatchFinder_NeedMove(CMatchFinder *p)
-{
- if (p->directInput)
- return 0;
- /* if (p->streamEndWasReached) return 0; */
- return ((size_t)(p->bufferBase + p->blockSize - p->buffer) <= p->keepSizeAfter);
-}
-
-void MatchFinder_ReadIfRequired(CMatchFinder *p)
-{
- if (p->streamEndWasReached)
- return;
- if (p->keepSizeAfter >= p->streamPos - p->pos)
- MatchFinder_ReadBlock(p);
-}
-
-static void MatchFinder_CheckAndMoveAndRead(CMatchFinder *p)
-{
- if (MatchFinder_NeedMove(p))
- MatchFinder_MoveBlock(p);
- MatchFinder_ReadBlock(p);
-}
-
-static void MatchFinder_SetDefaultSettings(CMatchFinder *p)
-{
- p->cutValue = 32;
- p->btMode = 1;
- p->numHashBytes = 4;
- p->bigHash = 0;
-}
-
-#define kCrcPoly 0xEDB88320
-
-void MatchFinder_Construct(CMatchFinder *p)
-{
- unsigned i;
- p->bufferBase = NULL;
- p->directInput = 0;
- p->hash = NULL;
- p->expectedDataSize = (UInt64)(Int64)-1;
- MatchFinder_SetDefaultSettings(p);
-
- for (i = 0; i < 256; i++)
- {
- UInt32 r = (UInt32)i;
- unsigned j;
- for (j = 0; j < 8; j++)
- r = (r >> 1) ^ (kCrcPoly & ((UInt32)0 - (r & 1)));
- p->crc[i] = r;
- }
-}
-
-static void MatchFinder_FreeThisClassMemory(CMatchFinder *p, ISzAllocPtr alloc)
-{
- ISzAlloc_Free(alloc, p->hash);
- p->hash = NULL;
-}
-
-void MatchFinder_Free(CMatchFinder *p, ISzAllocPtr alloc)
-{
- MatchFinder_FreeThisClassMemory(p, alloc);
- LzInWindow_Free(p, alloc);
-}
-
-static CLzRef* AllocRefs(size_t num, ISzAllocPtr alloc)
-{
- size_t sizeInBytes = (size_t)num * sizeof(CLzRef);
- if (sizeInBytes / sizeof(CLzRef) != num)
- return NULL;
- return (CLzRef *)ISzAlloc_Alloc(alloc, sizeInBytes);
-}
-
-int MatchFinder_Create(CMatchFinder *p, UInt32 historySize,
- UInt32 keepAddBufferBefore, UInt32 matchMaxLen, UInt32 keepAddBufferAfter,
- ISzAllocPtr alloc)
-{
- UInt32 sizeReserv;
-
- if (historySize > kMaxHistorySize)
- {
- MatchFinder_Free(p, alloc);
- return 0;
- }
-
- sizeReserv = historySize >> 1;
- if (historySize >= ((UInt32)3 << 30)) sizeReserv = historySize >> 3;
- else if (historySize >= ((UInt32)2 << 30)) sizeReserv = historySize >> 2;
-
- sizeReserv += (keepAddBufferBefore + matchMaxLen + keepAddBufferAfter) / 2 + (1 << 19);
-
- p->keepSizeBefore = historySize + keepAddBufferBefore + 1;
- p->keepSizeAfter = matchMaxLen + keepAddBufferAfter;
-
- /* we need one additional byte, since we use MoveBlock after pos++ and before dictionary using */
-
- if (LzInWindow_Create(p, sizeReserv, alloc))
- {
- UInt32 newCyclicBufferSize = historySize + 1;
- UInt32 hs;
- p->matchMaxLen = matchMaxLen;
- {
- p->fixedHashSize = 0;
- if (p->numHashBytes == 2)
- hs = (1 << 16) - 1;
- else
- {
- hs = historySize;
- if (hs > p->expectedDataSize)
- hs = (UInt32)p->expectedDataSize;
- if (hs != 0)
- hs--;
- hs |= (hs >> 1);
- hs |= (hs >> 2);
- hs |= (hs >> 4);
- hs |= (hs >> 8);
- hs >>= 1;
- hs |= 0xFFFF; /* don't change it! It's required for Deflate */
- if (hs > (1 << 24))
- {
- if (p->numHashBytes == 3)
- hs = (1 << 24) - 1;
- else
- hs >>= 1;
- /* if (bigHash) mode, GetHeads4b() in LzFindMt.c needs (hs >= ((1 << 24) - 1))) */
- }
- }
- p->hashMask = hs;
- hs++;
- if (p->numHashBytes > 2) p->fixedHashSize += kHash2Size;
- if (p->numHashBytes > 3) p->fixedHashSize += kHash3Size;
- if (p->numHashBytes > 4) p->fixedHashSize += kHash4Size;
- hs += p->fixedHashSize;
- }
-
- {
- size_t newSize;
- size_t numSons;
- p->historySize = historySize;
- p->hashSizeSum = hs;
- p->cyclicBufferSize = newCyclicBufferSize;
-
- numSons = newCyclicBufferSize;
- if (p->btMode)
- numSons <<= 1;
- newSize = hs + numSons;
-
- if (p->hash && p->numRefs == newSize)
- return 1;
-
- MatchFinder_FreeThisClassMemory(p, alloc);
- p->numRefs = newSize;
- p->hash = AllocRefs(newSize, alloc);
-
- if (p->hash)
- {
- p->son = p->hash + p->hashSizeSum;
- return 1;
- }
- }
- }
-
- MatchFinder_Free(p, alloc);
- return 0;
-}
-
-static void MatchFinder_SetLimits(CMatchFinder *p)
-{
- UInt32 limit = kMaxValForNormalize - p->pos;
- UInt32 limit2 = p->cyclicBufferSize - p->cyclicBufferPos;
-
- if (limit2 < limit)
- limit = limit2;
- limit2 = p->streamPos - p->pos;
-
- if (limit2 <= p->keepSizeAfter)
- {
- if (limit2 > 0)
- limit2 = 1;
- }
- else
- limit2 -= p->keepSizeAfter;
-
- if (limit2 < limit)
- limit = limit2;
-
- {
- UInt32 lenLimit = p->streamPos - p->pos;
- if (lenLimit > p->matchMaxLen)
- lenLimit = p->matchMaxLen;
- p->lenLimit = lenLimit;
- }
- p->posLimit = p->pos + limit;
-}
-
-
-void MatchFinder_Init_LowHash(CMatchFinder *p)
-{
- size_t i;
- CLzRef *items = p->hash;
- size_t numItems = p->fixedHashSize;
- for (i = 0; i < numItems; i++)
- items[i] = kEmptyHashValue;
-}
-
-
-void MatchFinder_Init_HighHash(CMatchFinder *p)
-{
- size_t i;
- CLzRef *items = p->hash + p->fixedHashSize;
- size_t numItems = (size_t)p->hashMask + 1;
- for (i = 0; i < numItems; i++)
- items[i] = kEmptyHashValue;
-}
-
-
-void MatchFinder_Init_3(CMatchFinder *p, int readData)
-{
- p->cyclicBufferPos = 0;
- p->buffer = p->bufferBase;
- p->pos =
- p->streamPos = p->cyclicBufferSize;
- p->result = SZ_OK;
- p->streamEndWasReached = 0;
-
- if (readData)
- MatchFinder_ReadBlock(p);
-
- MatchFinder_SetLimits(p);
-}
-
-
-void MatchFinder_Init(CMatchFinder *p)
-{
- MatchFinder_Init_HighHash(p);
- MatchFinder_Init_LowHash(p);
- MatchFinder_Init_3(p, True);
-}
-
-
-static UInt32 MatchFinder_GetSubValue(CMatchFinder *p)
-{
- return (p->pos - p->historySize - 1) & kNormalizeMask;
-}
-
-void MatchFinder_Normalize3(UInt32 subValue, CLzRef *items, size_t numItems)
-{
- size_t i;
- for (i = 0; i < numItems; i++)
- {
- UInt32 value = items[i];
- if (value <= subValue)
- value = kEmptyHashValue;
- else
- value -= subValue;
- items[i] = value;
- }
-}
-
-static void MatchFinder_Normalize(CMatchFinder *p)
-{
- UInt32 subValue = MatchFinder_GetSubValue(p);
- MatchFinder_Normalize3(subValue, p->hash, p->numRefs);
- MatchFinder_ReduceOffsets(p, subValue);
-}
-
-
-MY_NO_INLINE
-static void MatchFinder_CheckLimits(CMatchFinder *p)
-{
- if (p->pos == kMaxValForNormalize)
- MatchFinder_Normalize(p);
- if (!p->streamEndWasReached && p->keepSizeAfter == p->streamPos - p->pos)
- MatchFinder_CheckAndMoveAndRead(p);
- if (p->cyclicBufferPos == p->cyclicBufferSize)
- p->cyclicBufferPos = 0;
- MatchFinder_SetLimits(p);
-}
-
-
-/*
- (lenLimit > maxLen)
-*/
-MY_FORCE_INLINE
-static UInt32 * Hc_GetMatchesSpec(unsigned lenLimit, UInt32 curMatch, UInt32 pos, const Byte *cur, CLzRef *son,
- UInt32 _cyclicBufferPos, UInt32 _cyclicBufferSize, UInt32 cutValue,
- UInt32 *distances, unsigned maxLen)
-{
- /*
- son[_cyclicBufferPos] = curMatch;
- for (;;)
- {
- UInt32 delta = pos - curMatch;
- if (cutValue-- == 0 || delta >= _cyclicBufferSize)
- return distances;
- {
- const Byte *pb = cur - delta;
- curMatch = son[_cyclicBufferPos - delta + ((delta > _cyclicBufferPos) ? _cyclicBufferSize : 0)];
- if (pb[maxLen] == cur[maxLen] && *pb == *cur)
- {
- UInt32 len = 0;
- while (++len != lenLimit)
- if (pb[len] != cur[len])
- break;
- if (maxLen < len)
- {
- maxLen = len;
- *distances++ = len;
- *distances++ = delta - 1;
- if (len == lenLimit)
- return distances;
- }
- }
- }
- }
- */
-
- const Byte *lim = cur + lenLimit;
- son[_cyclicBufferPos] = curMatch;
- do
- {
- UInt32 delta = pos - curMatch;
- if (delta >= _cyclicBufferSize)
- break;
- {
- ptrdiff_t diff;
- curMatch = son[_cyclicBufferPos - delta + ((delta > _cyclicBufferPos) ? _cyclicBufferSize : 0)];
- diff = (ptrdiff_t)0 - delta;
- if (cur[maxLen] == cur[maxLen + diff])
- {
- const Byte *c = cur;
- while (*c == c[diff])
- {
- if (++c == lim)
- {
- distances[0] = (UInt32)(lim - cur);
- distances[1] = delta - 1;
- return distances + 2;
- }
- }
- {
- unsigned len = (unsigned)(c - cur);
- if (maxLen < len)
- {
- maxLen = len;
- distances[0] = (UInt32)len;
- distances[1] = delta - 1;
- distances += 2;
- }
- }
- }
- }
- }
- while (--cutValue);
-
- return distances;
-}
-
-
-MY_FORCE_INLINE
-UInt32 * GetMatchesSpec1(UInt32 lenLimit, UInt32 curMatch, UInt32 pos, const Byte *cur, CLzRef *son,
- UInt32 _cyclicBufferPos, UInt32 _cyclicBufferSize, UInt32 cutValue,
- UInt32 *distances, UInt32 maxLen)
-{
- CLzRef *ptr0 = son + ((size_t)_cyclicBufferPos << 1) + 1;
- CLzRef *ptr1 = son + ((size_t)_cyclicBufferPos << 1);
- unsigned len0 = 0, len1 = 0;
- for (;;)
- {
- UInt32 delta = pos - curMatch;
- if (cutValue-- == 0 || delta >= _cyclicBufferSize)
- {
- *ptr0 = *ptr1 = kEmptyHashValue;
- return distances;
- }
- {
- CLzRef *pair = son + ((size_t)(_cyclicBufferPos - delta + ((delta > _cyclicBufferPos) ? _cyclicBufferSize : 0)) << 1);
- const Byte *pb = cur - delta;
- unsigned len = (len0 < len1 ? len0 : len1);
- UInt32 pair0 = pair[0];
- if (pb[len] == cur[len])
- {
- if (++len != lenLimit && pb[len] == cur[len])
- while (++len != lenLimit)
- if (pb[len] != cur[len])
- break;
- if (maxLen < len)
- {
- maxLen = (UInt32)len;
- *distances++ = (UInt32)len;
- *distances++ = delta - 1;
- if (len == lenLimit)
- {
- *ptr1 = pair0;
- *ptr0 = pair[1];
- return distances;
- }
- }
- }
- if (pb[len] < cur[len])
- {
- *ptr1 = curMatch;
- ptr1 = pair + 1;
- curMatch = *ptr1;
- len1 = len;
- }
- else
- {
- *ptr0 = curMatch;
- ptr0 = pair;
- curMatch = *ptr0;
- len0 = len;
- }
- }
- }
-}
-
-static void SkipMatchesSpec(UInt32 lenLimit, UInt32 curMatch, UInt32 pos, const Byte *cur, CLzRef *son,
- UInt32 _cyclicBufferPos, UInt32 _cyclicBufferSize, UInt32 cutValue)
-{
- CLzRef *ptr0 = son + ((size_t)_cyclicBufferPos << 1) + 1;
- CLzRef *ptr1 = son + ((size_t)_cyclicBufferPos << 1);
- unsigned len0 = 0, len1 = 0;
- for (;;)
- {
- UInt32 delta = pos - curMatch;
- if (cutValue-- == 0 || delta >= _cyclicBufferSize)
- {
- *ptr0 = *ptr1 = kEmptyHashValue;
- return;
- }
- {
- CLzRef *pair = son + ((size_t)(_cyclicBufferPos - delta + ((delta > _cyclicBufferPos) ? _cyclicBufferSize : 0)) << 1);
- const Byte *pb = cur - delta;
- unsigned len = (len0 < len1 ? len0 : len1);
- if (pb[len] == cur[len])
- {
- while (++len != lenLimit)
- if (pb[len] != cur[len])
- break;
- {
- if (len == lenLimit)
- {
- *ptr1 = pair[0];
- *ptr0 = pair[1];
- return;
- }
- }
- }
- if (pb[len] < cur[len])
- {
- *ptr1 = curMatch;
- ptr1 = pair + 1;
- curMatch = *ptr1;
- len1 = len;
- }
- else
- {
- *ptr0 = curMatch;
- ptr0 = pair;
- curMatch = *ptr0;
- len0 = len;
- }
- }
- }
-}
-
-#define MOVE_POS \
- ++p->cyclicBufferPos; \
- p->buffer++; \
- if (++p->pos == p->posLimit) MatchFinder_CheckLimits(p);
-
-#define MOVE_POS_RET MOVE_POS return (UInt32)offset;
-
-static void MatchFinder_MovePos(CMatchFinder *p) { MOVE_POS; }
-
-#define GET_MATCHES_HEADER2(minLen, ret_op) \
- unsigned lenLimit; UInt32 hv; const Byte *cur; UInt32 curMatch; \
- lenLimit = (unsigned)p->lenLimit; { if (lenLimit < minLen) { MatchFinder_MovePos(p); ret_op; }} \
- cur = p->buffer;
-
-#define GET_MATCHES_HEADER(minLen) GET_MATCHES_HEADER2(minLen, return 0)
-#define SKIP_HEADER(minLen) GET_MATCHES_HEADER2(minLen, continue)
-
-#define MF_PARAMS(p) p->pos, p->buffer, p->son, p->cyclicBufferPos, p->cyclicBufferSize, p->cutValue
-
-#define GET_MATCHES_FOOTER(offset, maxLen) \
- offset = (unsigned)(GetMatchesSpec1((UInt32)lenLimit, curMatch, MF_PARAMS(p), \
- distances + offset, (UInt32)maxLen) - distances); MOVE_POS_RET;
-
-#define SKIP_FOOTER \
- SkipMatchesSpec((UInt32)lenLimit, curMatch, MF_PARAMS(p)); MOVE_POS;
-
-#define UPDATE_maxLen { \
- ptrdiff_t diff = (ptrdiff_t)0 - d2; \
- const Byte *c = cur + maxLen; \
- const Byte *lim = cur + lenLimit; \
- for (; c != lim; c++) if (*(c + diff) != *c) break; \
- maxLen = (unsigned)(c - cur); }
-
-static UInt32 Bt2_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances)
-{
- unsigned offset;
- GET_MATCHES_HEADER(2)
- HASH2_CALC;
- curMatch = p->hash[hv];
- p->hash[hv] = p->pos;
- offset = 0;
- GET_MATCHES_FOOTER(offset, 1)
-}
-
-UInt32 Bt3Zip_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances)
-{
- unsigned offset;
- GET_MATCHES_HEADER(3)
- HASH_ZIP_CALC;
- curMatch = p->hash[hv];
- p->hash[hv] = p->pos;
- offset = 0;
- GET_MATCHES_FOOTER(offset, 2)
-}
-
-static UInt32 Bt3_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances)
-{
- UInt32 h2, d2, pos;
- unsigned maxLen, offset;
- UInt32 *hash;
- GET_MATCHES_HEADER(3)
-
- HASH3_CALC;
-
- hash = p->hash;
- pos = p->pos;
-
- d2 = pos - hash[h2];
-
- curMatch = (hash + kFix3HashSize)[hv];
-
- hash[h2] = pos;
- (hash + kFix3HashSize)[hv] = pos;
-
- maxLen = 2;
- offset = 0;
-
- if (d2 < p->cyclicBufferSize && *(cur - d2) == *cur)
- {
- UPDATE_maxLen
- distances[0] = (UInt32)maxLen;
- distances[1] = d2 - 1;
- offset = 2;
- if (maxLen == lenLimit)
- {
- SkipMatchesSpec((UInt32)lenLimit, curMatch, MF_PARAMS(p));
- MOVE_POS_RET;
- }
- }
-
- GET_MATCHES_FOOTER(offset, maxLen)
-}
-
-static UInt32 Bt4_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances)
-{
- UInt32 h2, h3, d2, d3, pos;
- unsigned maxLen, offset;
- UInt32 *hash;
- GET_MATCHES_HEADER(4)
-
- HASH4_CALC;
-
- hash = p->hash;
- pos = p->pos;
-
- d2 = pos - hash [h2];
- d3 = pos - (hash + kFix3HashSize)[h3];
-
- curMatch = (hash + kFix4HashSize)[hv];
-
- hash [h2] = pos;
- (hash + kFix3HashSize)[h3] = pos;
- (hash + kFix4HashSize)[hv] = pos;
-
- maxLen = 0;
- offset = 0;
-
- if (d2 < p->cyclicBufferSize && *(cur - d2) == *cur)
- {
- maxLen = 2;
- distances[0] = 2;
- distances[1] = d2 - 1;
- offset = 2;
- }
-
- if (d2 != d3 && d3 < p->cyclicBufferSize && *(cur - d3) == *cur)
- {
- maxLen = 3;
- distances[(size_t)offset + 1] = d3 - 1;
- offset += 2;
- d2 = d3;
- }
-
- if (offset != 0)
- {
- UPDATE_maxLen
- distances[(size_t)offset - 2] = (UInt32)maxLen;
- if (maxLen == lenLimit)
- {
- SkipMatchesSpec((UInt32)lenLimit, curMatch, MF_PARAMS(p));
- MOVE_POS_RET;
- }
- }
-
- if (maxLen < 3)
- maxLen = 3;
-
- GET_MATCHES_FOOTER(offset, maxLen)
-}
-
-/*
-static UInt32 Bt5_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances)
-{
- UInt32 h2, h3, h4, d2, d3, d4, maxLen, offset, pos;
- UInt32 *hash;
- GET_MATCHES_HEADER(5)
-
- HASH5_CALC;
-
- hash = p->hash;
- pos = p->pos;
-
- d2 = pos - hash [h2];
- d3 = pos - (hash + kFix3HashSize)[h3];
- d4 = pos - (hash + kFix4HashSize)[h4];
-
- curMatch = (hash + kFix5HashSize)[hv];
-
- hash [h2] = pos;
- (hash + kFix3HashSize)[h3] = pos;
- (hash + kFix4HashSize)[h4] = pos;
- (hash + kFix5HashSize)[hv] = pos;
-
- maxLen = 0;
- offset = 0;
-
- if (d2 < p->cyclicBufferSize && *(cur - d2) == *cur)
- {
- distances[0] = maxLen = 2;
- distances[1] = d2 - 1;
- offset = 2;
- if (*(cur - d2 + 2) == cur[2])
- distances[0] = maxLen = 3;
- else if (d3 < p->cyclicBufferSize && *(cur - d3) == *cur)
- {
- distances[2] = maxLen = 3;
- distances[3] = d3 - 1;
- offset = 4;
- d2 = d3;
- }
- }
- else if (d3 < p->cyclicBufferSize && *(cur - d3) == *cur)
- {
- distances[0] = maxLen = 3;
- distances[1] = d3 - 1;
- offset = 2;
- d2 = d3;
- }
-
- if (d2 != d4 && d4 < p->cyclicBufferSize
- && *(cur - d4) == *cur
- && *(cur - d4 + 3) == *(cur + 3))
- {
- maxLen = 4;
- distances[(size_t)offset + 1] = d4 - 1;
- offset += 2;
- d2 = d4;
- }
-
- if (offset != 0)
- {
- UPDATE_maxLen
- distances[(size_t)offset - 2] = maxLen;
- if (maxLen == lenLimit)
- {
- SkipMatchesSpec(lenLimit, curMatch, MF_PARAMS(p));
- MOVE_POS_RET;
- }
- }
-
- if (maxLen < 4)
- maxLen = 4;
-
- GET_MATCHES_FOOTER(offset, maxLen)
-}
-*/
-
-static UInt32 Hc4_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances)
-{
- UInt32 h2, h3, d2, d3, pos;
- unsigned maxLen, offset;
- UInt32 *hash;
- GET_MATCHES_HEADER(4)
-
- HASH4_CALC;
-
- hash = p->hash;
- pos = p->pos;
-
- d2 = pos - hash [h2];
- d3 = pos - (hash + kFix3HashSize)[h3];
- curMatch = (hash + kFix4HashSize)[hv];
-
- hash [h2] = pos;
- (hash + kFix3HashSize)[h3] = pos;
- (hash + kFix4HashSize)[hv] = pos;
-
- maxLen = 0;
- offset = 0;
-
- if (d2 < p->cyclicBufferSize && *(cur - d2) == *cur)
- {
- maxLen = 2;
- distances[0] = 2;
- distances[1] = d2 - 1;
- offset = 2;
- }
-
- if (d2 != d3 && d3 < p->cyclicBufferSize && *(cur - d3) == *cur)
- {
- maxLen = 3;
- distances[(size_t)offset + 1] = d3 - 1;
- offset += 2;
- d2 = d3;
- }
-
- if (offset != 0)
- {
- UPDATE_maxLen
- distances[(size_t)offset - 2] = (UInt32)maxLen;
- if (maxLen == lenLimit)
- {
- p->son[p->cyclicBufferPos] = curMatch;
- MOVE_POS_RET;
- }
- }
-
- if (maxLen < 3)
- maxLen = 3;
-
- offset = (unsigned)(Hc_GetMatchesSpec(lenLimit, curMatch, MF_PARAMS(p),
- distances + offset, maxLen) - (distances));
- MOVE_POS_RET
-}
-
-/*
-static UInt32 Hc5_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances)
-{
- UInt32 h2, h3, h4, d2, d3, d4, maxLen, offset, pos
- UInt32 *hash;
- GET_MATCHES_HEADER(5)
-
- HASH5_CALC;
-
- hash = p->hash;
- pos = p->pos;
-
- d2 = pos - hash [h2];
- d3 = pos - (hash + kFix3HashSize)[h3];
- d4 = pos - (hash + kFix4HashSize)[h4];
-
- curMatch = (hash + kFix5HashSize)[hv];
-
- hash [h2] = pos;
- (hash + kFix3HashSize)[h3] = pos;
- (hash + kFix4HashSize)[h4] = pos;
- (hash + kFix5HashSize)[hv] = pos;
-
- maxLen = 0;
- offset = 0;
-
- if (d2 < p->cyclicBufferSize && *(cur - d2) == *cur)
- {
- distances[0] = maxLen = 2;
- distances[1] = d2 - 1;
- offset = 2;
- if (*(cur - d2 + 2) == cur[2])
- distances[0] = maxLen = 3;
- else if (d3 < p->cyclicBufferSize && *(cur - d3) == *cur)
- {
- distances[2] = maxLen = 3;
- distances[3] = d3 - 1;
- offset = 4;
- d2 = d3;
- }
- }
- else if (d3 < p->cyclicBufferSize && *(cur - d3) == *cur)
- {
- distances[0] = maxLen = 3;
- distances[1] = d3 - 1;
- offset = 2;
- d2 = d3;
- }
-
- if (d2 != d4 && d4 < p->cyclicBufferSize
- && *(cur - d4) == *cur
- && *(cur - d4 + 3) == *(cur + 3))
- {
- maxLen = 4;
- distances[(size_t)offset + 1] = d4 - 1;
- offset += 2;
- d2 = d4;
- }
-
- if (offset != 0)
- {
- UPDATE_maxLen
- distances[(size_t)offset - 2] = maxLen;
- if (maxLen == lenLimit)
- {
- p->son[p->cyclicBufferPos] = curMatch;
- MOVE_POS_RET;
- }
- }
-
- if (maxLen < 4)
- maxLen = 4;
-
- offset = (UInt32)(Hc_GetMatchesSpec(lenLimit, curMatch, MF_PARAMS(p),
- distances + offset, maxLen) - (distances));
- MOVE_POS_RET
-}
-*/
-
-UInt32 Hc3Zip_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances)
-{
- unsigned offset;
- GET_MATCHES_HEADER(3)
- HASH_ZIP_CALC;
- curMatch = p->hash[hv];
- p->hash[hv] = p->pos;
- offset = (unsigned)(Hc_GetMatchesSpec(lenLimit, curMatch, MF_PARAMS(p),
- distances, 2) - (distances));
- MOVE_POS_RET
-}
-
-static void Bt2_MatchFinder_Skip(CMatchFinder *p, UInt32 num)
-{
- do
- {
- SKIP_HEADER(2)
- HASH2_CALC;
- curMatch = p->hash[hv];
- p->hash[hv] = p->pos;
- SKIP_FOOTER
- }
- while (--num != 0);
-}
-
-void Bt3Zip_MatchFinder_Skip(CMatchFinder *p, UInt32 num)
-{
- do
- {
- SKIP_HEADER(3)
- HASH_ZIP_CALC;
- curMatch = p->hash[hv];
- p->hash[hv] = p->pos;
- SKIP_FOOTER
- }
- while (--num != 0);
-}
-
-static void Bt3_MatchFinder_Skip(CMatchFinder *p, UInt32 num)
-{
- do
- {
- UInt32 h2;
- UInt32 *hash;
- SKIP_HEADER(3)
- HASH3_CALC;
- hash = p->hash;
- curMatch = (hash + kFix3HashSize)[hv];
- hash[h2] =
- (hash + kFix3HashSize)[hv] = p->pos;
- SKIP_FOOTER
- }
- while (--num != 0);
-}
-
-static void Bt4_MatchFinder_Skip(CMatchFinder *p, UInt32 num)
-{
- do
- {
- UInt32 h2, h3;
- UInt32 *hash;
- SKIP_HEADER(4)
- HASH4_CALC;
- hash = p->hash;
- curMatch = (hash + kFix4HashSize)[hv];
- hash [h2] =
- (hash + kFix3HashSize)[h3] =
- (hash + kFix4HashSize)[hv] = p->pos;
- SKIP_FOOTER
- }
- while (--num != 0);
-}
-
-/*
-static void Bt5_MatchFinder_Skip(CMatchFinder *p, UInt32 num)
-{
- do
- {
- UInt32 h2, h3, h4;
- UInt32 *hash;
- SKIP_HEADER(5)
- HASH5_CALC;
- hash = p->hash;
- curMatch = (hash + kFix5HashSize)[hv];
- hash [h2] =
- (hash + kFix3HashSize)[h3] =
- (hash + kFix4HashSize)[h4] =
- (hash + kFix5HashSize)[hv] = p->pos;
- SKIP_FOOTER
- }
- while (--num != 0);
-}
-*/
-
-static void Hc4_MatchFinder_Skip(CMatchFinder *p, UInt32 num)
-{
- do
- {
- UInt32 h2, h3;
- UInt32 *hash;
- SKIP_HEADER(4)
- HASH4_CALC;
- hash = p->hash;
- curMatch = (hash + kFix4HashSize)[hv];
- hash [h2] =
- (hash + kFix3HashSize)[h3] =
- (hash + kFix4HashSize)[hv] = p->pos;
- p->son[p->cyclicBufferPos] = curMatch;
- MOVE_POS
- }
- while (--num != 0);
-}
-
-/*
-static void Hc5_MatchFinder_Skip(CMatchFinder *p, UInt32 num)
-{
- do
- {
- UInt32 h2, h3, h4;
- UInt32 *hash;
- SKIP_HEADER(5)
- HASH5_CALC;
- hash = p->hash;
- curMatch = hash + kFix5HashSize)[hv];
- hash [h2] =
- (hash + kFix3HashSize)[h3] =
- (hash + kFix4HashSize)[h4] =
- (hash + kFix5HashSize)[hv] = p->pos;
- p->son[p->cyclicBufferPos] = curMatch;
- MOVE_POS
- }
- while (--num != 0);
-}
-*/
-
-void Hc3Zip_MatchFinder_Skip(CMatchFinder *p, UInt32 num)
-{
- do
- {
- SKIP_HEADER(3)
- HASH_ZIP_CALC;
- curMatch = p->hash[hv];
- p->hash[hv] = p->pos;
- p->son[p->cyclicBufferPos] = curMatch;
- MOVE_POS
- }
- while (--num != 0);
-}
-
-void MatchFinder_CreateVTable(CMatchFinder *p, IMatchFinder *vTable)
-{
- vTable->Init = (Mf_Init_Func)MatchFinder_Init;
- vTable->GetNumAvailableBytes = (Mf_GetNumAvailableBytes_Func)MatchFinder_GetNumAvailableBytes;
- vTable->GetPointerToCurrentPos = (Mf_GetPointerToCurrentPos_Func)MatchFinder_GetPointerToCurrentPos;
- if (!p->btMode)
- {
- /* if (p->numHashBytes <= 4) */
- {
- vTable->GetMatches = (Mf_GetMatches_Func)Hc4_MatchFinder_GetMatches;
- vTable->Skip = (Mf_Skip_Func)Hc4_MatchFinder_Skip;
- }
- /*
- else
- {
- vTable->GetMatches = (Mf_GetMatches_Func)Hc5_MatchFinder_GetMatches;
- vTable->Skip = (Mf_Skip_Func)Hc5_MatchFinder_Skip;
- }
- */
- }
- else if (p->numHashBytes == 2)
- {
- vTable->GetMatches = (Mf_GetMatches_Func)Bt2_MatchFinder_GetMatches;
- vTable->Skip = (Mf_Skip_Func)Bt2_MatchFinder_Skip;
- }
- else if (p->numHashBytes == 3)
- {
- vTable->GetMatches = (Mf_GetMatches_Func)Bt3_MatchFinder_GetMatches;
- vTable->Skip = (Mf_Skip_Func)Bt3_MatchFinder_Skip;
- }
- else /* if (p->numHashBytes == 4) */
- {
- vTable->GetMatches = (Mf_GetMatches_Func)Bt4_MatchFinder_GetMatches;
- vTable->Skip = (Mf_Skip_Func)Bt4_MatchFinder_Skip;
- }
- /*
- else
- {
- vTable->GetMatches = (Mf_GetMatches_Func)Bt5_MatchFinder_GetMatches;
- vTable->Skip = (Mf_Skip_Func)Bt5_MatchFinder_Skip;
- }
- */
-}
+++ /dev/null
-/* LzmaDec.c -- LZMA Decoder
-2018-07-04 : Igor Pavlov : Public domain */
-
-#include "Precomp.h"
-
-#include <string.h>
-
-/* #include "CpuArch.h" */
-#include "LzmaDec.h"
-
-#define kNumTopBits 24
-#define kTopValue ((UInt32)1 << kNumTopBits)
-
-#define kNumBitModelTotalBits 11
-#define kBitModelTotal (1 << kNumBitModelTotalBits)
-#define kNumMoveBits 5
-
-#define RC_INIT_SIZE 5
-
-#define NORMALIZE if (range < kTopValue) { range <<= 8; code = (code << 8) | (*buf++); }
-
-#define IF_BIT_0(p) ttt = *(p); NORMALIZE; bound = (range >> kNumBitModelTotalBits) * (UInt32)ttt; if (code < bound)
-#define UPDATE_0(p) range = bound; *(p) = (CLzmaProb)(ttt + ((kBitModelTotal - ttt) >> kNumMoveBits));
-#define UPDATE_1(p) range -= bound; code -= bound; *(p) = (CLzmaProb)(ttt - (ttt >> kNumMoveBits));
-#define GET_BIT2(p, i, A0, A1) IF_BIT_0(p) \
- { UPDATE_0(p); i = (i + i); A0; } else \
- { UPDATE_1(p); i = (i + i) + 1; A1; }
-
-#define TREE_GET_BIT(probs, i) { GET_BIT2(probs + i, i, ;, ;); }
-
-#define REV_BIT(p, i, A0, A1) IF_BIT_0(p + i) \
- { UPDATE_0(p + i); A0; } else \
- { UPDATE_1(p + i); A1; }
-#define REV_BIT_VAR( p, i, m) REV_BIT(p, i, i += m; m += m, m += m; i += m; )
-#define REV_BIT_CONST(p, i, m) REV_BIT(p, i, i += m; , i += m * 2; )
-#define REV_BIT_LAST( p, i, m) REV_BIT(p, i, i -= m , ; )
-
-#define TREE_DECODE(probs, limit, i) \
- { i = 1; do { TREE_GET_BIT(probs, i); } while (i < limit); i -= limit; }
-
-/* #define _LZMA_SIZE_OPT */
-
-#ifdef _LZMA_SIZE_OPT
-#define TREE_6_DECODE(probs, i) TREE_DECODE(probs, (1 << 6), i)
-#else
-#define TREE_6_DECODE(probs, i) \
- { i = 1; \
- TREE_GET_BIT(probs, i); \
- TREE_GET_BIT(probs, i); \
- TREE_GET_BIT(probs, i); \
- TREE_GET_BIT(probs, i); \
- TREE_GET_BIT(probs, i); \
- TREE_GET_BIT(probs, i); \
- i -= 0x40; }
-#endif
-
-#define NORMAL_LITER_DEC TREE_GET_BIT(prob, symbol)
-#define MATCHED_LITER_DEC \
- matchByte += matchByte; \
- bit = offs; \
- offs &= matchByte; \
- probLit = prob + (offs + bit + symbol); \
- GET_BIT2(probLit, symbol, offs ^= bit; , ;)
-
-
-
-#define NORMALIZE_CHECK if (range < kTopValue) { if (buf >= bufLimit) return DUMMY_ERROR; range <<= 8; code = (code << 8) | (*buf++); }
-
-#define IF_BIT_0_CHECK(p) ttt = *(p); NORMALIZE_CHECK; bound = (range >> kNumBitModelTotalBits) * (UInt32)ttt; if (code < bound)
-#define UPDATE_0_CHECK range = bound;
-#define UPDATE_1_CHECK range -= bound; code -= bound;
-#define GET_BIT2_CHECK(p, i, A0, A1) IF_BIT_0_CHECK(p) \
- { UPDATE_0_CHECK; i = (i + i); A0; } else \
- { UPDATE_1_CHECK; i = (i + i) + 1; A1; }
-#define GET_BIT_CHECK(p, i) GET_BIT2_CHECK(p, i, ; , ;)
-#define TREE_DECODE_CHECK(probs, limit, i) \
- { i = 1; do { GET_BIT_CHECK(probs + i, i) } while (i < limit); i -= limit; }
-
-
-#define REV_BIT_CHECK(p, i, m) IF_BIT_0_CHECK(p + i) \
- { UPDATE_0_CHECK; i += m; m += m; } else \
- { UPDATE_1_CHECK; m += m; i += m; }
-
-
-#define kNumPosBitsMax 4
-#define kNumPosStatesMax (1 << kNumPosBitsMax)
-
-#define kLenNumLowBits 3
-#define kLenNumLowSymbols (1 << kLenNumLowBits)
-#define kLenNumHighBits 8
-#define kLenNumHighSymbols (1 << kLenNumHighBits)
-
-#define LenLow 0
-#define LenHigh (LenLow + 2 * (kNumPosStatesMax << kLenNumLowBits))
-#define kNumLenProbs (LenHigh + kLenNumHighSymbols)
-
-#define LenChoice LenLow
-#define LenChoice2 (LenLow + (1 << kLenNumLowBits))
-
-#define kNumStates 12
-#define kNumStates2 16
-#define kNumLitStates 7
-
-#define kStartPosModelIndex 4
-#define kEndPosModelIndex 14
-#define kNumFullDistances (1 << (kEndPosModelIndex >> 1))
-
-#define kNumPosSlotBits 6
-#define kNumLenToPosStates 4
-
-#define kNumAlignBits 4
-#define kAlignTableSize (1 << kNumAlignBits)
-
-#define kMatchMinLen 2
-#define kMatchSpecLenStart (kMatchMinLen + kLenNumLowSymbols * 2 + kLenNumHighSymbols)
-
-/* External ASM code needs same CLzmaProb array layout. So don't change it. */
-
-/* (probs_1664) is faster and better for code size at some platforms */
-/*
-#ifdef MY_CPU_X86_OR_AMD64
-*/
-#define kStartOffset 1664
-#define GET_PROBS p->probs_1664
-/*
-#define GET_PROBS p->probs + kStartOffset
-#else
-#define kStartOffset 0
-#define GET_PROBS p->probs
-#endif
-*/
-
-#define SpecPos (-kStartOffset)
-#define IsRep0Long (SpecPos + kNumFullDistances)
-#define RepLenCoder (IsRep0Long + (kNumStates2 << kNumPosBitsMax))
-#define LenCoder (RepLenCoder + kNumLenProbs)
-#define IsMatch (LenCoder + kNumLenProbs)
-#define Align (IsMatch + (kNumStates2 << kNumPosBitsMax))
-#define IsRep (Align + kAlignTableSize)
-#define IsRepG0 (IsRep + kNumStates)
-#define IsRepG1 (IsRepG0 + kNumStates)
-#define IsRepG2 (IsRepG1 + kNumStates)
-#define PosSlot (IsRepG2 + kNumStates)
-#define Literal (PosSlot + (kNumLenToPosStates << kNumPosSlotBits))
-#define NUM_BASE_PROBS (Literal + kStartOffset)
-
-#if Align != 0 && kStartOffset != 0
- #error Stop_Compiling_Bad_LZMA_kAlign
-#endif
-
-#if NUM_BASE_PROBS != 1984
- #error Stop_Compiling_Bad_LZMA_PROBS
-#endif
-
-
-#define LZMA_LIT_SIZE 0x300
-
-#define LzmaProps_GetNumProbs(p) (NUM_BASE_PROBS + ((UInt32)LZMA_LIT_SIZE << ((p)->lc + (p)->lp)))
-
-
-#define CALC_POS_STATE(processedPos, pbMask) (((processedPos) & (pbMask)) << 4)
-#define COMBINED_PS_STATE (posState + state)
-#define GET_LEN_STATE (posState)
-
-#define LZMA_DIC_MIN (1 << 12)
-
-/*
-p->remainLen : shows status of LZMA decoder:
- < kMatchSpecLenStart : normal remain
- = kMatchSpecLenStart : finished
- = kMatchSpecLenStart + 1 : need init range coder
- = kMatchSpecLenStart + 2 : need init range coder and state
-*/
-
-/* ---------- LZMA_DECODE_REAL ---------- */
-/*
-LzmaDec_DecodeReal_3() can be implemented in external ASM file.
-3 - is the code compatibility version of that function for check at link time.
-*/
-
-#define LZMA_DECODE_REAL LzmaDec_DecodeReal_3
-
-/*
-LZMA_DECODE_REAL()
-In:
- RangeCoder is normalized
- if (p->dicPos == limit)
- {
- LzmaDec_TryDummy() was called before to exclude LITERAL and MATCH-REP cases.
- So first symbol can be only MATCH-NON-REP. And if that MATCH-NON-REP symbol
- is not END_OF_PAYALOAD_MARKER, then function returns error code.
- }
-
-Processing:
- first LZMA symbol will be decoded in any case
- All checks for limits are at the end of main loop,
- It will decode new LZMA-symbols while (p->buf < bufLimit && dicPos < limit),
- RangeCoder is still without last normalization when (p->buf < bufLimit) is being checked.
-
-Out:
- RangeCoder is normalized
- Result:
- SZ_OK - OK
- SZ_ERROR_DATA - Error
- p->remainLen:
- < kMatchSpecLenStart : normal remain
- = kMatchSpecLenStart : finished
-*/
-
-
-#ifdef _LZMA_DEC_OPT
-
-int MY_FAST_CALL LZMA_DECODE_REAL(CLzmaDec *p, SizeT limit, const Byte *bufLimit);
-
-#else
-
-static
-int MY_FAST_CALL LZMA_DECODE_REAL(CLzmaDec *p, SizeT limit, const Byte *bufLimit)
-{
- CLzmaProb *probs = GET_PROBS;
- unsigned state = (unsigned)p->state;
- UInt32 rep0 = p->reps[0], rep1 = p->reps[1], rep2 = p->reps[2], rep3 = p->reps[3];
- unsigned pbMask = ((unsigned)1 << (p->prop.pb)) - 1;
- unsigned lc = p->prop.lc;
- unsigned lpMask = ((unsigned)0x100 << p->prop.lp) - ((unsigned)0x100 >> lc);
-
- Byte *dic = p->dic;
- SizeT dicBufSize = p->dicBufSize;
- SizeT dicPos = p->dicPos;
-
- UInt32 processedPos = p->processedPos;
- UInt32 checkDicSize = p->checkDicSize;
- unsigned len = 0;
-
- const Byte *buf = p->buf;
- UInt32 range = p->range;
- UInt32 code = p->code;
-
- do
- {
- CLzmaProb *prob;
- UInt32 bound;
- unsigned ttt;
- unsigned posState = CALC_POS_STATE(processedPos, pbMask);
-
- prob = probs + IsMatch + COMBINED_PS_STATE;
- IF_BIT_0(prob)
- {
- unsigned symbol;
- UPDATE_0(prob);
- prob = probs + Literal;
- if (processedPos != 0 || checkDicSize != 0)
- prob += (UInt32)3 * ((((processedPos << 8) + dic[(dicPos == 0 ? dicBufSize : dicPos) - 1]) & lpMask) << lc);
- processedPos++;
-
- if (state < kNumLitStates)
- {
- state -= (state < 4) ? state : 3;
- symbol = 1;
- #ifdef _LZMA_SIZE_OPT
- do { NORMAL_LITER_DEC } while (symbol < 0x100);
- #else
- NORMAL_LITER_DEC
- NORMAL_LITER_DEC
- NORMAL_LITER_DEC
- NORMAL_LITER_DEC
- NORMAL_LITER_DEC
- NORMAL_LITER_DEC
- NORMAL_LITER_DEC
- NORMAL_LITER_DEC
- #endif
- }
- else
- {
- unsigned matchByte = dic[dicPos - rep0 + (dicPos < rep0 ? dicBufSize : 0)];
- unsigned offs = 0x100;
- state -= (state < 10) ? 3 : 6;
- symbol = 1;
- #ifdef _LZMA_SIZE_OPT
- do
- {
- unsigned bit;
- CLzmaProb *probLit;
- MATCHED_LITER_DEC
- }
- while (symbol < 0x100);
- #else
- {
- unsigned bit;
- CLzmaProb *probLit;
- MATCHED_LITER_DEC
- MATCHED_LITER_DEC
- MATCHED_LITER_DEC
- MATCHED_LITER_DEC
- MATCHED_LITER_DEC
- MATCHED_LITER_DEC
- MATCHED_LITER_DEC
- MATCHED_LITER_DEC
- }
- #endif
- }
-
- dic[dicPos++] = (Byte)symbol;
- continue;
- }
-
- {
- UPDATE_1(prob);
- prob = probs + IsRep + state;
- IF_BIT_0(prob)
- {
- UPDATE_0(prob);
- state += kNumStates;
- prob = probs + LenCoder;
- }
- else
- {
- UPDATE_1(prob);
- /*
- // that case was checked before with kBadRepCode
- if (checkDicSize == 0 && processedPos == 0)
- return SZ_ERROR_DATA;
- */
- prob = probs + IsRepG0 + state;
- IF_BIT_0(prob)
- {
- UPDATE_0(prob);
- prob = probs + IsRep0Long + COMBINED_PS_STATE;
- IF_BIT_0(prob)
- {
- UPDATE_0(prob);
- dic[dicPos] = dic[dicPos - rep0 + (dicPos < rep0 ? dicBufSize : 0)];
- dicPos++;
- processedPos++;
- state = state < kNumLitStates ? 9 : 11;
- continue;
- }
- UPDATE_1(prob);
- }
- else
- {
- UInt32 distance;
- UPDATE_1(prob);
- prob = probs + IsRepG1 + state;
- IF_BIT_0(prob)
- {
- UPDATE_0(prob);
- distance = rep1;
- }
- else
- {
- UPDATE_1(prob);
- prob = probs + IsRepG2 + state;
- IF_BIT_0(prob)
- {
- UPDATE_0(prob);
- distance = rep2;
- }
- else
- {
- UPDATE_1(prob);
- distance = rep3;
- rep3 = rep2;
- }
- rep2 = rep1;
- }
- rep1 = rep0;
- rep0 = distance;
- }
- state = state < kNumLitStates ? 8 : 11;
- prob = probs + RepLenCoder;
- }
-
- #ifdef _LZMA_SIZE_OPT
- {
- unsigned lim, offset;
- CLzmaProb *probLen = prob + LenChoice;
- IF_BIT_0(probLen)
- {
- UPDATE_0(probLen);
- probLen = prob + LenLow + GET_LEN_STATE;
- offset = 0;
- lim = (1 << kLenNumLowBits);
- }
- else
- {
- UPDATE_1(probLen);
- probLen = prob + LenChoice2;
- IF_BIT_0(probLen)
- {
- UPDATE_0(probLen);
- probLen = prob + LenLow + GET_LEN_STATE + (1 << kLenNumLowBits);
- offset = kLenNumLowSymbols;
- lim = (1 << kLenNumLowBits);
- }
- else
- {
- UPDATE_1(probLen);
- probLen = prob + LenHigh;
- offset = kLenNumLowSymbols * 2;
- lim = (1 << kLenNumHighBits);
- }
- }
- TREE_DECODE(probLen, lim, len);
- len += offset;
- }
- #else
- {
- CLzmaProb *probLen = prob + LenChoice;
- IF_BIT_0(probLen)
- {
- UPDATE_0(probLen);
- probLen = prob + LenLow + GET_LEN_STATE;
- len = 1;
- TREE_GET_BIT(probLen, len);
- TREE_GET_BIT(probLen, len);
- TREE_GET_BIT(probLen, len);
- len -= 8;
- }
- else
- {
- UPDATE_1(probLen);
- probLen = prob + LenChoice2;
- IF_BIT_0(probLen)
- {
- UPDATE_0(probLen);
- probLen = prob + LenLow + GET_LEN_STATE + (1 << kLenNumLowBits);
- len = 1;
- TREE_GET_BIT(probLen, len);
- TREE_GET_BIT(probLen, len);
- TREE_GET_BIT(probLen, len);
- }
- else
- {
- UPDATE_1(probLen);
- probLen = prob + LenHigh;
- TREE_DECODE(probLen, (1 << kLenNumHighBits), len);
- len += kLenNumLowSymbols * 2;
- }
- }
- }
- #endif
-
- if (state >= kNumStates)
- {
- UInt32 distance;
- prob = probs + PosSlot +
- ((len < kNumLenToPosStates ? len : kNumLenToPosStates - 1) << kNumPosSlotBits);
- TREE_6_DECODE(prob, distance);
- if (distance >= kStartPosModelIndex)
- {
- unsigned posSlot = (unsigned)distance;
- unsigned numDirectBits = (unsigned)(((distance >> 1) - 1));
- distance = (2 | (distance & 1));
- if (posSlot < kEndPosModelIndex)
- {
- distance <<= numDirectBits;
- prob = probs + SpecPos;
- {
- UInt32 m = 1;
- distance++;
- do
- {
- REV_BIT_VAR(prob, distance, m);
- }
- while (--numDirectBits);
- distance -= m;
- }
- }
- else
- {
- numDirectBits -= kNumAlignBits;
- do
- {
- NORMALIZE
- range >>= 1;
-
- {
- UInt32 t;
- code -= range;
- t = (0 - ((UInt32)code >> 31)); /* (UInt32)((Int32)code >> 31) */
- distance = (distance << 1) + (t + 1);
- code += range & t;
- }
- /*
- distance <<= 1;
- if (code >= range)
- {
- code -= range;
- distance |= 1;
- }
- */
- }
- while (--numDirectBits);
- prob = probs + Align;
- distance <<= kNumAlignBits;
- {
- unsigned i = 1;
- REV_BIT_CONST(prob, i, 1);
- REV_BIT_CONST(prob, i, 2);
- REV_BIT_CONST(prob, i, 4);
- REV_BIT_LAST (prob, i, 8);
- distance |= i;
- }
- if (distance == (UInt32)0xFFFFFFFF)
- {
- len = kMatchSpecLenStart;
- state -= kNumStates;
- break;
- }
- }
- }
-
- rep3 = rep2;
- rep2 = rep1;
- rep1 = rep0;
- rep0 = distance + 1;
- state = (state < kNumStates + kNumLitStates) ? kNumLitStates : kNumLitStates + 3;
- if (distance >= (checkDicSize == 0 ? processedPos: checkDicSize))
- {
- p->dicPos = dicPos;
- return SZ_ERROR_DATA;
- }
- }
-
- len += kMatchMinLen;
-
- {
- SizeT rem;
- unsigned curLen;
- SizeT pos;
-
- if ((rem = limit - dicPos) == 0)
- {
- p->dicPos = dicPos;
- return SZ_ERROR_DATA;
- }
-
- curLen = ((rem < len) ? (unsigned)rem : len);
- pos = dicPos - rep0 + (dicPos < rep0 ? dicBufSize : 0);
-
- processedPos += (UInt32)curLen;
-
- len -= curLen;
- if (curLen <= dicBufSize - pos)
- {
- Byte *dest = dic + dicPos;
- ptrdiff_t src = (ptrdiff_t)pos - (ptrdiff_t)dicPos;
- const Byte *lim = dest + curLen;
- dicPos += (SizeT)curLen;
- do
- *(dest) = (Byte)*(dest + src);
- while (++dest != lim);
- }
- else
- {
- do
- {
- dic[dicPos++] = dic[pos];
- if (++pos == dicBufSize)
- pos = 0;
- }
- while (--curLen != 0);
- }
- }
- }
- }
- while (dicPos < limit && buf < bufLimit);
-
- NORMALIZE;
-
- p->buf = buf;
- p->range = range;
- p->code = code;
- p->remainLen = (UInt32)len;
- p->dicPos = dicPos;
- p->processedPos = processedPos;
- p->reps[0] = rep0;
- p->reps[1] = rep1;
- p->reps[2] = rep2;
- p->reps[3] = rep3;
- p->state = (UInt32)state;
-
- return SZ_OK;
-}
-#endif
-
-static void MY_FAST_CALL LzmaDec_WriteRem(CLzmaDec *p, SizeT limit)
-{
- if (p->remainLen != 0 && p->remainLen < kMatchSpecLenStart)
- {
- Byte *dic = p->dic;
- SizeT dicPos = p->dicPos;
- SizeT dicBufSize = p->dicBufSize;
- unsigned len = (unsigned)p->remainLen;
- SizeT rep0 = p->reps[0]; /* we use SizeT to avoid the BUG of VC14 for AMD64 */
- SizeT rem = limit - dicPos;
- if (rem < len)
- len = (unsigned)(rem);
-
- if (p->checkDicSize == 0 && p->prop.dicSize - p->processedPos <= len)
- p->checkDicSize = p->prop.dicSize;
-
- p->processedPos += (UInt32)len;
- p->remainLen -= (UInt32)len;
- while (len != 0)
- {
- len--;
- dic[dicPos] = dic[dicPos - rep0 + (dicPos < rep0 ? dicBufSize : 0)];
- dicPos++;
- }
- p->dicPos = dicPos;
- }
-}
-
-
-#define kRange0 0xFFFFFFFF
-#define kBound0 ((kRange0 >> kNumBitModelTotalBits) << (kNumBitModelTotalBits - 1))
-#define kBadRepCode (kBound0 + (((kRange0 - kBound0) >> kNumBitModelTotalBits) << (kNumBitModelTotalBits - 1)))
-#if kBadRepCode != (0xC0000000 - 0x400)
- #error Stop_Compiling_Bad_LZMA_Check
-#endif
-
-static int MY_FAST_CALL LzmaDec_DecodeReal2(CLzmaDec *p, SizeT limit, const Byte *bufLimit)
-{
- do
- {
- SizeT limit2 = limit;
- if (p->checkDicSize == 0)
- {
- UInt32 rem = p->prop.dicSize - p->processedPos;
- if (limit - p->dicPos > rem)
- limit2 = p->dicPos + rem;
-
- if (p->processedPos == 0)
- if (p->code >= kBadRepCode)
- return SZ_ERROR_DATA;
- }
-
- RINOK(LZMA_DECODE_REAL(p, limit2, bufLimit));
-
- if (p->checkDicSize == 0 && p->processedPos >= p->prop.dicSize)
- p->checkDicSize = p->prop.dicSize;
-
- LzmaDec_WriteRem(p, limit);
- }
- while (p->dicPos < limit && p->buf < bufLimit && p->remainLen < kMatchSpecLenStart);
-
- return 0;
-}
-
-typedef enum
-{
- DUMMY_ERROR, /* unexpected end of input stream */
- DUMMY_LIT,
- DUMMY_MATCH,
- DUMMY_REP
-} ELzmaDummy;
-
-static ELzmaDummy LzmaDec_TryDummy(const CLzmaDec *p, const Byte *buf, SizeT inSize)
-{
- UInt32 range = p->range;
- UInt32 code = p->code;
- const Byte *bufLimit = buf + inSize;
- const CLzmaProb *probs = GET_PROBS;
- unsigned state = (unsigned)p->state;
- ELzmaDummy res;
-
- {
- const CLzmaProb *prob;
- UInt32 bound;
- unsigned ttt;
- unsigned posState = CALC_POS_STATE(p->processedPos, (1 << p->prop.pb) - 1);
-
- prob = probs + IsMatch + COMBINED_PS_STATE;
- IF_BIT_0_CHECK(prob)
- {
- UPDATE_0_CHECK
-
- /* if (bufLimit - buf >= 7) return DUMMY_LIT; */
-
- prob = probs + Literal;
- if (p->checkDicSize != 0 || p->processedPos != 0)
- prob += ((UInt32)LZMA_LIT_SIZE *
- ((((p->processedPos) & ((1 << (p->prop.lp)) - 1)) << p->prop.lc) +
- (p->dic[(p->dicPos == 0 ? p->dicBufSize : p->dicPos) - 1] >> (8 - p->prop.lc))));
-
- if (state < kNumLitStates)
- {
- unsigned symbol = 1;
- do { GET_BIT_CHECK(prob + symbol, symbol) } while (symbol < 0x100);
- }
- else
- {
- unsigned matchByte = p->dic[p->dicPos - p->reps[0] +
- (p->dicPos < p->reps[0] ? p->dicBufSize : 0)];
- unsigned offs = 0x100;
- unsigned symbol = 1;
- do
- {
- unsigned bit;
- const CLzmaProb *probLit;
- matchByte += matchByte;
- bit = offs;
- offs &= matchByte;
- probLit = prob + (offs + bit + symbol);
- GET_BIT2_CHECK(probLit, symbol, offs ^= bit; , ; )
- }
- while (symbol < 0x100);
- }
- res = DUMMY_LIT;
- }
- else
- {
- unsigned len;
- UPDATE_1_CHECK;
-
- prob = probs + IsRep + state;
- IF_BIT_0_CHECK(prob)
- {
- UPDATE_0_CHECK;
- state = 0;
- prob = probs + LenCoder;
- res = DUMMY_MATCH;
- }
- else
- {
- UPDATE_1_CHECK;
- res = DUMMY_REP;
- prob = probs + IsRepG0 + state;
- IF_BIT_0_CHECK(prob)
- {
- UPDATE_0_CHECK;
- prob = probs + IsRep0Long + COMBINED_PS_STATE;
- IF_BIT_0_CHECK(prob)
- {
- UPDATE_0_CHECK;
- NORMALIZE_CHECK;
- return DUMMY_REP;
- }
- else
- {
- UPDATE_1_CHECK;
- }
- }
- else
- {
- UPDATE_1_CHECK;
- prob = probs + IsRepG1 + state;
- IF_BIT_0_CHECK(prob)
- {
- UPDATE_0_CHECK;
- }
- else
- {
- UPDATE_1_CHECK;
- prob = probs + IsRepG2 + state;
- IF_BIT_0_CHECK(prob)
- {
- UPDATE_0_CHECK;
- }
- else
- {
- UPDATE_1_CHECK;
- }
- }
- }
- state = kNumStates;
- prob = probs + RepLenCoder;
- }
- {
- unsigned limit, offset;
- const CLzmaProb *probLen = prob + LenChoice;
- IF_BIT_0_CHECK(probLen)
- {
- UPDATE_0_CHECK;
- probLen = prob + LenLow + GET_LEN_STATE;
- offset = 0;
- limit = 1 << kLenNumLowBits;
- }
- else
- {
- UPDATE_1_CHECK;
- probLen = prob + LenChoice2;
- IF_BIT_0_CHECK(probLen)
- {
- UPDATE_0_CHECK;
- probLen = prob + LenLow + GET_LEN_STATE + (1 << kLenNumLowBits);
- offset = kLenNumLowSymbols;
- limit = 1 << kLenNumLowBits;
- }
- else
- {
- UPDATE_1_CHECK;
- probLen = prob + LenHigh;
- offset = kLenNumLowSymbols * 2;
- limit = 1 << kLenNumHighBits;
- }
- }
- TREE_DECODE_CHECK(probLen, limit, len);
- len += offset;
- }
-
- if (state < 4)
- {
- unsigned posSlot;
- prob = probs + PosSlot +
- ((len < kNumLenToPosStates - 1 ? len : kNumLenToPosStates - 1) <<
- kNumPosSlotBits);
- TREE_DECODE_CHECK(prob, 1 << kNumPosSlotBits, posSlot);
- if (posSlot >= kStartPosModelIndex)
- {
- unsigned numDirectBits = ((posSlot >> 1) - 1);
-
- /* if (bufLimit - buf >= 8) return DUMMY_MATCH; */
-
- if (posSlot < kEndPosModelIndex)
- {
- prob = probs + SpecPos + ((2 | (posSlot & 1)) << numDirectBits);
- }
- else
- {
- numDirectBits -= kNumAlignBits;
- do
- {
- NORMALIZE_CHECK
- range >>= 1;
- code -= range & (((code - range) >> 31) - 1);
- /* if (code >= range) code -= range; */
- }
- while (--numDirectBits);
- prob = probs + Align;
- numDirectBits = kNumAlignBits;
- }
- {
- unsigned i = 1;
- unsigned m = 1;
- do
- {
- REV_BIT_CHECK(prob, i, m);
- }
- while (--numDirectBits);
- }
- }
- }
- }
- }
- NORMALIZE_CHECK;
- return res;
-}
-
-
-void LzmaDec_InitDicAndState(CLzmaDec *p, BoolInt initDic, BoolInt initState)
-{
- p->remainLen = kMatchSpecLenStart + 1;
- p->tempBufSize = 0;
-
- if (initDic)
- {
- p->processedPos = 0;
- p->checkDicSize = 0;
- p->remainLen = kMatchSpecLenStart + 2;
- }
- if (initState)
- p->remainLen = kMatchSpecLenStart + 2;
-}
-
-void LzmaDec_Init(CLzmaDec *p)
-{
- p->dicPos = 0;
- LzmaDec_InitDicAndState(p, True, True);
-}
-
-
-SRes LzmaDec_DecodeToDic(CLzmaDec *p, SizeT dicLimit, const Byte *src, SizeT *srcLen,
- ELzmaFinishMode finishMode, ELzmaStatus *status)
-{
- SizeT inSize = *srcLen;
- (*srcLen) = 0;
-
- *status = LZMA_STATUS_NOT_SPECIFIED;
-
- if (p->remainLen > kMatchSpecLenStart)
- {
- for (; inSize > 0 && p->tempBufSize < RC_INIT_SIZE; (*srcLen)++, inSize--)
- p->tempBuf[p->tempBufSize++] = *src++;
- if (p->tempBufSize != 0 && p->tempBuf[0] != 0)
- return SZ_ERROR_DATA;
- if (p->tempBufSize < RC_INIT_SIZE)
- {
- *status = LZMA_STATUS_NEEDS_MORE_INPUT;
- return SZ_OK;
- }
- p->code =
- ((UInt32)p->tempBuf[1] << 24)
- | ((UInt32)p->tempBuf[2] << 16)
- | ((UInt32)p->tempBuf[3] << 8)
- | ((UInt32)p->tempBuf[4]);
- p->range = 0xFFFFFFFF;
- p->tempBufSize = 0;
-
- if (p->remainLen > kMatchSpecLenStart + 1)
- {
- SizeT numProbs = LzmaProps_GetNumProbs(&p->prop);
- SizeT i;
- CLzmaProb *probs = p->probs;
- for (i = 0; i < numProbs; i++)
- probs[i] = kBitModelTotal >> 1;
- p->reps[0] = p->reps[1] = p->reps[2] = p->reps[3] = 1;
- p->state = 0;
- }
-
- p->remainLen = 0;
- }
-
- LzmaDec_WriteRem(p, dicLimit);
-
- while (p->remainLen != kMatchSpecLenStart)
- {
- int checkEndMarkNow = 0;
-
- if (p->dicPos >= dicLimit)
- {
- if (p->remainLen == 0 && p->code == 0)
- {
- *status = LZMA_STATUS_MAYBE_FINISHED_WITHOUT_MARK;
- return SZ_OK;
- }
- if (finishMode == LZMA_FINISH_ANY)
- {
- *status = LZMA_STATUS_NOT_FINISHED;
- return SZ_OK;
- }
- if (p->remainLen != 0)
- {
- *status = LZMA_STATUS_NOT_FINISHED;
- return SZ_ERROR_DATA;
- }
- checkEndMarkNow = 1;
- }
-
- if (p->tempBufSize == 0)
- {
- SizeT processed;
- const Byte *bufLimit;
- if (inSize < LZMA_REQUIRED_INPUT_MAX || checkEndMarkNow)
- {
- int dummyRes = LzmaDec_TryDummy(p, src, inSize);
- if (dummyRes == DUMMY_ERROR)
- {
- memcpy(p->tempBuf, src, inSize);
- p->tempBufSize = (unsigned)inSize;
- (*srcLen) += inSize;
- *status = LZMA_STATUS_NEEDS_MORE_INPUT;
- return SZ_OK;
- }
- if (checkEndMarkNow && dummyRes != DUMMY_MATCH)
- {
- *status = LZMA_STATUS_NOT_FINISHED;
- return SZ_ERROR_DATA;
- }
- bufLimit = src;
- }
- else
- bufLimit = src + inSize - LZMA_REQUIRED_INPUT_MAX;
- p->buf = src;
- if (LzmaDec_DecodeReal2(p, dicLimit, bufLimit) != 0)
- return SZ_ERROR_DATA;
- processed = (SizeT)(p->buf - src);
- (*srcLen) += processed;
- src += processed;
- inSize -= processed;
- }
- else
- {
- unsigned rem = p->tempBufSize, lookAhead = 0;
- while (rem < LZMA_REQUIRED_INPUT_MAX && lookAhead < inSize)
- p->tempBuf[rem++] = src[lookAhead++];
- p->tempBufSize = rem;
- if (rem < LZMA_REQUIRED_INPUT_MAX || checkEndMarkNow)
- {
- int dummyRes = LzmaDec_TryDummy(p, p->tempBuf, (SizeT)rem);
- if (dummyRes == DUMMY_ERROR)
- {
- (*srcLen) += (SizeT)lookAhead;
- *status = LZMA_STATUS_NEEDS_MORE_INPUT;
- return SZ_OK;
- }
- if (checkEndMarkNow && dummyRes != DUMMY_MATCH)
- {
- *status = LZMA_STATUS_NOT_FINISHED;
- return SZ_ERROR_DATA;
- }
- }
- p->buf = p->tempBuf;
- if (LzmaDec_DecodeReal2(p, dicLimit, p->buf) != 0)
- return SZ_ERROR_DATA;
-
- {
- unsigned kkk = (unsigned)(p->buf - p->tempBuf);
- if (rem < kkk)
- return SZ_ERROR_FAIL; /* some internal error */
- rem -= kkk;
- if (lookAhead < rem)
- return SZ_ERROR_FAIL; /* some internal error */
- lookAhead -= rem;
- }
- (*srcLen) += (SizeT)lookAhead;
- src += lookAhead;
- inSize -= (SizeT)lookAhead;
- p->tempBufSize = 0;
- }
- }
-
- if (p->code != 0)
- return SZ_ERROR_DATA;
- *status = LZMA_STATUS_FINISHED_WITH_MARK;
- return SZ_OK;
-}
-
-
-SRes LzmaDec_DecodeToBuf(CLzmaDec *p, Byte *dest, SizeT *destLen, const Byte *src, SizeT *srcLen, ELzmaFinishMode finishMode, ELzmaStatus *status)
-{
- SizeT outSize = *destLen;
- SizeT inSize = *srcLen;
- *srcLen = *destLen = 0;
- for (;;)
- {
- SizeT inSizeCur = inSize, outSizeCur, dicPos;
- ELzmaFinishMode curFinishMode;
- SRes res;
- if (p->dicPos == p->dicBufSize)
- p->dicPos = 0;
- dicPos = p->dicPos;
- if (outSize > p->dicBufSize - dicPos)
- {
- outSizeCur = p->dicBufSize;
- curFinishMode = LZMA_FINISH_ANY;
- }
- else
- {
- outSizeCur = dicPos + outSize;
- curFinishMode = finishMode;
- }
-
- res = LzmaDec_DecodeToDic(p, outSizeCur, src, &inSizeCur, curFinishMode, status);
- src += inSizeCur;
- inSize -= inSizeCur;
- *srcLen += inSizeCur;
- outSizeCur = p->dicPos - dicPos;
- memcpy(dest, p->dic + dicPos, outSizeCur);
- dest += outSizeCur;
- outSize -= outSizeCur;
- *destLen += outSizeCur;
- if (res != 0)
- return res;
- if (outSizeCur == 0 || outSize == 0)
- return SZ_OK;
- }
-}
-
-void LzmaDec_FreeProbs(CLzmaDec *p, ISzAllocPtr alloc)
-{
- ISzAlloc_Free(alloc, p->probs);
- p->probs = NULL;
-}
-
-static void LzmaDec_FreeDict(CLzmaDec *p, ISzAllocPtr alloc)
-{
- ISzAlloc_Free(alloc, p->dic);
- p->dic = NULL;
-}
-
-void LzmaDec_Free(CLzmaDec *p, ISzAllocPtr alloc)
-{
- LzmaDec_FreeProbs(p, alloc);
- LzmaDec_FreeDict(p, alloc);
-}
-
-SRes LzmaProps_Decode(CLzmaProps *p, const Byte *data, unsigned size)
-{
- UInt32 dicSize;
- Byte d;
-
- if (size < LZMA_PROPS_SIZE)
- return SZ_ERROR_UNSUPPORTED;
- else
- dicSize = data[1] | ((UInt32)data[2] << 8) | ((UInt32)data[3] << 16) | ((UInt32)data[4] << 24);
-
- if (dicSize < LZMA_DIC_MIN)
- dicSize = LZMA_DIC_MIN;
- p->dicSize = dicSize;
-
- d = data[0];
- if (d >= (9 * 5 * 5))
- return SZ_ERROR_UNSUPPORTED;
-
- p->lc = (Byte)(d % 9);
- d /= 9;
- p->pb = (Byte)(d / 5);
- p->lp = (Byte)(d % 5);
-
- return SZ_OK;
-}
-
-static SRes LzmaDec_AllocateProbs2(CLzmaDec *p, const CLzmaProps *propNew, ISzAllocPtr alloc)
-{
- UInt32 numProbs = LzmaProps_GetNumProbs(propNew);
- if (!p->probs || numProbs != p->numProbs)
- {
- LzmaDec_FreeProbs(p, alloc);
- p->probs = (CLzmaProb *)ISzAlloc_Alloc(alloc, numProbs * sizeof(CLzmaProb));
- if (!p->probs)
- return SZ_ERROR_MEM;
- p->probs_1664 = p->probs + 1664;
- p->numProbs = numProbs;
- }
- return SZ_OK;
-}
-
-SRes LzmaDec_AllocateProbs(CLzmaDec *p, const Byte *props, unsigned propsSize, ISzAllocPtr alloc)
-{
- CLzmaProps propNew;
- RINOK(LzmaProps_Decode(&propNew, props, propsSize));
- RINOK(LzmaDec_AllocateProbs2(p, &propNew, alloc));
- p->prop = propNew;
- return SZ_OK;
-}
-
-SRes LzmaDec_Allocate(CLzmaDec *p, const Byte *props, unsigned propsSize, ISzAllocPtr alloc)
-{
- CLzmaProps propNew;
- SizeT dicBufSize;
- RINOK(LzmaProps_Decode(&propNew, props, propsSize));
- RINOK(LzmaDec_AllocateProbs2(p, &propNew, alloc));
-
- {
- UInt32 dictSize = propNew.dicSize;
- SizeT mask = ((UInt32)1 << 12) - 1;
- if (dictSize >= ((UInt32)1 << 30)) mask = ((UInt32)1 << 22) - 1;
- else if (dictSize >= ((UInt32)1 << 22)) mask = ((UInt32)1 << 20) - 1;;
- dicBufSize = ((SizeT)dictSize + mask) & ~mask;
- if (dicBufSize < dictSize)
- dicBufSize = dictSize;
- }
-
- if (!p->dic || dicBufSize != p->dicBufSize)
- {
- LzmaDec_FreeDict(p, alloc);
- p->dic = (Byte *)ISzAlloc_Alloc(alloc, dicBufSize);
- if (!p->dic)
- {
- LzmaDec_FreeProbs(p, alloc);
- return SZ_ERROR_MEM;
- }
- }
- p->dicBufSize = dicBufSize;
- p->prop = propNew;
- return SZ_OK;
-}
-
-SRes LzmaDecode(Byte *dest, SizeT *destLen, const Byte *src, SizeT *srcLen,
- const Byte *propData, unsigned propSize, ELzmaFinishMode finishMode,
- ELzmaStatus *status, ISzAllocPtr alloc)
-{
- CLzmaDec p;
- SRes res;
- SizeT outSize = *destLen, inSize = *srcLen;
- *destLen = *srcLen = 0;
- *status = LZMA_STATUS_NOT_SPECIFIED;
- if (inSize < RC_INIT_SIZE)
- return SZ_ERROR_INPUT_EOF;
- LzmaDec_Construct(&p);
- RINOK(LzmaDec_AllocateProbs(&p, propData, propSize, alloc));
- p.dic = dest;
- p.dicBufSize = outSize;
- LzmaDec_Init(&p);
- *srcLen = inSize;
- res = LzmaDec_DecodeToDic(&p, outSize, src, srcLen, finishMode, status);
- *destLen = p.dicPos;
- if (res == SZ_OK && *status == LZMA_STATUS_NEEDS_MORE_INPUT)
- res = SZ_ERROR_INPUT_EOF;
- LzmaDec_FreeProbs(&p, alloc);
- return res;
-}
+++ /dev/null
-/* LzmaEnc.c -- LZMA Encoder
-2019-01-10: Igor Pavlov : Public domain */
-
-#include "Precomp.h"
-
-#include <string.h>
-
-/* #define SHOW_STAT */
-/* #define SHOW_STAT2 */
-
-#if defined(SHOW_STAT) || defined(SHOW_STAT2)
-#include <stdio.h>
-#endif
-
-#include "LzmaEnc.h"
-
-#include "LzFind.h"
-#ifndef _7ZIP_ST
-#include "LzFindMt.h"
-#endif
-
-#ifdef SHOW_STAT
-static unsigned g_STAT_OFFSET = 0;
-#endif
-
-#define kLzmaMaxHistorySize ((UInt32)3 << 29)
-/* #define kLzmaMaxHistorySize ((UInt32)7 << 29) */
-
-#define kNumTopBits 24
-#define kTopValue ((UInt32)1 << kNumTopBits)
-
-#define kNumBitModelTotalBits 11
-#define kBitModelTotal (1 << kNumBitModelTotalBits)
-#define kNumMoveBits 5
-#define kProbInitValue (kBitModelTotal >> 1)
-
-#define kNumMoveReducingBits 4
-#define kNumBitPriceShiftBits 4
-#define kBitPrice (1 << kNumBitPriceShiftBits)
-
-#define REP_LEN_COUNT 64
-
-void LzmaEncProps_Init(CLzmaEncProps *p)
-{
- p->level = 5;
- p->dictSize = p->mc = 0;
- p->reduceSize = (UInt64)(Int64)-1;
- p->lc = p->lp = p->pb = p->algo = p->fb = p->btMode = p->numHashBytes = p->numThreads = -1;
- p->writeEndMark = 0;
-}
-
-void LzmaEncProps_Normalize(CLzmaEncProps *p)
-{
- int level = p->level;
- if (level < 0) level = 5;
- p->level = level;
-
- if (p->dictSize == 0) p->dictSize = (level <= 5 ? (1 << (level * 2 + 14)) : (level <= 7 ? (1 << 25) : (1 << 26)));
- if (p->dictSize > p->reduceSize)
- {
- unsigned i;
- UInt32 reduceSize = (UInt32)p->reduceSize;
- for (i = 11; i <= 30; i++)
- {
- if (reduceSize <= ((UInt32)2 << i)) { p->dictSize = ((UInt32)2 << i); break; }
- if (reduceSize <= ((UInt32)3 << i)) { p->dictSize = ((UInt32)3 << i); break; }
- }
- }
-
- if (p->lc < 0) p->lc = 3;
- if (p->lp < 0) p->lp = 0;
- if (p->pb < 0) p->pb = 2;
-
- if (p->algo < 0) p->algo = (level < 5 ? 0 : 1);
- if (p->fb < 0) p->fb = (level < 7 ? 32 : 64);
- if (p->btMode < 0) p->btMode = (p->algo == 0 ? 0 : 1);
- if (p->numHashBytes < 0) p->numHashBytes = 4;
- if (p->mc == 0) p->mc = (16 + (p->fb >> 1)) >> (p->btMode ? 0 : 1);
-
- if (p->numThreads < 0)
- p->numThreads =
- #ifndef _7ZIP_ST
- ((p->btMode && p->algo) ? 2 : 1);
- #else
- 1;
- #endif
-}
-
-UInt32 LzmaEncProps_GetDictSize(const CLzmaEncProps *props2)
-{
- CLzmaEncProps props = *props2;
- LzmaEncProps_Normalize(&props);
- return props.dictSize;
-}
-
-#if (_MSC_VER >= 1400)
-/* BSR code is fast for some new CPUs */
-/* #define LZMA_LOG_BSR */
-#endif
-
-#ifdef LZMA_LOG_BSR
-
-#define kDicLogSizeMaxCompress 32
-
-#define BSR2_RET(pos, res) { unsigned long zz; _BitScanReverse(&zz, (pos)); res = (zz + zz) + ((pos >> (zz - 1)) & 1); }
-
-static unsigned GetPosSlot1(UInt32 pos)
-{
- unsigned res;
- BSR2_RET(pos, res);
- return res;
-}
-#define GetPosSlot2(pos, res) { BSR2_RET(pos, res); }
-#define GetPosSlot(pos, res) { if (pos < 2) res = pos; else BSR2_RET(pos, res); }
-
-#else
-
-#define kNumLogBits (9 + sizeof(size_t) / 2)
-/* #define kNumLogBits (11 + sizeof(size_t) / 8 * 3) */
-
-#define kDicLogSizeMaxCompress ((kNumLogBits - 1) * 2 + 7)
-
-static void LzmaEnc_FastPosInit(Byte *g_FastPos)
-{
- unsigned slot;
- g_FastPos[0] = 0;
- g_FastPos[1] = 1;
- g_FastPos += 2;
-
- for (slot = 2; slot < kNumLogBits * 2; slot++)
- {
- size_t k = ((size_t)1 << ((slot >> 1) - 1));
- size_t j;
- for (j = 0; j < k; j++)
- g_FastPos[j] = (Byte)slot;
- g_FastPos += k;
- }
-}
-
-/* we can use ((limit - pos) >> 31) only if (pos < ((UInt32)1 << 31)) */
-/*
-#define BSR2_RET(pos, res) { unsigned zz = 6 + ((kNumLogBits - 1) & \
- (0 - (((((UInt32)1 << (kNumLogBits + 6)) - 1) - pos) >> 31))); \
- res = p->g_FastPos[pos >> zz] + (zz * 2); }
-*/
-
-/*
-#define BSR2_RET(pos, res) { unsigned zz = 6 + ((kNumLogBits - 1) & \
- (0 - (((((UInt32)1 << (kNumLogBits)) - 1) - (pos >> 6)) >> 31))); \
- res = p->g_FastPos[pos >> zz] + (zz * 2); }
-*/
-
-#define BSR2_RET(pos, res) { unsigned zz = (pos < (1 << (kNumLogBits + 6))) ? 6 : 6 + kNumLogBits - 1; \
- res = p->g_FastPos[pos >> zz] + (zz * 2); }
-
-/*
-#define BSR2_RET(pos, res) { res = (pos < (1 << (kNumLogBits + 6))) ? \
- p->g_FastPos[pos >> 6] + 12 : \
- p->g_FastPos[pos >> (6 + kNumLogBits - 1)] + (6 + (kNumLogBits - 1)) * 2; }
-*/
-
-#define GetPosSlot1(pos) p->g_FastPos[pos]
-#define GetPosSlot2(pos, res) { BSR2_RET(pos, res); }
-#define GetPosSlot(pos, res) { if (pos < kNumFullDistances) res = p->g_FastPos[pos & (kNumFullDistances - 1)]; else BSR2_RET(pos, res); }
-
-#endif
-
-
-#define LZMA_NUM_REPS 4
-
-typedef UInt16 CState;
-typedef UInt16 CExtra;
-
-typedef struct
-{
- UInt32 price;
- CState state;
- CExtra extra;
- // 0 : normal
- // 1 : LIT : MATCH
- // > 1 : MATCH (extra-1) : LIT : REP0 (len)
- UInt32 len;
- UInt32 dist;
- UInt32 reps[LZMA_NUM_REPS];
-} COptimal;
-
-
-// 18.06
-#define kNumOpts (1 << 11)
-#define kPackReserve (kNumOpts * 8)
-// #define kNumOpts (1 << 12)
-// #define kPackReserve (1 + kNumOpts * 2)
-
-#define kNumLenToPosStates 4
-#define kNumPosSlotBits 6
-#define kDicLogSizeMin 0
-#define kDicLogSizeMax 32
-#define kDistTableSizeMax (kDicLogSizeMax * 2)
-
-#define kNumAlignBits 4
-#define kAlignTableSize (1 << kNumAlignBits)
-#define kAlignMask (kAlignTableSize - 1)
-
-#define kStartPosModelIndex 4
-#define kEndPosModelIndex 14
-#define kNumFullDistances (1 << (kEndPosModelIndex >> 1))
-
-typedef
-#ifdef _LZMA_PROB32
- UInt32
-#else
- UInt16
-#endif
- CLzmaProb;
-
-#define LZMA_PB_MAX 4
-#define LZMA_LC_MAX 8
-#define LZMA_LP_MAX 4
-
-#define LZMA_NUM_PB_STATES_MAX (1 << LZMA_PB_MAX)
-
-#define kLenNumLowBits 3
-#define kLenNumLowSymbols (1 << kLenNumLowBits)
-#define kLenNumHighBits 8
-#define kLenNumHighSymbols (1 << kLenNumHighBits)
-#define kLenNumSymbolsTotal (kLenNumLowSymbols * 2 + kLenNumHighSymbols)
-
-#define LZMA_MATCH_LEN_MIN 2
-#define LZMA_MATCH_LEN_MAX (LZMA_MATCH_LEN_MIN + kLenNumSymbolsTotal - 1)
-
-#define kNumStates 12
-
-
-typedef struct
-{
- CLzmaProb low[LZMA_NUM_PB_STATES_MAX << (kLenNumLowBits + 1)];
- CLzmaProb high[kLenNumHighSymbols];
-} CLenEnc;
-
-
-typedef struct
-{
- unsigned tableSize;
- UInt32 prices[LZMA_NUM_PB_STATES_MAX][kLenNumSymbolsTotal];
- // UInt32 prices1[LZMA_NUM_PB_STATES_MAX][kLenNumLowSymbols * 2];
- // UInt32 prices2[kLenNumSymbolsTotal];
-} CLenPriceEnc;
-
-#define GET_PRICE_LEN(p, posState, len) \
- ((p)->prices[posState][(size_t)(len) - LZMA_MATCH_LEN_MIN])
-
-/*
-#define GET_PRICE_LEN(p, posState, len) \
- ((p)->prices2[(size_t)(len) - 2] + ((p)->prices1[posState][((len) - 2) & (kLenNumLowSymbols * 2 - 1)] & (((len) - 2 - kLenNumLowSymbols * 2) >> 9)))
-*/
-
-typedef struct
-{
- UInt32 range;
- unsigned cache;
- UInt64 low;
- UInt64 cacheSize;
- Byte *buf;
- Byte *bufLim;
- Byte *bufBase;
- ISeqOutStream *outStream;
- UInt64 processed;
- SRes res;
-} CRangeEnc;
-
-
-typedef struct
-{
- CLzmaProb *litProbs;
-
- unsigned state;
- UInt32 reps[LZMA_NUM_REPS];
-
- CLzmaProb posAlignEncoder[1 << kNumAlignBits];
- CLzmaProb isRep[kNumStates];
- CLzmaProb isRepG0[kNumStates];
- CLzmaProb isRepG1[kNumStates];
- CLzmaProb isRepG2[kNumStates];
- CLzmaProb isMatch[kNumStates][LZMA_NUM_PB_STATES_MAX];
- CLzmaProb isRep0Long[kNumStates][LZMA_NUM_PB_STATES_MAX];
-
- CLzmaProb posSlotEncoder[kNumLenToPosStates][1 << kNumPosSlotBits];
- CLzmaProb posEncoders[kNumFullDistances];
-
- CLenEnc lenProbs;
- CLenEnc repLenProbs;
-
-} CSaveState;
-
-
-typedef UInt32 CProbPrice;
-
-
-typedef struct
-{
- void *matchFinderObj;
- IMatchFinder matchFinder;
-
- unsigned optCur;
- unsigned optEnd;
-
- unsigned longestMatchLen;
- unsigned numPairs;
- UInt32 numAvail;
-
- unsigned state;
- unsigned numFastBytes;
- unsigned additionalOffset;
- UInt32 reps[LZMA_NUM_REPS];
- unsigned lpMask, pbMask;
- CLzmaProb *litProbs;
- CRangeEnc rc;
-
- UInt32 backRes;
-
- unsigned lc, lp, pb;
- unsigned lclp;
-
- BoolInt fastMode;
- BoolInt writeEndMark;
- BoolInt finished;
- BoolInt multiThread;
- BoolInt needInit;
- // BoolInt _maxMode;
-
- UInt64 nowPos64;
-
- unsigned matchPriceCount;
- // unsigned alignPriceCount;
- int repLenEncCounter;
-
- unsigned distTableSize;
-
- UInt32 dictSize;
- SRes result;
-
- #ifndef _7ZIP_ST
- BoolInt mtMode;
- // begin of CMatchFinderMt is used in LZ thread
- CMatchFinderMt matchFinderMt;
- // end of CMatchFinderMt is used in BT and HASH threads
- #endif
-
- CMatchFinder matchFinderBase;
-
- #ifndef _7ZIP_ST
- Byte pad[128];
- #endif
-
- // LZ thread
- CProbPrice ProbPrices[kBitModelTotal >> kNumMoveReducingBits];
-
- UInt32 matches[LZMA_MATCH_LEN_MAX * 2 + 2 + 1];
-
- UInt32 alignPrices[kAlignTableSize];
- UInt32 posSlotPrices[kNumLenToPosStates][kDistTableSizeMax];
- UInt32 distancesPrices[kNumLenToPosStates][kNumFullDistances];
-
- CLzmaProb posAlignEncoder[1 << kNumAlignBits];
- CLzmaProb isRep[kNumStates];
- CLzmaProb isRepG0[kNumStates];
- CLzmaProb isRepG1[kNumStates];
- CLzmaProb isRepG2[kNumStates];
- CLzmaProb isMatch[kNumStates][LZMA_NUM_PB_STATES_MAX];
- CLzmaProb isRep0Long[kNumStates][LZMA_NUM_PB_STATES_MAX];
- CLzmaProb posSlotEncoder[kNumLenToPosStates][1 << kNumPosSlotBits];
- CLzmaProb posEncoders[kNumFullDistances];
-
- CLenEnc lenProbs;
- CLenEnc repLenProbs;
-
- #ifndef LZMA_LOG_BSR
- Byte g_FastPos[1 << kNumLogBits];
- #endif
-
- CLenPriceEnc lenEnc;
- CLenPriceEnc repLenEnc;
-
- COptimal opt[kNumOpts];
-
- CSaveState saveState;
-
- #ifndef _7ZIP_ST
- Byte pad2[128];
- #endif
-} CLzmaEnc;
-
-
-SRes LzmaEnc_SetProps(CLzmaEncHandle pp, const CLzmaEncProps *props2)
-{
- CLzmaEnc *p = (CLzmaEnc *)pp;
- CLzmaEncProps props = *props2;
- LzmaEncProps_Normalize(&props);
-
- if (props.lc > LZMA_LC_MAX
- || props.lp > LZMA_LP_MAX
- || props.pb > LZMA_PB_MAX
- || props.dictSize > ((UInt64)1 << kDicLogSizeMaxCompress)
- || props.dictSize > kLzmaMaxHistorySize)
- return SZ_ERROR_PARAM;
-
- p->dictSize = props.dictSize;
- {
- unsigned fb = props.fb;
- if (fb < 5)
- fb = 5;
- if (fb > LZMA_MATCH_LEN_MAX)
- fb = LZMA_MATCH_LEN_MAX;
- p->numFastBytes = fb;
- }
- p->lc = props.lc;
- p->lp = props.lp;
- p->pb = props.pb;
- p->fastMode = (props.algo == 0);
- // p->_maxMode = True;
- p->matchFinderBase.btMode = (Byte)(props.btMode ? 1 : 0);
- {
- unsigned numHashBytes = 4;
- if (props.btMode)
- {
- if (props.numHashBytes < 2)
- numHashBytes = 2;
- else if (props.numHashBytes < 4)
- numHashBytes = props.numHashBytes;
- }
- p->matchFinderBase.numHashBytes = numHashBytes;
- }
-
- p->matchFinderBase.cutValue = props.mc;
-
- p->writeEndMark = props.writeEndMark;
-
- #ifndef _7ZIP_ST
- /*
- if (newMultiThread != _multiThread)
- {
- ReleaseMatchFinder();
- _multiThread = newMultiThread;
- }
- */
- p->multiThread = (props.numThreads > 1);
- #endif
-
- return SZ_OK;
-}
-
-
-void LzmaEnc_SetDataSize(CLzmaEncHandle pp, UInt64 expectedDataSiize)
-{
- CLzmaEnc *p = (CLzmaEnc *)pp;
- p->matchFinderBase.expectedDataSize = expectedDataSiize;
-}
-
-
-#define kState_Start 0
-#define kState_LitAfterMatch 4
-#define kState_LitAfterRep 5
-#define kState_MatchAfterLit 7
-#define kState_RepAfterLit 8
-
-static const Byte kLiteralNextStates[kNumStates] = {0, 0, 0, 0, 1, 2, 3, 4, 5, 6, 4, 5};
-static const Byte kMatchNextStates[kNumStates] = {7, 7, 7, 7, 7, 7, 7, 10, 10, 10, 10, 10};
-static const Byte kRepNextStates[kNumStates] = {8, 8, 8, 8, 8, 8, 8, 11, 11, 11, 11, 11};
-static const Byte kShortRepNextStates[kNumStates]= {9, 9, 9, 9, 9, 9, 9, 11, 11, 11, 11, 11};
-
-#define IsLitState(s) ((s) < 7)
-#define GetLenToPosState2(len) (((len) < kNumLenToPosStates - 1) ? (len) : kNumLenToPosStates - 1)
-#define GetLenToPosState(len) (((len) < kNumLenToPosStates + 1) ? (len) - 2 : kNumLenToPosStates - 1)
-
-#define kInfinityPrice (1 << 30)
-
-static void RangeEnc_Construct(CRangeEnc *p)
-{
- p->outStream = NULL;
- p->bufBase = NULL;
-}
-
-#define RangeEnc_GetProcessed(p) ((p)->processed + ((p)->buf - (p)->bufBase) + (p)->cacheSize)
-#define RangeEnc_GetProcessed_sizet(p) ((size_t)(p)->processed + ((p)->buf - (p)->bufBase) + (size_t)(p)->cacheSize)
-
-#define RC_BUF_SIZE (1 << 16)
-
-static int RangeEnc_Alloc(CRangeEnc *p, ISzAllocPtr alloc)
-{
- if (!p->bufBase)
- {
- p->bufBase = (Byte *)ISzAlloc_Alloc(alloc, RC_BUF_SIZE);
- if (!p->bufBase)
- return 0;
- p->bufLim = p->bufBase + RC_BUF_SIZE;
- }
- return 1;
-}
-
-static void RangeEnc_Free(CRangeEnc *p, ISzAllocPtr alloc)
-{
- ISzAlloc_Free(alloc, p->bufBase);
- p->bufBase = 0;
-}
-
-static void RangeEnc_Init(CRangeEnc *p)
-{
- /* Stream.Init(); */
- p->range = 0xFFFFFFFF;
- p->cache = 0;
- p->low = 0;
- p->cacheSize = 0;
-
- p->buf = p->bufBase;
-
- p->processed = 0;
- p->res = SZ_OK;
-}
-
-MY_NO_INLINE static void RangeEnc_FlushStream(CRangeEnc *p)
-{
- size_t num;
- if (p->res != SZ_OK)
- return;
- num = p->buf - p->bufBase;
- if (num != ISeqOutStream_Write(p->outStream, p->bufBase, num))
- p->res = SZ_ERROR_WRITE;
- p->processed += num;
- p->buf = p->bufBase;
-}
-
-MY_NO_INLINE static void MY_FAST_CALL RangeEnc_ShiftLow(CRangeEnc *p)
-{
- UInt32 low = (UInt32)p->low;
- unsigned high = (unsigned)(p->low >> 32);
- p->low = (UInt32)(low << 8);
- if (low < (UInt32)0xFF000000 || high != 0)
- {
- {
- Byte *buf = p->buf;
- *buf++ = (Byte)(p->cache + high);
- p->cache = (unsigned)(low >> 24);
- p->buf = buf;
- if (buf == p->bufLim)
- RangeEnc_FlushStream(p);
- if (p->cacheSize == 0)
- return;
- }
- high += 0xFF;
- for (;;)
- {
- Byte *buf = p->buf;
- *buf++ = (Byte)(high);
- p->buf = buf;
- if (buf == p->bufLim)
- RangeEnc_FlushStream(p);
- if (--p->cacheSize == 0)
- return;
- }
- }
- p->cacheSize++;
-}
-
-static void RangeEnc_FlushData(CRangeEnc *p)
-{
- int i;
- for (i = 0; i < 5; i++)
- RangeEnc_ShiftLow(p);
-}
-
-#define RC_NORM(p) if (range < kTopValue) { range <<= 8; RangeEnc_ShiftLow(p); }
-
-#define RC_BIT_PRE(p, prob) \
- ttt = *(prob); \
- newBound = (range >> kNumBitModelTotalBits) * ttt;
-
-// #define _LZMA_ENC_USE_BRANCH
-
-#ifdef _LZMA_ENC_USE_BRANCH
-
-#define RC_BIT(p, prob, bit) { \
- RC_BIT_PRE(p, prob) \
- if (bit == 0) { range = newBound; ttt += (kBitModelTotal - ttt) >> kNumMoveBits; } \
- else { (p)->low += newBound; range -= newBound; ttt -= ttt >> kNumMoveBits; } \
- *(prob) = (CLzmaProb)ttt; \
- RC_NORM(p) \
- }
-
-#else
-
-#define RC_BIT(p, prob, bit) { \
- UInt32 mask; \
- RC_BIT_PRE(p, prob) \
- mask = 0 - (UInt32)bit; \
- range &= mask; \
- mask &= newBound; \
- range -= mask; \
- (p)->low += mask; \
- mask = (UInt32)bit - 1; \
- range += newBound & mask; \
- mask &= (kBitModelTotal - ((1 << kNumMoveBits) - 1)); \
- mask += ((1 << kNumMoveBits) - 1); \
- ttt += (Int32)(mask - ttt) >> kNumMoveBits; \
- *(prob) = (CLzmaProb)ttt; \
- RC_NORM(p) \
- }
-
-#endif
-
-
-
-
-#define RC_BIT_0_BASE(p, prob) \
- range = newBound; *(prob) = (CLzmaProb)(ttt + ((kBitModelTotal - ttt) >> kNumMoveBits));
-
-#define RC_BIT_1_BASE(p, prob) \
- range -= newBound; (p)->low += newBound; *(prob) = (CLzmaProb)(ttt - (ttt >> kNumMoveBits)); \
-
-#define RC_BIT_0(p, prob) \
- RC_BIT_0_BASE(p, prob) \
- RC_NORM(p)
-
-#define RC_BIT_1(p, prob) \
- RC_BIT_1_BASE(p, prob) \
- RC_NORM(p)
-
-static void RangeEnc_EncodeBit_0(CRangeEnc *p, CLzmaProb *prob)
-{
- UInt32 range, ttt, newBound;
- range = p->range;
- RC_BIT_PRE(p, prob)
- RC_BIT_0(p, prob)
- p->range = range;
-}
-
-static void LitEnc_Encode(CRangeEnc *p, CLzmaProb *probs, UInt32 sym)
-{
- UInt32 range = p->range;
- sym |= 0x100;
- do
- {
- UInt32 ttt, newBound;
- // RangeEnc_EncodeBit(p, probs + (sym >> 8), (sym >> 7) & 1);
- CLzmaProb *prob = probs + (sym >> 8);
- UInt32 bit = (sym >> 7) & 1;
- sym <<= 1;
- RC_BIT(p, prob, bit);
- }
- while (sym < 0x10000);
- p->range = range;
-}
-
-static void LzmaEnc_InitPriceTables(CProbPrice *ProbPrices)
-{
- UInt32 i;
- for (i = 0; i < (kBitModelTotal >> kNumMoveReducingBits); i++)
- {
- const unsigned kCyclesBits = kNumBitPriceShiftBits;
- UInt32 w = (i << kNumMoveReducingBits) + (1 << (kNumMoveReducingBits - 1));
- unsigned bitCount = 0;
- unsigned j;
- for (j = 0; j < kCyclesBits; j++)
- {
- w = w * w;
- bitCount <<= 1;
- while (w >= ((UInt32)1 << 16))
- {
- w >>= 1;
- bitCount++;
- }
- }
- ProbPrices[i] = (CProbPrice)((kNumBitModelTotalBits << kCyclesBits) - 15 - bitCount);
- // printf("\n%3d: %5d", i, ProbPrices[i]);
- }
-}
-
-
-#define GET_PRICE(prob, bit) \
- p->ProbPrices[((prob) ^ (unsigned)(((-(int)(bit))) & (kBitModelTotal - 1))) >> kNumMoveReducingBits];
-
-#define GET_PRICEa(prob, bit) \
- ProbPrices[((prob) ^ (unsigned)((-((int)(bit))) & (kBitModelTotal - 1))) >> kNumMoveReducingBits];
-
-#define GET_PRICE_0(prob) p->ProbPrices[(prob) >> kNumMoveReducingBits]
-#define GET_PRICE_1(prob) p->ProbPrices[((prob) ^ (kBitModelTotal - 1)) >> kNumMoveReducingBits]
-
-#define GET_PRICEa_0(prob) ProbPrices[(prob) >> kNumMoveReducingBits]
-#define GET_PRICEa_1(prob) ProbPrices[((prob) ^ (kBitModelTotal - 1)) >> kNumMoveReducingBits]
-
-
-static UInt32 LitEnc_GetPrice(const CLzmaProb *probs, UInt32 sym, const CProbPrice *ProbPrices)
-{
- UInt32 price = 0;
- sym |= 0x100;
- do
- {
- unsigned bit = sym & 1;
- sym >>= 1;
- price += GET_PRICEa(probs[sym], bit);
- }
- while (sym >= 2);
- return price;
-}
-
-
-static UInt32 LitEnc_Matched_GetPrice(const CLzmaProb *probs, UInt32 sym, UInt32 matchByte, const CProbPrice *ProbPrices)
-{
- UInt32 price = 0;
- UInt32 offs = 0x100;
- sym |= 0x100;
- do
- {
- matchByte <<= 1;
- price += GET_PRICEa(probs[offs + (matchByte & offs) + (sym >> 8)], (sym >> 7) & 1);
- sym <<= 1;
- offs &= ~(matchByte ^ sym);
- }
- while (sym < 0x10000);
- return price;
-}
-
-
-
-static void LenEnc_Init(CLenEnc *p)
-{
- unsigned i;
- for (i = 0; i < (LZMA_NUM_PB_STATES_MAX << (kLenNumLowBits + 1)); i++)
- p->low[i] = kProbInitValue;
- for (i = 0; i < kLenNumHighSymbols; i++)
- p->high[i] = kProbInitValue;
-}
-
-static void LenEnc_Encode(CLenEnc *p, CRangeEnc *rc, unsigned sym, unsigned posState)
-{
- UInt32 range, ttt, newBound;
- CLzmaProb *probs = p->low;
- range = rc->range;
- RC_BIT_PRE(rc, probs);
- if (sym >= kLenNumLowSymbols)
- {
- RC_BIT_1(rc, probs);
- probs += kLenNumLowSymbols;
- RC_BIT_PRE(rc, probs);
- if (sym >= kLenNumLowSymbols * 2)
- {
- RC_BIT_1(rc, probs);
- rc->range = range;
- // RcTree_Encode(rc, p->high, kLenNumHighBits, sym - kLenNumLowSymbols * 2);
- LitEnc_Encode(rc, p->high, sym - kLenNumLowSymbols * 2);
- return;
- }
- sym -= kLenNumLowSymbols;
- }
-
- // RcTree_Encode(rc, probs + (posState << kLenNumLowBits), kLenNumLowBits, sym);
- {
- unsigned m;
- unsigned bit;
- RC_BIT_0(rc, probs);
- probs += (posState << (1 + kLenNumLowBits));
- bit = (sym >> 2) ; RC_BIT(rc, probs + 1, bit); m = (1 << 1) + bit;
- bit = (sym >> 1) & 1; RC_BIT(rc, probs + m, bit); m = (m << 1) + bit;
- bit = sym & 1; RC_BIT(rc, probs + m, bit);
- rc->range = range;
- }
-}
-
-static void SetPrices_3(const CLzmaProb *probs, UInt32 startPrice, UInt32 *prices, const CProbPrice *ProbPrices)
-{
- unsigned i;
- for (i = 0; i < 8; i += 2)
- {
- UInt32 price = startPrice;
- UInt32 prob;
- price += GET_PRICEa(probs[1 ], (i >> 2));
- price += GET_PRICEa(probs[2 + (i >> 2)], (i >> 1) & 1);
- prob = probs[4 + (i >> 1)];
- prices[i ] = price + GET_PRICEa_0(prob);
- prices[i + 1] = price + GET_PRICEa_1(prob);
- }
-}
-
-
-MY_NO_INLINE static void MY_FAST_CALL LenPriceEnc_UpdateTables(
- CLenPriceEnc *p,
- unsigned numPosStates,
- const CLenEnc *enc,
- const CProbPrice *ProbPrices)
-{
- UInt32 b;
-
- {
- unsigned prob = enc->low[0];
- UInt32 a, c;
- unsigned posState;
- b = GET_PRICEa_1(prob);
- a = GET_PRICEa_0(prob);
- c = b + GET_PRICEa_0(enc->low[kLenNumLowSymbols]);
- for (posState = 0; posState < numPosStates; posState++)
- {
- UInt32 *prices = p->prices[posState];
- const CLzmaProb *probs = enc->low + (posState << (1 + kLenNumLowBits));
- SetPrices_3(probs, a, prices, ProbPrices);
- SetPrices_3(probs + kLenNumLowSymbols, c, prices + kLenNumLowSymbols, ProbPrices);
- }
- }
-
- /*
- {
- unsigned i;
- UInt32 b;
- a = GET_PRICEa_0(enc->low[0]);
- for (i = 0; i < kLenNumLowSymbols; i++)
- p->prices2[i] = a;
- a = GET_PRICEa_1(enc->low[0]);
- b = a + GET_PRICEa_0(enc->low[kLenNumLowSymbols]);
- for (i = kLenNumLowSymbols; i < kLenNumLowSymbols * 2; i++)
- p->prices2[i] = b;
- a += GET_PRICEa_1(enc->low[kLenNumLowSymbols]);
- }
- */
-
- // p->counter = numSymbols;
- // p->counter = 64;
-
- {
- unsigned i = p->tableSize;
-
- if (i > kLenNumLowSymbols * 2)
- {
- const CLzmaProb *probs = enc->high;
- UInt32 *prices = p->prices[0] + kLenNumLowSymbols * 2;
- i -= kLenNumLowSymbols * 2 - 1;
- i >>= 1;
- b += GET_PRICEa_1(enc->low[kLenNumLowSymbols]);
- do
- {
- /*
- p->prices2[i] = a +
- // RcTree_GetPrice(enc->high, kLenNumHighBits, i - kLenNumLowSymbols * 2, ProbPrices);
- LitEnc_GetPrice(probs, i - kLenNumLowSymbols * 2, ProbPrices);
- */
- // UInt32 price = a + RcTree_GetPrice(probs, kLenNumHighBits - 1, sym, ProbPrices);
- unsigned sym = --i + (1 << (kLenNumHighBits - 1));
- UInt32 price = b;
- do
- {
- unsigned bit = sym & 1;
- sym >>= 1;
- price += GET_PRICEa(probs[sym], bit);
- }
- while (sym >= 2);
-
- {
- unsigned prob = probs[(size_t)i + (1 << (kLenNumHighBits - 1))];
- prices[(size_t)i * 2 ] = price + GET_PRICEa_0(prob);
- prices[(size_t)i * 2 + 1] = price + GET_PRICEa_1(prob);
- }
- }
- while (i);
-
- {
- unsigned posState;
- size_t num = (p->tableSize - kLenNumLowSymbols * 2) * sizeof(p->prices[0][0]);
- for (posState = 1; posState < numPosStates; posState++)
- memcpy(p->prices[posState] + kLenNumLowSymbols * 2, p->prices[0] + kLenNumLowSymbols * 2, num);
- }
- }
- }
-}
-
-/*
- #ifdef SHOW_STAT
- g_STAT_OFFSET += num;
- printf("\n MovePos %u", num);
- #endif
-*/
-
-#define MOVE_POS(p, num) { \
- p->additionalOffset += (num); \
- p->matchFinder.Skip(p->matchFinderObj, (UInt32)(num)); }
-
-
-#define MARK_LIT ((UInt32)(Int32)-1)
-
-#define MakeAs_Lit(p) { (p)->dist = MARK_LIT; (p)->extra = 0; }
-#define MakeAs_ShortRep(p) { (p)->dist = 0; (p)->extra = 0; }
-#define IsShortRep(p) ((p)->dist == 0)
-
-
-#define GetPrice_ShortRep(p, state, posState) \
- ( GET_PRICE_0(p->isRepG0[state]) + GET_PRICE_0(p->isRep0Long[state][posState]))
-
-#define GetPrice_Rep_0(p, state, posState) ( \
- GET_PRICE_1(p->isMatch[state][posState]) \
- + GET_PRICE_1(p->isRep0Long[state][posState])) \
- + GET_PRICE_1(p->isRep[state]) \
- + GET_PRICE_0(p->isRepG0[state])
-
-MY_FORCE_INLINE
-static UInt32 GetPrice_PureRep(const CLzmaEnc *p, unsigned repIndex, size_t state, size_t posState)
-{
- UInt32 price;
- UInt32 prob = p->isRepG0[state];
- if (repIndex == 0)
- {
- price = GET_PRICE_0(prob);
- price += GET_PRICE_1(p->isRep0Long[state][posState]);
- }
- else
- {
- price = GET_PRICE_1(prob);
- prob = p->isRepG1[state];
- if (repIndex == 1)
- price += GET_PRICE_0(prob);
- else
- {
- price += GET_PRICE_1(prob);
- price += GET_PRICE(p->isRepG2[state], repIndex - 2);
- }
- }
- return price;
-}
-
-
-static SRes CheckErrors(CLzmaEnc *p)
-{
- if (p->result != SZ_OK)
- return p->result;
- if (p->rc.res != SZ_OK)
- p->result = SZ_ERROR_WRITE;
- if (p->matchFinderBase.result != SZ_OK)
- p->result = SZ_ERROR_READ;
- if (p->result != SZ_OK)
- p->finished = True;
- return p->result;
-}
-
-
-MY_NO_INLINE static void FillAlignPrices(CLzmaEnc *p)
-{
- unsigned i;
- const CProbPrice *ProbPrices = p->ProbPrices;
- const CLzmaProb *probs = p->posAlignEncoder;
- // p->alignPriceCount = 0;
- for (i = 0; i < kAlignTableSize / 2; i++)
- {
- UInt32 price = 0;
- unsigned sym = i;
- unsigned m = 1;
- unsigned bit;
- UInt32 prob;
- bit = sym & 1; sym >>= 1; price += GET_PRICEa(probs[m], bit); m = (m << 1) + bit;
- bit = sym & 1; sym >>= 1; price += GET_PRICEa(probs[m], bit); m = (m << 1) + bit;
- bit = sym & 1; sym >>= 1; price += GET_PRICEa(probs[m], bit); m = (m << 1) + bit;
- prob = probs[m];
- p->alignPrices[i ] = price + GET_PRICEa_0(prob);
- p->alignPrices[i + 8] = price + GET_PRICEa_1(prob);
- // p->alignPrices[i] = RcTree_ReverseGetPrice(p->posAlignEncoder, kNumAlignBits, i, p->ProbPrices);
- }
-}
-
-
-MY_NO_INLINE static void FillDistancesPrices(CLzmaEnc *p)
-{
- // int y; for (y = 0; y < 100; y++) {
-
- UInt32 tempPrices[kNumFullDistances];
- unsigned i, lps;
-
- const CProbPrice *ProbPrices = p->ProbPrices;
- p->matchPriceCount = 0;
-
- for (i = kStartPosModelIndex / 2; i < kNumFullDistances / 2; i++)
- {
- unsigned posSlot = GetPosSlot1(i);
- unsigned footerBits = (posSlot >> 1) - 1;
- unsigned base = ((2 | (posSlot & 1)) << footerBits);
- const CLzmaProb *probs = p->posEncoders + (size_t)base * 2;
- // tempPrices[i] = RcTree_ReverseGetPrice(p->posEncoders + base, footerBits, i - base, p->ProbPrices);
- UInt32 price = 0;
- unsigned m = 1;
- unsigned sym = i;
- unsigned offset = (unsigned)1 << footerBits;
- base += i;
-
- if (footerBits)
- do
- {
- unsigned bit = sym & 1;
- sym >>= 1;
- price += GET_PRICEa(probs[m], bit);
- m = (m << 1) + bit;
- }
- while (--footerBits);
-
- {
- unsigned prob = probs[m];
- tempPrices[base ] = price + GET_PRICEa_0(prob);
- tempPrices[base + offset] = price + GET_PRICEa_1(prob);
- }
- }
-
- for (lps = 0; lps < kNumLenToPosStates; lps++)
- {
- unsigned slot;
- unsigned distTableSize2 = (p->distTableSize + 1) >> 1;
- UInt32 *posSlotPrices = p->posSlotPrices[lps];
- const CLzmaProb *probs = p->posSlotEncoder[lps];
-
- for (slot = 0; slot < distTableSize2; slot++)
- {
- // posSlotPrices[slot] = RcTree_GetPrice(encoder, kNumPosSlotBits, slot, p->ProbPrices);
- UInt32 price;
- unsigned bit;
- unsigned sym = slot + (1 << (kNumPosSlotBits - 1));
- unsigned prob;
- bit = sym & 1; sym >>= 1; price = GET_PRICEa(probs[sym], bit);
- bit = sym & 1; sym >>= 1; price += GET_PRICEa(probs[sym], bit);
- bit = sym & 1; sym >>= 1; price += GET_PRICEa(probs[sym], bit);
- bit = sym & 1; sym >>= 1; price += GET_PRICEa(probs[sym], bit);
- bit = sym & 1; sym >>= 1; price += GET_PRICEa(probs[sym], bit);
- prob = probs[(size_t)slot + (1 << (kNumPosSlotBits - 1))];
- posSlotPrices[(size_t)slot * 2 ] = price + GET_PRICEa_0(prob);
- posSlotPrices[(size_t)slot * 2 + 1] = price + GET_PRICEa_1(prob);
- }
-
- {
- UInt32 delta = ((UInt32)((kEndPosModelIndex / 2 - 1) - kNumAlignBits) << kNumBitPriceShiftBits);
- for (slot = kEndPosModelIndex / 2; slot < distTableSize2; slot++)
- {
- posSlotPrices[(size_t)slot * 2 ] += delta;
- posSlotPrices[(size_t)slot * 2 + 1] += delta;
- delta += ((UInt32)1 << kNumBitPriceShiftBits);
- }
- }
-
- {
- UInt32 *dp = p->distancesPrices[lps];
-
- dp[0] = posSlotPrices[0];
- dp[1] = posSlotPrices[1];
- dp[2] = posSlotPrices[2];
- dp[3] = posSlotPrices[3];
-
- for (i = 4; i < kNumFullDistances; i += 2)
- {
- UInt32 slotPrice = posSlotPrices[GetPosSlot1(i)];
- dp[i ] = slotPrice + tempPrices[i];
- dp[i + 1] = slotPrice + tempPrices[i + 1];
- }
- }
- }
- // }
-}
-
-
-
-void LzmaEnc_Construct(CLzmaEnc *p)
-{
- RangeEnc_Construct(&p->rc);
- MatchFinder_Construct(&p->matchFinderBase);
-
- #ifndef _7ZIP_ST
- MatchFinderMt_Construct(&p->matchFinderMt);
- p->matchFinderMt.MatchFinder = &p->matchFinderBase;
- #endif
-
- {
- CLzmaEncProps props;
- LzmaEncProps_Init(&props);
- LzmaEnc_SetProps(p, &props);
- }
-
- #ifndef LZMA_LOG_BSR
- LzmaEnc_FastPosInit(p->g_FastPos);
- #endif
-
- LzmaEnc_InitPriceTables(p->ProbPrices);
- p->litProbs = NULL;
- p->saveState.litProbs = NULL;
-
-}
-
-CLzmaEncHandle LzmaEnc_Create(ISzAllocPtr alloc)
-{
- void *p;
- p = ISzAlloc_Alloc(alloc, sizeof(CLzmaEnc));
- if (p)
- LzmaEnc_Construct((CLzmaEnc *)p);
- return p;
-}
-
-void LzmaEnc_FreeLits(CLzmaEnc *p, ISzAllocPtr alloc)
-{
- ISzAlloc_Free(alloc, p->litProbs);
- ISzAlloc_Free(alloc, p->saveState.litProbs);
- p->litProbs = NULL;
- p->saveState.litProbs = NULL;
-}
-
-void LzmaEnc_Destruct(CLzmaEnc *p, ISzAllocPtr alloc, ISzAllocPtr allocBig)
-{
- #ifndef _7ZIP_ST
- MatchFinderMt_Destruct(&p->matchFinderMt, allocBig);
- #endif
-
- MatchFinder_Free(&p->matchFinderBase, allocBig);
- LzmaEnc_FreeLits(p, alloc);
- RangeEnc_Free(&p->rc, alloc);
-}
-
-void LzmaEnc_Destroy(CLzmaEncHandle p, ISzAllocPtr alloc, ISzAllocPtr allocBig)
-{
- LzmaEnc_Destruct((CLzmaEnc *)p, alloc, allocBig);
- ISzAlloc_Free(alloc, p);
-}
-
-
-#define kBigHashDicLimit ((UInt32)1 << 24)
-
-static SRes LzmaEnc_Alloc(CLzmaEnc *p, UInt32 keepWindowSize, ISzAllocPtr alloc, ISzAllocPtr allocBig)
-{
- UInt32 beforeSize = kNumOpts;
- if (!RangeEnc_Alloc(&p->rc, alloc))
- return SZ_ERROR_MEM;
-
- #ifndef _7ZIP_ST
- p->mtMode = (p->multiThread && !p->fastMode && (p->matchFinderBase.btMode != 0));
- #endif
-
- {
- unsigned lclp = p->lc + p->lp;
- if (!p->litProbs || !p->saveState.litProbs || p->lclp != lclp)
- {
- LzmaEnc_FreeLits(p, alloc);
- p->litProbs = (CLzmaProb *)ISzAlloc_Alloc(alloc, ((UInt32)0x300 << lclp) * sizeof(CLzmaProb));
- p->saveState.litProbs = (CLzmaProb *)ISzAlloc_Alloc(alloc, ((UInt32)0x300 << lclp) * sizeof(CLzmaProb));
- if (!p->litProbs || !p->saveState.litProbs)
- {
- LzmaEnc_FreeLits(p, alloc);
- return SZ_ERROR_MEM;
- }
- p->lclp = lclp;
- }
- }
-
- p->matchFinderBase.bigHash = (Byte)(p->dictSize > kBigHashDicLimit ? 1 : 0);
-
- if (beforeSize + p->dictSize < keepWindowSize)
- beforeSize = keepWindowSize - p->dictSize;
-
- #ifndef _7ZIP_ST
- if (p->mtMode)
- {
- RINOK(MatchFinderMt_Create(&p->matchFinderMt, p->dictSize, beforeSize, p->numFastBytes,
- LZMA_MATCH_LEN_MAX
- + 1 /* 18.04 */
- , allocBig));
- p->matchFinderObj = &p->matchFinderMt;
- p->matchFinderBase.bigHash = (Byte)(
- (p->dictSize > kBigHashDicLimit && p->matchFinderBase.hashMask >= 0xFFFFFF) ? 1 : 0);
- MatchFinderMt_CreateVTable(&p->matchFinderMt, &p->matchFinder);
- }
- else
- #endif
- {
- if (!MatchFinder_Create(&p->matchFinderBase, p->dictSize, beforeSize, p->numFastBytes, LZMA_MATCH_LEN_MAX, allocBig))
- return SZ_ERROR_MEM;
- p->matchFinderObj = &p->matchFinderBase;
- MatchFinder_CreateVTable(&p->matchFinderBase, &p->matchFinder);
- }
-
- return SZ_OK;
-}
-
-void LzmaEnc_Init(CLzmaEnc *p)
-{
- unsigned i;
- p->state = 0;
- p->reps[0] =
- p->reps[1] =
- p->reps[2] =
- p->reps[3] = 1;
-
- RangeEnc_Init(&p->rc);
-
- for (i = 0; i < (1 << kNumAlignBits); i++)
- p->posAlignEncoder[i] = kProbInitValue;
-
- for (i = 0; i < kNumStates; i++)
- {
- unsigned j;
- for (j = 0; j < LZMA_NUM_PB_STATES_MAX; j++)
- {
- p->isMatch[i][j] = kProbInitValue;
- p->isRep0Long[i][j] = kProbInitValue;
- }
- p->isRep[i] = kProbInitValue;
- p->isRepG0[i] = kProbInitValue;
- p->isRepG1[i] = kProbInitValue;
- p->isRepG2[i] = kProbInitValue;
- }
-
- {
- for (i = 0; i < kNumLenToPosStates; i++)
- {
- CLzmaProb *probs = p->posSlotEncoder[i];
- unsigned j;
- for (j = 0; j < (1 << kNumPosSlotBits); j++)
- probs[j] = kProbInitValue;
- }
- }
- {
- for (i = 0; i < kNumFullDistances; i++)
- p->posEncoders[i] = kProbInitValue;
- }
-
- {
- UInt32 num = (UInt32)0x300 << (p->lp + p->lc);
- UInt32 k;
- CLzmaProb *probs = p->litProbs;
- for (k = 0; k < num; k++)
- probs[k] = kProbInitValue;
- }
-
-
- LenEnc_Init(&p->lenProbs);
- LenEnc_Init(&p->repLenProbs);
-
- p->optEnd = 0;
- p->optCur = 0;
-
- {
- for (i = 0; i < kNumOpts; i++)
- p->opt[i].price = kInfinityPrice;
- }
-
- p->additionalOffset = 0;
-
- p->pbMask = (1 << p->pb) - 1;
- p->lpMask = ((UInt32)0x100 << p->lp) - ((unsigned)0x100 >> p->lc);
-}
-
-
-void LzmaEnc_InitPrices(CLzmaEnc *p)
-{
- if (!p->fastMode)
- {
- FillDistancesPrices(p);
- FillAlignPrices(p);
- }
-
- p->lenEnc.tableSize =
- p->repLenEnc.tableSize =
- p->numFastBytes + 1 - LZMA_MATCH_LEN_MIN;
-
- p->repLenEncCounter = REP_LEN_COUNT;
-
- LenPriceEnc_UpdateTables(&p->lenEnc, 1 << p->pb, &p->lenProbs, p->ProbPrices);
- LenPriceEnc_UpdateTables(&p->repLenEnc, 1 << p->pb, &p->repLenProbs, p->ProbPrices);
-}
-
-typedef struct
-{
- ISeqOutStream vt;
- Byte *data;
- SizeT rem;
- BoolInt overflow;
-} CLzmaEnc_SeqOutStreamBuf;
-
-static size_t SeqOutStreamBuf_Write(const ISeqOutStream *pp, const void *data, size_t size)
-{
- CLzmaEnc_SeqOutStreamBuf *p = CONTAINER_FROM_VTBL(pp, CLzmaEnc_SeqOutStreamBuf, vt);
- if (p->rem < size)
- {
- size = p->rem;
- p->overflow = True;
- }
- memcpy(p->data, data, size);
- p->rem -= size;
- p->data += size;
- return size;
-}
-
-
-UInt32 LzmaEnc_GetNumAvailableBytes(CLzmaEncHandle pp)
-{
- const CLzmaEnc *p = (CLzmaEnc *)pp;
- return p->matchFinder.GetNumAvailableBytes(p->matchFinderObj);
-}
-
-
-const Byte *LzmaEnc_GetCurBuf(CLzmaEncHandle pp)
-{
- const CLzmaEnc *p = (CLzmaEnc *)pp;
- return p->matchFinder.GetPointerToCurrentPos(p->matchFinderObj) - p->additionalOffset;
-}
-
-
-SRes LzmaEnc_WriteProperties(CLzmaEncHandle pp, Byte *props, SizeT *size)
-{
- CLzmaEnc *p = (CLzmaEnc *)pp;
- unsigned i;
- UInt32 dictSize = p->dictSize;
- if (*size < LZMA_PROPS_SIZE)
- return SZ_ERROR_PARAM;
- *size = LZMA_PROPS_SIZE;
- props[0] = (Byte)((p->pb * 5 + p->lp) * 9 + p->lc);
-
- if (dictSize >= ((UInt32)1 << 22))
- {
- UInt32 kDictMask = ((UInt32)1 << 20) - 1;
- if (dictSize < (UInt32)0xFFFFFFFF - kDictMask)
- dictSize = (dictSize + kDictMask) & ~kDictMask;
- }
- else for (i = 11; i <= 30; i++)
- {
- if (dictSize <= ((UInt32)2 << i)) { dictSize = (2 << i); break; }
- if (dictSize <= ((UInt32)3 << i)) { dictSize = (3 << i); break; }
- }
-
- for (i = 0; i < 4; i++)
- props[1 + i] = (Byte)(dictSize >> (8 * i));
- return SZ_OK;
-}
-
-
-
-
+++ /dev/null
-7z ANSI-C Decoder 9.35\r
-----------------------\r
-\r
-7z ANSI-C provides 7z/LZMA decoding.\r
-7z ANSI-C version is simplified version ported from C++ code.\r
-\r
-LZMA is default and general compression method of 7z format\r
-in 7-Zip compression program (www.7-zip.org). LZMA provides high \r
-compression ratio and very fast decompression.\r
-\r
-\r
-LICENSE\r
--------\r
-\r
-7z ANSI-C Decoder is part of the LZMA SDK.\r
-LZMA SDK is written and placed in the public domain by Igor Pavlov.\r
-\r
-Files\r
----------------------\r
-\r
-7zDecode.* - Low level 7z decoding\r
-7zExtract.* - High level 7z decoding\r
-7zHeader.* - .7z format constants\r
-7zIn.* - .7z archive opening\r
-7zItem.* - .7z structures\r
-7zMain.c - Test application\r
-\r
-\r
-How To Use\r
-----------\r
-\r
-You can create .7z archive with 7z.exe, 7za.exe or 7zr.exe:\r
-\r
- 7z.exe a archive.7z *.htm -r -mx -m0fb=255\r
-\r
-If you have big number of files in archive, and you need fast extracting, \r
-you can use partly-solid archives:\r
- \r
- 7za.exe a archive.7z *.htm -ms=512K -r -mx -m0fb=255 -m0d=512K\r
-\r
-In that example 7-Zip will use 512KB solid blocks. So it needs to decompress only \r
-512KB for extracting one file from such archive.\r
-\r
-\r
-Limitations of current version of 7z ANSI-C Decoder\r
----------------------------------------------------\r
-\r
- - It reads only "FileName", "Size", "LastWriteTime" and "CRC" information for each file in archive.\r
- - It supports only LZMA and Copy (no compression) methods with BCJ or BCJ2 filters.\r
- - It converts original UTF-16 Unicode file names to UTF-8 Unicode file names.\r
- \r
-These limitations will be fixed in future versions.\r
-\r
-\r
-Using 7z ANSI-C Decoder Test application:\r
------------------------------------------\r
-\r
-Usage: 7zDec <command> <archive_name>\r
-\r
-<Command>:\r
- e: Extract files from archive\r
- l: List contents of archive\r
- t: Test integrity of archive\r
-\r
-Example: \r
-\r
- 7zDec l archive.7z\r
-\r
-lists contents of archive.7z\r
-\r
- 7zDec e archive.7z\r
-\r
-extracts files from archive.7z to current folder.\r
-\r
-\r
-How to use .7z Decoder\r
-----------------------\r
-\r
-Memory allocation\r
-~~~~~~~~~~~~~~~~~\r
-\r
-7z Decoder uses two memory pools:\r
-1) Temporary pool\r
-2) Main pool\r
-Such scheme can allow you to avoid fragmentation of allocated blocks.\r
-\r
-\r
-Steps for using 7z decoder\r
---------------------------\r
-\r
-Use code at 7zMain.c as example.\r
-\r
-1) Declare variables:\r
- inStream /* implements ILookInStream interface */\r
- CSzArEx db; /* 7z archive database structure */\r
- ISzAlloc allocImp; /* memory functions for main pool */\r
- ISzAlloc allocTempImp; /* memory functions for temporary pool */\r
-\r
-2) call CrcGenerateTable(); function to initialize CRC structures.\r
-\r
-3) call SzArEx_Init(&db); function to initialize db structures.\r
-\r
-4) call SzArEx_Open(&db, inStream, &allocMain, &allocTemp) to open archive\r
-\r
-This function opens archive "inStream" and reads headers to "db".\r
-All items in "db" will be allocated with "allocMain" functions.\r
-SzArEx_Open function allocates and frees temporary structures by "allocTemp" functions.\r
-\r
-5) List items or Extract items\r
-\r
- Listing code:\r
- ~~~~~~~~~~~~~\r
-\r
- Use SzArEx_GetFileNameUtf16 function. Look example code in C\Util\7z\7zMain.c file. \r
- \r
-\r
- Extracting code:\r
- ~~~~~~~~~~~~~~~~\r
-\r
- SZ_RESULT SzAr_Extract(\r
- CArchiveDatabaseEx *db,\r
- ILookInStream *inStream, \r
- UInt32 fileIndex, /* index of file */\r
- UInt32 *blockIndex, /* index of solid block */\r
- Byte **outBuffer, /* pointer to pointer to output buffer (allocated with allocMain) */\r
- size_t *outBufferSize, /* buffer size for output buffer */\r
- size_t *offset, /* offset of stream for required file in *outBuffer */\r
- size_t *outSizeProcessed, /* size of file in *outBuffer */\r
- ISzAlloc *allocMain,\r
- ISzAlloc *allocTemp);\r
-\r
- If you need to decompress more than one file, you can send these values from previous call:\r
- blockIndex, \r
- outBuffer, \r
- outBufferSize,\r
- You can consider "outBuffer" as cache of solid block. If your archive is solid, \r
- it will increase decompression speed.\r
-\r
- After decompressing you must free "outBuffer":\r
- allocImp.Free(outBuffer);\r
-\r
-6) call SzArEx_Free(&db, allocImp.Free) to free allocated items in "db".\r
-\r
-\r
-\r
-\r
-Memory requirements for .7z decoding \r
-------------------------------------\r
-\r
-Memory usage for Archive opening:\r
- - Temporary pool:\r
- - Memory for uncompressed .7z headers\r
- - some other temporary blocks\r
- - Main pool:\r
- - Memory for database: \r
- Estimated size of one file structures in solid archive:\r
- - Size (4 or 8 Bytes)\r
- - CRC32 (4 bytes)\r
- - LastWriteTime (8 bytes)\r
- - Some file information (4 bytes)\r
- - File Name (variable length) + pointer + allocation structures\r
-\r
-Memory usage for archive Decompressing:\r
- - Temporary pool:\r
- - Memory for LZMA decompressing structures\r
- - Main pool:\r
- - Memory for decompressed solid block\r
- - Memory for temprorary buffers, if BCJ2 fileter is used. Usually these \r
- temprorary buffers can be about 15% of solid block size. \r
- \r
-\r
-7z Decoder doesn't allocate memory for compressed blocks. \r
-Instead of this, you must allocate buffer with desired \r
-size before calling 7z Decoder. Use 7zMain.c as example.\r
-\r
-\r
-Defines\r
--------\r
-\r
-_SZ_ALLOC_DEBUG - define it if you want to debug alloc/free operations to stderr.\r
-\r
-\r
----\r
-\r
-http://www.7-zip.org\r
-http://www.7-zip.org/sdk.html\r
-http://www.7-zip.org/support.html\r
+++ /dev/null
-7z Format description (18.06)\r
-----------------------------\r
-\r
-This file contains description of 7z archive format. \r
-7z archive can contain files compressed with any method.\r
-See "Methods.txt" for description for defined compressing methods.\r
-\r
-\r
-Format structure Overview\r
--------------------------\r
-\r
-Some fields can be optional.\r
-\r
-Archive structure\r
-~~~~~~~~~~~~~~~~~ \r
-SignatureHeader\r
-[PackedStreams]\r
-[PackedStreamsForHeaders]\r
-[\r
- Header \r
- or \r
- {\r
- Packed Header\r
- HeaderInfo\r
- }\r
-]\r
-\r
-\r
-\r
-Header structure\r
-~~~~~~~~~~~~~~~~ \r
-{\r
- ArchiveProperties\r
- AdditionalStreams\r
- {\r
- PackInfo\r
- {\r
- PackPos\r
- NumPackStreams\r
- Sizes[NumPackStreams]\r
- CRCs[NumPackStreams]\r
- }\r
- CodersInfo\r
- {\r
- NumFolders\r
- Folders[NumFolders]\r
- {\r
- NumCoders\r
- CodersInfo[NumCoders]\r
- {\r
- ID\r
- NumInStreams;\r
- NumOutStreams;\r
- PropertiesSize\r
- Properties[PropertiesSize]\r
- }\r
- NumBindPairs\r
- BindPairsInfo[NumBindPairs]\r
- {\r
- InIndex;\r
- OutIndex;\r
- }\r
- PackedIndices\r
- }\r
- UnPackSize[Folders][Folders.NumOutstreams]\r
- CRCs[NumFolders]\r
- }\r
- SubStreamsInfo\r
- {\r
- NumUnPackStreamsInFolders[NumFolders];\r
- UnPackSizes[]\r
- CRCs[]\r
- }\r
- }\r
- MainStreamsInfo\r
- {\r
- (Same as in AdditionalStreams)\r
- }\r
- FilesInfo\r
- {\r
- NumFiles\r
- Properties[]\r
- {\r
- ID\r
- Size\r
- Data\r
- }\r
- }\r
-}\r
-\r
-HeaderInfo structure\r
-~~~~~~~~~~~~~~~~~~~~\r
-{\r
- (Same as in AdditionalStreams)\r
-}\r
-\r
-\r
-\r
-Notes about Notation and encoding\r
----------------------------------\r
-\r
-7z uses little endian encoding.\r
-\r
-7z archive format has optional headers that are marked as\r
-[]\r
-Header\r
-[]\r
-\r
-REAL_UINT64 means real UINT64.\r
-\r
-UINT64 means real UINT64 encoded with the following scheme:\r
-\r
- Size of encoding sequence depends from first byte:\r
- First_Byte Extra_Bytes Value\r
- (binary) \r
- 0xxxxxxx : ( xxxxxxx )\r
- 10xxxxxx BYTE y[1] : ( xxxxxx << (8 * 1)) + y\r
- 110xxxxx BYTE y[2] : ( xxxxx << (8 * 2)) + y\r
- ...\r
- 1111110x BYTE y[6] : ( x << (8 * 6)) + y\r
- 11111110 BYTE y[7] : y\r
- 11111111 BYTE y[8] : y\r
-\r
-\r
-\r
-Property IDs\r
-------------\r
-\r
-0x00 = kEnd\r
-\r
-0x01 = kHeader\r
-\r
-0x02 = kArchiveProperties\r
- \r
-0x03 = kAdditionalStreamsInfo\r
-0x04 = kMainStreamsInfo\r
-0x05 = kFilesInfo\r
- \r
-0x06 = kPackInfo\r
-0x07 = kUnPackInfo\r
-0x08 = kSubStreamsInfo\r
-\r
-0x09 = kSize\r
-0x0A = kCRC\r
-\r
-0x0B = kFolder\r
-\r
-0x0C = kCodersUnPackSize\r
-0x0D = kNumUnPackStream\r
-\r
-0x0E = kEmptyStream\r
-0x0F = kEmptyFile\r
-0x10 = kAnti\r
-\r
-0x11 = kName\r
-0x12 = kCTime\r
-0x13 = kATime\r
-0x14 = kMTime\r
-0x15 = kWinAttributes\r
-0x16 = kComment\r
-\r
-0x17 = kEncodedHeader\r
-\r
-0x18 = kStartPos\r
-0x19 = kDummy\r
-\r
-\r
-7z format headers\r
------------------\r
-\r
-SignatureHeader\r
-~~~~~~~~~~~~~~~\r
- BYTE kSignature[6] = {'7', 'z', 0xBC, 0xAF, 0x27, 0x1C};\r
-\r
- ArchiveVersion\r
- {\r
- BYTE Major; // now = 0\r
- BYTE Minor; // now = 4\r
- };\r
-\r
- UINT32 StartHeaderCRC;\r
-\r
- StartHeader\r
- {\r
- REAL_UINT64 NextHeaderOffset\r
- REAL_UINT64 NextHeaderSize\r
- UINT32 NextHeaderCRC\r
- }\r
-\r
-\r
-...........................\r
-\r
-\r
-ArchiveProperties\r
-~~~~~~~~~~~~~~~~~\r
-BYTE NID::kArchiveProperties (0x02)\r
-for (;;)\r
-{\r
- BYTE PropertyType;\r
- if (aType == 0)\r
- break;\r
- UINT64 PropertySize;\r
- BYTE PropertyData[PropertySize];\r
-}\r
-\r
-\r
-Digests (NumStreams)\r
-~~~~~~~~~~~~~~~~~~~~~\r
- BYTE AllAreDefined\r
- if (AllAreDefined == 0)\r
- {\r
- for(NumStreams)\r
- BIT Defined\r
- }\r
- UINT32 CRCs[NumDefined]\r
-\r
-\r
-PackInfo\r
-~~~~~~~~~~~~\r
- BYTE NID::kPackInfo (0x06)\r
- UINT64 PackPos\r
- UINT64 NumPackStreams\r
-\r
- []\r
- BYTE NID::kSize (0x09)\r
- UINT64 PackSizes[NumPackStreams]\r
- []\r
-\r
- []\r
- BYTE NID::kCRC (0x0A)\r
- PackStreamDigests[NumPackStreams]\r
- []\r
-\r
- BYTE NID::kEnd\r
-\r
-\r
-Folder\r
-~~~~~~\r
- UINT64 NumCoders;\r
- for (NumCoders)\r
- {\r
- BYTE \r
- {\r
- 0:3 CodecIdSize\r
- 4: Is Complex Coder\r
- 5: There Are Attributes\r
- 6: Reserved\r
- 7: There are more alternative methods. (Not used anymore, must be 0).\r
- } \r
- BYTE CodecId[CodecIdSize]\r
- if (Is Complex Coder)\r
- {\r
- UINT64 NumInStreams;\r
- UINT64 NumOutStreams;\r
- }\r
- if (There Are Attributes)\r
- {\r
- UINT64 PropertiesSize\r
- BYTE Properties[PropertiesSize]\r
- }\r
- }\r
- \r
- NumBindPairs = NumOutStreamsTotal - 1;\r
-\r
- for (NumBindPairs)\r
- {\r
- UINT64 InIndex;\r
- UINT64 OutIndex;\r
- }\r
-\r
- NumPackedStreams = NumInStreamsTotal - NumBindPairs;\r
- if (NumPackedStreams > 1)\r
- for(NumPackedStreams)\r
- {\r
- UINT64 Index;\r
- };\r
-\r
-\r
-\r
-\r
-Coders Info\r
-~~~~~~~~~~~\r
-\r
- BYTE NID::kUnPackInfo (0x07)\r
-\r
-\r
- BYTE NID::kFolder (0x0B)\r
- UINT64 NumFolders\r
- BYTE External\r
- switch(External)\r
- {\r
- case 0:\r
- Folders[NumFolders]\r
- case 1:\r
- UINT64 DataStreamIndex\r
- }\r
-\r
-\r
- BYTE ID::kCodersUnPackSize (0x0C)\r
- for(Folders)\r
- for(Folder.NumOutStreams)\r
- UINT64 UnPackSize;\r
-\r
-\r
- []\r
- BYTE NID::kCRC (0x0A)\r
- UnPackDigests[NumFolders]\r
- []\r
-\r
- \r
-\r
- BYTE NID::kEnd\r
-\r
-\r
-\r
-SubStreams Info\r
-~~~~~~~~~~~~~~\r
- BYTE NID::kSubStreamsInfo; (0x08)\r
-\r
- []\r
- BYTE NID::kNumUnPackStream; (0x0D)\r
- UINT64 NumUnPackStreamsInFolders[NumFolders];\r
- []\r
-\r
-\r
- []\r
- BYTE NID::kSize (0x09)\r
- UINT64 UnPackSizes[]\r
- []\r
-\r
-\r
- []\r
- BYTE NID::kCRC (0x0A)\r
- Digests[Number of streams with unknown CRC]\r
- []\r
-\r
- \r
- BYTE NID::kEnd\r
-\r
-\r
-Streams Info\r
-~~~~~~~~~~~~\r
-\r
- []\r
- PackInfo\r
- []\r
-\r
-\r
- []\r
- CodersInfo\r
- []\r
-\r
-\r
- []\r
- SubStreamsInfo\r
- []\r
-\r
- BYTE NID::kEnd\r
-\r
-\r
-FilesInfo\r
-~~~~~~~~~\r
- BYTE NID::kFilesInfo; (0x05)\r
- UINT64 NumFiles\r
-\r
- for (;;)\r
- {\r
- BYTE PropertyType;\r
- if (aType == 0)\r
- break;\r
-\r
- UINT64 Size;\r
-\r
- switch(PropertyType)\r
- {\r
- kEmptyStream: (0x0E)\r
- for(NumFiles)\r
- BIT IsEmptyStream\r
-\r
- kEmptyFile: (0x0F)\r
- for(EmptyStreams)\r
- BIT IsEmptyFile\r
-\r
- kAnti: (0x10)\r
- for(EmptyStreams)\r
- BIT IsAntiFile\r
- \r
- case kCTime: (0x12)\r
- case kATime: (0x13)\r
- case kMTime: (0x14)\r
- BYTE AllAreDefined\r
- if (AllAreDefined == 0)\r
- {\r
- for(NumFiles)\r
- BIT TimeDefined\r
- }\r
- BYTE External;\r
- if(External != 0)\r
- UINT64 DataIndex\r
- []\r
- for(Definded Items)\r
- REAL_UINT64 Time\r
- []\r
- \r
- kNames: (0x11)\r
- BYTE External;\r
- if(External != 0)\r
- UINT64 DataIndex\r
- []\r
- for(Files)\r
- {\r
- wchar_t Names[NameSize];\r
- wchar_t 0;\r
- }\r
- []\r
-\r
- kAttributes: (0x15)\r
- BYTE AllAreDefined\r
- if (AllAreDefined == 0)\r
- {\r
- for(NumFiles)\r
- BIT AttributesAreDefined\r
- }\r
- BYTE External;\r
- if(External != 0)\r
- UINT64 DataIndex\r
- []\r
- for(Definded Attributes)\r
- UINT32 Attributes\r
- []\r
- }\r
- }\r
-\r
-\r
-Header\r
-~~~~~~\r
- BYTE NID::kHeader (0x01)\r
-\r
- []\r
- ArchiveProperties\r
- []\r
-\r
- []\r
- BYTE NID::kAdditionalStreamsInfo; (0x03)\r
- StreamsInfo\r
- []\r
-\r
- []\r
- BYTE NID::kMainStreamsInfo; (0x04)\r
- StreamsInfo\r
- []\r
-\r
- []\r
- FilesInfo\r
- []\r
-\r
- BYTE NID::kEnd\r
-\r
-\r
-HeaderInfo\r
-~~~~~~~~~~\r
- []\r
- BYTE NID::kEncodedHeader; (0x17)\r
- StreamsInfo for Encoded Header\r
- []\r
-\r
-\r
----\r
-End of document\r
+++ /dev/null
-add_library(lzma STATIC
- include/7zTypes.h
- include/Alloc.h
- include/Bra.h
- include/Compiler.h
- include/CpuArch.h
- include/Delta.h
- include/LzFind.h
- include/LzHash.h
- include/Lzma86.h
- include/LzmaDec.h
- include/LzmaEnc.h
- include/LzmaLib.h
- include/Precomp.h
- include/Sort.h
- src/Alloc.c
- src/Bra86.c
- src/BraIA64.c
- src/CpuArch.c
- src/Delta.c
- src/LzFind.c
- src/Lzma86Dec.c
- src/LzmaDec.c
- src/LzmaEnc.c
- src/Sort.c
-)
-
-target_compile_definitions(lzma PRIVATE _7ZIP_ST)
-
-target_include_directories(lzma PRIVATE "${CMAKE_CURRENT_SOURCE_DIR}/include")
-target_include_directories(lzma INTERFACE "${CMAKE_CURRENT_SOURCE_DIR}/include")
-
-set_target_properties(lzma PROPERTIES POSITION_INDEPENDENT_CODE ON)
+++ /dev/null
-LZMA SDK is placed in the public domain.
-
-Anyone is free to copy, modify, publish, use, compile, sell, or distribute the original LZMA SDK code, either in source code form or as a compiled binary, for any purpose, commercial or non-commercial, and by any means.
\ No newline at end of file
+++ /dev/null
-7-Zip method IDs for 7z and xz archives\r
----------------------------------------\r
-\r
-Version: 18.06\r
-Date: 2018-06-30\r
-\r
-Each compression or crypto method in 7z is associated with unique binary value (ID).\r
-The length of ID in bytes is arbitrary but it can not exceed 63 bits (8 bytes).\r
-\r
-xz and 7z formats use same ID map.\r
-\r
-If you want to add some new ID, you have two ways:\r
- 1) Write request for allocating IDs to 7-Zip developers.\r
- 2) Generate 8-bytes ID:\r
-\r
- 3F ZZ ZZ ZZ ZZ ZZ MM MM \r
-\r
- 3F - Prefix for random IDs (1 byte)\r
- ZZ ZZ ZZ ZZ ZZ - Developer ID (5 bytes). Use real random bytes. \r
- \r
- MM MM - Method ID (2 bytes)\r
-\r
- You can notify 7-Zip developers about your Developer ID / Method ID.\r
-\r
- Note: Use new ID, if old codec can not decode data encoded with new version.\r
-\r
-\r
-List of defined IDs\r
--------------------\r
- \r
-00 - Copy\r
-\r
-03 - Delta\r
-04 - BCJ (x86)\r
-05 - PPC (big-endian)\r
-06 - IA64\r
-07 - ARM (little-endian)\r
-08 - ARMT (little-endian)\r
-09 - SPARC\r
-\r
-21 - LZMA2\r
- \r
-02.. - Common\r
- 03 [Swap]\r
- - 2 Swap2\r
- - 4 Swap4\r
-\r
-03.. - 7z\r
- 01 - \r
- 01 - LZMA\r
- \r
- 03 - [Branch Codecs]\r
- 01 - [x86 Codecs]\r
- 03 - BCJ\r
- 1B - BCJ2 (4 packed streams)\r
- 02 - \r
- 05 - PPC (big-endian)\r
- 03 - \r
- 01 - Alpha\r
- 04 - \r
- 01 - IA64\r
- 05 - \r
- 01 - ARM (little-endian)\r
- 06 - \r
- 05 - M68 (big-endian)\r
- 07 - \r
- 01 - ARMT (little-endian)\r
- 08 - \r
- 05 - SPARC\r
-\r
- 04 - \r
- 01 - PPMD\r
-\r
- 7F -\r
- 01 - experimental method.\r
-\r
-\r
-04.. - Misc codecs\r
-\r
- 00 - Reserved\r
-\r
- 01 - [Zip]\r
- 00 - Copy (not used. Use {00} instead)\r
- 01 - Shrink\r
- 06 - Implode\r
- 08 - Deflate\r
- 09 - Deflate64\r
- 0A - Imploding\r
- 0C - BZip2 (not used. Use {040202} instead)\r
- 0E - LZMA (LZMA-zip)\r
- 5F - xz\r
- 60 - Jpeg\r
- 61 - WavPack\r
- 62 - PPMd (PPMd-zip)\r
- 63 - wzAES\r
-\r
- 02 - \r
- 02 - BZip2\r
-\r
- 03 - [Rar]\r
- 01 - Rar1\r
- 02 - Rar2\r
- 03 - Rar3\r
- 05 - Rar5\r
-\r
- 04 - [Arj]\r
- 01 - Arj(1,2,3)\r
- 02 - Arj4\r
-\r
- 05 - [Z]\r
-\r
- 06 - [Lzh]\r
-\r
- 07 - Reserved for 7z\r
-\r
- 08 - [Cab]\r
-\r
- 09 - [NSIS]\r
- 01 - DeflateNSIS\r
- 02 - BZip2NSIS\r
-\r
- F7 - External codecs (that are not included to 7-Zip)\r
-\r
- 0x xx - reserved\r
-\r
- 10 xx - reserved (LZHAM)\r
- 01 - LZHAM\r
-\r
- 11 xx - reserved (Tino Reichardt)\r
- 01 - ZSTD\r
- 02 - BROTLI\r
- 04 - LZ4\r
- 05 - LZ5\r
- 06 - LIZARD\r
-\r
- 12 xx - reserverd (Denis Anisimov)\r
- \r
- 01 - WavPack2\r
- FE - eSplitter \r
- FF - RawSplitter\r
- \r
-\r
-06.. - Crypto \r
-\r
- F0 - Ciphers without hashing algo\r
-\r
- 01 - [AES]\r
- 0x - AES-128\r
- 4x - AES-192\r
- 8x - AES-256\r
- Cx - AES\r
-\r
- x0 - ECB\r
- x1 - CBC\r
- x2 - CFB\r
- x3 - OFB\r
- x4 - CTR\r
-\r
- F1 - Combine Ciphers\r
-\r
- 01 - [Zip]\r
- 01 - ZipCrypto (Main Zip crypto algo)\r
-\r
- 03 - [RAR]\r
- 02 - \r
- 03 - Rar29AES (AES-128 + modified SHA-1)\r
-\r
- 07 - [7z]\r
- 01 - 7zAES (AES-256 + SHA-256)\r
-\r
-\r
----\r
-End of document\r
+++ /dev/null
-/* 7z.h -- 7z interface\r
-2018-07-02 : Igor Pavlov : Public domain */\r
-\r
-#ifndef __7Z_H\r
-#define __7Z_H\r
-\r
-#include "7zTypes.h"\r
-\r
-EXTERN_C_BEGIN\r
-\r
-#define k7zStartHeaderSize 0x20\r
-#define k7zSignatureSize 6\r
-\r
-extern const Byte k7zSignature[k7zSignatureSize];\r
-\r
-typedef struct\r
-{\r
- const Byte *Data;\r
- size_t Size;\r
-} CSzData;\r
-\r
-/* CSzCoderInfo & CSzFolder support only default methods */\r
-\r
-typedef struct\r
-{\r
- size_t PropsOffset;\r
- UInt32 MethodID;\r
- Byte NumStreams;\r
- Byte PropsSize;\r
-} CSzCoderInfo;\r
-\r
-typedef struct\r
-{\r
- UInt32 InIndex;\r
- UInt32 OutIndex;\r
-} CSzBond;\r
-\r
-#define SZ_NUM_CODERS_IN_FOLDER_MAX 4\r
-#define SZ_NUM_BONDS_IN_FOLDER_MAX 3\r
-#define SZ_NUM_PACK_STREAMS_IN_FOLDER_MAX 4\r
-\r
-typedef struct\r
-{\r
- UInt32 NumCoders;\r
- UInt32 NumBonds;\r
- UInt32 NumPackStreams;\r
- UInt32 UnpackStream;\r
- UInt32 PackStreams[SZ_NUM_PACK_STREAMS_IN_FOLDER_MAX];\r
- CSzBond Bonds[SZ_NUM_BONDS_IN_FOLDER_MAX];\r
- CSzCoderInfo Coders[SZ_NUM_CODERS_IN_FOLDER_MAX];\r
-} CSzFolder;\r
-\r
-\r
-SRes SzGetNextFolderItem(CSzFolder *f, CSzData *sd);\r
-\r
-typedef struct\r
-{\r
- UInt32 Low;\r
- UInt32 High;\r
-} CNtfsFileTime;\r
-\r
-typedef struct\r
-{\r
- Byte *Defs; /* MSB 0 bit numbering */\r
- UInt32 *Vals;\r
-} CSzBitUi32s;\r
-\r
-typedef struct\r
-{\r
- Byte *Defs; /* MSB 0 bit numbering */\r
- // UInt64 *Vals;\r
- CNtfsFileTime *Vals;\r
-} CSzBitUi64s;\r
-\r
-#define SzBitArray_Check(p, i) (((p)[(i) >> 3] & (0x80 >> ((i) & 7))) != 0)\r
-\r
-#define SzBitWithVals_Check(p, i) ((p)->Defs && ((p)->Defs[(i) >> 3] & (0x80 >> ((i) & 7))) != 0)\r
-\r
-typedef struct\r
-{\r
- UInt32 NumPackStreams;\r
- UInt32 NumFolders;\r
-\r
- UInt64 *PackPositions; // NumPackStreams + 1\r
- CSzBitUi32s FolderCRCs; // NumFolders\r
-\r
- size_t *FoCodersOffsets; // NumFolders + 1\r
- UInt32 *FoStartPackStreamIndex; // NumFolders + 1\r
- UInt32 *FoToCoderUnpackSizes; // NumFolders + 1\r
- Byte *FoToMainUnpackSizeIndex; // NumFolders\r
- UInt64 *CoderUnpackSizes; // for all coders in all folders\r
-\r
- Byte *CodersData;\r
-\r
- UInt64 RangeLimit;\r
-} CSzAr;\r
-\r
-UInt64 SzAr_GetFolderUnpackSize(const CSzAr *p, UInt32 folderIndex);\r
-\r
-SRes SzAr_DecodeFolder(const CSzAr *p, UInt32 folderIndex,\r
- ILookInStream *stream, UInt64 startPos,\r
- Byte *outBuffer, size_t outSize,\r
- ISzAllocPtr allocMain);\r
-\r
-typedef struct\r
-{\r
- CSzAr db;\r
-\r
- UInt64 startPosAfterHeader;\r
- UInt64 dataPos;\r
- \r
- UInt32 NumFiles;\r
-\r
- UInt64 *UnpackPositions; // NumFiles + 1\r
- // Byte *IsEmptyFiles;\r
- Byte *IsDirs;\r
- CSzBitUi32s CRCs;\r
-\r
- CSzBitUi32s Attribs;\r
- // CSzBitUi32s Parents;\r
- CSzBitUi64s MTime;\r
- CSzBitUi64s CTime;\r
-\r
- UInt32 *FolderToFile; // NumFolders + 1\r
- UInt32 *FileToFolder; // NumFiles\r
-\r
- size_t *FileNameOffsets; /* in 2-byte steps */\r
- Byte *FileNames; /* UTF-16-LE */\r
-} CSzArEx;\r
-\r
-#define SzArEx_IsDir(p, i) (SzBitArray_Check((p)->IsDirs, i))\r
-\r
-#define SzArEx_GetFileSize(p, i) ((p)->UnpackPositions[(i) + 1] - (p)->UnpackPositions[i])\r
-\r
-void SzArEx_Init(CSzArEx *p);\r
-void SzArEx_Free(CSzArEx *p, ISzAllocPtr alloc);\r
-UInt64 SzArEx_GetFolderStreamPos(const CSzArEx *p, UInt32 folderIndex, UInt32 indexInFolder);\r
-int SzArEx_GetFolderFullPackSize(const CSzArEx *p, UInt32 folderIndex, UInt64 *resSize);\r
-\r
-/*\r
-if dest == NULL, the return value specifies the required size of the buffer,\r
- in 16-bit characters, including the null-terminating character.\r
-if dest != NULL, the return value specifies the number of 16-bit characters that\r
- are written to the dest, including the null-terminating character. */\r
-\r
-size_t SzArEx_GetFileNameUtf16(const CSzArEx *p, size_t fileIndex, UInt16 *dest);\r
-\r
-/*\r
-size_t SzArEx_GetFullNameLen(const CSzArEx *p, size_t fileIndex);\r
-UInt16 *SzArEx_GetFullNameUtf16_Back(const CSzArEx *p, size_t fileIndex, UInt16 *dest);\r
-*/\r
-\r
-\r
-\r
-/*\r
- SzArEx_Extract extracts file from archive\r
-\r
- *outBuffer must be 0 before first call for each new archive.\r
-\r
- Extracting cache:\r
- If you need to decompress more than one file, you can send\r
- these values from previous call:\r
- *blockIndex,\r
- *outBuffer,\r
- *outBufferSize\r
- You can consider "*outBuffer" as cache of solid block. If your archive is solid,\r
- it will increase decompression speed.\r
- \r
- If you use external function, you can declare these 3 cache variables\r
- (blockIndex, outBuffer, outBufferSize) as static in that external function.\r
- \r
- Free *outBuffer and set *outBuffer to 0, if you want to flush cache.\r
-*/\r
-\r
-SRes SzArEx_Extract(\r
- const CSzArEx *db,\r
- ILookInStream *inStream,\r
- UInt32 fileIndex, /* index of file */\r
- UInt32 *blockIndex, /* index of solid block */\r
- Byte **outBuffer, /* pointer to pointer to output buffer (allocated with allocMain) */\r
- size_t *outBufferSize, /* buffer size for output buffer */\r
- size_t *offset, /* offset of stream for required file in *outBuffer */\r
- size_t *outSizeProcessed, /* size of file in *outBuffer */\r
- ISzAllocPtr allocMain,\r
- ISzAllocPtr allocTemp);\r
-\r
-\r
-/*\r
-SzArEx_Open Errors:\r
-SZ_ERROR_NO_ARCHIVE\r
-SZ_ERROR_ARCHIVE\r
-SZ_ERROR_UNSUPPORTED\r
-SZ_ERROR_MEM\r
-SZ_ERROR_CRC\r
-SZ_ERROR_INPUT_EOF\r
-SZ_ERROR_FAIL\r
-*/\r
-\r
-SRes SzArEx_Open(CSzArEx *p, ILookInStream *inStream,\r
- ISzAllocPtr allocMain, ISzAllocPtr allocTemp);\r
-\r
-EXTERN_C_END\r
-\r
-#endif\r
+++ /dev/null
-/* 7zAlloc.h -- Allocation functions\r
-2017-04-03 : Igor Pavlov : Public domain */\r
-\r
-#ifndef __7Z_ALLOC_H\r
-#define __7Z_ALLOC_H\r
-\r
-#include "7zTypes.h"\r
-\r
-EXTERN_C_BEGIN\r
-\r
-void *SzAlloc(ISzAllocPtr p, size_t size);\r
-void SzFree(ISzAllocPtr p, void *address);\r
-\r
-void *SzAllocTemp(ISzAllocPtr p, size_t size);\r
-void SzFreeTemp(ISzAllocPtr p, void *address);\r
-\r
-EXTERN_C_END\r
-\r
-#endif\r
+++ /dev/null
-/* 7zBuf.h -- Byte Buffer\r
-2017-04-03 : Igor Pavlov : Public domain */\r
-\r
-#ifndef __7Z_BUF_H\r
-#define __7Z_BUF_H\r
-\r
-#include "7zTypes.h"\r
-\r
-EXTERN_C_BEGIN\r
-\r
-typedef struct\r
-{\r
- Byte *data;\r
- size_t size;\r
-} CBuf;\r
-\r
-void Buf_Init(CBuf *p);\r
-int Buf_Create(CBuf *p, size_t size, ISzAllocPtr alloc);\r
-void Buf_Free(CBuf *p, ISzAllocPtr alloc);\r
-\r
-typedef struct\r
-{\r
- Byte *data;\r
- size_t size;\r
- size_t pos;\r
-} CDynBuf;\r
-\r
-void DynBuf_Construct(CDynBuf *p);\r
-void DynBuf_SeekToBeg(CDynBuf *p);\r
-int DynBuf_Write(CDynBuf *p, const Byte *buf, size_t size, ISzAllocPtr alloc);\r
-void DynBuf_Free(CDynBuf *p, ISzAllocPtr alloc);\r
-\r
-EXTERN_C_END\r
-\r
-#endif\r
+++ /dev/null
-/* 7zCrc.h -- CRC32 calculation\r
-2013-01-18 : Igor Pavlov : Public domain */\r
-\r
-#ifndef __7Z_CRC_H\r
-#define __7Z_CRC_H\r
-\r
-#include "7zTypes.h"\r
-\r
-EXTERN_C_BEGIN\r
-\r
-extern UInt32 g_CrcTable[];\r
-\r
-/* Call CrcGenerateTable one time before other CRC functions */\r
-void MY_FAST_CALL CrcGenerateTable(void);\r
-\r
-#define CRC_INIT_VAL 0xFFFFFFFF\r
-#define CRC_GET_DIGEST(crc) ((crc) ^ CRC_INIT_VAL)\r
-#define CRC_UPDATE_BYTE(crc, b) (g_CrcTable[((crc) ^ (b)) & 0xFF] ^ ((crc) >> 8))\r
-\r
-UInt32 MY_FAST_CALL CrcUpdate(UInt32 crc, const void *data, size_t size);\r
-UInt32 MY_FAST_CALL CrcCalc(const void *data, size_t size);\r
-\r
-EXTERN_C_END\r
-\r
-#endif\r
+++ /dev/null
-/* 7zFile.h -- File IO\r
-2021-02-15 : Igor Pavlov : Public domain */\r
-\r
-#ifndef __7Z_FILE_H\r
-#define __7Z_FILE_H\r
-\r
-#ifdef _WIN32\r
-#define USE_WINDOWS_FILE\r
-// #include <windows.h>\r
-#endif\r
-\r
-#ifdef USE_WINDOWS_FILE\r
-#include <windows.h>\r
-#else\r
-// note: USE_FOPEN mode is limited to 32-bit file size\r
-// #define USE_FOPEN\r
-// #include <stdio.h>\r
-#endif\r
-\r
-#include "7zTypes.h"\r
-\r
-EXTERN_C_BEGIN\r
-\r
-/* ---------- File ---------- */\r
-\r
-typedef struct\r
-{\r
- #ifdef USE_WINDOWS_FILE\r
- HANDLE handle;\r
- #elif defined(USE_FOPEN)\r
- FILE *file;\r
- #else\r
- int fd;\r
- #endif\r
-} CSzFile;\r
-\r
-void File_Construct(CSzFile *p);\r
-#if !defined(UNDER_CE) || !defined(USE_WINDOWS_FILE)\r
-WRes InFile_Open(CSzFile *p, const char *name);\r
-WRes OutFile_Open(CSzFile *p, const char *name);\r
-#endif\r
-#ifdef USE_WINDOWS_FILE\r
-WRes InFile_OpenW(CSzFile *p, const WCHAR *name);\r
-WRes OutFile_OpenW(CSzFile *p, const WCHAR *name);\r
-#endif\r
-WRes File_Close(CSzFile *p);\r
-\r
-/* reads max(*size, remain file's size) bytes */\r
-WRes File_Read(CSzFile *p, void *data, size_t *size);\r
-\r
-/* writes *size bytes */\r
-WRes File_Write(CSzFile *p, const void *data, size_t *size);\r
-\r
-WRes File_Seek(CSzFile *p, Int64 *pos, ESzSeek origin);\r
-WRes File_GetLength(CSzFile *p, UInt64 *length);\r
-\r
-\r
-/* ---------- FileInStream ---------- */\r
-\r
-typedef struct\r
-{\r
- ISeqInStream vt;\r
- CSzFile file;\r
- WRes wres;\r
-} CFileSeqInStream;\r
-\r
-void FileSeqInStream_CreateVTable(CFileSeqInStream *p);\r
-\r
-\r
-typedef struct\r
-{\r
- ISeekInStream vt;\r
- CSzFile file;\r
- WRes wres;\r
-} CFileInStream;\r
-\r
-void FileInStream_CreateVTable(CFileInStream *p);\r
-\r
-\r
-typedef struct\r
-{\r
- ISeqOutStream vt;\r
- CSzFile file;\r
- WRes wres;\r
-} CFileOutStream;\r
-\r
-void FileOutStream_CreateVTable(CFileOutStream *p);\r
-\r
-EXTERN_C_END\r
-\r
-#endif\r
+++ /dev/null
-#define MY_VER_MAJOR 22\r
-#define MY_VER_MINOR 01\r
-#define MY_VER_BUILD 0\r
-#define MY_VERSION_NUMBERS "22.01"\r
-#define MY_VERSION MY_VERSION_NUMBERS\r
-\r
-#ifdef MY_CPU_NAME\r
- #define MY_VERSION_CPU MY_VERSION " (" MY_CPU_NAME ")"\r
-#else\r
- #define MY_VERSION_CPU MY_VERSION\r
-#endif\r
-\r
-#define MY_DATE "2022-07-15"\r
-#undef MY_COPYRIGHT\r
-#undef MY_VERSION_COPYRIGHT_DATE\r
-#define MY_AUTHOR_NAME "Igor Pavlov"\r
-#define MY_COPYRIGHT_PD "Igor Pavlov : Public domain"\r
-#define MY_COPYRIGHT_CR "Copyright (c) 1999-2022 Igor Pavlov"\r
-\r
-#ifdef USE_COPYRIGHT_CR\r
- #define MY_COPYRIGHT MY_COPYRIGHT_CR\r
-#else\r
- #define MY_COPYRIGHT MY_COPYRIGHT_PD\r
-#endif\r
-\r
-#define MY_COPYRIGHT_DATE MY_COPYRIGHT " : " MY_DATE\r
-#define MY_VERSION_COPYRIGHT_DATE MY_VERSION_CPU " : " MY_COPYRIGHT " : " MY_DATE\r
+++ /dev/null
-/* Aes.h -- AES encryption / decryption\r
-2018-04-28 : Igor Pavlov : Public domain */\r
-\r
-#ifndef __AES_H\r
-#define __AES_H\r
-\r
-#include "7zTypes.h"\r
-\r
-EXTERN_C_BEGIN\r
-\r
-#define AES_BLOCK_SIZE 16\r
-\r
-/* Call AesGenTables one time before other AES functions */\r
-void AesGenTables(void);\r
-\r
-/* UInt32 pointers must be 16-byte aligned */\r
-\r
-/* 16-byte (4 * 32-bit words) blocks: 1 (IV) + 1 (keyMode) + 15 (AES-256 roundKeys) */\r
-#define AES_NUM_IVMRK_WORDS ((1 + 1 + 15) * 4)\r
-\r
-/* aes - 16-byte aligned pointer to keyMode+roundKeys sequence */\r
-/* keySize = 16 or 24 or 32 (bytes) */\r
-typedef void (MY_FAST_CALL *AES_SET_KEY_FUNC)(UInt32 *aes, const Byte *key, unsigned keySize);\r
-void MY_FAST_CALL Aes_SetKey_Enc(UInt32 *aes, const Byte *key, unsigned keySize);\r
-void MY_FAST_CALL Aes_SetKey_Dec(UInt32 *aes, const Byte *key, unsigned keySize);\r
-\r
-/* ivAes - 16-byte aligned pointer to iv+keyMode+roundKeys sequence: UInt32[AES_NUM_IVMRK_WORDS] */\r
-void AesCbc_Init(UInt32 *ivAes, const Byte *iv); /* iv size is AES_BLOCK_SIZE */\r
-\r
-/* data - 16-byte aligned pointer to data */\r
-/* numBlocks - the number of 16-byte blocks in data array */\r
-typedef void (MY_FAST_CALL *AES_CODE_FUNC)(UInt32 *ivAes, Byte *data, size_t numBlocks);\r
-\r
-extern AES_CODE_FUNC g_AesCbc_Decode;\r
-#ifndef _SFX\r
-extern AES_CODE_FUNC g_AesCbc_Encode;\r
-extern AES_CODE_FUNC g_AesCtr_Code;\r
-#define k_Aes_SupportedFunctions_HW (1 << 2)\r
-#define k_Aes_SupportedFunctions_HW_256 (1 << 3)\r
-extern UInt32 g_Aes_SupportedFunctions_Flags;\r
-#endif\r
-\r
-\r
-#define DECLARE__AES_CODE_FUNC(funcName) \\r
- void MY_FAST_CALL funcName(UInt32 *ivAes, Byte *data, size_t numBlocks);\r
-\r
-DECLARE__AES_CODE_FUNC (AesCbc_Encode)\r
-DECLARE__AES_CODE_FUNC (AesCbc_Decode)\r
-DECLARE__AES_CODE_FUNC (AesCtr_Code)\r
-\r
-DECLARE__AES_CODE_FUNC (AesCbc_Encode_HW)\r
-DECLARE__AES_CODE_FUNC (AesCbc_Decode_HW)\r
-DECLARE__AES_CODE_FUNC (AesCtr_Code_HW)\r
-\r
-DECLARE__AES_CODE_FUNC (AesCbc_Decode_HW_256)\r
-DECLARE__AES_CODE_FUNC (AesCtr_Code_HW_256)\r
-\r
-EXTERN_C_END\r
-\r
-#endif\r
+++ /dev/null
-/* Alloc.h -- Memory allocation functions\r
-2021-07-13 : Igor Pavlov : Public domain */\r
-\r
-#ifndef __COMMON_ALLOC_H\r
-#define __COMMON_ALLOC_H\r
-\r
-#include "7zTypes.h"\r
-\r
-EXTERN_C_BEGIN\r
-\r
-void *MyAlloc(size_t size);\r
-void MyFree(void *address);\r
-\r
-#ifdef _WIN32\r
-\r
-void SetLargePageSize(void);\r
-\r
-void *MidAlloc(size_t size);\r
-void MidFree(void *address);\r
-void *BigAlloc(size_t size);\r
-void BigFree(void *address);\r
-\r
-#else\r
-\r
-#define MidAlloc(size) MyAlloc(size)\r
-#define MidFree(address) MyFree(address)\r
-#define BigAlloc(size) MyAlloc(size)\r
-#define BigFree(address) MyFree(address)\r
-\r
-#endif\r
-\r
-extern const ISzAlloc g_Alloc;\r
-\r
-#ifdef _WIN32\r
-extern const ISzAlloc g_BigAlloc;\r
-extern const ISzAlloc g_MidAlloc;\r
-#else\r
-#define g_BigAlloc g_AlignedAlloc\r
-#define g_MidAlloc g_AlignedAlloc\r
-#endif\r
-\r
-extern const ISzAlloc g_AlignedAlloc;\r
-\r
-\r
-typedef struct\r
-{\r
- ISzAlloc vt;\r
- ISzAllocPtr baseAlloc;\r
- unsigned numAlignBits; /* ((1 << numAlignBits) >= sizeof(void *)) */\r
- size_t offset; /* (offset == (k * sizeof(void *)) && offset < (1 << numAlignBits) */\r
-} CAlignOffsetAlloc;\r
-\r
-void AlignOffsetAlloc_CreateVTable(CAlignOffsetAlloc *p);\r
-\r
-\r
-EXTERN_C_END\r
-\r
-#endif\r
+++ /dev/null
-/* Bcj2.h -- BCJ2 Converter for x86 code\r
-2014-11-10 : Igor Pavlov : Public domain */\r
-\r
-#ifndef __BCJ2_H\r
-#define __BCJ2_H\r
-\r
-#include "7zTypes.h"\r
-\r
-EXTERN_C_BEGIN\r
-\r
-#define BCJ2_NUM_STREAMS 4\r
-\r
-enum\r
-{\r
- BCJ2_STREAM_MAIN,\r
- BCJ2_STREAM_CALL,\r
- BCJ2_STREAM_JUMP,\r
- BCJ2_STREAM_RC\r
-};\r
-\r
-enum\r
-{\r
- BCJ2_DEC_STATE_ORIG_0 = BCJ2_NUM_STREAMS,\r
- BCJ2_DEC_STATE_ORIG_1,\r
- BCJ2_DEC_STATE_ORIG_2,\r
- BCJ2_DEC_STATE_ORIG_3,\r
- \r
- BCJ2_DEC_STATE_ORIG,\r
- BCJ2_DEC_STATE_OK\r
-};\r
-\r
-enum\r
-{\r
- BCJ2_ENC_STATE_ORIG = BCJ2_NUM_STREAMS,\r
- BCJ2_ENC_STATE_OK\r
-};\r
-\r
-\r
-#define BCJ2_IS_32BIT_STREAM(s) ((s) == BCJ2_STREAM_CALL || (s) == BCJ2_STREAM_JUMP)\r
-\r
-/*\r
-CBcj2Dec / CBcj2Enc\r
-bufs sizes:\r
- BUF_SIZE(n) = lims[n] - bufs[n]\r
-bufs sizes for BCJ2_STREAM_CALL and BCJ2_STREAM_JUMP must be mutliply of 4:\r
- (BUF_SIZE(BCJ2_STREAM_CALL) & 3) == 0\r
- (BUF_SIZE(BCJ2_STREAM_JUMP) & 3) == 0\r
-*/\r
-\r
-/*\r
-CBcj2Dec:\r
-dest is allowed to overlap with bufs[BCJ2_STREAM_MAIN], with the following conditions:\r
- bufs[BCJ2_STREAM_MAIN] >= dest &&\r
- bufs[BCJ2_STREAM_MAIN] - dest >= tempReserv +\r
- BUF_SIZE(BCJ2_STREAM_CALL) +\r
- BUF_SIZE(BCJ2_STREAM_JUMP)\r
- tempReserv = 0 : for first call of Bcj2Dec_Decode\r
- tempReserv = 4 : for any other calls of Bcj2Dec_Decode\r
- overlap with offset = 1 is not allowed\r
-*/\r
-\r
-typedef struct\r
-{\r
- const Byte *bufs[BCJ2_NUM_STREAMS];\r
- const Byte *lims[BCJ2_NUM_STREAMS];\r
- Byte *dest;\r
- const Byte *destLim;\r
-\r
- unsigned state; /* BCJ2_STREAM_MAIN has more priority than BCJ2_STATE_ORIG */\r
-\r
- UInt32 ip;\r
- Byte temp[4];\r
- UInt32 range;\r
- UInt32 code;\r
- UInt16 probs[2 + 256];\r
-} CBcj2Dec;\r
-\r
-void Bcj2Dec_Init(CBcj2Dec *p);\r
-\r
-/* Returns: SZ_OK or SZ_ERROR_DATA */\r
-SRes Bcj2Dec_Decode(CBcj2Dec *p);\r
-\r
-#define Bcj2Dec_IsFinished(_p_) ((_p_)->code == 0)\r
-\r
-\r
-\r
-typedef enum\r
-{\r
- BCJ2_ENC_FINISH_MODE_CONTINUE,\r
- BCJ2_ENC_FINISH_MODE_END_BLOCK,\r
- BCJ2_ENC_FINISH_MODE_END_STREAM\r
-} EBcj2Enc_FinishMode;\r
-\r
-typedef struct\r
-{\r
- Byte *bufs[BCJ2_NUM_STREAMS];\r
- const Byte *lims[BCJ2_NUM_STREAMS];\r
- const Byte *src;\r
- const Byte *srcLim;\r
-\r
- unsigned state;\r
- EBcj2Enc_FinishMode finishMode;\r
-\r
- Byte prevByte;\r
-\r
- Byte cache;\r
- UInt32 range;\r
- UInt64 low;\r
- UInt64 cacheSize;\r
-\r
- UInt32 ip;\r
-\r
- /* 32-bit ralative offset in JUMP/CALL commands is\r
- - (mod 4 GB) in 32-bit mode\r
- - signed Int32 in 64-bit mode\r
- We use (mod 4 GB) check for fileSize.\r
- Use fileSize up to 2 GB, if you want to support 32-bit and 64-bit code conversion. */\r
- UInt32 fileIp;\r
- UInt32 fileSize; /* (fileSize <= ((UInt32)1 << 31)), 0 means no_limit */\r
- UInt32 relatLimit; /* (relatLimit <= ((UInt32)1 << 31)), 0 means desable_conversion */\r
-\r
- UInt32 tempTarget;\r
- unsigned tempPos;\r
- Byte temp[4 * 2];\r
-\r
- unsigned flushPos;\r
- \r
- UInt16 probs[2 + 256];\r
-} CBcj2Enc;\r
-\r
-void Bcj2Enc_Init(CBcj2Enc *p);\r
-void Bcj2Enc_Encode(CBcj2Enc *p);\r
-\r
-#define Bcj2Enc_Get_InputData_Size(p) ((SizeT)((p)->srcLim - (p)->src) + (p)->tempPos)\r
-#define Bcj2Enc_IsFinished(p) ((p)->flushPos == 5)\r
-\r
-\r
-#define BCJ2_RELAT_LIMIT_NUM_BITS 26\r
-#define BCJ2_RELAT_LIMIT ((UInt32)1 << BCJ2_RELAT_LIMIT_NUM_BITS)\r
-\r
-/* limit for CBcj2Enc::fileSize variable */\r
-#define BCJ2_FileSize_MAX ((UInt32)1 << 31)\r
-\r
-EXTERN_C_END\r
-\r
-#endif\r
+++ /dev/null
-/* Bra.h -- Branch converters for executables\r
-2013-01-18 : Igor Pavlov : Public domain */\r
-\r
-#ifndef __BRA_H\r
-#define __BRA_H\r
-\r
-#include "7zTypes.h"\r
-\r
-EXTERN_C_BEGIN\r
-\r
-/*\r
-These functions convert relative addresses to absolute addresses\r
-in CALL instructions to increase the compression ratio.\r
- \r
- In:\r
- data - data buffer\r
- size - size of data\r
- ip - current virtual Instruction Pinter (IP) value\r
- state - state variable for x86 converter\r
- encoding - 0 (for decoding), 1 (for encoding)\r
- \r
- Out:\r
- state - state variable for x86 converter\r
-\r
- Returns:\r
- The number of processed bytes. If you call these functions with multiple calls,\r
- you must start next call with first byte after block of processed bytes.\r
- \r
- Type Endian Alignment LookAhead\r
- \r
- x86 little 1 4\r
- ARMT little 2 2\r
- ARM little 4 0\r
- PPC big 4 0\r
- SPARC big 4 0\r
- IA64 little 16 0\r
-\r
- size must be >= Alignment + LookAhead, if it's not last block.\r
- If (size < Alignment + LookAhead), converter returns 0.\r
-\r
- Example:\r
-\r
- UInt32 ip = 0;\r
- for ()\r
- {\r
- ; size must be >= Alignment + LookAhead, if it's not last block\r
- SizeT processed = Convert(data, size, ip, 1);\r
- data += processed;\r
- size -= processed;\r
- ip += processed;\r
- }\r
-*/\r
-\r
-#define x86_Convert_Init(state) { state = 0; }\r
-SizeT x86_Convert(Byte *data, SizeT size, UInt32 ip, UInt32 *state, int encoding);\r
-SizeT ARM_Convert(Byte *data, SizeT size, UInt32 ip, int encoding);\r
-SizeT ARMT_Convert(Byte *data, SizeT size, UInt32 ip, int encoding);\r
-SizeT PPC_Convert(Byte *data, SizeT size, UInt32 ip, int encoding);\r
-SizeT SPARC_Convert(Byte *data, SizeT size, UInt32 ip, int encoding);\r
-SizeT IA64_Convert(Byte *data, SizeT size, UInt32 ip, int encoding);\r
-\r
-EXTERN_C_END\r
-\r
-#endif\r
+++ /dev/null
-/* Compiler.h\r
-2021-01-05 : Igor Pavlov : Public domain */\r
-\r
-#ifndef __7Z_COMPILER_H\r
-#define __7Z_COMPILER_H\r
-\r
- #ifdef __clang__\r
- #pragma clang diagnostic ignored "-Wunused-private-field"\r
- #endif\r
-\r
-#ifdef _MSC_VER\r
-\r
- #ifdef UNDER_CE\r
- #define RPC_NO_WINDOWS_H\r
- /* #pragma warning(disable : 4115) // '_RPC_ASYNC_STATE' : named type definition in parentheses */\r
- #pragma warning(disable : 4201) // nonstandard extension used : nameless struct/union\r
- #pragma warning(disable : 4214) // nonstandard extension used : bit field types other than int\r
- #endif\r
-\r
- #if _MSC_VER >= 1300\r
- #pragma warning(disable : 4996) // This function or variable may be unsafe\r
- #else\r
- #pragma warning(disable : 4511) // copy constructor could not be generated\r
- #pragma warning(disable : 4512) // assignment operator could not be generated\r
- #pragma warning(disable : 4514) // unreferenced inline function has been removed\r
- #pragma warning(disable : 4702) // unreachable code\r
- #pragma warning(disable : 4710) // not inlined\r
- #pragma warning(disable : 4714) // function marked as __forceinline not inlined\r
- #pragma warning(disable : 4786) // identifier was truncated to '255' characters in the debug information\r
- #endif\r
-\r
- #ifdef __clang__\r
- #pragma clang diagnostic ignored "-Wdeprecated-declarations"\r
- #pragma clang diagnostic ignored "-Wmicrosoft-exception-spec"\r
- // #pragma clang diagnostic ignored "-Wreserved-id-macro"\r
- #endif\r
-\r
-#endif\r
-\r
-#define UNUSED_VAR(x) (void)x;\r
-/* #define UNUSED_VAR(x) x=x; */\r
-\r
-#endif\r
+++ /dev/null
-/* CpuArch.h -- CPU specific code\r
-2022-07-15 : Igor Pavlov : Public domain */\r
-\r
-#ifndef __CPU_ARCH_H\r
-#define __CPU_ARCH_H\r
-\r
-#include "7zTypes.h"\r
-\r
-EXTERN_C_BEGIN\r
-\r
-/*\r
-MY_CPU_LE means that CPU is LITTLE ENDIAN.\r
-MY_CPU_BE means that CPU is BIG ENDIAN.\r
-If MY_CPU_LE and MY_CPU_BE are not defined, we don't know about ENDIANNESS of platform.\r
-\r
-MY_CPU_LE_UNALIGN means that CPU is LITTLE ENDIAN and CPU supports unaligned memory accesses.\r
-\r
-MY_CPU_64BIT means that processor can work with 64-bit registers.\r
- MY_CPU_64BIT can be used to select fast code branch\r
- MY_CPU_64BIT doesn't mean that (sizeof(void *) == 8)\r
-*/\r
-\r
-#if defined(_M_X64) \\r
- || defined(_M_AMD64) \\r
- || defined(__x86_64__) \\r
- || defined(__AMD64__) \\r
- || defined(__amd64__)\r
- #define MY_CPU_AMD64\r
- #ifdef __ILP32__\r
- #define MY_CPU_NAME "x32"\r
- #define MY_CPU_SIZEOF_POINTER 4\r
- #else\r
- #define MY_CPU_NAME "x64"\r
- #define MY_CPU_SIZEOF_POINTER 8\r
- #endif\r
- #define MY_CPU_64BIT\r
-#endif\r
-\r
-\r
-#if defined(_M_IX86) \\r
- || defined(__i386__)\r
- #define MY_CPU_X86\r
- #define MY_CPU_NAME "x86"\r
- /* #define MY_CPU_32BIT */\r
- #define MY_CPU_SIZEOF_POINTER 4\r
-#endif\r
-\r
-\r
-#if defined(_M_ARM64) \\r
- || defined(__AARCH64EL__) \\r
- || defined(__AARCH64EB__) \\r
- || defined(__aarch64__)\r
- #define MY_CPU_ARM64\r
- #define MY_CPU_NAME "arm64"\r
- #define MY_CPU_64BIT\r
-#endif\r
-\r
-\r
-#if defined(_M_ARM) \\r
- || defined(_M_ARM_NT) \\r
- || defined(_M_ARMT) \\r
- || defined(__arm__) \\r
- || defined(__thumb__) \\r
- || defined(__ARMEL__) \\r
- || defined(__ARMEB__) \\r
- || defined(__THUMBEL__) \\r
- || defined(__THUMBEB__)\r
- #define MY_CPU_ARM\r
-\r
- #if defined(__thumb__) || defined(__THUMBEL__) || defined(_M_ARMT)\r
- #define MY_CPU_NAME "armt"\r
- #else\r
- #define MY_CPU_NAME "arm"\r
- #endif\r
- /* #define MY_CPU_32BIT */\r
- #define MY_CPU_SIZEOF_POINTER 4\r
-#endif\r
-\r
-\r
-#if defined(_M_IA64) \\r
- || defined(__ia64__)\r
- #define MY_CPU_IA64\r
- #define MY_CPU_NAME "ia64"\r
- #define MY_CPU_64BIT\r
-#endif\r
-\r
-\r
-#if defined(__mips64) \\r
- || defined(__mips64__) \\r
- || (defined(__mips) && (__mips == 64 || __mips == 4 || __mips == 3))\r
- #define MY_CPU_NAME "mips64"\r
- #define MY_CPU_64BIT\r
-#elif defined(__mips__)\r
- #define MY_CPU_NAME "mips"\r
- /* #define MY_CPU_32BIT */\r
-#endif\r
-\r
-\r
-#if defined(__ppc64__) \\r
- || defined(__powerpc64__) \\r
- || defined(__ppc__) \\r
- || defined(__powerpc__) \\r
- || defined(__PPC__) \\r
- || defined(_POWER)\r
-\r
-#if defined(__ppc64__) \\r
- || defined(__powerpc64__) \\r
- || defined(_LP64) \\r
- || defined(__64BIT__)\r
- #ifdef __ILP32__\r
- #define MY_CPU_NAME "ppc64-32"\r
- #define MY_CPU_SIZEOF_POINTER 4\r
- #else\r
- #define MY_CPU_NAME "ppc64"\r
- #define MY_CPU_SIZEOF_POINTER 8\r
- #endif\r
- #define MY_CPU_64BIT\r
-#else\r
- #define MY_CPU_NAME "ppc"\r
- #define MY_CPU_SIZEOF_POINTER 4\r
- /* #define MY_CPU_32BIT */\r
-#endif\r
-#endif\r
-\r
-\r
-#if defined(__riscv) \\r
- || defined(__riscv__)\r
- #if __riscv_xlen == 32\r
- #define MY_CPU_NAME "riscv32"\r
- #elif __riscv_xlen == 64\r
- #define MY_CPU_NAME "riscv64"\r
- #else\r
- #define MY_CPU_NAME "riscv"\r
- #endif\r
-#endif\r
-\r
-\r
-#if defined(MY_CPU_X86) || defined(MY_CPU_AMD64)\r
-#define MY_CPU_X86_OR_AMD64\r
-#endif\r
-\r
-#if defined(MY_CPU_ARM) || defined(MY_CPU_ARM64)\r
-#define MY_CPU_ARM_OR_ARM64\r
-#endif\r
-\r
-\r
-#ifdef _WIN32\r
-\r
- #ifdef MY_CPU_ARM\r
- #define MY_CPU_ARM_LE\r
- #endif\r
-\r
- #ifdef MY_CPU_ARM64\r
- #define MY_CPU_ARM64_LE\r
- #endif\r
-\r
- #ifdef _M_IA64\r
- #define MY_CPU_IA64_LE\r
- #endif\r
-\r
-#endif\r
-\r
-\r
-#if defined(MY_CPU_X86_OR_AMD64) \\r
- || defined(MY_CPU_ARM_LE) \\r
- || defined(MY_CPU_ARM64_LE) \\r
- || defined(MY_CPU_IA64_LE) \\r
- || defined(__LITTLE_ENDIAN__) \\r
- || defined(__ARMEL__) \\r
- || defined(__THUMBEL__) \\r
- || defined(__AARCH64EL__) \\r
- || defined(__MIPSEL__) \\r
- || defined(__MIPSEL) \\r
- || defined(_MIPSEL) \\r
- || defined(__BFIN__) \\r
- || (defined(__BYTE_ORDER__) && (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__))\r
- #define MY_CPU_LE\r
-#endif\r
-\r
-#if defined(__BIG_ENDIAN__) \\r
- || defined(__ARMEB__) \\r
- || defined(__THUMBEB__) \\r
- || defined(__AARCH64EB__) \\r
- || defined(__MIPSEB__) \\r
- || defined(__MIPSEB) \\r
- || defined(_MIPSEB) \\r
- || defined(__m68k__) \\r
- || defined(__s390__) \\r
- || defined(__s390x__) \\r
- || defined(__zarch__) \\r
- || (defined(__BYTE_ORDER__) && (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__))\r
- #define MY_CPU_BE\r
-#endif\r
-\r
-\r
-#if defined(MY_CPU_LE) && defined(MY_CPU_BE)\r
- #error Stop_Compiling_Bad_Endian\r
-#endif\r
-\r
-\r
-#if defined(MY_CPU_32BIT) && defined(MY_CPU_64BIT)\r
- #error Stop_Compiling_Bad_32_64_BIT\r
-#endif\r
-\r
-#ifdef __SIZEOF_POINTER__\r
- #ifdef MY_CPU_SIZEOF_POINTER\r
- #if MY_CPU_SIZEOF_POINTER != __SIZEOF_POINTER__\r
- #error Stop_Compiling_Bad_MY_CPU_PTR_SIZE\r
- #endif\r
- #else\r
- #define MY_CPU_SIZEOF_POINTER __SIZEOF_POINTER__\r
- #endif\r
-#endif\r
-\r
-#if defined(MY_CPU_SIZEOF_POINTER) && (MY_CPU_SIZEOF_POINTER == 4)\r
-#if defined (_LP64)\r
- #error Stop_Compiling_Bad_MY_CPU_PTR_SIZE\r
-#endif\r
-#endif\r
-\r
-#ifdef _MSC_VER\r
- #if _MSC_VER >= 1300\r
- #define MY_CPU_pragma_pack_push_1 __pragma(pack(push, 1))\r
- #define MY_CPU_pragma_pop __pragma(pack(pop))\r
- #else\r
- #define MY_CPU_pragma_pack_push_1\r
- #define MY_CPU_pragma_pop\r
- #endif\r
-#else\r
- #ifdef __xlC__\r
- #define MY_CPU_pragma_pack_push_1 _Pragma("pack(1)")\r
- #define MY_CPU_pragma_pop _Pragma("pack()")\r
- #else\r
- #define MY_CPU_pragma_pack_push_1 _Pragma("pack(push, 1)")\r
- #define MY_CPU_pragma_pop _Pragma("pack(pop)")\r
- #endif\r
-#endif\r
-\r
-\r
-#ifndef MY_CPU_NAME\r
- #ifdef MY_CPU_LE\r
- #define MY_CPU_NAME "LE"\r
- #elif defined(MY_CPU_BE)\r
- #define MY_CPU_NAME "BE"\r
- #else\r
- /*\r
- #define MY_CPU_NAME ""\r
- */\r
- #endif\r
-#endif\r
-\r
-\r
-\r
-\r
-\r
-#ifdef MY_CPU_LE\r
- #if defined(MY_CPU_X86_OR_AMD64) \\r
- || defined(MY_CPU_ARM64)\r
- #define MY_CPU_LE_UNALIGN\r
- #define MY_CPU_LE_UNALIGN_64\r
- #elif defined(__ARM_FEATURE_UNALIGNED)\r
- /* gcc9 for 32-bit arm can use LDRD instruction that requires 32-bit alignment.\r
- So we can't use unaligned 64-bit operations. */\r
- #define MY_CPU_LE_UNALIGN\r
- #endif\r
-#endif\r
-\r
-\r
-#ifdef MY_CPU_LE_UNALIGN\r
-\r
-#define GetUi16(p) (*(const UInt16 *)(const void *)(p))\r
-#define GetUi32(p) (*(const UInt32 *)(const void *)(p))\r
-#ifdef MY_CPU_LE_UNALIGN_64\r
-#define GetUi64(p) (*(const UInt64 *)(const void *)(p))\r
-#endif\r
-\r
-#define SetUi16(p, v) { *(UInt16 *)(void *)(p) = (v); }\r
-#define SetUi32(p, v) { *(UInt32 *)(void *)(p) = (v); }\r
-#ifdef MY_CPU_LE_UNALIGN_64\r
-#define SetUi64(p, v) { *(UInt64 *)(void *)(p) = (v); }\r
-#endif\r
-\r
-#else\r
-\r
-#define GetUi16(p) ( (UInt16) ( \\r
- ((const Byte *)(p))[0] | \\r
- ((UInt16)((const Byte *)(p))[1] << 8) ))\r
-\r
-#define GetUi32(p) ( \\r
- ((const Byte *)(p))[0] | \\r
- ((UInt32)((const Byte *)(p))[1] << 8) | \\r
- ((UInt32)((const Byte *)(p))[2] << 16) | \\r
- ((UInt32)((const Byte *)(p))[3] << 24))\r
-\r
-#define SetUi16(p, v) { Byte *_ppp_ = (Byte *)(p); UInt32 _vvv_ = (v); \\r
- _ppp_[0] = (Byte)_vvv_; \\r
- _ppp_[1] = (Byte)(_vvv_ >> 8); }\r
-\r
-#define SetUi32(p, v) { Byte *_ppp_ = (Byte *)(p); UInt32 _vvv_ = (v); \\r
- _ppp_[0] = (Byte)_vvv_; \\r
- _ppp_[1] = (Byte)(_vvv_ >> 8); \\r
- _ppp_[2] = (Byte)(_vvv_ >> 16); \\r
- _ppp_[3] = (Byte)(_vvv_ >> 24); }\r
-\r
-#endif\r
-\r
-\r
-#ifndef MY_CPU_LE_UNALIGN_64\r
-\r
-#define GetUi64(p) (GetUi32(p) | ((UInt64)GetUi32(((const Byte *)(p)) + 4) << 32))\r
-\r
-#define SetUi64(p, v) { Byte *_ppp2_ = (Byte *)(p); UInt64 _vvv2_ = (v); \\r
- SetUi32(_ppp2_ , (UInt32)_vvv2_); \\r
- SetUi32(_ppp2_ + 4, (UInt32)(_vvv2_ >> 32)); }\r
-\r
-#endif\r
-\r
-\r
-\r
-\r
-#ifdef __has_builtin\r
- #define MY__has_builtin(x) __has_builtin(x)\r
-#else\r
- #define MY__has_builtin(x) 0\r
-#endif\r
-\r
-#if defined(MY_CPU_LE_UNALIGN) && /* defined(_WIN64) && */ defined(_MSC_VER) && (_MSC_VER >= 1300)\r
-\r
-/* Note: we use bswap instruction, that is unsupported in 386 cpu */\r
-\r
-#include <stdlib.h>\r
-\r
-#pragma intrinsic(_byteswap_ushort)\r
-#pragma intrinsic(_byteswap_ulong)\r
-#pragma intrinsic(_byteswap_uint64)\r
-\r
-/* #define GetBe16(p) _byteswap_ushort(*(const UInt16 *)(const Byte *)(p)) */\r
-#define GetBe32(p) _byteswap_ulong (*(const UInt32 *)(const void *)(p))\r
-#define GetBe64(p) _byteswap_uint64(*(const UInt64 *)(const void *)(p))\r
-\r
-#define SetBe32(p, v) (*(UInt32 *)(void *)(p)) = _byteswap_ulong(v)\r
-\r
-#elif defined(MY_CPU_LE_UNALIGN) && ( \\r
- (defined(__GNUC__) && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 3))) \\r
- || (defined(__clang__) && MY__has_builtin(__builtin_bswap16)) )\r
-\r
-/* #define GetBe16(p) __builtin_bswap16(*(const UInt16 *)(const void *)(p)) */\r
-#define GetBe32(p) __builtin_bswap32(*(const UInt32 *)(const void *)(p))\r
-#define GetBe64(p) __builtin_bswap64(*(const UInt64 *)(const void *)(p))\r
-\r
-#define SetBe32(p, v) (*(UInt32 *)(void *)(p)) = __builtin_bswap32(v)\r
-\r
-#else\r
-\r
-#define GetBe32(p) ( \\r
- ((UInt32)((const Byte *)(p))[0] << 24) | \\r
- ((UInt32)((const Byte *)(p))[1] << 16) | \\r
- ((UInt32)((const Byte *)(p))[2] << 8) | \\r
- ((const Byte *)(p))[3] )\r
-\r
-#define GetBe64(p) (((UInt64)GetBe32(p) << 32) | GetBe32(((const Byte *)(p)) + 4))\r
-\r
-#define SetBe32(p, v) { Byte *_ppp_ = (Byte *)(p); UInt32 _vvv_ = (v); \\r
- _ppp_[0] = (Byte)(_vvv_ >> 24); \\r
- _ppp_[1] = (Byte)(_vvv_ >> 16); \\r
- _ppp_[2] = (Byte)(_vvv_ >> 8); \\r
- _ppp_[3] = (Byte)_vvv_; }\r
-\r
-#endif\r
-\r
-\r
-#ifndef GetBe16\r
-\r
-#define GetBe16(p) ( (UInt16) ( \\r
- ((UInt16)((const Byte *)(p))[0] << 8) | \\r
- ((const Byte *)(p))[1] ))\r
-\r
-#endif\r
-\r
-\r
-\r
-#ifdef MY_CPU_X86_OR_AMD64\r
-\r
-typedef struct\r
-{\r
- UInt32 maxFunc;\r
- UInt32 vendor[3];\r
- UInt32 ver;\r
- UInt32 b;\r
- UInt32 c;\r
- UInt32 d;\r
-} Cx86cpuid;\r
-\r
-enum\r
-{\r
- CPU_FIRM_INTEL,\r
- CPU_FIRM_AMD,\r
- CPU_FIRM_VIA\r
-};\r
-\r
-void MyCPUID(UInt32 function, UInt32 *a, UInt32 *b, UInt32 *c, UInt32 *d);\r
-\r
-BoolInt x86cpuid_CheckAndRead(Cx86cpuid *p);\r
-int x86cpuid_GetFirm(const Cx86cpuid *p);\r
-\r
-#define x86cpuid_GetFamily(ver) (((ver >> 16) & 0xFF0) | ((ver >> 8) & 0xF))\r
-#define x86cpuid_GetModel(ver) (((ver >> 12) & 0xF0) | ((ver >> 4) & 0xF))\r
-#define x86cpuid_GetStepping(ver) (ver & 0xF)\r
-\r
-BoolInt CPU_Is_InOrder(void);\r
-\r
-BoolInt CPU_IsSupported_AES(void);\r
-BoolInt CPU_IsSupported_AVX2(void);\r
-BoolInt CPU_IsSupported_VAES_AVX2(void);\r
-BoolInt CPU_IsSupported_SSSE3(void);\r
-BoolInt CPU_IsSupported_SSE41(void);\r
-BoolInt CPU_IsSupported_SHA(void);\r
-BoolInt CPU_IsSupported_PageGB(void);\r
-\r
-#elif defined(MY_CPU_ARM_OR_ARM64)\r
-\r
-BoolInt CPU_IsSupported_CRC32(void);\r
-BoolInt CPU_IsSupported_NEON(void);\r
-\r
-#if defined(_WIN32)\r
-BoolInt CPU_IsSupported_CRYPTO(void);\r
-#define CPU_IsSupported_SHA1 CPU_IsSupported_CRYPTO\r
-#define CPU_IsSupported_SHA2 CPU_IsSupported_CRYPTO\r
-#define CPU_IsSupported_AES CPU_IsSupported_CRYPTO\r
-#else\r
-BoolInt CPU_IsSupported_SHA1(void);\r
-BoolInt CPU_IsSupported_SHA2(void);\r
-BoolInt CPU_IsSupported_AES(void);\r
-#endif\r
-\r
-#endif\r
-\r
-#if defined(__APPLE__)\r
-int My_sysctlbyname_Get(const char *name, void *buf, size_t *bufSize);\r
-int My_sysctlbyname_Get_UInt32(const char *name, UInt32 *val);\r
-#endif\r
-\r
-EXTERN_C_END\r
-\r
-#endif\r
+++ /dev/null
-/* Delta.h -- Delta converter\r
-2013-01-18 : Igor Pavlov : Public domain */\r
-\r
-#ifndef __DELTA_H\r
-#define __DELTA_H\r
-\r
-#include "7zTypes.h"\r
-\r
-EXTERN_C_BEGIN\r
-\r
-#define DELTA_STATE_SIZE 256\r
-\r
-void Delta_Init(Byte *state);\r
-void Delta_Encode(Byte *state, unsigned delta, Byte *data, SizeT size);\r
-void Delta_Decode(Byte *state, unsigned delta, Byte *data, SizeT size);\r
-\r
-EXTERN_C_END\r
-\r
-#endif\r
+++ /dev/null
-/* DllSecur.h -- DLL loading for security\r
-2018-02-19 : Igor Pavlov : Public domain */\r
-\r
-#ifndef __DLL_SECUR_H\r
-#define __DLL_SECUR_H\r
-\r
-#include "7zTypes.h"\r
-\r
-EXTERN_C_BEGIN\r
-\r
-#ifdef _WIN32\r
-\r
-void My_SetDefaultDllDirectories(void);\r
-void LoadSecurityDlls(void);\r
-\r
-#endif\r
-\r
-EXTERN_C_END\r
-\r
-#endif\r
+++ /dev/null
-/* LzFindMt.h -- multithreaded Match finder for LZ algorithms\r
-2021-07-12 : Igor Pavlov : Public domain */\r
-\r
-#ifndef __LZ_FIND_MT_H\r
-#define __LZ_FIND_MT_H\r
-\r
-#include "LzFind.h"\r
-#include "Threads.h"\r
-\r
-EXTERN_C_BEGIN\r
-\r
-typedef struct _CMtSync\r
-{\r
- UInt32 numProcessedBlocks;\r
- CThread thread;\r
- UInt64 affinity;\r
-\r
- BoolInt wasCreated;\r
- BoolInt needStart;\r
- BoolInt csWasInitialized;\r
- BoolInt csWasEntered;\r
-\r
- BoolInt exit;\r
- BoolInt stopWriting;\r
-\r
- CAutoResetEvent canStart;\r
- CAutoResetEvent wasStopped;\r
- CSemaphore freeSemaphore;\r
- CSemaphore filledSemaphore;\r
- CCriticalSection cs;\r
- // UInt32 numBlocks_Sent;\r
-} CMtSync;\r
-\r
-typedef UInt32 * (*Mf_Mix_Matches)(void *p, UInt32 matchMinPos, UInt32 *distances);\r
-\r
-/* kMtCacheLineDummy must be >= size_of_CPU_cache_line */\r
-#define kMtCacheLineDummy 128\r
-\r
-typedef void (*Mf_GetHeads)(const Byte *buffer, UInt32 pos,\r
- UInt32 *hash, UInt32 hashMask, UInt32 *heads, UInt32 numHeads, const UInt32 *crc);\r
-\r
-typedef struct _CMatchFinderMt\r
-{\r
- /* LZ */\r
- const Byte *pointerToCurPos;\r
- UInt32 *btBuf;\r
- const UInt32 *btBufPos;\r
- const UInt32 *btBufPosLimit;\r
- UInt32 lzPos;\r
- UInt32 btNumAvailBytes;\r
-\r
- UInt32 *hash;\r
- UInt32 fixedHashSize;\r
- // UInt32 hash4Mask;\r
- UInt32 historySize;\r
- const UInt32 *crc;\r
-\r
- Mf_Mix_Matches MixMatchesFunc;\r
- UInt32 failure_LZ_BT; // failure in BT transfered to LZ\r
- // UInt32 failure_LZ_LZ; // failure in LZ tables\r
- UInt32 failureBuf[1];\r
- // UInt32 crc[256];\r
-\r
- /* LZ + BT */\r
- CMtSync btSync;\r
- Byte btDummy[kMtCacheLineDummy];\r
-\r
- /* BT */\r
- UInt32 *hashBuf;\r
- UInt32 hashBufPos;\r
- UInt32 hashBufPosLimit;\r
- UInt32 hashNumAvail;\r
- UInt32 failure_BT;\r
-\r
-\r
- CLzRef *son;\r
- UInt32 matchMaxLen;\r
- UInt32 numHashBytes;\r
- UInt32 pos;\r
- const Byte *buffer;\r
- UInt32 cyclicBufferPos;\r
- UInt32 cyclicBufferSize; /* it must be = (historySize + 1) */\r
- UInt32 cutValue;\r
-\r
- /* BT + Hash */\r
- CMtSync hashSync;\r
- /* Byte hashDummy[kMtCacheLineDummy]; */\r
- \r
- /* Hash */\r
- Mf_GetHeads GetHeadsFunc;\r
- CMatchFinder *MatchFinder;\r
- // CMatchFinder MatchFinder;\r
-} CMatchFinderMt;\r
-\r
-// only for Mt part\r
-void MatchFinderMt_Construct(CMatchFinderMt *p);\r
-void MatchFinderMt_Destruct(CMatchFinderMt *p, ISzAllocPtr alloc);\r
-\r
-SRes MatchFinderMt_Create(CMatchFinderMt *p, UInt32 historySize, UInt32 keepAddBufferBefore,\r
- UInt32 matchMaxLen, UInt32 keepAddBufferAfter, ISzAllocPtr alloc);\r
-void MatchFinderMt_CreateVTable(CMatchFinderMt *p, IMatchFinder2 *vTable);\r
-\r
-/* call MatchFinderMt_InitMt() before IMatchFinder::Init() */\r
-SRes MatchFinderMt_InitMt(CMatchFinderMt *p);\r
-void MatchFinderMt_ReleaseStream(CMatchFinderMt *p);\r
-\r
-EXTERN_C_END\r
-\r
-#endif\r
+++ /dev/null
-/* Lzma86.h -- LZMA + x86 (BCJ) Filter\r
-2013-01-18 : Igor Pavlov : Public domain */\r
-\r
-#ifndef __LZMA86_H\r
-#define __LZMA86_H\r
-\r
-#include "7zTypes.h"\r
-\r
-EXTERN_C_BEGIN\r
-\r
-#define LZMA86_SIZE_OFFSET (1 + 5)\r
-#define LZMA86_HEADER_SIZE (LZMA86_SIZE_OFFSET + 8)\r
-\r
-/*\r
-It's an example for LZMA + x86 Filter use.\r
-You can use .lzma86 extension, if you write that stream to file.\r
-.lzma86 header adds one additional byte to standard .lzma header.\r
-.lzma86 header (14 bytes):\r
- Offset Size Description\r
- 0 1 = 0 - no filter, pure LZMA\r
- = 1 - x86 filter + LZMA\r
- 1 1 lc, lp and pb in encoded form\r
- 2 4 dictSize (little endian)\r
- 6 8 uncompressed size (little endian)\r
-\r
-\r
-Lzma86_Encode\r
--------------\r
-level - compression level: 0 <= level <= 9, the default value for "level" is 5.\r
-\r
-dictSize - The dictionary size in bytes. The maximum value is\r
- 128 MB = (1 << 27) bytes for 32-bit version\r
- 1 GB = (1 << 30) bytes for 64-bit version\r
- The default value is 16 MB = (1 << 24) bytes, for level = 5.\r
- It's recommended to use the dictionary that is larger than 4 KB and\r
- that can be calculated as (1 << N) or (3 << N) sizes.\r
- For better compression ratio dictSize must be >= inSize.\r
-\r
-filterMode:\r
- SZ_FILTER_NO - no Filter\r
- SZ_FILTER_YES - x86 Filter\r
- SZ_FILTER_AUTO - it tries both alternatives to select best.\r
- Encoder will use 2 or 3 passes:\r
- 2 passes when FILTER_NO provides better compression.\r
- 3 passes when FILTER_YES provides better compression.\r
-\r
-Lzma86Encode allocates Data with MyAlloc functions.\r
-RAM Requirements for compressing:\r
- RamSize = dictionarySize * 11.5 + 6MB + FilterBlockSize\r
- filterMode FilterBlockSize\r
- SZ_FILTER_NO 0\r
- SZ_FILTER_YES inSize\r
- SZ_FILTER_AUTO inSize\r
-\r
-\r
-Return code:\r
- SZ_OK - OK\r
- SZ_ERROR_MEM - Memory allocation error\r
- SZ_ERROR_PARAM - Incorrect paramater\r
- SZ_ERROR_OUTPUT_EOF - output buffer overflow\r
- SZ_ERROR_THREAD - errors in multithreading functions (only for Mt version)\r
-*/\r
-\r
-enum ESzFilterMode\r
-{\r
- SZ_FILTER_NO,\r
- SZ_FILTER_YES,\r
- SZ_FILTER_AUTO\r
-};\r
-\r
-SRes Lzma86_Encode(Byte *dest, size_t *destLen, const Byte *src, size_t srcLen,\r
- int level, UInt32 dictSize, int filterMode);\r
-\r
-\r
-/*\r
-Lzma86_GetUnpackSize:\r
- In:\r
- src - input data\r
- srcLen - input data size\r
- Out:\r
- unpackSize - size of uncompressed stream\r
- Return code:\r
- SZ_OK - OK\r
- SZ_ERROR_INPUT_EOF - Error in headers\r
-*/\r
-\r
-SRes Lzma86_GetUnpackSize(const Byte *src, SizeT srcLen, UInt64 *unpackSize);\r
-\r
-/*\r
-Lzma86_Decode:\r
- In:\r
- dest - output data\r
- destLen - output data size\r
- src - input data\r
- srcLen - input data size\r
- Out:\r
- destLen - processed output size\r
- srcLen - processed input size\r
- Return code:\r
- SZ_OK - OK\r
- SZ_ERROR_DATA - Data error\r
- SZ_ERROR_MEM - Memory allocation error\r
- SZ_ERROR_UNSUPPORTED - unsupported file\r
- SZ_ERROR_INPUT_EOF - it needs more bytes in input buffer\r
-*/\r
-\r
-SRes Lzma86_Decode(Byte *dest, SizeT *destLen, const Byte *src, SizeT *srcLen);\r
-\r
-EXTERN_C_END\r
-\r
-#endif\r
+++ /dev/null
-/* LzmaLib.h -- LZMA library interface\r
-2021-04-03 : Igor Pavlov : Public domain */\r
-\r
-#ifndef __LZMA_LIB_H\r
-#define __LZMA_LIB_H\r
-\r
-#include "7zTypes.h"\r
-\r
-EXTERN_C_BEGIN\r
-\r
-#define MY_STDAPI int MY_STD_CALL\r
-\r
-#define LZMA_PROPS_SIZE 5\r
-\r
-/*\r
-RAM requirements for LZMA:\r
- for compression: (dictSize * 11.5 + 6 MB) + state_size\r
- for decompression: dictSize + state_size\r
- state_size = (4 + (1.5 << (lc + lp))) KB\r
- by default (lc=3, lp=0), state_size = 16 KB.\r
-\r
-LZMA properties (5 bytes) format\r
- Offset Size Description\r
- 0 1 lc, lp and pb in encoded form.\r
- 1 4 dictSize (little endian).\r
-*/\r
-\r
-/*\r
-LzmaCompress\r
-------------\r
-\r
-outPropsSize -\r
- In: the pointer to the size of outProps buffer; *outPropsSize = LZMA_PROPS_SIZE = 5.\r
- Out: the pointer to the size of written properties in outProps buffer; *outPropsSize = LZMA_PROPS_SIZE = 5.\r
-\r
- LZMA Encoder will use defult values for any parameter, if it is\r
- -1 for any from: level, loc, lp, pb, fb, numThreads\r
- 0 for dictSize\r
- \r
-level - compression level: 0 <= level <= 9;\r
-\r
- level dictSize algo fb\r
- 0: 64 KB 0 32\r
- 1: 256 KB 0 32\r
- 2: 1 MB 0 32\r
- 3: 4 MB 0 32\r
- 4: 16 MB 0 32\r
- 5: 16 MB 1 32\r
- 6: 32 MB 1 32\r
- 7: 32 MB 1 64\r
- 8: 64 MB 1 64\r
- 9: 64 MB 1 64\r
- \r
- The default value for "level" is 5.\r
-\r
- algo = 0 means fast method\r
- algo = 1 means normal method\r
-\r
-dictSize - The dictionary size in bytes. The maximum value is\r
- 128 MB = (1 << 27) bytes for 32-bit version\r
- 1 GB = (1 << 30) bytes for 64-bit version\r
- The default value is 16 MB = (1 << 24) bytes.\r
- It's recommended to use the dictionary that is larger than 4 KB and\r
- that can be calculated as (1 << N) or (3 << N) sizes.\r
-\r
-lc - The number of literal context bits (high bits of previous literal).\r
- It can be in the range from 0 to 8. The default value is 3.\r
- Sometimes lc=4 gives the gain for big files.\r
-\r
-lp - The number of literal pos bits (low bits of current position for literals).\r
- It can be in the range from 0 to 4. The default value is 0.\r
- The lp switch is intended for periodical data when the period is equal to 2^lp.\r
- For example, for 32-bit (4 bytes) periodical data you can use lp=2. Often it's\r
- better to set lc=0, if you change lp switch.\r
-\r
-pb - The number of pos bits (low bits of current position).\r
- It can be in the range from 0 to 4. The default value is 2.\r
- The pb switch is intended for periodical data when the period is equal 2^pb.\r
-\r
-fb - Word size (the number of fast bytes).\r
- It can be in the range from 5 to 273. The default value is 32.\r
- Usually, a big number gives a little bit better compression ratio and\r
- slower compression process.\r
-\r
-numThreads - The number of thereads. 1 or 2. The default value is 2.\r
- Fast mode (algo = 0) can use only 1 thread.\r
-\r
-In:\r
- dest - output data buffer\r
- destLen - output data buffer size\r
- src - input data\r
- srcLen - input data size\r
-Out:\r
- destLen - processed output size\r
-Returns:\r
- SZ_OK - OK\r
- SZ_ERROR_MEM - Memory allocation error\r
- SZ_ERROR_PARAM - Incorrect paramater\r
- SZ_ERROR_OUTPUT_EOF - output buffer overflow\r
- SZ_ERROR_THREAD - errors in multithreading functions (only for Mt version)\r
-*/\r
-\r
-MY_STDAPI LzmaCompress(unsigned char *dest, size_t *destLen, const unsigned char *src, size_t srcLen,\r
- unsigned char *outProps, size_t *outPropsSize, /* *outPropsSize must be = 5 */\r
- int level, /* 0 <= level <= 9, default = 5 */\r
- unsigned dictSize, /* default = (1 << 24) */\r
- int lc, /* 0 <= lc <= 8, default = 3 */\r
- int lp, /* 0 <= lp <= 4, default = 0 */\r
- int pb, /* 0 <= pb <= 4, default = 2 */\r
- int fb, /* 5 <= fb <= 273, default = 32 */\r
- int numThreads /* 1 or 2, default = 2 */\r
- );\r
-\r
-/*\r
-LzmaUncompress\r
---------------\r
-In:\r
- dest - output data buffer\r
- destLen - output data buffer size\r
- src - input data\r
- srcLen - input data size\r
-Out:\r
- destLen - processed output size\r
- srcLen - processed input size\r
-Returns:\r
- SZ_OK - OK\r
- SZ_ERROR_DATA - Data error\r
- SZ_ERROR_MEM - Memory allocation arror\r
- SZ_ERROR_UNSUPPORTED - Unsupported properties\r
- SZ_ERROR_INPUT_EOF - it needs more bytes in input buffer (src)\r
-*/\r
-\r
-MY_STDAPI LzmaUncompress(unsigned char *dest, size_t *destLen, const unsigned char *src, SizeT *srcLen,\r
- const unsigned char *props, size_t propsSize);\r
-\r
-EXTERN_C_END\r
-\r
-#endif\r
+++ /dev/null
-/* MtCoder.h -- Multi-thread Coder\r
-2018-07-04 : Igor Pavlov : Public domain */\r
-\r
-#ifndef __MT_CODER_H\r
-#define __MT_CODER_H\r
-\r
-#include "MtDec.h"\r
-\r
-EXTERN_C_BEGIN\r
-\r
-/*\r
- if ( defined MTCODER__USE_WRITE_THREAD) : main thread writes all data blocks to output stream\r
- if (not defined MTCODER__USE_WRITE_THREAD) : any coder thread can write data blocks to output stream\r
-*/\r
-/* #define MTCODER__USE_WRITE_THREAD */\r
-\r
-#ifndef _7ZIP_ST\r
- #define MTCODER__GET_NUM_BLOCKS_FROM_THREADS(numThreads) ((numThreads) + (numThreads) / 8 + 1)\r
- #define MTCODER__THREADS_MAX 64\r
- #define MTCODER__BLOCKS_MAX (MTCODER__GET_NUM_BLOCKS_FROM_THREADS(MTCODER__THREADS_MAX) + 3)\r
-#else\r
- #define MTCODER__THREADS_MAX 1\r
- #define MTCODER__BLOCKS_MAX 1\r
-#endif\r
-\r
-\r
-#ifndef _7ZIP_ST\r
-\r
-\r
-typedef struct\r
-{\r
- ICompressProgress vt;\r
- CMtProgress *mtProgress;\r
- UInt64 inSize;\r
- UInt64 outSize;\r
-} CMtProgressThunk;\r
-\r
-void MtProgressThunk_CreateVTable(CMtProgressThunk *p);\r
- \r
-#define MtProgressThunk_Init(p) { (p)->inSize = 0; (p)->outSize = 0; }\r
-\r
-\r
-struct _CMtCoder;\r
-\r
-\r
-typedef struct\r
-{\r
- struct _CMtCoder *mtCoder;\r
- unsigned index;\r
- int stop;\r
- Byte *inBuf;\r
-\r
- CAutoResetEvent startEvent;\r
- CThread thread;\r
-} CMtCoderThread;\r
-\r
-\r
-typedef struct\r
-{\r
- SRes (*Code)(void *p, unsigned coderIndex, unsigned outBufIndex,\r
- const Byte *src, size_t srcSize, int finished);\r
- SRes (*Write)(void *p, unsigned outBufIndex);\r
-} IMtCoderCallback2;\r
-\r
-\r
-typedef struct\r
-{\r
- SRes res;\r
- unsigned bufIndex;\r
- BoolInt finished;\r
-} CMtCoderBlock;\r
-\r
-\r
-typedef struct _CMtCoder\r
-{\r
- /* input variables */\r
- \r
- size_t blockSize; /* size of input block */\r
- unsigned numThreadsMax;\r
- UInt64 expectedDataSize;\r
-\r
- ISeqInStream *inStream;\r
- const Byte *inData;\r
- size_t inDataSize;\r
-\r
- ICompressProgress *progress;\r
- ISzAllocPtr allocBig;\r
-\r
- IMtCoderCallback2 *mtCallback;\r
- void *mtCallbackObject;\r
-\r
- \r
- /* internal variables */\r
- \r
- size_t allocatedBufsSize;\r
-\r
- CAutoResetEvent readEvent;\r
- CSemaphore blocksSemaphore;\r
-\r
- BoolInt stopReading;\r
- SRes readRes;\r
-\r
- #ifdef MTCODER__USE_WRITE_THREAD\r
- CAutoResetEvent writeEvents[MTCODER__BLOCKS_MAX];\r
- #else\r
- CAutoResetEvent finishedEvent;\r
- SRes writeRes;\r
- unsigned writeIndex;\r
- Byte ReadyBlocks[MTCODER__BLOCKS_MAX];\r
- LONG numFinishedThreads;\r
- #endif\r
-\r
- unsigned numStartedThreadsLimit;\r
- unsigned numStartedThreads;\r
-\r
- unsigned numBlocksMax;\r
- unsigned blockIndex;\r
- UInt64 readProcessed;\r
-\r
- CCriticalSection cs;\r
-\r
- unsigned freeBlockHead;\r
- unsigned freeBlockList[MTCODER__BLOCKS_MAX];\r
-\r
- CMtProgress mtProgress;\r
- CMtCoderBlock blocks[MTCODER__BLOCKS_MAX];\r
- CMtCoderThread threads[MTCODER__THREADS_MAX];\r
-} CMtCoder;\r
-\r
-\r
-void MtCoder_Construct(CMtCoder *p);\r
-void MtCoder_Destruct(CMtCoder *p);\r
-SRes MtCoder_Code(CMtCoder *p);\r
-\r
-\r
-#endif\r
-\r
-\r
-EXTERN_C_END\r
-\r
-#endif\r
+++ /dev/null
-/* MtDec.h -- Multi-thread Decoder\r
-2020-03-05 : Igor Pavlov : Public domain */\r
-\r
-#ifndef __MT_DEC_H\r
-#define __MT_DEC_H\r
-\r
-#include "7zTypes.h"\r
-\r
-#ifndef _7ZIP_ST\r
-#include "Threads.h"\r
-#endif\r
-\r
-EXTERN_C_BEGIN\r
-\r
-#ifndef _7ZIP_ST\r
-\r
-#ifndef _7ZIP_ST\r
- #define MTDEC__THREADS_MAX 32\r
-#else\r
- #define MTDEC__THREADS_MAX 1\r
-#endif\r
-\r
-\r
-typedef struct\r
-{\r
- ICompressProgress *progress;\r
- SRes res;\r
- UInt64 totalInSize;\r
- UInt64 totalOutSize;\r
- CCriticalSection cs;\r
-} CMtProgress;\r
-\r
-void MtProgress_Init(CMtProgress *p, ICompressProgress *progress);\r
-SRes MtProgress_Progress_ST(CMtProgress *p);\r
-SRes MtProgress_ProgressAdd(CMtProgress *p, UInt64 inSize, UInt64 outSize);\r
-SRes MtProgress_GetError(CMtProgress *p);\r
-void MtProgress_SetError(CMtProgress *p, SRes res);\r
-\r
-struct _CMtDec;\r
-\r
-typedef struct\r
-{\r
- struct _CMtDec *mtDec;\r
- unsigned index;\r
- void *inBuf;\r
-\r
- size_t inDataSize_Start; // size of input data in start block\r
- UInt64 inDataSize; // total size of input data in all blocks\r
-\r
- CThread thread;\r
- CAutoResetEvent canRead;\r
- CAutoResetEvent canWrite;\r
- void *allocaPtr;\r
-} CMtDecThread;\r
-\r
-void MtDecThread_FreeInBufs(CMtDecThread *t);\r
-\r
-\r
-typedef enum\r
-{\r
- MTDEC_PARSE_CONTINUE, // continue this block with more input data\r
- MTDEC_PARSE_OVERFLOW, // MT buffers overflow, need switch to single-thread\r
- MTDEC_PARSE_NEW, // new block\r
- MTDEC_PARSE_END // end of block threading. But we still can return to threading after Write(&needContinue)\r
-} EMtDecParseState;\r
-\r
-typedef struct\r
-{\r
- // in\r
- int startCall;\r
- const Byte *src;\r
- size_t srcSize;\r
- // in : (srcSize == 0) is allowed\r
- // out : it's allowed to return less that actually was used ?\r
- int srcFinished;\r
-\r
- // out\r
- EMtDecParseState state;\r
- BoolInt canCreateNewThread;\r
- UInt64 outPos; // check it (size_t)\r
-} CMtDecCallbackInfo;\r
-\r
-\r
-typedef struct\r
-{\r
- void (*Parse)(void *p, unsigned coderIndex, CMtDecCallbackInfo *ci);\r
- \r
- // PreCode() and Code():\r
- // (SRes_return_result != SZ_OK) means stop decoding, no need another blocks\r
- SRes (*PreCode)(void *p, unsigned coderIndex);\r
- SRes (*Code)(void *p, unsigned coderIndex,\r
- const Byte *src, size_t srcSize, int srcFinished,\r
- UInt64 *inCodePos, UInt64 *outCodePos, int *stop);\r
- // stop - means stop another Code calls\r
-\r
-\r
- /* Write() must be called, if Parse() was called\r
- set (needWrite) if\r
- {\r
- && (was not interrupted by progress)\r
- && (was not interrupted in previous block)\r
- }\r
-\r
- out:\r
- if (*needContinue), decoder still need to continue decoding with new iteration,\r
- even after MTDEC_PARSE_END\r
- if (*canRecode), we didn't flush current block data, so we still can decode current block later.\r
- */\r
- SRes (*Write)(void *p, unsigned coderIndex,\r
- BoolInt needWriteToStream,\r
- const Byte *src, size_t srcSize, BoolInt isCross,\r
- // int srcFinished,\r
- BoolInt *needContinue,\r
- BoolInt *canRecode);\r
-\r
-} IMtDecCallback2;\r
-\r
-\r
-\r
-typedef struct _CMtDec\r
-{\r
- /* input variables */\r
- \r
- size_t inBufSize; /* size of input block */\r
- unsigned numThreadsMax;\r
- // size_t inBlockMax;\r
- unsigned numThreadsMax_2;\r
-\r
- ISeqInStream *inStream;\r
- // const Byte *inData;\r
- // size_t inDataSize;\r
-\r
- ICompressProgress *progress;\r
- ISzAllocPtr alloc;\r
-\r
- IMtDecCallback2 *mtCallback;\r
- void *mtCallbackObject;\r
-\r
- \r
- /* internal variables */\r
- \r
- size_t allocatedBufsSize;\r
-\r
- BoolInt exitThread;\r
- WRes exitThreadWRes;\r
-\r
- UInt64 blockIndex;\r
- BoolInt isAllocError;\r
- BoolInt overflow;\r
- SRes threadingErrorSRes;\r
-\r
- BoolInt needContinue;\r
-\r
- // CAutoResetEvent finishedEvent;\r
-\r
- SRes readRes;\r
- SRes codeRes;\r
-\r
- BoolInt wasInterrupted;\r
-\r
- unsigned numStartedThreads_Limit;\r
- unsigned numStartedThreads;\r
-\r
- Byte *crossBlock;\r
- size_t crossStart;\r
- size_t crossEnd;\r
- UInt64 readProcessed;\r
- BoolInt readWasFinished;\r
- UInt64 inProcessed;\r
-\r
- unsigned filledThreadStart;\r
- unsigned numFilledThreads;\r
-\r
- #ifndef _7ZIP_ST\r
- BoolInt needInterrupt;\r
- UInt64 interruptIndex;\r
- CMtProgress mtProgress;\r
- CMtDecThread threads[MTDEC__THREADS_MAX];\r
- #endif\r
-} CMtDec;\r
-\r
-\r
-void MtDec_Construct(CMtDec *p);\r
-void MtDec_Destruct(CMtDec *p);\r
-\r
-/*\r
-MtDec_Code() returns:\r
- SZ_OK - in most cases\r
- MY_SRes_HRESULT_FROM_WRes(WRes_error) - in case of unexpected error in threading function\r
-*/\r
- \r
-SRes MtDec_Code(CMtDec *p);\r
-Byte *MtDec_GetCrossBuff(CMtDec *p);\r
-\r
-int MtDec_PrepareRead(CMtDec *p);\r
-const Byte *MtDec_Read(CMtDec *p, size_t *inLim);\r
-\r
-#endif\r
-\r
-EXTERN_C_END\r
-\r
-#endif\r
+++ /dev/null
-/* Precomp.h -- StdAfx\r
-2013-11-12 : Igor Pavlov : Public domain */\r
-\r
-#ifndef __7Z_PRECOMP_H\r
-#define __7Z_PRECOMP_H\r
-\r
-#include "Compiler.h"\r
-/* #include "7zTypes.h" */\r
-\r
-#endif\r
+++ /dev/null
-/* RotateDefs.h -- Rotate functions\r
-2015-03-25 : Igor Pavlov : Public domain */\r
-\r
-#ifndef __ROTATE_DEFS_H\r
-#define __ROTATE_DEFS_H\r
-\r
-#ifdef _MSC_VER\r
-\r
-#include <stdlib.h>\r
-\r
-/* don't use _rotl with MINGW. It can insert slow call to function. */\r
- \r
-/* #if (_MSC_VER >= 1200) */\r
-#pragma intrinsic(_rotl)\r
-#pragma intrinsic(_rotr)\r
-/* #endif */\r
-\r
-#define rotlFixed(x, n) _rotl((x), (n))\r
-#define rotrFixed(x, n) _rotr((x), (n))\r
-\r
-#else\r
-\r
-/* new compilers can translate these macros to fast commands. */\r
-\r
-#define rotlFixed(x, n) (((x) << (n)) | ((x) >> (32 - (n))))\r
-#define rotrFixed(x, n) (((x) >> (n)) | ((x) << (32 - (n))))\r
-\r
-#endif\r
-\r
-#endif\r
+++ /dev/null
-/* Sha256.h -- SHA-256 Hash\r
-2021-01-01 : Igor Pavlov : Public domain */\r
-\r
-#ifndef __7Z_SHA256_H\r
-#define __7Z_SHA256_H\r
-\r
-#include "7zTypes.h"\r
-\r
-EXTERN_C_BEGIN\r
-\r
-#define SHA256_NUM_BLOCK_WORDS 16\r
-#define SHA256_NUM_DIGEST_WORDS 8\r
-\r
-#define SHA256_BLOCK_SIZE (SHA256_NUM_BLOCK_WORDS * 4)\r
-#define SHA256_DIGEST_SIZE (SHA256_NUM_DIGEST_WORDS * 4)\r
-\r
-typedef void (MY_FAST_CALL *SHA256_FUNC_UPDATE_BLOCKS)(UInt32 state[8], const Byte *data, size_t numBlocks);\r
-\r
-/*\r
- if (the system supports different SHA256 code implementations)\r
- {\r
- (CSha256::func_UpdateBlocks) will be used\r
- (CSha256::func_UpdateBlocks) can be set by\r
- Sha256_Init() - to default (fastest)\r
- Sha256_SetFunction() - to any algo\r
- }\r
- else\r
- {\r
- (CSha256::func_UpdateBlocks) is ignored.\r
- }\r
-*/\r
-\r
-typedef struct\r
-{\r
- SHA256_FUNC_UPDATE_BLOCKS func_UpdateBlocks;\r
- UInt64 count;\r
- UInt64 __pad_2[2];\r
- UInt32 state[SHA256_NUM_DIGEST_WORDS];\r
-\r
- Byte buffer[SHA256_BLOCK_SIZE];\r
-} CSha256;\r
-\r
-\r
-#define SHA256_ALGO_DEFAULT 0\r
-#define SHA256_ALGO_SW 1\r
-#define SHA256_ALGO_HW 2\r
-\r
-/*\r
-Sha256_SetFunction()\r
-return:\r
- 0 - (algo) value is not supported, and func_UpdateBlocks was not changed\r
- 1 - func_UpdateBlocks was set according (algo) value.\r
-*/\r
-\r
-BoolInt Sha256_SetFunction(CSha256 *p, unsigned algo);\r
-\r
-void Sha256_InitState(CSha256 *p);\r
-void Sha256_Init(CSha256 *p);\r
-void Sha256_Update(CSha256 *p, const Byte *data, size_t size);\r
-void Sha256_Final(CSha256 *p, Byte *digest);\r
-\r
-\r
-\r
-\r
-// void MY_FAST_CALL Sha256_UpdateBlocks(UInt32 state[8], const Byte *data, size_t numBlocks);\r
-\r
-/*\r
-call Sha256Prepare() once at program start.\r
-It prepares all supported implementations, and detects the fastest implementation.\r
-*/\r
-\r
-void Sha256Prepare(void);\r
-\r
-EXTERN_C_END\r
-\r
-#endif\r
+++ /dev/null
-/* Sort.h -- Sort functions\r
-2014-04-05 : Igor Pavlov : Public domain */\r
-\r
-#ifndef __7Z_SORT_H\r
-#define __7Z_SORT_H\r
-\r
-#include "7zTypes.h"\r
-\r
-EXTERN_C_BEGIN\r
-\r
-void HeapSort(UInt32 *p, size_t size);\r
-void HeapSort64(UInt64 *p, size_t size);\r
-\r
-/* void HeapSortRef(UInt32 *p, UInt32 *vals, size_t size); */\r
-\r
-EXTERN_C_END\r
-\r
-#endif\r
+++ /dev/null
-/* Threads.h -- multithreading library\r
-2021-12-21 : Igor Pavlov : Public domain */\r
-\r
-#ifndef __7Z_THREADS_H\r
-#define __7Z_THREADS_H\r
-\r
-#ifdef _WIN32\r
-#include <windows.h>\r
-#else\r
-\r
-#if defined(__linux__)\r
-#if !defined(__APPLE__) && !defined(_AIX) && !defined(__ANDROID__)\r
-#ifndef _7ZIP_AFFINITY_DISABLE\r
-#define _7ZIP_AFFINITY_SUPPORTED\r
-// #pragma message(" ==== _7ZIP_AFFINITY_SUPPORTED")\r
-// #define _GNU_SOURCE\r
-#endif\r
-#endif\r
-#endif\r
-\r
-#include <pthread.h>\r
-\r
-#endif\r
-\r
-#include "7zTypes.h"\r
-\r
-EXTERN_C_BEGIN\r
-\r
-#ifdef _WIN32\r
-\r
-WRes HandlePtr_Close(HANDLE *h);\r
-WRes Handle_WaitObject(HANDLE h);\r
-\r
-typedef HANDLE CThread;\r
-\r
-#define Thread_Construct(p) { *(p) = NULL; }\r
-#define Thread_WasCreated(p) (*(p) != NULL)\r
-#define Thread_Close(p) HandlePtr_Close(p)\r
-// #define Thread_Wait(p) Handle_WaitObject(*(p))\r
-\r
-#ifdef UNDER_CE\r
- // if (USE_THREADS_CreateThread is defined), we use _beginthreadex()\r
- // if (USE_THREADS_CreateThread is not definned), we use CreateThread()\r
- #define USE_THREADS_CreateThread\r
-#endif\r
-\r
-typedef\r
- #ifdef USE_THREADS_CreateThread\r
- DWORD\r
- #else\r
- unsigned\r
- #endif\r
- THREAD_FUNC_RET_TYPE;\r
-\r
-typedef DWORD_PTR CAffinityMask;\r
-typedef DWORD_PTR CCpuSet;\r
-\r
-#define CpuSet_Zero(p) { *(p) = 0; }\r
-#define CpuSet_Set(p, cpu) { *(p) |= ((DWORD_PTR)1 << (cpu)); }\r
-\r
-#else // _WIN32\r
-\r
-typedef struct _CThread\r
-{\r
- pthread_t _tid;\r
- int _created;\r
-} CThread;\r
-\r
-#define Thread_Construct(p) { (p)->_tid = 0; (p)->_created = 0; }\r
-#define Thread_WasCreated(p) ((p)->_created != 0)\r
-WRes Thread_Close(CThread *p);\r
-// #define Thread_Wait Thread_Wait_Close\r
-\r
-typedef void * THREAD_FUNC_RET_TYPE;\r
-\r
-typedef UInt64 CAffinityMask;\r
-\r
-#ifdef _7ZIP_AFFINITY_SUPPORTED\r
-\r
-typedef cpu_set_t CCpuSet;\r
-#define CpuSet_Zero(p) CPU_ZERO(p)\r
-#define CpuSet_Set(p, cpu) CPU_SET(cpu, p)\r
-#define CpuSet_IsSet(p, cpu) CPU_ISSET(cpu, p)\r
-\r
-#else\r
-\r
-typedef UInt64 CCpuSet;\r
-#define CpuSet_Zero(p) { *(p) = 0; }\r
-#define CpuSet_Set(p, cpu) { *(p) |= ((UInt64)1 << (cpu)); }\r
-#define CpuSet_IsSet(p, cpu) ((*(p) & ((UInt64)1 << (cpu))) != 0)\r
-\r
-#endif\r
-\r
-\r
-#endif // _WIN32\r
-\r
-\r
-#define THREAD_FUNC_CALL_TYPE MY_STD_CALL\r
-\r
-#if defined(_WIN32) && defined(__GNUC__)\r
-/* GCC compiler for x86 32-bit uses the rule:\r
- the stack is 16-byte aligned before CALL instruction for function calling.\r
- But only root function main() contains instructions that\r
- set 16-byte alignment for stack pointer. And another functions\r
- just keep alignment, if it was set in some parent function.\r
- \r
- The problem:\r
- if we create new thread in MinGW (GCC) 32-bit x86 via _beginthreadex() or CreateThread(),\r
- the root function of thread doesn't set 16-byte alignment.\r
- And stack frames in all child functions also will be unaligned in that case.\r
- \r
- Here we set (force_align_arg_pointer) attribute for root function of new thread.\r
- Do we need (force_align_arg_pointer) also for another systems? */\r
- \r
- #define THREAD_FUNC_ATTRIB_ALIGN_ARG __attribute__((force_align_arg_pointer))\r
- // #define THREAD_FUNC_ATTRIB_ALIGN_ARG // for debug : bad alignment in SSE functions\r
-#else\r
- #define THREAD_FUNC_ATTRIB_ALIGN_ARG\r
-#endif\r
-\r
-#define THREAD_FUNC_DECL THREAD_FUNC_ATTRIB_ALIGN_ARG THREAD_FUNC_RET_TYPE THREAD_FUNC_CALL_TYPE\r
-\r
-typedef THREAD_FUNC_RET_TYPE (THREAD_FUNC_CALL_TYPE * THREAD_FUNC_TYPE)(void *);\r
-WRes Thread_Create(CThread *p, THREAD_FUNC_TYPE func, LPVOID param);\r
-WRes Thread_Create_With_Affinity(CThread *p, THREAD_FUNC_TYPE func, LPVOID param, CAffinityMask affinity);\r
-WRes Thread_Wait_Close(CThread *p);\r
-\r
-#ifdef _WIN32\r
-#define Thread_Create_With_CpuSet(p, func, param, cs) \\r
- Thread_Create_With_Affinity(p, func, param, *cs)\r
-#else\r
-WRes Thread_Create_With_CpuSet(CThread *p, THREAD_FUNC_TYPE func, LPVOID param, const CCpuSet *cpuSet);\r
-#endif\r
-\r
-\r
-#ifdef _WIN32\r
-\r
-typedef HANDLE CEvent;\r
-typedef CEvent CAutoResetEvent;\r
-typedef CEvent CManualResetEvent;\r
-#define Event_Construct(p) *(p) = NULL\r
-#define Event_IsCreated(p) (*(p) != NULL)\r
-#define Event_Close(p) HandlePtr_Close(p)\r
-#define Event_Wait(p) Handle_WaitObject(*(p))\r
-WRes Event_Set(CEvent *p);\r
-WRes Event_Reset(CEvent *p);\r
-WRes ManualResetEvent_Create(CManualResetEvent *p, int signaled);\r
-WRes ManualResetEvent_CreateNotSignaled(CManualResetEvent *p);\r
-WRes AutoResetEvent_Create(CAutoResetEvent *p, int signaled);\r
-WRes AutoResetEvent_CreateNotSignaled(CAutoResetEvent *p);\r
-\r
-typedef HANDLE CSemaphore;\r
-#define Semaphore_Construct(p) *(p) = NULL\r
-#define Semaphore_IsCreated(p) (*(p) != NULL)\r
-#define Semaphore_Close(p) HandlePtr_Close(p)\r
-#define Semaphore_Wait(p) Handle_WaitObject(*(p))\r
-WRes Semaphore_Create(CSemaphore *p, UInt32 initCount, UInt32 maxCount);\r
-WRes Semaphore_OptCreateInit(CSemaphore *p, UInt32 initCount, UInt32 maxCount);\r
-WRes Semaphore_ReleaseN(CSemaphore *p, UInt32 num);\r
-WRes Semaphore_Release1(CSemaphore *p);\r
-\r
-typedef CRITICAL_SECTION CCriticalSection;\r
-WRes CriticalSection_Init(CCriticalSection *p);\r
-#define CriticalSection_Delete(p) DeleteCriticalSection(p)\r
-#define CriticalSection_Enter(p) EnterCriticalSection(p)\r
-#define CriticalSection_Leave(p) LeaveCriticalSection(p)\r
-\r
-\r
-#else // _WIN32\r
-\r
-typedef struct _CEvent\r
-{\r
- int _created;\r
- int _manual_reset;\r
- int _state;\r
- pthread_mutex_t _mutex;\r
- pthread_cond_t _cond;\r
-} CEvent;\r
-\r
-typedef CEvent CAutoResetEvent;\r
-typedef CEvent CManualResetEvent;\r
-\r
-#define Event_Construct(p) (p)->_created = 0\r
-#define Event_IsCreated(p) ((p)->_created)\r
-\r
-WRes ManualResetEvent_Create(CManualResetEvent *p, int signaled);\r
-WRes ManualResetEvent_CreateNotSignaled(CManualResetEvent *p);\r
-WRes AutoResetEvent_Create(CAutoResetEvent *p, int signaled);\r
-WRes AutoResetEvent_CreateNotSignaled(CAutoResetEvent *p);\r
-WRes Event_Set(CEvent *p);\r
-WRes Event_Reset(CEvent *p);\r
-WRes Event_Wait(CEvent *p);\r
-WRes Event_Close(CEvent *p);\r
-\r
-\r
-typedef struct _CSemaphore\r
-{\r
- int _created;\r
- UInt32 _count;\r
- UInt32 _maxCount;\r
- pthread_mutex_t _mutex;\r
- pthread_cond_t _cond;\r
-} CSemaphore;\r
-\r
-#define Semaphore_Construct(p) (p)->_created = 0\r
-#define Semaphore_IsCreated(p) ((p)->_created)\r
-\r
-WRes Semaphore_Create(CSemaphore *p, UInt32 initCount, UInt32 maxCount);\r
-WRes Semaphore_OptCreateInit(CSemaphore *p, UInt32 initCount, UInt32 maxCount);\r
-WRes Semaphore_ReleaseN(CSemaphore *p, UInt32 num);\r
-#define Semaphore_Release1(p) Semaphore_ReleaseN(p, 1)\r
-WRes Semaphore_Wait(CSemaphore *p);\r
-WRes Semaphore_Close(CSemaphore *p);\r
-\r
-\r
-typedef struct _CCriticalSection\r
-{\r
- pthread_mutex_t _mutex;\r
-} CCriticalSection;\r
-\r
-WRes CriticalSection_Init(CCriticalSection *p);\r
-void CriticalSection_Delete(CCriticalSection *cs);\r
-void CriticalSection_Enter(CCriticalSection *cs);\r
-void CriticalSection_Leave(CCriticalSection *cs);\r
-\r
-LONG InterlockedIncrement(LONG volatile *addend);\r
-\r
-#endif // _WIN32\r
-\r
-EXTERN_C_END\r
-\r
-#endif\r
+++ /dev/null
-7-Zip for installers 9.38\r
--------------------------\r
-\r
-7-Zip is a file archiver for Windows NT/2000/2003/2008/XP/Vista/7/8/10. \r
-\r
-7-Zip for installers is part of LZMA SDK.\r
-LZMA SDK is written and placed in the public domain by Igor Pavlov.\r
-\r
-It's allowed to join 7-Zip SFX module with another software.\r
-It's allowed to change resources of 7-Zip's SFX modules.\r
-\r
-\r
-HOW to use\r
------------\r
-\r
-7zr.exe is reduced version of 7za.exe of 7-Zip.\r
-7zr.exe supports only format with these codecs: LZMA, LZMA2, BCJ, BCJ2, ARM, Copy.\r
-\r
-Example of compressing command for installation packages:\r
-\r
-7zr a archive.7z files\r
-\r
-7zSD.sfx is SFX module for installers. 7zSD.sfx uses msvcrt.dll.\r
-\r
-SFX modules for installers allow to create installation program. \r
-Such module extracts archive to temp folder and then runs specified program and removes \r
-temp files after program finishing. Self-extract archive for installers must be created \r
-as joining 3 files: SFX_Module, Installer_Config, 7z_Archive. \r
-Installer_Config is optional file. You can use the following command to create installer \r
-self-extract archive:\r
-\r
-copy /b 7zSD.sfx + config.txt + archive.7z archive.exe\r
-\r
-The smallest installation package size can be achieved, if installation files was \r
-uncompressed before including to 7z archive.\r
-\r
--y switch for installer module (at runtime) specifies quiet mode for extracting.\r
-\r
-Installer Config file format\r
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~\r
-Config file contains commands for Installer. File begins from string \r
-;!@Install@!UTF-8! and ends with ;!@InstallEnd@!. File must be written \r
-in UTF-8 encoding. File contains string pairs: \r
-\r
-ID_String="Value"\r
-\r
-ID_String Description \r
-\r
-Title Title for messages \r
-BeginPrompt Begin Prompt message \r
-Progress Value can be "yes" or "no". Default value is "yes". \r
-RunProgram Command for executing. Default value is "setup.exe". \r
- Substring %%T will be replaced with path to temporary \r
- folder, where files were extracted \r
-Directory Directory prefix for "RunProgram". Default value is ".\\" \r
-ExecuteFile Name of file for executing \r
-ExecuteParameters Parameters for "ExecuteFile" \r
-\r
-\r
-You can omit any string pair.\r
-\r
-There are two ways to run program: RunProgram and ExecuteFile. \r
-Use RunProgram, if you want to run some program from .7z archive. \r
-Use ExecuteFile, if you want to open some document from .7z archive or \r
-if you want to execute some command from Windows.\r
-\r
-If you use RunProgram and if you specify empty directory prefix: Directory="", \r
-the system searches for the executable file in the following sequence:\r
-\r
-1. The directory from which the application (installer) loaded. \r
-2. The temporary folder, where files were extracted. \r
-3. The Windows system directory. \r
-\r
-\r
-Config file Examples\r
-~~~~~~~~~~~~~~~~~~~~\r
-\r
-;!@Install@!UTF-8!\r
-Title="7-Zip 4.00"\r
-BeginPrompt="Do you want to install the 7-Zip 4.00?"\r
-RunProgram="setup.exe"\r
-;!@InstallEnd@!\r
-\r
-\r
-\r
-;!@Install@!UTF-8!\r
-Title="7-Zip 4.00"\r
-BeginPrompt="Do you want to install the 7-Zip 4.00?"\r
-ExecuteFile="7zip.msi"\r
-;!@InstallEnd@!\r
-\r
-\r
-\r
-;!@Install@!UTF-8!\r
-Title="7-Zip 4.01 Update"\r
-BeginPrompt="Do you want to install the 7-Zip 4.01 Update?"\r
-ExecuteFile="msiexec.exe"\r
-ExecuteParameters="/i 7zip.msi REINSTALL=ALL REINSTALLMODE=vomus"\r
-;!@InstallEnd@!\r
-\r
-\r
-\r
-Small SFX modules for installers\r
---------------------------------\r
-\r
-7zS2.sfx - small SFX module (GUI version)\r
-7zS2con.sfx - small SFX module (Console version)\r
-\r
-Small SFX modules support this codecs: LZMA, LZMA2, BCJ, BCJ2, ARM, COPY\r
-\r
-Small SFX module is similar to common SFX module for installers.\r
-The difference (what's new in small version):\r
- - Smaller size (30 KB vs 100 KB)\r
- - C source code instead of Ñ++\r
- - No installer Configuration file\r
- - No extracting progress window\r
- - It decompresses solid 7z blocks (it can be whole 7z archive) to RAM.\r
- So user that calls SFX installer must have free RAM of size of largest \r
- solid 7z block (size of 7z archive at simplest case).\r
-\r
-How to use\r
-----------\r
-\r
-copy /b 7zS2.sfx + archive.7z sfx.exe\r
-\r
-When you run installer sfx module (sfx.exe)\r
-1) It creates "7zNNNNNNNN" temp folder in system temp folder.\r
-2) It extracts .7z archive to that folder\r
-3) It executes one file from "7zNNNNNNNN" temp folder. \r
-4) It removes "7zNNNNNNNN" temp folder\r
-\r
-You can send parameters to installer, and installer will transfer them to extracted .exe file.\r
-\r
-Small SFX uses 3 levels of priorities to select file to execute:\r
-\r
- 1) Files in root folder have higher priority than files in subfolders.\r
- 2) File extension priorities (from high to low priority order): \r
- bat, cmd, exe, inf, msi, cab (under Windows CE), html, htm\r
- 3) File name priorities (from high to low priority order): \r
- setup, install, run, start\r
-\r
-Windows CE (ARM) version of 7zS2.sfx is included to 7-Zip for Windows Mobile package.\r
-\r
-\r
-Examples\r
---------\r
-\r
-1) To create compressed console 7-Zip:\r
-\r
-7zr a c.7z 7z.exe 7z.dll -mx\r
-copy /b 7zS2con.sfx + c.7z 7zCompr.exe\r
-7zCompr.exe b -md22\r
-\r
-\r
-2) To create compressed GUI 7-Zip:\r
-\r
-7zr a g.7z 7zg.exe 7z.dll -mx\r
-copy /b 7zS2.sfx + g.7z 7zgCompr.exe\r
-7zgCompr.exe b -md22\r
-\r
-\r
-3) To open some file:\r
-\r
-7zr a h.7z readme.txt -mx\r
-copy /b 7zS2.sfx + h.7z 7zTxt.exe \r
-7zTxt.exe\r
+++ /dev/null
-HISTORY of the LZMA SDK\r
------------------------\r
-\r
-21.07 2021-12-26\r
--------------------------\r
-- New switches: -spm and -im!{file_path} to exclude directories from processing \r
- for specified paths that don't contain path separator character at the end of path.\r
-- The sorting order of files in archives was slightly changed to be more consistent\r
- for cases where the name of some directory is the same as the prefix part of the name\r
- of another directory or file.\r
-\r
-\r
-21.06 2021-11-24\r
--------------------------\r
-- Bug in LZMA encoder in file LzmaEnc.c was fixed:\r
- LzmaEnc_MemEncode(), LzmaEncode() and LzmaCompress() could work incorrectly, \r
- if size value for output buffer is smaller than size required for all compressed data.\r
- LzmaEnc_Encode() could work incorrectly,\r
- if callback ISeqOutStream::Write() doesn't write all compressed data.\r
- NCompress::NLzma::CEncoder::Code() could work incorrectly,\r
- if callback ISequentialOutStream::Write() returns error code.\r
-- Bug in versions 21.00-21.05 was fixed:\r
- 7-Zip didn't set attributes of directories during archive extracting.\r
-\r
-\r
-21.04 beta 2021-11-02\r
--------------------------\r
-- 7-Zip now reduces the number of working CPU threads for compression,\r
- if RAM size is not enough for compression with big LZMA2 dictionary.\r
-- 7-Zip now can create and check "file.sha256" text files that contain the list \r
- of file names and SHA-256 checksums in format compatible with sha256sum program.\r
-\r
-\r
-21.03 beta 2021-07-20\r
--------------------------\r
-- The maximum dictionary size for LZMA/LZMA2 compressing was increased to 4 GB (3840 MiB).\r
-- Minor speed optimizations in LZMA/LZMA2 compressing.\r
-\r
-\r
-21.02 alpha 2021-05-06\r
--------------------------\r
-- The command line version of 7-Zip for macOS was released.\r
-- The speed for LZMA and LZMA2 decompression in arm64 versions for macOS and Linux \r
- was increased by 20%-60%.\r
-\r
-\r
-21.01 alpha 2021-03-09\r
--------------------------\r
-- The command line version of 7-Zip for Linux was released.\r
-- The improvements for speed of ARM64 version using hardware CPU instructions \r
- for AES, CRC-32, SHA-1 and SHA-256.\r
-- Some bugs were fixed.\r
-\r
-\r
-20.02 alpha 2020-08-08\r
--------------------------\r
-- The default number of LZMA2 chunks per solid block in 7z archive was increased to 64.\r
- It allows to increase the compression speed for big 7z archives, if there is a big number \r
- of CPU cores and threads.\r
-- The speed of PPMd compressing/decompressing was increased for 7z archives.\r
-- The new -ssp switch. If the switch -ssp is specified, 7-Zip doesn't allow the system \r
- to modify "Last Access Time" property of source files for archiving and hashing operations. \r
-- Some bugs were fixed.\r
-\r
-\r
-20.00 alpha 2020-02-06\r
--------------------------\r
-- 7-Zip now supports new optional match finders for LZMA/LZMA2 compression: bt5 and hc5, \r
- that can work faster than bt4 and hc4 match finders for the data with big redundancy.\r
-- The compression ratio was improved for Fast and Fastest compression levels with the \r
- following default settings:\r
- - Fastest level (-mx1) : hc5 match finder with 256 KB dictionary.\r
- - Fast level (-mx3) : hc5 match finder with 4 MB dictionary.\r
-- Minor speed optimizations in multithreaded LZMA/LZMA2 compression for Normal/Maximum/Ultra \r
- compression levels.\r
-\r
-\r
-19.00 2019-02-21\r
--------------------------\r
-- Encryption strength for 7z archives was increased:\r
- the size of random initialization vector was increased from 64-bit to 128-bit,\r
- and the pseudo-random number generator was improved.\r
-- The bug in 7zIn.c code was fixed.\r
-\r
-\r
-18.06 2018-12-30\r
--------------------------\r
-- The speed for LZMA/LZMA2 compressing was increased by 3-10%,\r
- and there are minor changes in compression ratio.\r
-- Some bugs were fixed.\r
-- The bug in 7-Zip 18.02-18.05 was fixed:\r
- There was memory leak in multithreading xz decoder - XzDecMt_Decode(),\r
- if xz stream contains only one block.\r
-- The changes for MSVS compiler makefiles: \r
- - the makefiles now use "PLATFORM" macroname with values (x64, x86, arm64)\r
- instead of "CPU" macroname with values (AMD64, ARM64).\r
- - the makefiles by default now use static version of the run-time library.\r
-\r
-\r
-18.05 2018-04-30\r
--------------------------\r
-- The speed for LZMA/LZMA2 compressing was increased \r
- by 8% for fastest/fast compression levels and \r
- by 3% for normal/maximum compression levels.\r
-- Previous versions of 7-Zip could work incorrectly in "Large memory pages" mode in\r
- Windows 10 because of some BUG with "Large Pages" in Windows 10. \r
- Now 7-Zip doesn't use "Large Pages" on Windows 10 up to revision 1709 (16299).\r
-- The BUG was fixed in Lzma2Enc.c\r
- Lzma2Enc_Encode2() function worked incorretly,\r
- if (inStream == NULL) and the number of block threads is more than 1.\r
-\r
-\r
-18.03 beta 2018-03-04\r
--------------------------\r
-- Asm\x86\LzmaDecOpt.asm: new optimized LZMA decoder written in asm \r
- for x64 with about 30% higher speed than main version of LZMA decoder written in C.\r
-- The speed for single-thread LZMA/LZMA2 decoder written in C was increased by 3%.\r
-- 7-Zip now can use multi-threading for 7z/LZMA2 decoding,\r
- if there are multiple independent data chunks in LZMA2 stream.\r
-- 7-Zip now can use multi-threading for xz decoding,\r
- if there are multiple blocks in xz stream.\r
-\r
-\r
-18.01 2019-01-28\r
--------------------------\r
-- The BUG in 17.01 - 18.00 beta was fixed:\r
- XzDec.c : random block unpacking and XzUnpacker_IsBlockFinished()\r
- didn't work correctly for xz archives without checksum (CRC).\r
-\r
-\r
-18.00 beta 2019-01-10\r
--------------------------\r
-- The BUG in xz encoder was fixed:\r
- There was memory leak of 16 KB for each file compressed with \r
- xz compression method, if additional filter was used.\r
-\r
-\r
-17.01 beta 2017-08-28\r
--------------------------\r
-- Minor speed optimization for LZMA2 (xz and 7z) multi-threading compression.\r
- 7-Zip now uses additional memory buffers for multi-block LZMA2 compression.\r
- CPU utilization was slightly improved.\r
-- 7-zip now creates multi-block xz archives by default. Block size can be \r
- specified with -ms[Size]{m|g} switch.\r
-- xz decoder now can unpack random block from multi-block xz archives.\r
-- 7-Zip command line: @listfile now doesn't work after -- switch.\r
- Use -i@listfile before -- switch instead.\r
-- The BUGs were fixed:\r
- 7-Zip 17.00 beta crashed for commands that write anti-item to 7z archive.\r
-\r
-\r
-17.00 beta 2017-04-29\r
--------------------------\r
-- NewHandler.h / NewHandler.cpp: \r
- now it redefines operator new() only for old MSVC compilers (_MSC_VER < 1900).\r
-- C/7zTypes.h : the names of variables in interface structures were changed (vt).\r
-- Some bugs were fixed. 7-Zip could crash in some cases.\r
-- Some internal changes in code.\r
-\r
-\r
-16.04 2016-10-04\r
--------------------------\r
-- The bug was fixed in DllSecur.c.\r
-\r
-\r
-16.03 2016-09-28\r
--------------------------\r
-- SFX modules now use some protection against DLL preloading attack.\r
-- Some bugs in 7z code were fixed.\r
-\r
-\r
-16.02 2016-05-21\r
--------------------------\r
-- The BUG in 16.00 - 16.01 was fixed:\r
- Split Handler (SplitHandler.cpp) returned incorrect \r
- total size value (kpidSize) for split archives.\r
-\r
-\r
-16.01 2016-05-19\r
-------------------------- \r
-- Some internal changes to reduce the number of compiler warnings.\r
-\r
-\r
-16.00 2016-05-10\r
-------------------------- \r
-- Some bugs were fixed.\r
-\r
-\r
-15.12 2015-11-19\r
-------------------------- \r
-- The BUG in C version of 7z decoder was fixed:\r
- 7zDec.c : SzDecodeLzma2()\r
- 7z decoder could mistakenly report about decoding error for some 7z archives\r
- that use LZMA2 compression method.\r
- The probability to get that mistaken decoding error report was about \r
- one error per 16384 solid blocks for solid blocks larger than 16 KB (compressed size). \r
-- The BUG (in 9.26-15.11) in C version of 7z decoder was fixed:\r
- 7zArcIn.c : SzReadHeader2()\r
- 7z decoder worked incorrectly for 7z archives that contain \r
- empty solid blocks, that can be placed to 7z archive, if some file is \r
- unavailable for reading during archive creation.\r
-\r
-\r
-15.09 beta 2015-10-16\r
-------------------------- \r
-- The BUG in LZMA / LZMA2 encoding code was fixed.\r
- The BUG in LzFind.c::MatchFinder_ReadBlock() function.\r
- If input data size is larger than (4 GiB - dictionary_size),\r
- the following code worked incorrectly:\r
- - LZMA : LzmaEnc_MemEncode(), LzmaEncode() : LZMA encoding functions \r
- for compressing from memory to memory. \r
- That BUG is not related to LZMA encoder version that works via streams.\r
- - LZMA2 : multi-threaded version of LZMA2 encoder worked incorrectly, if \r
- default value of chunk size (CLzma2EncProps::blockSize) is changed \r
- to value larger than (4 GiB - dictionary_size).\r
-\r
-\r
-9.38 beta 2015-01-03\r
-------------------------- \r
-- The BUG in 9.31-9.37 was fixed:\r
- IArchiveGetRawProps interface was disabled for 7z archives.\r
-- The BUG in 9.26-9.36 was fixed:\r
- Some code in CPP\7zip\Archive\7z\ worked correctly only under Windows.\r
-\r
-\r
-9.36 beta 2014-12-26\r
-------------------------- \r
-- The BUG in command line version was fixed:\r
- 7-Zip created temporary archive in current folder during update archive\r
- operation, if -w{Path} switch was not specified. \r
- The fixed 7-Zip creates temporary archive in folder that contains updated archive.\r
-- The BUG in 9.33-9.35 was fixed:\r
- 7-Zip silently ignored file reading errors during 7z or gz archive creation,\r
- and the created archive contained only part of file that was read before error.\r
- The fixed 7-Zip stops archive creation and it reports about error.\r
-\r
-\r
-9.35 beta 2014-12-07\r
-------------------------- \r
-- 7zr.exe now support AES encryption.\r
-- SFX mudules were added to LZMA SDK\r
-- Some bugs were fixed.\r
-\r
-\r
-9.21 beta 2011-04-11\r
-------------------------- \r
-- New class FString for file names at file systems.\r
-- Speed optimization in CRC code for big-endian CPUs.\r
-- The BUG in Lzma2Dec.c was fixed:\r
- Lzma2Decode function didn't work.\r
-\r
-\r
-9.18 beta 2010-11-02\r
-------------------------- \r
-- New small SFX module for installers (SfxSetup).\r
-\r
-\r
-9.12 beta 2010-03-24\r
--------------------------\r
-- The BUG in LZMA SDK 9.* was fixed: LZMA2 codec didn't work,\r
- if more than 10 threads were used (or more than 20 threads in some modes).\r
-\r
-\r
-9.11 beta 2010-03-15\r
--------------------------\r
-- PPMd compression method support\r
- \r
-\r
-9.09 2009-12-12\r
--------------------------\r
-- The bug was fixed:\r
- Utf16_To_Utf8 funstions in UTFConvert.cpp and 7zMain.c\r
- incorrectly converted surrogate characters (the code >= 0x10000) to UTF-8.\r
-- Some bugs were fixed\r
-\r
-\r
-9.06 2009-08-17\r
--------------------------\r
-- Some changes in ANSI-C 7z Decoder interfaces.\r
-\r
-\r
-9.04 2009-05-30\r
--------------------------\r
-- LZMA2 compression method support\r
-- xz format support\r
-\r
-\r
-4.65 2009-02-03\r
--------------------------\r
-- Some minor fixes\r
-\r
-\r
-4.63 2008-12-31\r
--------------------------\r
-- Some minor fixes\r
-\r
-\r
-4.61 beta 2008-11-23\r
--------------------------\r
-- The bug in ANSI-C LZMA Decoder was fixed:\r
- If encoded stream was corrupted, decoder could access memory \r
- outside of allocated range.\r
-- Some changes in ANSI-C 7z Decoder interfaces.\r
-- LZMA SDK is placed in the public domain.\r
-\r
-\r
-4.60 beta 2008-08-19\r
--------------------------\r
-- Some minor fixes.\r
-\r
-\r
-4.59 beta 2008-08-13\r
--------------------------\r
-- The bug was fixed:\r
- LZMA Encoder in fast compression mode could access memory outside of \r
- allocated range in some rare cases.\r
-\r
-\r
-4.58 beta 2008-05-05\r
--------------------------\r
-- ANSI-C LZMA Decoder was rewritten for speed optimizations.\r
-- ANSI-C LZMA Encoder was included to LZMA SDK.\r
-- C++ LZMA code now is just wrapper over ANSI-C code.\r
-\r
-\r
-4.57 2007-12-12\r
--------------------------\r
-- Speed optimizations in Ñ++ LZMA Decoder. \r
-- Small changes for more compatibility with some C/C++ compilers.\r
-\r
-\r
-4.49 beta 2007-07-05\r
--------------------------\r
-- .7z ANSI-C Decoder:\r
- - now it supports BCJ and BCJ2 filters\r
- - now it supports files larger than 4 GB.\r
- - now it supports "Last Write Time" field for files.\r
-- C++ code for .7z archives compressing/decompressing from 7-zip \r
- was included to LZMA SDK.\r
- \r
-\r
-4.43 2006-06-04\r
--------------------------\r
-- Small changes for more compatibility with some C/C++ compilers.\r
- \r
-\r
-4.42 2006-05-15\r
--------------------------\r
-- Small changes in .h files in ANSI-C version.\r
- \r
-\r
-4.39 beta 2006-04-14\r
--------------------------\r
-- The bug in versions 4.33b:4.38b was fixed:\r
- C++ version of LZMA encoder could not correctly compress \r
- files larger than 2 GB with HC4 match finder (-mfhc4).\r
- \r
-\r
-4.37 beta 2005-04-06\r
--------------------------\r
-- Fixes in C++ code: code could no be compiled if _NO_EXCEPTIONS was defined. \r
-\r
-\r
-4.35 beta 2005-03-02\r
--------------------------\r
-- The bug was fixed in C++ version of LZMA Decoder:\r
- If encoded stream was corrupted, decoder could access memory \r
- outside of allocated range.\r
-\r
-\r
-4.34 beta 2006-02-27\r
--------------------------\r
-- Compressing speed and memory requirements for compressing were increased\r
-- LZMA now can use only these match finders: HC4, BT2, BT3, BT4\r
-\r
-\r
-4.32 2005-12-09\r
--------------------------\r
-- Java version of LZMA SDK was included\r
-\r
-\r
-4.30 2005-11-20\r
--------------------------\r
-- Compression ratio was improved in -a2 mode\r
-- Speed optimizations for compressing in -a2 mode\r
-- -fb switch now supports values up to 273\r
-- The bug in 7z_C (7zIn.c) was fixed:\r
- It used Alloc/Free functions from different memory pools.\r
- So if program used two memory pools, it worked incorrectly.\r
-- 7z_C: .7z format supporting was improved\r
-- LZMA# SDK (C#.NET version) was included\r
-\r
-\r
-4.27 (Updated) 2005-09-21\r
--------------------------\r
-- Some GUIDs/interfaces in C++ were changed.\r
- IStream.h:\r
- ISequentialInStream::Read now works as old ReadPart\r
- ISequentialOutStream::Write now works as old WritePart\r
-\r
-\r
-4.27 2005-08-07\r
--------------------------\r
-- The bug in LzmaDecodeSize.c was fixed:\r
- if _LZMA_IN_CB and _LZMA_OUT_READ were defined,\r
- decompressing worked incorrectly.\r
-\r
-\r
-4.26 2005-08-05\r
--------------------------\r
-- Fixes in 7z_C code and LzmaTest.c:\r
- previous versions could work incorrectly,\r
- if malloc(0) returns 0\r
-\r
-\r
-4.23 2005-06-29\r
--------------------------\r
-- Small fixes in C++ code\r
-\r
-\r
-4.22 2005-06-10\r
--------------------------\r
-- Small fixes\r
-\r
-\r
-4.21 2005-06-08\r
--------------------------\r
-- Interfaces for ANSI-C LZMA Decoder (LzmaDecode.c) were changed\r
-- New additional version of ANSI-C LZMA Decoder with zlib-like interface:\r
- - LzmaStateDecode.h\r
- - LzmaStateDecode.c\r
- - LzmaStateTest.c\r
-- ANSI-C LZMA Decoder now can decompress files larger than 4 GB\r
-\r
-\r
-4.17 2005-04-18\r
--------------------------\r
-- New example for RAM->RAM compressing/decompressing: \r
- LZMA + BCJ (filter for x86 code):\r
- - LzmaRam.h\r
- - LzmaRam.cpp\r
- - LzmaRamDecode.h\r
- - LzmaRamDecode.c\r
- - -f86 switch for lzma.exe\r
-\r
-\r
-4.16 2005-03-29\r
--------------------------\r
-- The bug was fixed in LzmaDecode.c (ANSI-C LZMA Decoder): \r
- If _LZMA_OUT_READ was defined, and if encoded stream was corrupted,\r
- decoder could access memory outside of allocated range.\r
-- Speed optimization of ANSI-C LZMA Decoder (now it's about 20% faster).\r
- Old version of LZMA Decoder now is in file LzmaDecodeSize.c. \r
- LzmaDecodeSize.c can provide slightly smaller code than LzmaDecode.c\r
-- Small speed optimization in LZMA C++ code\r
-- filter for SPARC's code was added\r
-- Simplified version of .7z ANSI-C Decoder was included\r
-\r
-\r
-4.06 2004-09-05\r
--------------------------\r
-- The bug in v4.05 was fixed:\r
- LZMA-Encoder didn't release output stream in some cases.\r
-\r
-\r
-4.05 2004-08-25\r
--------------------------\r
-- Source code of filters for x86, IA-64, ARM, ARM-Thumb \r
- and PowerPC code was included to SDK\r
-- Some internal minor changes\r
-\r
-\r
-4.04 2004-07-28\r
--------------------------\r
-- More compatibility with some C++ compilers\r
-\r
-\r
-4.03 2004-06-18\r
--------------------------\r
-- "Benchmark" command was added. It measures compressing \r
- and decompressing speed and shows rating values. \r
- Also it checks hardware errors.\r
-\r
-\r
-4.02 2004-06-10\r
--------------------------\r
-- C++ LZMA Encoder/Decoder code now is more portable\r
- and it can be compiled by GCC on Linux.\r
-\r
-\r
-4.01 2004-02-15\r
--------------------------\r
-- Some detection of data corruption was enabled.\r
- LzmaDecode.c / RangeDecoderReadByte\r
- .....\r
- {\r
- rd->ExtraBytes = 1;\r
- return 0xFF;\r
- }\r
-\r
-\r
-4.00 2004-02-13\r
--------------------------\r
-- Original version of LZMA SDK\r
-\r
-\r
-\r
-HISTORY of the LZMA\r
--------------------\r
- 2001-2008: Improvements to LZMA compressing/decompressing code, \r
- keeping compatibility with original LZMA format\r
- 1996-2001: Development of LZMA compression format\r
-\r
- Some milestones:\r
-\r
- 2001-08-30: LZMA compression was added to 7-Zip\r
- 1999-01-02: First version of 7-Zip was released\r
- \r
-\r
-End of document\r
+++ /dev/null
-LZMA SDK 22.01\r
---------------\r
-\r
-LZMA SDK provides the documentation, samples, header files,\r
-libraries, and tools you need to develop applications that \r
-use 7z / LZMA / LZMA2 / XZ compression.\r
-\r
-LZMA is an improved version of famous LZ77 compression algorithm. \r
-It was improved in way of maximum increasing of compression ratio,\r
-keeping high decompression speed and low memory requirements for \r
-decompressing.\r
-\r
-LZMA2 is a LZMA based compression method. LZMA2 provides better \r
-multithreading support for compression than LZMA and some other improvements.\r
-\r
-7z is a file format for data compression and file archiving.\r
-7z is a main file format for 7-Zip compression program (www.7-zip.org).\r
-7z format supports different compression methods: LZMA, LZMA2 and others.\r
-7z also supports AES-256 based encryption.\r
-\r
-XZ is a file format for data compression that uses LZMA2 compression.\r
-XZ format provides additional features: SHA/CRC check, filters for \r
-improved compression ratio, splitting to blocks and streams,\r
-\r
-\r
-\r
-LICENSE\r
--------\r
-\r
-LZMA SDK is written and placed in the public domain by Igor Pavlov.\r
-\r
-Some code in LZMA SDK is based on public domain code from another developers:\r
- 1) PPMd var.H (2001): Dmitry Shkarin\r
- 2) SHA-256: Wei Dai (Crypto++ library)\r
-\r
-Anyone is free to copy, modify, publish, use, compile, sell, or distribute the \r
-original LZMA SDK code, either in source code form or as a compiled binary, for \r
-any purpose, commercial or non-commercial, and by any means.\r
-\r
-LZMA SDK code is compatible with open source licenses, for example, you can \r
-include it to GNU GPL or GNU LGPL code.\r
-\r
-\r
-LZMA SDK Contents\r
------------------\r
-\r
- Source code:\r
-\r
- - C / C++ / C# / Java - LZMA compression and decompression\r
- - C / C++ - LZMA2 compression and decompression\r
- - C / C++ - XZ compression and decompression\r
- - C - 7z decompression\r
- - C++ - 7z compression and decompression\r
- - C - small SFXs for installers (7z decompression)\r
- - C++ - SFXs and SFXs for installers (7z decompression)\r
-\r
- Precomiled binaries:\r
-\r
- - console programs for lzma / 7z / xz compression and decompression\r
- - SFX modules for installers.\r
-\r
-\r
-UNIX/Linux version \r
-------------------\r
-There are several otpions to compile 7-Zip with different compilers: gcc and clang.\r
-Also 7-Zip code contains two versions for some critical parts of code: in C and in Assembeler.\r
-So if you compile the version with Assembeler code, you will get faster 7-Zip binary.\r
-\r
-7-Zip's assembler code uses the following syntax for different platforms:\r
-\r
-1) x86 and x86-64 (AMD64): MASM syntax. \r
- There are 2 programs that supports MASM syntax in Linux.\r
-' 'Asmc Macro Assembler and JWasm. But JWasm now doesn't support some \r
- cpu instructions used in 7-Zip.\r
- So you must install Asmc Macro Assembler in Linux, if you want to compile fastest version\r
- of 7-Zip x86 and x86-64:\r
- https://github.com/nidud/asmc\r
-\r
-2) arm64: GNU assembler for ARM64 with preprocessor. \r
- That systax of that arm64 assembler code in 7-Zip is supported by GCC and CLANG for ARM64.\r
-\r
-There are different binaries that can be compiled from 7-Zip source.\r
-There are 2 main files in folder for compiling:\r
- makefile - that can be used for compiling Windows version of 7-Zip with nmake command\r
- makefile.gcc - that can be used for compiling Linux/macOS versions of 7-Zip with make command\r
-\r
-At first you must change the current folder to folder that contains `makefile.gcc`:\r
-\r
- cd CPP/7zip/Bundles/Alone7z\r
-\r
-Then you can compile `makefile.gcc` with the command:\r
-\r
- make -j -f makefile.gcc\r
-\r
-Also there are additional "*.mak" files in folder "CPP/7zip/" that can be used to compile \r
-7-Zip binaries with optimized code and optimzing options.\r
-\r
-To compile with GCC without assembler:\r
- cd CPP/7zip/Bundles/Alone7z\r
- make -j -f ../../cmpl_gcc.mak\r
-\r
-To compile with CLANG without assembler:\r
- make -j -f ../../cmpl_clang.mak\r
-\r
-To compile 7-Zip for x86-64 with asmc assembler:\r
- make -j -f ../../cmpl_gcc_x64.mak\r
-\r
-To compile 7-Zip for arm64 with assembler:\r
- make -j -f ../../cmpl_gcc_arm64.mak\r
-\r
-To compile 7-Zip for arm64 for macOS:\r
- make -j -f ../../cmpl_mac_arm64.mak\r
-\r
-Also you can change some compiler options in the mak files:\r
- cmpl_gcc.mak\r
- var_gcc.mak\r
- warn_gcc.mak\r
-\r
-\r
-\r
-Also you can use p7zip (port of 7-Zip for POSIX systems like Unix or Linux):\r
- \r
- http://p7zip.sourceforge.net/\r
-\r
-\r
-Files\r
------\r
-\r
-DOC/7zC.txt - 7z ANSI-C Decoder description\r
-DOC/7zFormat.txt - 7z Format description\r
-DOC/installer.txt - information about 7-Zip for installers\r
-DOC/lzma.txt - LZMA compression description\r
-DOC/lzma-sdk.txt - LZMA SDK description (this file)\r
-DOC/lzma-history.txt - history of LZMA SDK\r
-DOC/lzma-specification.txt - Specification of LZMA\r
-DOC/Methods.txt - Compression method IDs for .7z\r
-\r
-bin/installer/ - example script to create installer that uses SFX module,\r
-\r
-bin/7zdec.exe - simplified 7z archive decoder\r
-bin/7zr.exe - 7-Zip console program (reduced version)\r
-bin/x64/7zr.exe - 7-Zip console program (reduced version) (x64 version)\r
-bin/lzma.exe - file->file LZMA encoder/decoder for Windows\r
-bin/7zS2.sfx - small SFX module for installers (GUI version)\r
-bin/7zS2con.sfx - small SFX module for installers (Console version)\r
-bin/7zSD.sfx - SFX module for installers.\r
-\r
-\r
-7zDec.exe\r
----------\r
-7zDec.exe is simplified 7z archive decoder.\r
-It supports only LZMA, LZMA2, and PPMd methods.\r
-7zDec decodes whole solid block from 7z archive to RAM.\r
-The RAM consumption can be high.\r
-\r
-\r
-\r
-\r
-Source code structure\r
----------------------\r
-\r
-\r
-Asm/ - asm files (optimized code for CRC calculation and Intel-AES encryption)\r
-\r
-C/ - C files (compression / decompression and other)\r
- Util/\r
- 7z - 7z decoder program (decoding 7z files)\r
- Lzma - LZMA program (file->file LZMA encoder/decoder).\r
- LzmaLib - LZMA library (.DLL for Windows)\r
- SfxSetup - small SFX module for installers \r
-\r
-CPP/ -- CPP files\r
-\r
- Common - common files for C++ projects\r
- Windows - common files for Windows related code\r
-\r
- 7zip - files related to 7-Zip\r
-\r
- Archive - files related to archiving\r
-\r
- Common - common files for archive handling\r
- 7z - 7z C++ Encoder/Decoder\r
-\r
- Bundles - Modules that are bundles of other modules (files)\r
- \r
- Alone7z - 7zr.exe: Standalone 7-Zip console program (reduced version)\r
- Format7zExtractR - 7zxr.dll: Reduced version of 7z DLL: extracting from 7z/LZMA/BCJ/BCJ2.\r
- Format7zR - 7zr.dll: Reduced version of 7z DLL: extracting/compressing to 7z/LZMA/BCJ/BCJ2\r
- LzmaCon - lzma.exe: LZMA compression/decompression\r
- LzmaSpec - example code for LZMA Specification\r
- SFXCon - 7zCon.sfx: Console 7z SFX module\r
- SFXSetup - 7zS.sfx: 7z SFX module for installers\r
- SFXWin - 7z.sfx: GUI 7z SFX module\r
-\r
- Common - common files for 7-Zip\r
-\r
- Compress - files for compression/decompression\r
-\r
- Crypto - files for encryption / decompression\r
-\r
- UI - User Interface files\r
- \r
- Client7z - Test application for 7za.dll, 7zr.dll, 7zxr.dll\r
- Common - Common UI files\r
- Console - Code for console program (7z.exe)\r
- Explorer - Some code from 7-Zip Shell extension\r
- FileManager - Some GUI code from 7-Zip File Manager\r
- GUI - Some GUI code from 7-Zip\r
-\r
-\r
-CS/ - C# files\r
- 7zip\r
- Common - some common files for 7-Zip\r
- Compress - files related to compression/decompression\r
- LZ - files related to LZ (Lempel-Ziv) compression algorithm\r
- LZMA - LZMA compression/decompression\r
- LzmaAlone - file->file LZMA compression/decompression\r
- RangeCoder - Range Coder (special code of compression/decompression)\r
-\r
-Java/ - Java files\r
- SevenZip\r
- Compression - files related to compression/decompression\r
- LZ - files related to LZ (Lempel-Ziv) compression algorithm\r
- LZMA - LZMA compression/decompression\r
- RangeCoder - Range Coder (special code of compression/decompression)\r
-\r
-\r
-Note: \r
- Asm / C / C++ source code of LZMA SDK is part of 7-Zip's source code.\r
- 7-Zip's source code can be downloaded from 7-Zip's SourceForge page:\r
-\r
- http://sourceforge.net/projects/sevenzip/\r
-\r
-\r
-\r
-LZMA features\r
--------------\r
- - Variable dictionary size (up to 1 GB)\r
- - Estimated compressing speed: about 2 MB/s on 2 GHz CPU\r
- - Estimated decompressing speed: \r
- - 20-30 MB/s on modern 2 GHz cpu\r
- - 1-2 MB/s on 200 MHz simple RISC cpu: (ARM, MIPS, PowerPC)\r
- - Small memory requirements for decompressing (16 KB + DictionarySize)\r
- - Small code size for decompressing: 5-8 KB\r
-\r
-LZMA decoder uses only integer operations and can be \r
-implemented in any modern 32-bit CPU (or on 16-bit CPU with some conditions).\r
-\r
-Some critical operations that affect the speed of LZMA decompression:\r
- 1) 32*16 bit integer multiply\r
- 2) Mispredicted branches (penalty mostly depends from pipeline length)\r
- 3) 32-bit shift and arithmetic operations\r
-\r
-The speed of LZMA decompressing mostly depends from CPU speed.\r
-Memory speed has no big meaning. But if your CPU has small data cache, \r
-overall weight of memory speed will slightly increase.\r
-\r
-\r
-How To Use\r
-----------\r
-\r
-Using LZMA encoder/decoder executable\r
---------------------------------------\r
-\r
-Usage: LZMA <e|d> inputFile outputFile [<switches>...]\r
-\r
- e: encode file\r
-\r
- d: decode file\r
-\r
- b: Benchmark. There are two tests: compressing and decompressing \r
- with LZMA method. Benchmark shows rating in MIPS (million \r
- instructions per second). Rating value is calculated from \r
- measured speed and it is normalized with Intel's Core 2 results.\r
- Also Benchmark checks possible hardware errors (RAM \r
- errors in most cases). Benchmark uses these settings:\r
- (-a1, -d21, -fb32, -mfbt4). You can change only -d parameter. \r
- Also you can change the number of iterations. Example for 30 iterations:\r
- LZMA b 30\r
- Default number of iterations is 10.\r
-\r
-<Switches>\r
- \r
-\r
- -a{N}: set compression mode 0 = fast, 1 = normal\r
- default: 1 (normal)\r
-\r
- d{N}: Sets Dictionary size - [0, 30], default: 23 (8MB)\r
- The maximum value for dictionary size is 1 GB = 2^30 bytes.\r
- Dictionary size is calculated as DictionarySize = 2^N bytes. \r
- For decompressing file compressed by LZMA method with dictionary \r
- size D = 2^N you need about D bytes of memory (RAM).\r
-\r
- -fb{N}: set number of fast bytes - [5, 273], default: 128\r
- Usually big number gives a little bit better compression ratio \r
- and slower compression process.\r
-\r
- -lc{N}: set number of literal context bits - [0, 8], default: 3\r
- Sometimes lc=4 gives gain for big files.\r
-\r
- -lp{N}: set number of literal pos bits - [0, 4], default: 0\r
- lp switch is intended for periodical data when period is \r
- equal 2^N. For example, for 32-bit (4 bytes) \r
- periodical data you can use lp=2. Often it's better to set lc0, \r
- if you change lp switch.\r
-\r
- -pb{N}: set number of pos bits - [0, 4], default: 2\r
- pb switch is intended for periodical data \r
- when period is equal 2^N.\r
-\r
- -mf{MF_ID}: set Match Finder. Default: bt4. \r
- Algorithms from hc* group doesn't provide good compression \r
- ratio, but they often works pretty fast in combination with \r
- fast mode (-a0).\r
-\r
- Memory requirements depend from dictionary size \r
- (parameter "d" in table below). \r
-\r
- MF_ID Memory Description\r
-\r
- bt2 d * 9.5 + 4MB Binary Tree with 2 bytes hashing.\r
- bt3 d * 11.5 + 4MB Binary Tree with 3 bytes hashing.\r
- bt4 d * 11.5 + 4MB Binary Tree with 4 bytes hashing.\r
- hc4 d * 7.5 + 4MB Hash Chain with 4 bytes hashing.\r
-\r
- -eos: write End Of Stream marker. By default LZMA doesn't write \r
- eos marker, since LZMA decoder knows uncompressed size \r
- stored in .lzma file header.\r
-\r
- -si: Read data from stdin (it will write End Of Stream marker).\r
- -so: Write data to stdout\r
-\r
-\r
-Examples:\r
-\r
-1) LZMA e file.bin file.lzma -d16 -lc0 \r
-\r
-compresses file.bin to file.lzma with 64 KB dictionary (2^16=64K) \r
-and 0 literal context bits. -lc0 allows to reduce memory requirements \r
-for decompression.\r
-\r
-\r
-2) LZMA e file.bin file.lzma -lc0 -lp2\r
-\r
-compresses file.bin to file.lzma with settings suitable \r
-for 32-bit periodical data (for example, ARM or MIPS code).\r
-\r
-3) LZMA d file.lzma file.bin\r
-\r
-decompresses file.lzma to file.bin.\r
-\r
-\r
-Compression ratio hints\r
------------------------\r
-\r
-Recommendations\r
----------------\r
-\r
-To increase the compression ratio for LZMA compressing it's desirable \r
-to have aligned data (if it's possible) and also it's desirable to locate\r
-data in such order, where code is grouped in one place and data is \r
-grouped in other place (it's better than such mixing: code, data, code,\r
-data, ...).\r
-\r
-\r
-Filters\r
--------\r
-You can increase the compression ratio for some data types, using\r
-special filters before compressing. For example, it's possible to \r
-increase the compression ratio on 5-10% for code for those CPU ISAs: \r
-x86, IA-64, ARM, ARM-Thumb, PowerPC, SPARC.\r
-\r
-You can find C source code of such filters in C/Bra*.* files\r
-\r
-You can check the compression ratio gain of these filters with such \r
-7-Zip commands (example for ARM code):\r
-No filter:\r
- 7z a a1.7z a.bin -m0=lzma\r
-\r
-With filter for little-endian ARM code:\r
- 7z a a2.7z a.bin -m0=arm -m1=lzma \r
-\r
-It works in such manner:\r
-Compressing = Filter_encoding + LZMA_encoding\r
-Decompressing = LZMA_decoding + Filter_decoding\r
-\r
-Compressing and decompressing speed of such filters is very high,\r
-so it will not increase decompressing time too much.\r
-Moreover, it reduces decompression time for LZMA_decoding, \r
-since compression ratio with filtering is higher.\r
-\r
-These filters convert CALL (calling procedure) instructions \r
-from relative offsets to absolute addresses, so such data becomes more \r
-compressible.\r
-\r
-For some ISAs (for example, for MIPS) it's impossible to get gain from such filter.\r
-\r
-\r
-\r
----\r
-\r
-http://www.7-zip.org\r
-http://www.7-zip.org/sdk.html\r
-http://www.7-zip.org/support.html\r
+++ /dev/null
-LZMA specification (DRAFT version)\r
-----------------------------------\r
-\r
-Author: Igor Pavlov\r
-Date: 2015-06-14\r
-\r
-This specification defines the format of LZMA compressed data and lzma file format.\r
-\r
-Notation \r
---------\r
-\r
-We use the syntax of C++ programming language.\r
-We use the following types in C++ code:\r
- unsigned - unsigned integer, at least 16 bits in size\r
- int - signed integer, at least 16 bits in size\r
- UInt64 - 64-bit unsigned integer\r
- UInt32 - 32-bit unsigned integer\r
- UInt16 - 16-bit unsigned integer\r
- Byte - 8-bit unsigned integer\r
- bool - boolean type with two possible values: false, true\r
-\r
-\r
-lzma file format\r
-================\r
-\r
-The lzma file contains the raw LZMA stream and the header with related properties.\r
-\r
-The files in that format use ".lzma" extension.\r
-\r
-The lzma file format layout:\r
-\r
-Offset Size Description\r
-\r
- 0 1 LZMA model properties (lc, lp, pb) in encoded form\r
- 1 4 Dictionary size (32-bit unsigned integer, little-endian)\r
- 5 8 Uncompressed size (64-bit unsigned integer, little-endian)\r
- 13 Compressed data (LZMA stream)\r
-\r
-LZMA properties:\r
-\r
- name Range Description\r
-\r
- lc [0, 8] the number of "literal context" bits\r
- lp [0, 4] the number of "literal pos" bits\r
- pb [0, 4] the number of "pos" bits\r
-dictSize [0, 2^32 - 1] the dictionary size \r
-\r
-The following code encodes LZMA properties:\r
-\r
-void EncodeProperties(Byte *properties)\r
-{\r
- properties[0] = (Byte)((pb * 5 + lp) * 9 + lc);\r
- Set_UInt32_LittleEndian(properties + 1, dictSize);\r
-}\r
-\r
-If the value of dictionary size in properties is smaller than (1 << 12),\r
-the LZMA decoder must set the dictionary size variable to (1 << 12).\r
-\r
-#define LZMA_DIC_MIN (1 << 12)\r
-\r
- unsigned lc, pb, lp;\r
- UInt32 dictSize;\r
- UInt32 dictSizeInProperties;\r
-\r
- void DecodeProperties(const Byte *properties)\r
- {\r
- unsigned d = properties[0];\r
- if (d >= (9 * 5 * 5))\r
- throw "Incorrect LZMA properties";\r
- lc = d % 9;\r
- d /= 9;\r
- pb = d / 5;\r
- lp = d % 5;\r
- dictSizeInProperties = 0;\r
- for (int i = 0; i < 4; i++)\r
- dictSizeInProperties |= (UInt32)properties[i + 1] << (8 * i);\r
- dictSize = dictSizeInProperties;\r
- if (dictSize < LZMA_DIC_MIN)\r
- dictSize = LZMA_DIC_MIN;\r
- }\r
-\r
-If "Uncompressed size" field contains ones in all 64 bits, it means that\r
-uncompressed size is unknown and there is the "end marker" in stream,\r
-that indicates the end of decoding point.\r
-In opposite case, if the value from "Uncompressed size" field is not\r
-equal to ((2^64) - 1), the LZMA stream decoding must be finished after\r
-specified number of bytes (Uncompressed size) is decoded. And if there \r
-is the "end marker", the LZMA decoder must read that marker also.\r
-\r
-\r
-The new scheme to encode LZMA properties\r
-----------------------------------------\r
-\r
-If LZMA compression is used for some another format, it's recommended to\r
-use a new improved scheme to encode LZMA properties. That new scheme was\r
-used in xz format that uses the LZMA2 compression algorithm.\r
-The LZMA2 is a new compression algorithm that is based on the LZMA algorithm.\r
-\r
-The dictionary size in LZMA2 is encoded with just one byte and LZMA2 supports\r
-only reduced set of dictionary sizes:\r
- (2 << 11), (3 << 11),\r
- (2 << 12), (3 << 12),\r
- ...\r
- (2 << 30), (3 << 30),\r
- (2 << 31) - 1\r
-\r
-The dictionary size can be extracted from encoded value with the following code:\r
-\r
- dictSize = (p == 40) ? 0xFFFFFFFF : (((UInt32)2 | ((p) & 1)) << ((p) / 2 + 11));\r
-\r
-Also there is additional limitation (lc + lp <= 4) in LZMA2 for values of \r
-"lc" and "lp" properties:\r
-\r
- if (lc + lp > 4)\r
- throw "Unsupported properties: (lc + lp) > 4";\r
-\r
-There are some advantages for LZMA decoder with such (lc + lp) value\r
-limitation. It reduces the maximum size of tables allocated by decoder.\r
-And it reduces the complexity of initialization procedure, that can be \r
-important to keep high speed of decoding of big number of small LZMA streams.\r
-\r
-It's recommended to use that limitation (lc + lp <= 4) for any new format\r
-that uses LZMA compression. Note that the combinations of "lc" and "lp" \r
-parameters, where (lc + lp > 4), can provide significant improvement in \r
-compression ratio only in some rare cases.\r
-\r
-The LZMA properties can be encoded into two bytes in new scheme:\r
-\r
-Offset Size Description\r
-\r
- 0 1 The dictionary size encoded with LZMA2 scheme\r
- 1 1 LZMA model properties (lc, lp, pb) in encoded form\r
-\r
-\r
-The RAM usage \r
-=============\r
-\r
-The RAM usage for LZMA decoder is determined by the following parts:\r
-\r
-1) The Sliding Window (from 4 KiB to 4 GiB).\r
-2) The probability model counter arrays (arrays of 16-bit variables).\r
-3) Some additional state variables (about 10 variables of 32-bit integers).\r
-\r
-\r
-The RAM usage for Sliding Window\r
---------------------------------\r
-\r
-There are two main scenarios of decoding:\r
-\r
-1) The decoding of full stream to one RAM buffer.\r
-\r
- If we decode full LZMA stream to one output buffer in RAM, the decoder \r
- can use that output buffer as sliding window. So the decoder doesn't \r
- need additional buffer allocated for sliding window.\r
-\r
-2) The decoding to some external storage.\r
-\r
- If we decode LZMA stream to external storage, the decoder must allocate\r
- the buffer for sliding window. The size of that buffer must be equal \r
- or larger than the value of dictionary size from properties of LZMA stream.\r
-\r
-In this specification we describe the code for decoding to some external\r
-storage. The optimized version of code for decoding of full stream to one\r
-output RAM buffer can require some minor changes in code.\r
-\r
-\r
-The RAM usage for the probability model counters\r
-------------------------------------------------\r
-\r
-The size of the probability model counter arrays is calculated with the \r
-following formula:\r
-\r
-size_of_prob_arrays = 1846 + 768 * (1 << (lp + lc))\r
-\r
-Each probability model counter is 11-bit unsigned integer.\r
-If we use 16-bit integer variables (2-byte integers) for these probability \r
-model counters, the RAM usage required by probability model counter arrays \r
-can be estimated with the following formula:\r
-\r
- RAM = 4 KiB + 1.5 KiB * (1 << (lp + lc))\r
-\r
-For example, for default LZMA parameters (lp = 0 and lc = 3), the RAM usage is\r
-\r
- RAM_lc3_lp0 = 4 KiB + 1.5 KiB * 8 = 16 KiB\r
-\r
-The maximum RAM state usage is required for decoding the stream with lp = 4 \r
-and lc = 8:\r
-\r
- RAM_lc8_lp4 = 4 KiB + 1.5 KiB * 4096 = 6148 KiB\r
-\r
-If the decoder uses LZMA2's limited property condition \r
-(lc + lp <= 4), the RAM usage will be not larger than\r
-\r
- RAM_lc_lp_4 = 4 KiB + 1.5 KiB * 16 = 28 KiB\r
-\r
-\r
-The RAM usage for encoder\r
--------------------------\r
-\r
-There are many variants for LZMA encoding code.\r
-These variants have different values for memory consumption.\r
-Note that memory consumption for LZMA Encoder can not be \r
-smaller than memory consumption of LZMA Decoder for same stream.\r
-\r
-The RAM usage required by modern effective implementation of \r
-LZMA Encoder can be estimated with the following formula:\r
-\r
- Encoder_RAM_Usage = 4 MiB + 11 * dictionarySize.\r
-\r
-But there are some modes of the encoder that require less memory.\r
-\r
-\r
-LZMA Decoding\r
-=============\r
-\r
-The LZMA compression algorithm uses LZ-based compression with Sliding Window\r
-and Range Encoding as entropy coding method.\r
-\r
-\r
-Sliding Window\r
---------------\r
-\r
-LZMA uses Sliding Window compression similar to LZ77 algorithm.\r
-\r
-LZMA stream must be decoded to the sequence that consists\r
-of MATCHES and LITERALS:\r
- \r
- - a LITERAL is a 8-bit character (one byte).\r
- The decoder just puts that LITERAL to the uncompressed stream.\r
- \r
- - a MATCH is a pair of two numbers (DISTANCE-LENGTH pair).\r
- The decoder takes one byte exactly "DISTANCE" characters behind\r
- current position in the uncompressed stream and puts it to \r
- uncompressed stream. The decoder must repeat it "LENGTH" times.\r
-\r
-The "DISTANCE" can not be larger than dictionary size.\r
-And the "DISTANCE" can not be larger than the number of bytes in\r
-the uncompressed stream that were decoded before that match.\r
-\r
-In this specification we use cyclic buffer to implement Sliding Window\r
-for LZMA decoder:\r
-\r
-class COutWindow\r
-{\r
- Byte *Buf;\r
- UInt32 Pos;\r
- UInt32 Size;\r
- bool IsFull;\r
-\r
-public:\r
- unsigned TotalPos;\r
- COutStream OutStream;\r
-\r
- COutWindow(): Buf(NULL) {}\r
- ~COutWindow() { delete []Buf; }\r
- \r
- void Create(UInt32 dictSize)\r
- {\r
- Buf = new Byte[dictSize];\r
- Pos = 0;\r
- Size = dictSize;\r
- IsFull = false;\r
- TotalPos = 0;\r
- }\r
-\r
- void PutByte(Byte b)\r
- {\r
- TotalPos++;\r
- Buf[Pos++] = b;\r
- if (Pos == Size)\r
- {\r
- Pos = 0;\r
- IsFull = true;\r
- }\r
- OutStream.WriteByte(b);\r
- }\r
-\r
- Byte GetByte(UInt32 dist) const\r
- {\r
- return Buf[dist <= Pos ? Pos - dist : Size - dist + Pos];\r
- }\r
-\r
- void CopyMatch(UInt32 dist, unsigned len)\r
- {\r
- for (; len > 0; len--)\r
- PutByte(GetByte(dist));\r
- }\r
-\r
- bool CheckDistance(UInt32 dist) const\r
- {\r
- return dist <= Pos || IsFull;\r
- }\r
-\r
- bool IsEmpty() const\r
- {\r
- return Pos == 0 && !IsFull;\r
- }\r
-};\r
-\r
-\r
-In another implementation it's possible to use one buffer that contains \r
-Sliding Window and the whole data stream after uncompressing.\r
-\r
-\r
-Range Decoder\r
--------------\r
-\r
-LZMA algorithm uses Range Encoding (1) as entropy coding method.\r
-\r
-LZMA stream contains just one very big number in big-endian encoding.\r
-LZMA decoder uses the Range Decoder to extract a sequence of binary\r
-symbols from that big number.\r
-\r
-The state of the Range Decoder:\r
-\r
-struct CRangeDecoder\r
-{\r
- UInt32 Range; \r
- UInt32 Code;\r
- InputStream *InStream;\r
-\r
- bool Corrupted;\r
-}\r
-\r
-The notes about UInt32 type for the "Range" and "Code" variables:\r
-\r
- It's possible to use 64-bit (unsigned or signed) integer type\r
- for the "Range" and the "Code" variables instead of 32-bit unsigned,\r
- but some additional code must be used to truncate the values to \r
- low 32-bits after some operations.\r
-\r
- If the programming language does not support 32-bit unsigned integer type \r
- (like in case of JAVA language), it's possible to use 32-bit signed integer, \r
- but some code must be changed. For example, it's required to change the code\r
- that uses comparison operations for UInt32 variables in this specification.\r
-\r
-The Range Decoder can be in some states that can be treated as \r
-"Corruption" in LZMA stream. The Range Decoder uses the variable "Corrupted":\r
-\r
- (Corrupted == false), if the Range Decoder has not detected any corruption.\r
- (Corrupted == true), if the Range Decoder has detected some corruption.\r
-\r
-The reference LZMA Decoder ignores the value of the "Corrupted" variable.\r
-So it continues to decode the stream, even if the corruption can be detected\r
-in the Range Decoder. To provide the full compatibility with output of the \r
-reference LZMA Decoder, another LZMA Decoder implementations must also \r
-ignore the value of the "Corrupted" variable.\r
-\r
-The LZMA Encoder is required to create only such LZMA streams, that will not \r
-lead the Range Decoder to states, where the "Corrupted" variable is set to true.\r
-\r
-The Range Decoder reads first 5 bytes from input stream to initialize\r
-the state:\r
-\r
-bool CRangeDecoder::Init()\r
-{\r
- Corrupted = false;\r
- Range = 0xFFFFFFFF;\r
- Code = 0;\r
-\r
- Byte b = InStream->ReadByte();\r
- \r
- for (int i = 0; i < 4; i++)\r
- Code = (Code << 8) | InStream->ReadByte();\r
- \r
- if (b != 0 || Code == Range)\r
- Corrupted = true;\r
- return b == 0;\r
-}\r
-\r
-The LZMA Encoder always writes ZERO in initial byte of compressed stream.\r
-That scheme allows to simplify the code of the Range Encoder in the \r
-LZMA Encoder. If initial byte is not equal to ZERO, the LZMA Decoder must\r
-stop decoding and report error.\r
-\r
-After the last bit of data was decoded by Range Decoder, the value of the\r
-"Code" variable must be equal to 0. The LZMA Decoder must check it by \r
-calling the IsFinishedOK() function:\r
-\r
- bool IsFinishedOK() const { return Code == 0; }\r
-\r
-If there is corruption in data stream, there is big probability that\r
-the "Code" value will be not equal to 0 in the Finish() function. So that\r
-check in the IsFinishedOK() function provides very good feature for \r
-corruption detection.\r
-\r
-The value of the "Range" variable before each bit decoding can not be smaller \r
-than ((UInt32)1 << 24). The Normalize() function keeps the "Range" value in \r
-described range.\r
-\r
-#define kTopValue ((UInt32)1 << 24)\r
-\r
-void CRangeDecoder::Normalize()\r
-{\r
- if (Range < kTopValue)\r
- {\r
- Range <<= 8;\r
- Code = (Code << 8) | InStream->ReadByte();\r
- }\r
-}\r
-\r
-Notes: if the size of the "Code" variable is larger than 32 bits, it's\r
-required to keep only low 32 bits of the "Code" variable after the change\r
-in Normalize() function.\r
-\r
-If the LZMA Stream is not corrupted, the value of the "Code" variable is\r
-always smaller than value of the "Range" variable.\r
-But the Range Decoder ignores some types of corruptions, so the value of\r
-the "Code" variable can be equal or larger than value of the "Range" variable\r
-for some "Corrupted" archives.\r
-\r
-\r
-LZMA uses Range Encoding only with binary symbols of two types:\r
- 1) binary symbols with fixed and equal probabilities (direct bits)\r
- 2) binary symbols with predicted probabilities\r
-\r
-The DecodeDirectBits() function decodes the sequence of direct bits:\r
-\r
-UInt32 CRangeDecoder::DecodeDirectBits(unsigned numBits)\r
-{\r
- UInt32 res = 0;\r
- do\r
- {\r
- Range >>= 1;\r
- Code -= Range;\r
- UInt32 t = 0 - ((UInt32)Code >> 31);\r
- Code += Range & t;\r
- \r
- if (Code == Range)\r
- Corrupted = true;\r
- \r
- Normalize();\r
- res <<= 1;\r
- res += t + 1;\r
- }\r
- while (--numBits);\r
- return res;\r
-}\r
-\r
-\r
-The Bit Decoding with Probability Model\r
----------------------------------------\r
-\r
-The task of Bit Probability Model is to estimate probabilities of binary\r
-symbols. And then it provides the Range Decoder with that information.\r
-The better prediction provides better compression ratio.\r
-The Bit Probability Model uses statistical data of previous decoded\r
-symbols.\r
-\r
-That estimated probability is presented as 11-bit unsigned integer value\r
-that represents the probability of symbol "0".\r
-\r
-#define kNumBitModelTotalBits 11\r
-\r
-Mathematical probabilities can be presented with the following formulas:\r
- probability(symbol_0) = prob / 2048.\r
- probability(symbol_1) = 1 - Probability(symbol_0) = \r
- = 1 - prob / 2048 = \r
- = (2048 - prob) / 2048\r
-where the "prob" variable contains 11-bit integer probability counter.\r
-\r
-It's recommended to use 16-bit unsigned integer type, to store these 11-bit\r
-probability values:\r
-\r
-typedef UInt16 CProb;\r
-\r
-Each probability value must be initialized with value ((1 << 11) / 2),\r
-that represents the state, where probabilities of symbols 0 and 1 \r
-are equal to 0.5:\r
-\r
-#define PROB_INIT_VAL ((1 << kNumBitModelTotalBits) / 2)\r
-\r
-The INIT_PROBS macro is used to initialize the array of CProb variables:\r
-\r
-#define INIT_PROBS(p) \\r
- { for (unsigned i = 0; i < sizeof(p) / sizeof(p[0]); i++) p[i] = PROB_INIT_VAL; }\r
-\r
-\r
-The DecodeBit() function decodes one bit.\r
-The LZMA decoder provides the pointer to CProb variable that contains \r
-information about estimated probability for symbol 0 and the Range Decoder \r
-updates that CProb variable after decoding. The Range Decoder increases \r
-estimated probability of the symbol that was decoded:\r
-\r
-#define kNumMoveBits 5\r
-\r
-unsigned CRangeDecoder::DecodeBit(CProb *prob)\r
-{\r
- unsigned v = *prob;\r
- UInt32 bound = (Range >> kNumBitModelTotalBits) * v;\r
- unsigned symbol;\r
- if (Code < bound)\r
- {\r
- v += ((1 << kNumBitModelTotalBits) - v) >> kNumMoveBits;\r
- Range = bound;\r
- symbol = 0;\r
- }\r
- else\r
- {\r
- v -= v >> kNumMoveBits;\r
- Code -= bound;\r
- Range -= bound;\r
- symbol = 1;\r
- }\r
- *prob = (CProb)v;\r
- Normalize();\r
- return symbol;\r
-}\r
-\r
-\r
-The Binary Tree of bit model counters\r
--------------------------------------\r
-\r
-LZMA uses a tree of Bit model variables to decode symbol that needs\r
-several bits for storing. There are two versions of such trees in LZMA:\r
- 1) the tree that decodes bits from high bit to low bit (the normal scheme).\r
- 2) the tree that decodes bits from low bit to high bit (the reverse scheme).\r
-\r
-Each binary tree structure supports different size of decoded symbol\r
-(the size of binary sequence that contains value of symbol).\r
-If that size of decoded symbol is "NumBits" bits, the tree structure \r
-uses the array of (2 << NumBits) counters of CProb type. \r
-But only ((2 << NumBits) - 1) items are used by encoder and decoder.\r
-The first item (the item with index equal to 0) in array is unused.\r
-That scheme with unused array's item allows to simplify the code.\r
-\r
-unsigned BitTreeReverseDecode(CProb *probs, unsigned numBits, CRangeDecoder *rc)\r
-{\r
- unsigned m = 1;\r
- unsigned symbol = 0;\r
- for (unsigned i = 0; i < numBits; i++)\r
- {\r
- unsigned bit = rc->DecodeBit(&probs[m]);\r
- m <<= 1;\r
- m += bit;\r
- symbol |= (bit << i);\r
- }\r
- return symbol;\r
-}\r
-\r
-template <unsigned NumBits>\r
-class CBitTreeDecoder\r
-{\r
- CProb Probs[(unsigned)1 << NumBits];\r
-\r
-public:\r
-\r
- void Init()\r
- {\r
- INIT_PROBS(Probs);\r
- }\r
-\r
- unsigned Decode(CRangeDecoder *rc)\r
- {\r
- unsigned m = 1;\r
- for (unsigned i = 0; i < NumBits; i++)\r
- m = (m << 1) + rc->DecodeBit(&Probs[m]);\r
- return m - ((unsigned)1 << NumBits);\r
- }\r
-\r
- unsigned ReverseDecode(CRangeDecoder *rc)\r
- {\r
- return BitTreeReverseDecode(Probs, NumBits, rc);\r
- }\r
-};\r
-\r
-\r
-LZ part of LZMA \r
----------------\r
-\r
-LZ part of LZMA describes details about the decoding of MATCHES and LITERALS.\r
-\r
-\r
-The Literal Decoding\r
---------------------\r
-\r
-The LZMA Decoder uses (1 << (lc + lp)) tables with CProb values, where \r
-each table contains 0x300 CProb values:\r
-\r
- CProb *LitProbs;\r
-\r
- void CreateLiterals()\r
- {\r
- LitProbs = new CProb[(UInt32)0x300 << (lc + lp)];\r
- }\r
- \r
- void InitLiterals()\r
- {\r
- UInt32 num = (UInt32)0x300 << (lc + lp);\r
- for (UInt32 i = 0; i < num; i++)\r
- LitProbs[i] = PROB_INIT_VAL;\r
- }\r
-\r
-To select the table for decoding it uses the context that consists of\r
-(lc) high bits from previous literal and (lp) low bits from value that\r
-represents current position in outputStream.\r
-\r
-If (State > 7), the Literal Decoder also uses "matchByte" that represents \r
-the byte in OutputStream at position the is the DISTANCE bytes before \r
-current position, where the DISTANCE is the distance in DISTANCE-LENGTH pair\r
-of latest decoded match.\r
-\r
-The following code decodes one literal and puts it to Sliding Window buffer:\r
-\r
- void DecodeLiteral(unsigned state, UInt32 rep0)\r
- {\r
- unsigned prevByte = 0;\r
- if (!OutWindow.IsEmpty())\r
- prevByte = OutWindow.GetByte(1);\r
- \r
- unsigned symbol = 1;\r
- unsigned litState = ((OutWindow.TotalPos & ((1 << lp) - 1)) << lc) + (prevByte >> (8 - lc));\r
- CProb *probs = &LitProbs[(UInt32)0x300 * litState];\r
- \r
- if (state >= 7)\r
- {\r
- unsigned matchByte = OutWindow.GetByte(rep0 + 1);\r
- do\r
- {\r
- unsigned matchBit = (matchByte >> 7) & 1;\r
- matchByte <<= 1;\r
- unsigned bit = RangeDec.DecodeBit(&probs[((1 + matchBit) << 8) + symbol]);\r
- symbol = (symbol << 1) | bit;\r
- if (matchBit != bit)\r
- break;\r
- }\r
- while (symbol < 0x100);\r
- }\r
- while (symbol < 0x100)\r
- symbol = (symbol << 1) | RangeDec.DecodeBit(&probs[symbol]);\r
- OutWindow.PutByte((Byte)(symbol - 0x100));\r
- }\r
-\r
-\r
-The match length decoding\r
--------------------------\r
-\r
-The match length decoder returns normalized (zero-based value) \r
-length of match. That value can be converted to real length of the match \r
-with the following code:\r
-\r
-#define kMatchMinLen 2\r
-\r
- matchLen = len + kMatchMinLen;\r
-\r
-The match length decoder can return the values from 0 to 271.\r
-And the corresponded real match length values can be in the range \r
-from 2 to 273.\r
-\r
-The following scheme is used for the match length encoding:\r
-\r
- Binary encoding Binary Tree structure Zero-based match length \r
- sequence (binary + decimal):\r
-\r
- 0 xxx LowCoder[posState] xxx\r
- 1 0 yyy MidCoder[posState] yyy + 8\r
- 1 1 zzzzzzzz HighCoder zzzzzzzz + 16\r
-\r
-LZMA uses bit model variable "Choice" to decode the first selection bit.\r
-\r
-If the first selection bit is equal to 0, the decoder uses binary tree \r
- LowCoder[posState] to decode 3-bit zero-based match length (xxx).\r
-\r
-If the first selection bit is equal to 1, the decoder uses bit model \r
- variable "Choice2" to decode the second selection bit.\r
-\r
- If the second selection bit is equal to 0, the decoder uses binary tree \r
- MidCoder[posState] to decode 3-bit "yyy" value, and zero-based match\r
- length is equal to (yyy + 8).\r
-\r
- If the second selection bit is equal to 1, the decoder uses binary tree \r
- HighCoder to decode 8-bit "zzzzzzzz" value, and zero-based \r
- match length is equal to (zzzzzzzz + 16).\r
-\r
-LZMA uses "posState" value as context to select the binary tree \r
-from LowCoder and MidCoder binary tree arrays:\r
-\r
- unsigned posState = OutWindow.TotalPos & ((1 << pb) - 1);\r
-\r
-The full code of the length decoder:\r
-\r
-class CLenDecoder\r
-{\r
- CProb Choice;\r
- CProb Choice2;\r
- CBitTreeDecoder<3> LowCoder[1 << kNumPosBitsMax];\r
- CBitTreeDecoder<3> MidCoder[1 << kNumPosBitsMax];\r
- CBitTreeDecoder<8> HighCoder;\r
-\r
-public:\r
-\r
- void Init()\r
- {\r
- Choice = PROB_INIT_VAL;\r
- Choice2 = PROB_INIT_VAL;\r
- HighCoder.Init();\r
- for (unsigned i = 0; i < (1 << kNumPosBitsMax); i++)\r
- {\r
- LowCoder[i].Init();\r
- MidCoder[i].Init();\r
- }\r
- }\r
-\r
- unsigned Decode(CRangeDecoder *rc, unsigned posState)\r
- {\r
- if (rc->DecodeBit(&Choice) == 0)\r
- return LowCoder[posState].Decode(rc);\r
- if (rc->DecodeBit(&Choice2) == 0)\r
- return 8 + MidCoder[posState].Decode(rc);\r
- return 16 + HighCoder.Decode(rc);\r
- }\r
-};\r
-\r
-The LZMA decoder uses two instances of CLenDecoder class.\r
-The first instance is for the matches of "Simple Match" type,\r
-and the second instance is for the matches of "Rep Match" type:\r
-\r
- CLenDecoder LenDecoder;\r
- CLenDecoder RepLenDecoder;\r
-\r
-\r
-The match distance decoding\r
----------------------------\r
-\r
-LZMA supports dictionary sizes up to 4 GiB minus 1.\r
-The value of match distance (decoded by distance decoder) can be \r
-from 1 to 2^32. But the distance value that is equal to 2^32 is used to\r
-indicate the "End of stream" marker. So real largest match distance \r
-that is used for LZ-window match is (2^32 - 1).\r
-\r
-LZMA uses normalized match length (zero-based length) \r
-to calculate the context state "lenState" do decode the distance value:\r
-\r
-#define kNumLenToPosStates 4\r
-\r
- unsigned lenState = len;\r
- if (lenState > kNumLenToPosStates - 1)\r
- lenState = kNumLenToPosStates - 1;\r
-\r
-The distance decoder returns the "dist" value that is zero-based value \r
-of match distance. The real match distance can be calculated with the\r
-following code:\r
- \r
- matchDistance = dist + 1; \r
-\r
-The state of the distance decoder and the initialization code: \r
-\r
- #define kEndPosModelIndex 14\r
- #define kNumFullDistances (1 << (kEndPosModelIndex >> 1))\r
- #define kNumAlignBits 4\r
-\r
- CBitTreeDecoder<6> PosSlotDecoder[kNumLenToPosStates];\r
- CProb PosDecoders[1 + kNumFullDistances - kEndPosModelIndex];\r
- CBitTreeDecoder<kNumAlignBits> AlignDecoder;\r
-\r
- void InitDist()\r
- {\r
- for (unsigned i = 0; i < kNumLenToPosStates; i++)\r
- PosSlotDecoder[i].Init();\r
- AlignDecoder.Init();\r
- INIT_PROBS(PosDecoders);\r
- }\r
-\r
-At first stage the distance decoder decodes 6-bit "posSlot" value with bit\r
-tree decoder from PosSlotDecoder array. It's possible to get 2^6=64 different \r
-"posSlot" values.\r
-\r
- unsigned posSlot = PosSlotDecoder[lenState].Decode(&RangeDec);\r
-\r
-The encoding scheme for distance value is shown in the following table:\r
-\r
-posSlot (decimal) /\r
- zero-based distance (binary)\r
- 0 0\r
- 1 1\r
- 2 10\r
- 3 11\r
-\r
- 4 10 x\r
- 5 11 x\r
- 6 10 xx\r
- 7 11 xx\r
- 8 10 xxx\r
- 9 11 xxx\r
-10 10 xxxx\r
-11 11 xxxx\r
-12 10 xxxxx\r
-13 11 xxxxx\r
-\r
-14 10 yy zzzz\r
-15 11 yy zzzz\r
-16 10 yyy zzzz\r
-17 11 yyy zzzz\r
-...\r
-62 10 yyyyyyyyyyyyyyyyyyyyyyyyyy zzzz\r
-63 11 yyyyyyyyyyyyyyyyyyyyyyyyyy zzzz\r
-\r
-where \r
- "x ... x" means the sequence of binary symbols encoded with binary tree and \r
- "Reverse" scheme. It uses separated binary tree for each posSlot from 4 to 13.\r
- "y" means direct bit encoded with range coder.\r
- "zzzz" means the sequence of four binary symbols encoded with binary\r
- tree with "Reverse" scheme, where one common binary tree "AlignDecoder"\r
- is used for all posSlot values.\r
-\r
-If (posSlot < 4), the "dist" value is equal to posSlot value.\r
-\r
-If (posSlot >= 4), the decoder uses "posSlot" value to calculate the value of\r
- the high bits of "dist" value and the number of the low bits.\r
-\r
- If (4 <= posSlot < kEndPosModelIndex), the decoder uses bit tree decoders.\r
- (one separated bit tree decoder per one posSlot value) and "Reverse" scheme.\r
- In this implementation we use one CProb array "PosDecoders" that contains \r
- all CProb variables for all these bit decoders.\r
- \r
- if (posSlot >= kEndPosModelIndex), the middle bits are decoded as direct \r
- bits from RangeDecoder and the low 4 bits are decoded with a bit tree \r
- decoder "AlignDecoder" with "Reverse" scheme.\r
-\r
-The code to decode zero-based match distance:\r
- \r
- unsigned DecodeDistance(unsigned len)\r
- {\r
- unsigned lenState = len;\r
- if (lenState > kNumLenToPosStates - 1)\r
- lenState = kNumLenToPosStates - 1;\r
- \r
- unsigned posSlot = PosSlotDecoder[lenState].Decode(&RangeDec);\r
- if (posSlot < 4)\r
- return posSlot;\r
- \r
- unsigned numDirectBits = (unsigned)((posSlot >> 1) - 1);\r
- UInt32 dist = ((2 | (posSlot & 1)) << numDirectBits);\r
- if (posSlot < kEndPosModelIndex)\r
- dist += BitTreeReverseDecode(PosDecoders + dist - posSlot, numDirectBits, &RangeDec);\r
- else\r
- {\r
- dist += RangeDec.DecodeDirectBits(numDirectBits - kNumAlignBits) << kNumAlignBits;\r
- dist += AlignDecoder.ReverseDecode(&RangeDec);\r
- }\r
- return dist;\r
- }\r
-\r
-\r
-\r
-LZMA Decoding modes\r
--------------------\r
-\r
-There are 2 types of LZMA streams:\r
-\r
-1) The stream with "End of stream" marker.\r
-2) The stream without "End of stream" marker.\r
-\r
-And the LZMA Decoder supports 3 modes of decoding:\r
-\r
-1) The unpack size is undefined. The LZMA decoder stops decoding after \r
- getting "End of stream" marker. \r
- The input variables for that case:\r
- \r
- markerIsMandatory = true\r
- unpackSizeDefined = false\r
- unpackSize contains any value\r
-\r
-2) The unpack size is defined and LZMA decoder supports both variants, \r
- where the stream can contain "End of stream" marker or the stream is\r
- finished without "End of stream" marker. The LZMA decoder must detect \r
- any of these situations.\r
- The input variables for that case:\r
- \r
- markerIsMandatory = false\r
- unpackSizeDefined = true\r
- unpackSize contains unpack size\r
-\r
-3) The unpack size is defined and the LZMA stream must contain \r
- "End of stream" marker\r
- The input variables for that case:\r
- \r
- markerIsMandatory = true\r
- unpackSizeDefined = true\r
- unpackSize contains unpack size\r
-\r
-\r
-The main loop of decoder\r
-------------------------\r
-\r
-The main loop of LZMA decoder:\r
-\r
-Initialize the LZMA state.\r
-loop\r
-{\r
- // begin of loop\r
- Check "end of stream" conditions.\r
- Decode Type of MATCH / LITERAL. \r
- If it's LITERAL, decode LITERAL value and put the LITERAL to Window.\r
- If it's MATCH, decode the length of match and the match distance. \r
- Check error conditions, check end of stream conditions and copy\r
- the sequence of match bytes from sliding window to current position\r
- in window.\r
- Go to begin of loop\r
-}\r
-\r
-The reference implementation of LZMA decoder uses "unpackSize" variable\r
-to keep the number of remaining bytes in output stream. So it reduces \r
-"unpackSize" value after each decoded LITERAL or MATCH.\r
-\r
-The following code contains the "end of stream" condition check at the start\r
-of the loop:\r
-\r
- if (unpackSizeDefined && unpackSize == 0 && !markerIsMandatory)\r
- if (RangeDec.IsFinishedOK())\r
- return LZMA_RES_FINISHED_WITHOUT_MARKER;\r
-\r
-LZMA uses three types of matches:\r
-\r
-1) "Simple Match" - the match with distance value encoded with bit models.\r
-\r
-2) "Rep Match" - the match that uses the distance from distance\r
- history table.\r
-\r
-3) "Short Rep Match" - the match of single byte length, that uses the latest \r
- distance from distance history table.\r
-\r
-The LZMA decoder keeps the history of latest 4 match distances that were used \r
-by decoder. That set of 4 variables contains zero-based match distances and \r
-these variables are initialized with zero values:\r
-\r
- UInt32 rep0 = 0, rep1 = 0, rep2 = 0, rep3 = 0;\r
-\r
-The LZMA decoder uses binary model variables to select type of MATCH or LITERAL:\r
-\r
-#define kNumStates 12\r
-#define kNumPosBitsMax 4\r
-\r
- CProb IsMatch[kNumStates << kNumPosBitsMax];\r
- CProb IsRep[kNumStates];\r
- CProb IsRepG0[kNumStates];\r
- CProb IsRepG1[kNumStates];\r
- CProb IsRepG2[kNumStates];\r
- CProb IsRep0Long[kNumStates << kNumPosBitsMax];\r
-\r
-The decoder uses "state" variable value to select exact variable \r
-from "IsRep", "IsRepG0", "IsRepG1" and "IsRepG2" arrays.\r
-The "state" variable can get the value from 0 to 11.\r
-Initial value for "state" variable is zero:\r
-\r
- unsigned state = 0;\r
-\r
-The "state" variable is updated after each LITERAL or MATCH with one of the\r
-following functions:\r
-\r
-unsigned UpdateState_Literal(unsigned state)\r
-{\r
- if (state < 4) return 0;\r
- else if (state < 10) return state - 3;\r
- else return state - 6;\r
-}\r
-unsigned UpdateState_Match (unsigned state) { return state < 7 ? 7 : 10; }\r
-unsigned UpdateState_Rep (unsigned state) { return state < 7 ? 8 : 11; }\r
-unsigned UpdateState_ShortRep(unsigned state) { return state < 7 ? 9 : 11; }\r
-\r
-The decoder calculates "state2" variable value to select exact variable from \r
-"IsMatch" and "IsRep0Long" arrays:\r
-\r
-unsigned posState = OutWindow.TotalPos & ((1 << pb) - 1);\r
-unsigned state2 = (state << kNumPosBitsMax) + posState;\r
-\r
-The decoder uses the following code flow scheme to select exact \r
-type of LITERAL or MATCH:\r
-\r
-IsMatch[state2] decode\r
- 0 - the Literal\r
- 1 - the Match\r
- IsRep[state] decode\r
- 0 - Simple Match\r
- 1 - Rep Match\r
- IsRepG0[state] decode\r
- 0 - the distance is rep0\r
- IsRep0Long[state2] decode\r
- 0 - Short Rep Match\r
- 1 - Rep Match 0\r
- 1 - \r
- IsRepG1[state] decode\r
- 0 - Rep Match 1\r
- 1 - \r
- IsRepG2[state] decode\r
- 0 - Rep Match 2\r
- 1 - Rep Match 3\r
-\r
-\r
-LITERAL symbol\r
---------------\r
-If the value "0" was decoded with IsMatch[state2] decoding, we have "LITERAL" type.\r
-\r
-At first the LZMA decoder must check that it doesn't exceed \r
-specified uncompressed size:\r
-\r
- if (unpackSizeDefined && unpackSize == 0)\r
- return LZMA_RES_ERROR;\r
-\r
-Then it decodes literal value and puts it to sliding window:\r
-\r
- DecodeLiteral(state, rep0);\r
-\r
-Then the decoder must update the "state" value and "unpackSize" value;\r
-\r
- state = UpdateState_Literal(state);\r
- unpackSize--;\r
-\r
-Then the decoder must go to the begin of main loop to decode next Match or Literal.\r
-\r
-\r
-Simple Match\r
-------------\r
-\r
-If the value "1" was decoded with IsMatch[state2] decoding,\r
-we have the "Simple Match" type.\r
-\r
-The distance history table is updated with the following scheme:\r
- \r
- rep3 = rep2;\r
- rep2 = rep1;\r
- rep1 = rep0;\r
-\r
-The zero-based length is decoded with "LenDecoder":\r
-\r
- len = LenDecoder.Decode(&RangeDec, posState);\r
-\r
-The state is update with UpdateState_Match function:\r
-\r
- state = UpdateState_Match(state);\r
-\r
-and the new "rep0" value is decoded with DecodeDistance:\r
-\r
- rep0 = DecodeDistance(len);\r
-\r
-That "rep0" will be used as zero-based distance for current match.\r
-\r
-If the value of "rep0" is equal to 0xFFFFFFFF, it means that we have \r
-"End of stream" marker, so we can stop decoding and check finishing \r
-condition in Range Decoder:\r
-\r
- if (rep0 == 0xFFFFFFFF)\r
- return RangeDec.IsFinishedOK() ?\r
- LZMA_RES_FINISHED_WITH_MARKER :\r
- LZMA_RES_ERROR;\r
-\r
-If uncompressed size is defined, LZMA decoder must check that it doesn't \r
-exceed that specified uncompressed size:\r
-\r
- if (unpackSizeDefined && unpackSize == 0)\r
- return LZMA_RES_ERROR;\r
-\r
-Also the decoder must check that "rep0" value is not larger than dictionary size\r
-and is not larger than the number of already decoded bytes:\r
-\r
- if (rep0 >= dictSize || !OutWindow.CheckDistance(rep0))\r
- return LZMA_RES_ERROR;\r
-\r
-Then the decoder must copy match bytes as described in \r
-"The match symbols copying" section.\r
-\r
-\r
-Rep Match\r
----------\r
-\r
-If the LZMA decoder has decoded the value "1" with IsRep[state] variable,\r
-we have "Rep Match" type.\r
-\r
-At first the LZMA decoder must check that it doesn't exceed \r
-specified uncompressed size:\r
-\r
- if (unpackSizeDefined && unpackSize == 0)\r
- return LZMA_RES_ERROR;\r
-\r
-Also the decoder must return error, if the LZ window is empty:\r
-\r
- if (OutWindow.IsEmpty())\r
- return LZMA_RES_ERROR;\r
-\r
-If the match type is "Rep Match", the decoder uses one of the 4 variables of\r
-distance history table to get the value of distance for current match.\r
-And there are 4 corresponding ways of decoding flow. \r
-\r
-The decoder updates the distance history with the following scheme \r
-depending from type of match:\r
-\r
-- "Rep Match 0" or "Short Rep Match":\r
- ; LZMA doesn't update the distance history \r
-\r
-- "Rep Match 1":\r
- UInt32 dist = rep1;\r
- rep1 = rep0;\r
- rep0 = dist;\r
-\r
-- "Rep Match 2":\r
- UInt32 dist = rep2;\r
- rep2 = rep1;\r
- rep1 = rep0;\r
- rep0 = dist;\r
-\r
-- "Rep Match 3":\r
- UInt32 dist = rep3;\r
- rep3 = rep2;\r
- rep2 = rep1;\r
- rep1 = rep0;\r
- rep0 = dist;\r
-\r
-Then the decoder decodes exact subtype of "Rep Match" using "IsRepG0", "IsRep0Long",\r
-"IsRepG1", "IsRepG2".\r
-\r
-If the subtype is "Short Rep Match", the decoder updates the state, puts \r
-the one byte from window to current position in window and goes to next \r
-MATCH/LITERAL symbol (the begin of main loop):\r
-\r
- state = UpdateState_ShortRep(state);\r
- OutWindow.PutByte(OutWindow.GetByte(rep0 + 1));\r
- unpackSize--;\r
- continue;\r
-\r
-In other cases (Rep Match 0/1/2/3), it decodes the zero-based \r
-length of match with "RepLenDecoder" decoder:\r
-\r
- len = RepLenDecoder.Decode(&RangeDec, posState);\r
-\r
-Then it updates the state:\r
-\r
- state = UpdateState_Rep(state);\r
-\r
-Then the decoder must copy match bytes as described in \r
-"The Match symbols copying" section.\r
-\r
-\r
-The match symbols copying\r
--------------------------\r
-\r
-If we have the match (Simple Match or Rep Match 0/1/2/3), the decoder must\r
-copy the sequence of bytes with calculated match distance and match length.\r
-If uncompressed size is defined, LZMA decoder must check that it doesn't \r
-exceed that specified uncompressed size:\r
-\r
- len += kMatchMinLen;\r
- bool isError = false;\r
- if (unpackSizeDefined && unpackSize < len)\r
- {\r
- len = (unsigned)unpackSize;\r
- isError = true;\r
- }\r
- OutWindow.CopyMatch(rep0 + 1, len);\r
- unpackSize -= len;\r
- if (isError)\r
- return LZMA_RES_ERROR;\r
-\r
-Then the decoder must go to the begin of main loop to decode next MATCH or LITERAL.\r
-\r
-\r
-\r
-NOTES\r
------\r
-\r
-This specification doesn't describe the variant of decoder implementation \r
-that supports partial decoding. Such partial decoding case can require some \r
-changes in "end of stream" condition checks code. Also such code \r
-can use additional status codes, returned by decoder.\r
-\r
-This specification uses C++ code with templates to simplify describing.\r
-The optimized version of LZMA decoder doesn't need templates.\r
-Such optimized version can use just two arrays of CProb variables:\r
- 1) The dynamic array of CProb variables allocated for the Literal Decoder.\r
- 2) The one common array that contains all other CProb variables.\r
-\r
-\r
-References: \r
-\r
-1. G. N. N. Martin, Range encoding: an algorithm for removing redundancy \r
- from a digitized message, Video & Data Recording Conference, \r
- Southampton, UK, July 24-27, 1979.\r
+++ /dev/null
-LZMA compression\r
-----------------\r
-Version: 9.35\r
-\r
-This file describes LZMA encoding and decoding functions written in C language.\r
-\r
-LZMA is an improved version of famous LZ77 compression algorithm. \r
-It was improved in way of maximum increasing of compression ratio,\r
-keeping high decompression speed and low memory requirements for \r
-decompressing.\r
-\r
-Note: you can read also LZMA Specification (lzma-specification.txt from LZMA SDK)\r
-\r
-Also you can look source code for LZMA encoding and decoding:\r
- C/Util/Lzma/LzmaUtil.c\r
-\r
-\r
-LZMA compressed file format\r
----------------------------\r
-Offset Size Description\r
- 0 1 Special LZMA properties (lc,lp, pb in encoded form)\r
- 1 4 Dictionary size (little endian)\r
- 5 8 Uncompressed size (little endian). -1 means unknown size\r
- 13 Compressed data\r
-\r
-\r
-\r
-ANSI-C LZMA Decoder\r
-~~~~~~~~~~~~~~~~~~~\r
-\r
-Please note that interfaces for ANSI-C code were changed in LZMA SDK 4.58.\r
-If you want to use old interfaces you can download previous version of LZMA SDK\r
-from sourceforge.net site.\r
-\r
-To use ANSI-C LZMA Decoder you need the following files:\r
-1) LzmaDec.h + LzmaDec.c + 7zTypes.h + Precomp.h + Compiler.h\r
-\r
-Look example code:\r
- C/Util/Lzma/LzmaUtil.c\r
-\r
-\r
-Memory requirements for LZMA decoding\r
--------------------------------------\r
-\r
-Stack usage of LZMA decoding function for local variables is not \r
-larger than 200-400 bytes.\r
-\r
-LZMA Decoder uses dictionary buffer and internal state structure.\r
-Internal state structure consumes\r
- state_size = (4 + (1.5 << (lc + lp))) KB\r
-by default (lc=3, lp=0), state_size = 16 KB.\r
-\r
-\r
-How To decompress data\r
-----------------------\r
-\r
-LZMA Decoder (ANSI-C version) now supports 2 interfaces:\r
-1) Single-call Decompressing\r
-2) Multi-call State Decompressing (zlib-like interface)\r
-\r
-You must use external allocator:\r
-Example:\r
-void *SzAlloc(void *p, size_t size) { p = p; return malloc(size); }\r
-void SzFree(void *p, void *address) { p = p; free(address); }\r
-ISzAlloc alloc = { SzAlloc, SzFree };\r
-\r
-You can use p = p; operator to disable compiler warnings.\r
-\r
-\r
-Single-call Decompressing\r
--------------------------\r
-When to use: RAM->RAM decompressing\r
-Compile files: LzmaDec.h + LzmaDec.c + 7zTypes.h\r
-Compile defines: no defines\r
-Memory Requirements:\r
- - Input buffer: compressed size\r
- - Output buffer: uncompressed size\r
- - LZMA Internal Structures: state_size (16 KB for default settings) \r
-\r
-Interface:\r
- int LzmaDecode(Byte *dest, SizeT *destLen, const Byte *src, SizeT *srcLen,\r
- const Byte *propData, unsigned propSize, ELzmaFinishMode finishMode, \r
- ELzmaStatus *status, ISzAlloc *alloc);\r
- In: \r
- dest - output data\r
- destLen - output data size\r
- src - input data\r
- srcLen - input data size\r
- propData - LZMA properties (5 bytes)\r
- propSize - size of propData buffer (5 bytes)\r
- finishMode - It has meaning only if the decoding reaches output limit (*destLen).\r
- LZMA_FINISH_ANY - Decode just destLen bytes.\r
- LZMA_FINISH_END - Stream must be finished after (*destLen).\r
- You can use LZMA_FINISH_END, when you know that \r
- current output buffer covers last bytes of stream. \r
- alloc - Memory allocator.\r
-\r
- Out: \r
- destLen - processed output size \r
- srcLen - processed input size \r
-\r
- Output:\r
- SZ_OK\r
- status:\r
- LZMA_STATUS_FINISHED_WITH_MARK\r
- LZMA_STATUS_NOT_FINISHED \r
- LZMA_STATUS_MAYBE_FINISHED_WITHOUT_MARK\r
- SZ_ERROR_DATA - Data error\r
- SZ_ERROR_MEM - Memory allocation error\r
- SZ_ERROR_UNSUPPORTED - Unsupported properties\r
- SZ_ERROR_INPUT_EOF - It needs more bytes in input buffer (src).\r
-\r
- If LZMA decoder sees end_marker before reaching output limit, it returns OK result,\r
- and output value of destLen will be less than output buffer size limit.\r
-\r
- You can use multiple checks to test data integrity after full decompression:\r
- 1) Check Result and "status" variable.\r
- 2) Check that output(destLen) = uncompressedSize, if you know real uncompressedSize.\r
- 3) Check that output(srcLen) = compressedSize, if you know real compressedSize. \r
- You must use correct finish mode in that case. */ \r
-\r
-\r
-Multi-call State Decompressing (zlib-like interface)\r
-----------------------------------------------------\r
-\r
-When to use: file->file decompressing \r
-Compile files: LzmaDec.h + LzmaDec.c + 7zTypes.h\r
-\r
-Memory Requirements:\r
- - Buffer for input stream: any size (for example, 16 KB)\r
- - Buffer for output stream: any size (for example, 16 KB)\r
- - LZMA Internal Structures: state_size (16 KB for default settings) \r
- - LZMA dictionary (dictionary size is encoded in LZMA properties header)\r
-\r
-1) read LZMA properties (5 bytes) and uncompressed size (8 bytes, little-endian) to header:\r
- unsigned char header[LZMA_PROPS_SIZE + 8];\r
- ReadFile(inFile, header, sizeof(header)\r
-\r
-2) Allocate CLzmaDec structures (state + dictionary) using LZMA properties\r
-\r
- CLzmaDec state;\r
- LzmaDec_Constr(&state);\r
- res = LzmaDec_Allocate(&state, header, LZMA_PROPS_SIZE, &g_Alloc);\r
- if (res != SZ_OK)\r
- return res;\r
-\r
-3) Init LzmaDec structure before any new LZMA stream. And call LzmaDec_DecodeToBuf in loop\r
-\r
- LzmaDec_Init(&state);\r
- for (;;)\r
- {\r
- ... \r
- int res = LzmaDec_DecodeToBuf(CLzmaDec *p, Byte *dest, SizeT *destLen, \r
- const Byte *src, SizeT *srcLen, ELzmaFinishMode finishMode);\r
- ...\r
- }\r
-\r
-\r
-4) Free all allocated structures\r
- LzmaDec_Free(&state, &g_Alloc);\r
-\r
-Look example code:\r
- C/Util/Lzma/LzmaUtil.c\r
-\r
-\r
-How To compress data\r
---------------------\r
-\r
-Compile files: \r
- 7zTypes.h\r
- Threads.h \r
- LzmaEnc.h\r
- LzmaEnc.c\r
- LzFind.h\r
- LzFind.c\r
- LzFindMt.h\r
- LzFindMt.c\r
- LzHash.h\r
-\r
-Memory Requirements:\r
- - (dictSize * 11.5 + 6 MB) + state_size\r
-\r
-Lzma Encoder can use two memory allocators:\r
-1) alloc - for small arrays.\r
-2) allocBig - for big arrays.\r
-\r
-For example, you can use Large RAM Pages (2 MB) in allocBig allocator for \r
-better compression speed. Note that Windows has bad implementation for \r
-Large RAM Pages. \r
-It's OK to use same allocator for alloc and allocBig.\r
-\r
-\r
-Single-call Compression with callbacks\r
---------------------------------------\r
-\r
-Look example code:\r
- C/Util/Lzma/LzmaUtil.c\r
-\r
-When to use: file->file compressing \r
-\r
-1) you must implement callback structures for interfaces:\r
-ISeqInStream\r
-ISeqOutStream\r
-ICompressProgress\r
-ISzAlloc\r
-\r
-static void *SzAlloc(void *p, size_t size) { p = p; return MyAlloc(size); }\r
-static void SzFree(void *p, void *address) { p = p; MyFree(address); }\r
-static ISzAlloc g_Alloc = { SzAlloc, SzFree };\r
-\r
- CFileSeqInStream inStream;\r
- CFileSeqOutStream outStream;\r
-\r
- inStream.funcTable.Read = MyRead;\r
- inStream.file = inFile;\r
- outStream.funcTable.Write = MyWrite;\r
- outStream.file = outFile;\r
-\r
-\r
-2) Create CLzmaEncHandle object;\r
-\r
- CLzmaEncHandle enc;\r
-\r
- enc = LzmaEnc_Create(&g_Alloc);\r
- if (enc == 0)\r
- return SZ_ERROR_MEM;\r
-\r
-\r
-3) initialize CLzmaEncProps properties;\r
-\r
- LzmaEncProps_Init(&props);\r
-\r
- Then you can change some properties in that structure.\r
-\r
-4) Send LZMA properties to LZMA Encoder\r
-\r
- res = LzmaEnc_SetProps(enc, &props);\r
-\r
-5) Write encoded properties to header\r
-\r
- Byte header[LZMA_PROPS_SIZE + 8];\r
- size_t headerSize = LZMA_PROPS_SIZE;\r
- UInt64 fileSize;\r
- int i;\r
-\r
- res = LzmaEnc_WriteProperties(enc, header, &headerSize);\r
- fileSize = MyGetFileLength(inFile);\r
- for (i = 0; i < 8; i++)\r
- header[headerSize++] = (Byte)(fileSize >> (8 * i));\r
- MyWriteFileAndCheck(outFile, header, headerSize)\r
-\r
-6) Call encoding function:\r
- res = LzmaEnc_Encode(enc, &outStream.funcTable, &inStream.funcTable, \r
- NULL, &g_Alloc, &g_Alloc);\r
-\r
-7) Destroy LZMA Encoder Object\r
- LzmaEnc_Destroy(enc, &g_Alloc, &g_Alloc);\r
-\r
-\r
-If callback function return some error code, LzmaEnc_Encode also returns that code\r
-or it can return the code like SZ_ERROR_READ, SZ_ERROR_WRITE or SZ_ERROR_PROGRESS.\r
-\r
-\r
-Single-call RAM->RAM Compression\r
---------------------------------\r
-\r
-Single-call RAM->RAM Compression is similar to Compression with callbacks,\r
-but you provide pointers to buffers instead of pointers to stream callbacks:\r
-\r
-SRes LzmaEncode(Byte *dest, SizeT *destLen, const Byte *src, SizeT srcLen,\r
- const CLzmaEncProps *props, Byte *propsEncoded, SizeT *propsSize, int writeEndMark, \r
- ICompressProgress *progress, ISzAlloc *alloc, ISzAlloc *allocBig);\r
-\r
-Return code:\r
- SZ_OK - OK\r
- SZ_ERROR_MEM - Memory allocation error \r
- SZ_ERROR_PARAM - Incorrect paramater\r
- SZ_ERROR_OUTPUT_EOF - output buffer overflow\r
- SZ_ERROR_THREAD - errors in multithreading functions (only for Mt version)\r
-\r
-\r
-\r
-Defines\r
--------\r
-\r
-_LZMA_SIZE_OPT - Enable some optimizations in LZMA Decoder to get smaller executable code.\r
-\r
-_LZMA_PROB32 - It can increase the speed on some 32-bit CPUs, but memory usage for \r
- some structures will be doubled in that case.\r
-\r
-_LZMA_UINT32_IS_ULONG - Define it if int is 16-bit on your compiler and long is 32-bit.\r
-\r
-_LZMA_NO_SYSTEM_SIZE_T - Define it if you don't want to use size_t type.\r
-\r
-\r
-_7ZIP_PPMD_SUPPPORT - Define it if you don't want to support PPMD method in AMSI-C .7z decoder.\r
-\r
-\r
-C++ LZMA Encoder/Decoder \r
-~~~~~~~~~~~~~~~~~~~~~~~~\r
-C++ LZMA code use COM-like interfaces. So if you want to use it, \r
-you can study basics of COM/OLE.\r
-C++ LZMA code is just wrapper over ANSI-C code.\r
-\r
-\r
-C++ Notes\r
-~~~~~~~~~~~~~~~~~~~~~~~~\r
-If you use some C++ code folders in 7-Zip (for example, C++ code for .7z handling),\r
-you must check that you correctly work with "new" operator.\r
-7-Zip can be compiled with MSVC 6.0 that doesn't throw "exception" from "new" operator.\r
-So 7-Zip uses "CPP\Common\NewHandler.cpp" that redefines "new" operator:\r
-operator new(size_t size)\r
-{\r
- void *p = ::malloc(size);\r
- if (p == 0)\r
- throw CNewException();\r
- return p;\r
-}\r
-If you use MSCV that throws exception for "new" operator, you can compile without \r
-"NewHandler.cpp". So standard exception will be used. Actually some code of \r
-7-Zip catches any exception in internal code and converts it to HRESULT code.\r
-So you don't need to catch CNewException, if you call COM interfaces of 7-Zip.\r
-\r
----\r
-\r
-http://www.7-zip.org\r
-http://www.7-zip.org/sdk.html\r
-http://www.7-zip.org/support.html\r
+++ /dev/null
-<?xml version="1.0" encoding="utf-8"?>\r
-<Project DefaultTargets="Build" ToolsVersion="15.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">\r
- <ItemGroup Label="ProjectConfigurations">\r
- <ProjectConfiguration Include="DebugFast|ARM64">\r
- <Configuration>DebugFast</Configuration>\r
- <Platform>ARM64</Platform>\r
- </ProjectConfiguration>\r
- <ProjectConfiguration Include="DebugFast|Win32">\r
- <Configuration>DebugFast</Configuration>\r
- <Platform>Win32</Platform>\r
- </ProjectConfiguration>\r
- <ProjectConfiguration Include="DebugFast|x64">\r
- <Configuration>DebugFast</Configuration>\r
- <Platform>x64</Platform>\r
- </ProjectConfiguration>\r
- <ProjectConfiguration Include="Debug|ARM64">\r
- <Configuration>Debug</Configuration>\r
- <Platform>ARM64</Platform>\r
- </ProjectConfiguration>\r
- <ProjectConfiguration Include="Debug|Win32">\r
- <Configuration>Debug</Configuration>\r
- <Platform>Win32</Platform>\r
- </ProjectConfiguration>\r
- <ProjectConfiguration Include="Debug|x64">\r
- <Configuration>Debug</Configuration>\r
- <Platform>x64</Platform>\r
- </ProjectConfiguration>\r
- <ProjectConfiguration Include="ReleaseLTCG|ARM64">\r
- <Configuration>ReleaseLTCG</Configuration>\r
- <Platform>ARM64</Platform>\r
- </ProjectConfiguration>\r
- <ProjectConfiguration Include="ReleaseLTCG|Win32">\r
- <Configuration>ReleaseLTCG</Configuration>\r
- <Platform>Win32</Platform>\r
- </ProjectConfiguration>\r
- <ProjectConfiguration Include="ReleaseLTCG|x64">\r
- <Configuration>ReleaseLTCG</Configuration>\r
- <Platform>x64</Platform>\r
- </ProjectConfiguration>\r
- <ProjectConfiguration Include="Release|ARM64">\r
- <Configuration>Release</Configuration>\r
- <Platform>ARM64</Platform>\r
- </ProjectConfiguration>\r
- <ProjectConfiguration Include="Release|Win32">\r
- <Configuration>Release</Configuration>\r
- <Platform>Win32</Platform>\r
- </ProjectConfiguration>\r
- <ProjectConfiguration Include="Release|x64">\r
- <Configuration>Release</Configuration>\r
- <Platform>x64</Platform>\r
- </ProjectConfiguration>\r
- </ItemGroup>\r
- <ItemGroup>\r
- <ClCompile Include="src\Alloc.c" />\r
- <ClCompile Include="src\Bra86.c" />\r
- <ClCompile Include="src\BraIA64.c" />\r
- <ClCompile Include="src\CpuArch.c" />\r
- <ClCompile Include="src\Delta.c" />\r
- <ClCompile Include="src\LzFind.c" />\r
- <ClCompile Include="src\Lzma86Dec.c" />\r
- <ClCompile Include="src\Lzma86Enc.c" />\r
- <ClCompile Include="src\LzmaDec.c" />\r
- <ClCompile Include="src\LzmaEnc.c" />\r
- <ClCompile Include="src\LzmaLib.c" />\r
- <ClCompile Include="src\Sort.c" />\r
- </ItemGroup>\r
- <PropertyGroup Label="Globals">\r
- <ProjectGuid>{DD944834-7899-4C1C-A4C1-064B5009D239}</ProjectGuid>\r
- <Keyword>Win32Proj</Keyword>\r
- <RootNamespace>lzma</RootNamespace>\r
- <WindowsTargetPlatformVersion>10.0</WindowsTargetPlatformVersion>\r
- </PropertyGroup>\r
- <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />\r
- <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">\r
- <ConfigurationType>StaticLibrary</ConfigurationType>\r
- <UseDebugLibraries>true</UseDebugLibraries>\r
- <PlatformToolset>v142</PlatformToolset>\r
- <CharacterSet>NotSet</CharacterSet>\r
- </PropertyGroup>\r
- <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">\r
- <ConfigurationType>StaticLibrary</ConfigurationType>\r
- <UseDebugLibraries>true</UseDebugLibraries>\r
- <PlatformToolset>v142</PlatformToolset>\r
- <CharacterSet>NotSet</CharacterSet>\r
- </PropertyGroup>\r
- <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|ARM64'" Label="Configuration">\r
- <ConfigurationType>StaticLibrary</ConfigurationType>\r
- <UseDebugLibraries>true</UseDebugLibraries>\r
- <PlatformToolset>v142</PlatformToolset>\r
- <CharacterSet>NotSet</CharacterSet>\r
- </PropertyGroup>\r
- <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='DebugFast|Win32'" Label="Configuration">\r
- <ConfigurationType>StaticLibrary</ConfigurationType>\r
- <UseDebugLibraries>true</UseDebugLibraries>\r
- <PlatformToolset>v142</PlatformToolset>\r
- <CharacterSet>NotSet</CharacterSet>\r
- </PropertyGroup>\r
- <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='DebugFast|x64'" Label="Configuration">\r
- <ConfigurationType>StaticLibrary</ConfigurationType>\r
- <UseDebugLibraries>true</UseDebugLibraries>\r
- <PlatformToolset>v142</PlatformToolset>\r
- <CharacterSet>NotSet</CharacterSet>\r
- </PropertyGroup>\r
- <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='DebugFast|ARM64'" Label="Configuration">\r
- <ConfigurationType>StaticLibrary</ConfigurationType>\r
- <UseDebugLibraries>true</UseDebugLibraries>\r
- <PlatformToolset>v142</PlatformToolset>\r
- <CharacterSet>NotSet</CharacterSet>\r
- </PropertyGroup>\r
- <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">\r
- <ConfigurationType>StaticLibrary</ConfigurationType>\r
- <UseDebugLibraries>false</UseDebugLibraries>\r
- <PlatformToolset>v142</PlatformToolset>\r
- <WholeProgramOptimization>true</WholeProgramOptimization>\r
- <CharacterSet>NotSet</CharacterSet>\r
- <SpectreMitigation>false</SpectreMitigation>\r
- </PropertyGroup>\r
- <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='ReleaseLTCG|Win32'" Label="Configuration">\r
- <ConfigurationType>StaticLibrary</ConfigurationType>\r
- <UseDebugLibraries>false</UseDebugLibraries>\r
- <PlatformToolset>v142</PlatformToolset>\r
- <WholeProgramOptimization>true</WholeProgramOptimization>\r
- <CharacterSet>NotSet</CharacterSet>\r
- <SpectreMitigation>false</SpectreMitigation>\r
- </PropertyGroup>\r
- <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">\r
- <ConfigurationType>StaticLibrary</ConfigurationType>\r
- <UseDebugLibraries>false</UseDebugLibraries>\r
- <PlatformToolset>v142</PlatformToolset>\r
- <WholeProgramOptimization>true</WholeProgramOptimization>\r
- <CharacterSet>NotSet</CharacterSet>\r
- <SpectreMitigation>false</SpectreMitigation>\r
- </PropertyGroup>\r
- <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|ARM64'" Label="Configuration">\r
- <ConfigurationType>StaticLibrary</ConfigurationType>\r
- <UseDebugLibraries>false</UseDebugLibraries>\r
- <PlatformToolset>v142</PlatformToolset>\r
- <WholeProgramOptimization>true</WholeProgramOptimization>\r
- <CharacterSet>NotSet</CharacterSet>\r
- <SpectreMitigation>false</SpectreMitigation>\r
- </PropertyGroup>\r
- <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='ReleaseLTCG|x64'" Label="Configuration">\r
- <ConfigurationType>StaticLibrary</ConfigurationType>\r
- <UseDebugLibraries>false</UseDebugLibraries>\r
- <PlatformToolset>v142</PlatformToolset>\r
- <WholeProgramOptimization>true</WholeProgramOptimization>\r
- <CharacterSet>NotSet</CharacterSet>\r
- <SpectreMitigation>false</SpectreMitigation>\r
- </PropertyGroup>\r
- <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='ReleaseLTCG|ARM64'" Label="Configuration">\r
- <ConfigurationType>StaticLibrary</ConfigurationType>\r
- <UseDebugLibraries>false</UseDebugLibraries>\r
- <PlatformToolset>v142</PlatformToolset>\r
- <WholeProgramOptimization>true</WholeProgramOptimization>\r
- <CharacterSet>NotSet</CharacterSet>\r
- <SpectreMitigation>false</SpectreMitigation>\r
- </PropertyGroup>\r
- <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />\r
- <ImportGroup Label="ExtensionSettings">\r
- </ImportGroup>\r
- <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">\r
- <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />\r
- </ImportGroup>\r
- <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="PropertySheets">\r
- <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />\r
- </ImportGroup>\r
- <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug|ARM64'" Label="PropertySheets">\r
- <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />\r
- </ImportGroup>\r
- <ImportGroup Condition="'$(Configuration)|$(Platform)'=='DebugFast|Win32'" Label="PropertySheets">\r
- <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />\r
- </ImportGroup>\r
- <ImportGroup Condition="'$(Configuration)|$(Platform)'=='DebugFast|x64'" Label="PropertySheets">\r
- <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />\r
- </ImportGroup>\r
- <ImportGroup Condition="'$(Configuration)|$(Platform)'=='DebugFast|ARM64'" Label="PropertySheets">\r
- <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />\r
- </ImportGroup>\r
- <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">\r
- <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />\r
- </ImportGroup>\r
- <ImportGroup Condition="'$(Configuration)|$(Platform)'=='ReleaseLTCG|Win32'" Label="PropertySheets">\r
- <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />\r
- </ImportGroup>\r
- <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="PropertySheets">\r
- <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />\r
- </ImportGroup>\r
- <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release|ARM64'" Label="PropertySheets">\r
- <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />\r
- </ImportGroup>\r
- <ImportGroup Condition="'$(Configuration)|$(Platform)'=='ReleaseLTCG|x64'" Label="PropertySheets">\r
- <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />\r
- </ImportGroup>\r
- <ImportGroup Condition="'$(Configuration)|$(Platform)'=='ReleaseLTCG|ARM64'" Label="PropertySheets">\r
- <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />\r
- </ImportGroup>\r
- <PropertyGroup Label="UserMacros" />\r
- <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">\r
- <LinkIncremental>true</LinkIncremental>\r
- <OutDir>$(SolutionDir)build\$(ProjectName)-$(Platform)-$(Configuration)\</OutDir>\r
- <IntDir>$(SolutionDir)build\$(ProjectName)-$(Platform)-$(Configuration)\</IntDir>\r
- <TargetName>$(ProjectName)-$(Platform)-$(Configuration)</TargetName>\r
- </PropertyGroup>\r
- <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">\r
- <IntDir>$(SolutionDir)build\$(ProjectName)-$(Platform)-$(Configuration)\</IntDir>\r
- <TargetName>$(ProjectName)-$(Platform)-$(Configuration)</TargetName>\r
- <LinkIncremental>true</LinkIncremental>\r
- <OutDir>$(SolutionDir)build\$(ProjectName)-$(Platform)-$(Configuration)\</OutDir>\r
- </PropertyGroup>\r
- <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|ARM64'">\r
- <IntDir>$(SolutionDir)build\$(ProjectName)-$(Platform)-$(Configuration)\</IntDir>\r
- <TargetName>$(ProjectName)-$(Platform)-$(Configuration)</TargetName>\r
- <LinkIncremental>true</LinkIncremental>\r
- <OutDir>$(SolutionDir)build\$(ProjectName)-$(Platform)-$(Configuration)\</OutDir>\r
- </PropertyGroup>\r
- <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='DebugFast|Win32'">\r
- <LinkIncremental>true</LinkIncremental>\r
- <OutDir>$(SolutionDir)build\$(ProjectName)-$(Platform)-$(Configuration)\</OutDir>\r
- <IntDir>$(SolutionDir)build\$(ProjectName)-$(Platform)-$(Configuration)\</IntDir>\r
- <TargetName>$(ProjectName)-$(Platform)-$(Configuration)</TargetName>\r
- </PropertyGroup>\r
- <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='DebugFast|x64'">\r
- <IntDir>$(SolutionDir)build\$(ProjectName)-$(Platform)-$(Configuration)\</IntDir>\r
- <TargetName>$(ProjectName)-$(Platform)-$(Configuration)</TargetName>\r
- <LinkIncremental>true</LinkIncremental>\r
- <OutDir>$(SolutionDir)build\$(ProjectName)-$(Platform)-$(Configuration)\</OutDir>\r
- </PropertyGroup>\r
- <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='DebugFast|ARM64'">\r
- <IntDir>$(SolutionDir)build\$(ProjectName)-$(Platform)-$(Configuration)\</IntDir>\r
- <TargetName>$(ProjectName)-$(Platform)-$(Configuration)</TargetName>\r
- <LinkIncremental>true</LinkIncremental>\r
- <OutDir>$(SolutionDir)build\$(ProjectName)-$(Platform)-$(Configuration)\</OutDir>\r
- </PropertyGroup>\r
- <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">\r
- <LinkIncremental>false</LinkIncremental>\r
- <OutDir>$(SolutionDir)build\$(ProjectName)-$(Platform)-$(Configuration)\</OutDir>\r
- <IntDir>$(SolutionDir)build\$(ProjectName)-$(Platform)-$(Configuration)\</IntDir>\r
- <TargetName>$(ProjectName)-$(Platform)-$(Configuration)</TargetName>\r
- </PropertyGroup>\r
- <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='ReleaseLTCG|Win32'">\r
- <LinkIncremental>false</LinkIncremental>\r
- <OutDir>$(SolutionDir)build\$(ProjectName)-$(Platform)-$(Configuration)\</OutDir>\r
- <IntDir>$(SolutionDir)build\$(ProjectName)-$(Platform)-$(Configuration)\</IntDir>\r
- <TargetName>$(ProjectName)-$(Platform)-$(Configuration)</TargetName>\r
- </PropertyGroup>\r
- <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">\r
- <IntDir>$(SolutionDir)build\$(ProjectName)-$(Platform)-$(Configuration)\</IntDir>\r
- <TargetName>$(ProjectName)-$(Platform)-$(Configuration)</TargetName>\r
- <LinkIncremental>false</LinkIncremental>\r
- <OutDir>$(SolutionDir)build\$(ProjectName)-$(Platform)-$(Configuration)\</OutDir>\r
- </PropertyGroup>\r
- <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|ARM64'">\r
- <IntDir>$(SolutionDir)build\$(ProjectName)-$(Platform)-$(Configuration)\</IntDir>\r
- <TargetName>$(ProjectName)-$(Platform)-$(Configuration)</TargetName>\r
- <LinkIncremental>false</LinkIncremental>\r
- <OutDir>$(SolutionDir)build\$(ProjectName)-$(Platform)-$(Configuration)\</OutDir>\r
- </PropertyGroup>\r
- <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='ReleaseLTCG|x64'">\r
- <IntDir>$(SolutionDir)build\$(ProjectName)-$(Platform)-$(Configuration)\</IntDir>\r
- <TargetName>$(ProjectName)-$(Platform)-$(Configuration)</TargetName>\r
- <LinkIncremental>false</LinkIncremental>\r
- <OutDir>$(SolutionDir)build\$(ProjectName)-$(Platform)-$(Configuration)\</OutDir>\r
- </PropertyGroup>\r
- <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='ReleaseLTCG|ARM64'">\r
- <IntDir>$(SolutionDir)build\$(ProjectName)-$(Platform)-$(Configuration)\</IntDir>\r
- <TargetName>$(ProjectName)-$(Platform)-$(Configuration)</TargetName>\r
- <LinkIncremental>false</LinkIncremental>\r
- <OutDir>$(SolutionDir)build\$(ProjectName)-$(Platform)-$(Configuration)\</OutDir>\r
- </PropertyGroup>\r
- <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">\r
- <ClCompile>\r
- <PrecompiledHeader>\r
- </PrecompiledHeader>\r
- <WarningLevel>Level2</WarningLevel>\r
- <Optimization>Disabled</Optimization>\r
- <PreprocessorDefinitions>_7ZIP_ST;_CRT_NONSTDC_NO_WARNINGS;_CRT_SECURE_NO_WARNINGS;WIN32;_DEBUG;_CONSOLE;_LIB;%(PreprocessorDefinitions)</PreprocessorDefinitions>\r
- <SDLCheck>true</SDLCheck>\r
- <DebugInformationFormat>ProgramDatabase</DebugInformationFormat>\r
- <AdditionalIncludeDirectories>$(ProjectDir)include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>\r
- <MinimalRebuild>false</MinimalRebuild>\r
- <LanguageStandard>stdcpp17</LanguageStandard>\r
- <MultiProcessorCompilation>true</MultiProcessorCompilation>\r
- <ConformanceMode>true</ConformanceMode>\r
- <AdditionalOptions>/Zo /utf-8 %(AdditionalOptions)</AdditionalOptions>\r
- </ClCompile>\r
- <Link>\r
- <SubSystem>Windows</SubSystem>\r
- <GenerateDebugInformation>true</GenerateDebugInformation>\r
- </Link>\r
- </ItemDefinitionGroup>\r
- <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">\r
- <ClCompile>\r
- <PrecompiledHeader>\r
- </PrecompiledHeader>\r
- <WarningLevel>Level2</WarningLevel>\r
- <Optimization>Disabled</Optimization>\r
- <PreprocessorDefinitions>_7ZIP_ST;_CRT_NONSTDC_NO_WARNINGS;_CRT_SECURE_NO_WARNINGS;WIN32;_DEBUG;_CONSOLE;_LIB;%(PreprocessorDefinitions)</PreprocessorDefinitions>\r
- <SDLCheck>true</SDLCheck>\r
- <DebugInformationFormat>ProgramDatabase</DebugInformationFormat>\r
- <AdditionalIncludeDirectories>$(ProjectDir)include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>\r
- <MinimalRebuild>false</MinimalRebuild>\r
- <LanguageStandard>stdcpp17</LanguageStandard>\r
- <MultiProcessorCompilation>true</MultiProcessorCompilation>\r
- <ConformanceMode>true</ConformanceMode>\r
- <AdditionalOptions>/Zo /utf-8 %(AdditionalOptions)</AdditionalOptions>\r
- </ClCompile>\r
- <Link>\r
- <SubSystem>Windows</SubSystem>\r
- <GenerateDebugInformation>true</GenerateDebugInformation>\r
- </Link>\r
- </ItemDefinitionGroup>\r
- <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|ARM64'">\r
- <ClCompile>\r
- <PrecompiledHeader>\r
- </PrecompiledHeader>\r
- <WarningLevel>Level2</WarningLevel>\r
- <Optimization>Disabled</Optimization>\r
- <PreprocessorDefinitions>_7ZIP_ST;_CRT_NONSTDC_NO_WARNINGS;_CRT_SECURE_NO_WARNINGS;WIN32;_DEBUG;_CONSOLE;_LIB;%(PreprocessorDefinitions)</PreprocessorDefinitions>\r
- <SDLCheck>true</SDLCheck>\r
- <DebugInformationFormat>ProgramDatabase</DebugInformationFormat>\r
- <AdditionalIncludeDirectories>$(ProjectDir)include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>\r
- <MinimalRebuild>false</MinimalRebuild>\r
- <LanguageStandard>stdcpp17</LanguageStandard>\r
- <MultiProcessorCompilation>true</MultiProcessorCompilation>\r
- <ConformanceMode>true</ConformanceMode>\r
- <AdditionalOptions>/Zo /utf-8 %(AdditionalOptions)</AdditionalOptions>\r
- </ClCompile>\r
- <Link>\r
- <SubSystem>Windows</SubSystem>\r
- <GenerateDebugInformation>true</GenerateDebugInformation>\r
- </Link>\r
- </ItemDefinitionGroup>\r
- <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='DebugFast|Win32'">\r
- <ClCompile>\r
- <PrecompiledHeader>\r
- </PrecompiledHeader>\r
- <WarningLevel>Level2</WarningLevel>\r
- <Optimization>Disabled</Optimization>\r
- <PreprocessorDefinitions>_7ZIP_ST;_ITERATOR_DEBUG_LEVEL=1;_CRT_NONSTDC_NO_WARNINGS;_CRT_SECURE_NO_WARNINGS;WIN32;_DEBUGFAST;_DEBUG;_CONSOLE;_LIB;%(PreprocessorDefinitions)</PreprocessorDefinitions>\r
- <SDLCheck>true</SDLCheck>\r
- <DebugInformationFormat>ProgramDatabase</DebugInformationFormat>\r
- <AdditionalIncludeDirectories>$(ProjectDir)include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>\r
- <BasicRuntimeChecks>Default</BasicRuntimeChecks>\r
- <MinimalRebuild>false</MinimalRebuild>\r
- <LanguageStandard>stdcpp17</LanguageStandard>\r
- <SupportJustMyCode>false</SupportJustMyCode>\r
- <MultiProcessorCompilation>true</MultiProcessorCompilation>\r
- <InlineFunctionExpansion>OnlyExplicitInline</InlineFunctionExpansion>\r
- <ConformanceMode>true</ConformanceMode>\r
- <AdditionalOptions>/Zo /utf-8 %(AdditionalOptions)</AdditionalOptions>\r
- </ClCompile>\r
- <Link>\r
- <SubSystem>Windows</SubSystem>\r
- <GenerateDebugInformation>true</GenerateDebugInformation>\r
- </Link>\r
- </ItemDefinitionGroup>\r
- <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='DebugFast|x64'">\r
- <ClCompile>\r
- <PrecompiledHeader>\r
- </PrecompiledHeader>\r
- <WarningLevel>Level2</WarningLevel>\r
- <Optimization>Disabled</Optimization>\r
- <PreprocessorDefinitions>_7ZIP_ST;_ITERATOR_DEBUG_LEVEL=1;_CRT_NONSTDC_NO_WARNINGS;_CRT_SECURE_NO_WARNINGS;WIN32;_DEBUGFAST;_DEBUG;_CONSOLE;_LIB;%(PreprocessorDefinitions)</PreprocessorDefinitions>\r
- <SDLCheck>true</SDLCheck>\r
- <DebugInformationFormat>ProgramDatabase</DebugInformationFormat>\r
- <AdditionalIncludeDirectories>$(ProjectDir)include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>\r
- <BasicRuntimeChecks>Default</BasicRuntimeChecks>\r
- <MinimalRebuild>false</MinimalRebuild>\r
- <LanguageStandard>stdcpp17</LanguageStandard>\r
- <SupportJustMyCode>false</SupportJustMyCode>\r
- <MultiProcessorCompilation>true</MultiProcessorCompilation>\r
- <InlineFunctionExpansion>OnlyExplicitInline</InlineFunctionExpansion>\r
- <ConformanceMode>true</ConformanceMode>\r
- <AdditionalOptions>/Zo /utf-8 %(AdditionalOptions)</AdditionalOptions>\r
- </ClCompile>\r
- <Link>\r
- <SubSystem>Windows</SubSystem>\r
- <GenerateDebugInformation>true</GenerateDebugInformation>\r
- </Link>\r
- </ItemDefinitionGroup>\r
- <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='DebugFast|ARM64'">\r
- <ClCompile>\r
- <PrecompiledHeader>\r
- </PrecompiledHeader>\r
- <WarningLevel>Level2</WarningLevel>\r
- <Optimization>Disabled</Optimization>\r
- <PreprocessorDefinitions>_7ZIP_ST;_ITERATOR_DEBUG_LEVEL=1;_CRT_NONSTDC_NO_WARNINGS;_CRT_SECURE_NO_WARNINGS;WIN32;_DEBUGFAST;_DEBUG;_CONSOLE;_LIB;%(PreprocessorDefinitions)</PreprocessorDefinitions>\r
- <SDLCheck>true</SDLCheck>\r
- <DebugInformationFormat>ProgramDatabase</DebugInformationFormat>\r
- <AdditionalIncludeDirectories>$(ProjectDir)include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>\r
- <BasicRuntimeChecks>Default</BasicRuntimeChecks>\r
- <MinimalRebuild>false</MinimalRebuild>\r
- <LanguageStandard>stdcpp17</LanguageStandard>\r
- <SupportJustMyCode>false</SupportJustMyCode>\r
- <MultiProcessorCompilation>true</MultiProcessorCompilation>\r
- <InlineFunctionExpansion>OnlyExplicitInline</InlineFunctionExpansion>\r
- <ConformanceMode>true</ConformanceMode>\r
- <AdditionalOptions>/Zo /utf-8 %(AdditionalOptions)</AdditionalOptions>\r
- </ClCompile>\r
- <Link>\r
- <SubSystem>Windows</SubSystem>\r
- <GenerateDebugInformation>true</GenerateDebugInformation>\r
- </Link>\r
- </ItemDefinitionGroup>\r
- <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">\r
- <ClCompile>\r
- <WarningLevel>Level2</WarningLevel>\r
- <PrecompiledHeader>\r
- </PrecompiledHeader>\r
- <Optimization>MaxSpeed</Optimization>\r
- <IntrinsicFunctions>true</IntrinsicFunctions>\r
- <PreprocessorDefinitions>_7ZIP_ST;_CRT_NONSTDC_NO_WARNINGS;_CRT_SECURE_NO_WARNINGS;WIN32;NDEBUG;_CONSOLE;_LIB;%(PreprocessorDefinitions)</PreprocessorDefinitions>\r
- <AdditionalIncludeDirectories>$(ProjectDir)include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>\r
- <WholeProgramOptimization>false</WholeProgramOptimization>\r
- <LanguageStandard>stdcpp17</LanguageStandard>\r
- <MultiProcessorCompilation>true</MultiProcessorCompilation>\r
- <ConformanceMode>true</ConformanceMode>\r
- <AdditionalOptions>/Zo /utf-8 %(AdditionalOptions)</AdditionalOptions>\r
- </ClCompile>\r
- <Link>\r
- <SubSystem>Windows</SubSystem>\r
- <GenerateDebugInformation>true</GenerateDebugInformation>\r
- <EnableCOMDATFolding>true</EnableCOMDATFolding>\r
- <OptimizeReferences>true</OptimizeReferences>\r
- </Link>\r
- </ItemDefinitionGroup>\r
- <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='ReleaseLTCG|Win32'">\r
- <ClCompile>\r
- <WarningLevel>Level2</WarningLevel>\r
- <PrecompiledHeader>\r
- </PrecompiledHeader>\r
- <Optimization>MaxSpeed</Optimization>\r
- <IntrinsicFunctions>true</IntrinsicFunctions>\r
- <PreprocessorDefinitions>_7ZIP_ST;_CRT_NONSTDC_NO_WARNINGS;_CRT_SECURE_NO_WARNINGS;WIN32;NDEBUG;_CONSOLE;_LIB;%(PreprocessorDefinitions)</PreprocessorDefinitions>\r
- <AdditionalIncludeDirectories>$(ProjectDir)include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>\r
- <WholeProgramOptimization>true</WholeProgramOptimization>\r
- <LanguageStandard>stdcpp17</LanguageStandard>\r
- <OmitFramePointers>true</OmitFramePointers>\r
- <MultiProcessorCompilation>true</MultiProcessorCompilation>\r
- <ConformanceMode>true</ConformanceMode>\r
- <AdditionalOptions>/Zo /utf-8 %(AdditionalOptions)</AdditionalOptions>\r
- </ClCompile>\r
- <Link>\r
- <SubSystem>Windows</SubSystem>\r
- <GenerateDebugInformation>true</GenerateDebugInformation>\r
- <EnableCOMDATFolding>true</EnableCOMDATFolding>\r
- <OptimizeReferences>true</OptimizeReferences>\r
- </Link>\r
- </ItemDefinitionGroup>\r
- <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">\r
- <ClCompile>\r
- <WarningLevel>Level2</WarningLevel>\r
- <PrecompiledHeader>\r
- </PrecompiledHeader>\r
- <Optimization>MaxSpeed</Optimization>\r
- <IntrinsicFunctions>true</IntrinsicFunctions>\r
- <PreprocessorDefinitions>_7ZIP_ST;_CRT_NONSTDC_NO_WARNINGS;_CRT_SECURE_NO_WARNINGS;WIN32;NDEBUG;_CONSOLE;_LIB;%(PreprocessorDefinitions)</PreprocessorDefinitions>\r
- <AdditionalIncludeDirectories>$(ProjectDir)include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>\r
- <WholeProgramOptimization>false</WholeProgramOptimization>\r
- <LanguageStandard>stdcpp17</LanguageStandard>\r
- <MultiProcessorCompilation>true</MultiProcessorCompilation>\r
- <ConformanceMode>true</ConformanceMode>\r
- <AdditionalOptions>/Zo /utf-8 %(AdditionalOptions)</AdditionalOptions>\r
- </ClCompile>\r
- <Link>\r
- <SubSystem>Windows</SubSystem>\r
- <GenerateDebugInformation>true</GenerateDebugInformation>\r
- <EnableCOMDATFolding>true</EnableCOMDATFolding>\r
- <OptimizeReferences>true</OptimizeReferences>\r
- </Link>\r
- </ItemDefinitionGroup>\r
- <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|ARM64'">\r
- <ClCompile>\r
- <WarningLevel>Level2</WarningLevel>\r
- <PrecompiledHeader>\r
- </PrecompiledHeader>\r
- <Optimization>MaxSpeed</Optimization>\r
- <IntrinsicFunctions>true</IntrinsicFunctions>\r
- <PreprocessorDefinitions>_7ZIP_ST;_CRT_NONSTDC_NO_WARNINGS;_CRT_SECURE_NO_WARNINGS;WIN32;NDEBUG;_CONSOLE;_LIB;%(PreprocessorDefinitions)</PreprocessorDefinitions>\r
- <AdditionalIncludeDirectories>$(ProjectDir)include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>\r
- <WholeProgramOptimization>false</WholeProgramOptimization>\r
- <LanguageStandard>stdcpp17</LanguageStandard>\r
- <MultiProcessorCompilation>true</MultiProcessorCompilation>\r
- <ConformanceMode>true</ConformanceMode>\r
- <AdditionalOptions>/Zo /utf-8 %(AdditionalOptions)</AdditionalOptions>\r
- </ClCompile>\r
- <Link>\r
- <SubSystem>Windows</SubSystem>\r
- <GenerateDebugInformation>true</GenerateDebugInformation>\r
- <EnableCOMDATFolding>true</EnableCOMDATFolding>\r
- <OptimizeReferences>true</OptimizeReferences>\r
- </Link>\r
- </ItemDefinitionGroup>\r
- <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='ReleaseLTCG|x64'">\r
- <ClCompile>\r
- <WarningLevel>Level2</WarningLevel>\r
- <PrecompiledHeader>\r
- </PrecompiledHeader>\r
- <Optimization>MaxSpeed</Optimization>\r
- <IntrinsicFunctions>true</IntrinsicFunctions>\r
- <PreprocessorDefinitions>_7ZIP_ST;_CRT_NONSTDC_NO_WARNINGS;_CRT_SECURE_NO_WARNINGS;WIN32;NDEBUG;_CONSOLE;_LIB;%(PreprocessorDefinitions)</PreprocessorDefinitions>\r
- <AdditionalIncludeDirectories>$(ProjectDir)include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>\r
- <WholeProgramOptimization>true</WholeProgramOptimization>\r
- <LanguageStandard>stdcpp17</LanguageStandard>\r
- <OmitFramePointers>true</OmitFramePointers>\r
- <MultiProcessorCompilation>true</MultiProcessorCompilation>\r
- <ConformanceMode>true</ConformanceMode>\r
- <AdditionalOptions>/Zo /utf-8 %(AdditionalOptions)</AdditionalOptions>\r
- </ClCompile>\r
- <Link>\r
- <SubSystem>Windows</SubSystem>\r
- <GenerateDebugInformation>true</GenerateDebugInformation>\r
- <EnableCOMDATFolding>true</EnableCOMDATFolding>\r
- <OptimizeReferences>true</OptimizeReferences>\r
- </Link>\r
- </ItemDefinitionGroup>\r
- <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='ReleaseLTCG|ARM64'">\r
- <ClCompile>\r
- <WarningLevel>Level2</WarningLevel>\r
- <PrecompiledHeader>\r
- </PrecompiledHeader>\r
- <Optimization>MaxSpeed</Optimization>\r
- <IntrinsicFunctions>true</IntrinsicFunctions>\r
- <PreprocessorDefinitions>_7ZIP_ST;_CRT_NONSTDC_NO_WARNINGS;_CRT_SECURE_NO_WARNINGS;WIN32;NDEBUG;_CONSOLE;_LIB;%(PreprocessorDefinitions)</PreprocessorDefinitions>\r
- <AdditionalIncludeDirectories>$(ProjectDir)include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>\r
- <WholeProgramOptimization>true</WholeProgramOptimization>\r
- <LanguageStandard>stdcpp17</LanguageStandard>\r
- <OmitFramePointers>true</OmitFramePointers>\r
- <MultiProcessorCompilation>true</MultiProcessorCompilation>\r
- <ConformanceMode>true</ConformanceMode>\r
- <AdditionalOptions>/Zo /utf-8 %(AdditionalOptions)</AdditionalOptions>\r
- </ClCompile>\r
- <Link>\r
- <SubSystem>Windows</SubSystem>\r
- <GenerateDebugInformation>true</GenerateDebugInformation>\r
- <EnableCOMDATFolding>true</EnableCOMDATFolding>\r
- <OptimizeReferences>true</OptimizeReferences>\r
- </Link>\r
- </ItemDefinitionGroup>\r
- <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />\r
- <ImportGroup Label="ExtensionTargets">\r
- </ImportGroup>\r
-</Project>
\ No newline at end of file
+++ /dev/null
-<?xml version="1.0" encoding="utf-8"?>
-<Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
- <ItemGroup>
- <ClCompile Include="src\BraIA64.c" />
- <ClCompile Include="src\CpuArch.c" />
- <ClCompile Include="src\Delta.c" />
- <ClCompile Include="src\LzFind.c" />
- <ClCompile Include="src\Lzma86Dec.c" />
- <ClCompile Include="src\Lzma86Enc.c" />
- <ClCompile Include="src\LzmaDec.c" />
- <ClCompile Include="src\LzmaEnc.c" />
- <ClCompile Include="src\LzmaLib.c" />
- <ClCompile Include="src\Sort.c" />
- <ClCompile Include="src\Alloc.c" />
- <ClCompile Include="src\Bra86.c" />
- </ItemGroup>
-</Project>
\ No newline at end of file
+++ /dev/null
- CODE32\r
-\r
- EXPORT |CrcUpdateT4@16|\r
-\r
- AREA |.text|, CODE, ARM\r
-\r
- MACRO\r
- CRC32_STEP_1\r
-\r
- ldrb r4, [r1], #1\r
- subs r2, r2, #1\r
- eor r4, r4, r0\r
- and r4, r4, #0xFF\r
- ldr r4, [r3, +r4, lsl #2]\r
- eor r0, r4, r0, lsr #8\r
-\r
- MEND\r
-\r
-\r
- MACRO\r
- CRC32_STEP_4 $STREAM_WORD\r
- \r
- eor r7, r7, r8\r
- eor r7, r7, r9\r
- eor r0, r0, r7\r
- eor r0, r0, $STREAM_WORD\r
- ldr $STREAM_WORD, [r1], #4\r
- \r
- and r7, r0, #0xFF\r
- and r8, r0, #0xFF00\r
- and r9, r0, #0xFF0000\r
- and r0, r0, #0xFF000000\r
-\r
- ldr r7, [r6, +r7, lsl #2]\r
- ldr r8, [r5, +r8, lsr #6]\r
- ldr r9, [r4, +r9, lsr #14]\r
- ldr r0, [r3, +r0, lsr #22]\r
- \r
- MEND\r
-\r
-\r
-|CrcUpdateT4@16| PROC\r
-\r
- stmdb sp!, {r4-r11, lr}\r
- cmp r2, #0\r
- beq |$fin|\r
-\r
-|$v1|\r
- tst r1, #7\r
- beq |$v2|\r
- CRC32_STEP_1\r
- bne |$v1|\r
-\r
-|$v2|\r
- cmp r2, #16\r
- blo |$v3|\r
-\r
- ldr r10, [r1], #4\r
- ldr r11, [r1], #4\r
-\r
- add r4, r3, #0x400 \r
- add r5, r3, #0x800\r
- add r6, r3, #0xC00\r
-\r
- mov r7, #0\r
- mov r8, #0\r
- mov r9, #0\r
-\r
- sub r2, r2, #16\r
-\r
-|$loop|\r
- ; pld [r1, #0x40]\r
-\r
- CRC32_STEP_4 r10\r
- CRC32_STEP_4 r11\r
-\r
- subs r2, r2, #8\r
- bhs |$loop|\r
-\r
- sub r1, r1, #8\r
- add r2, r2, #16\r
-\r
- eor r7, r7, r8\r
- eor r7, r7, r9\r
- eor r0, r0, r7\r
-\r
-|$v3|\r
- cmp r2, #0\r
- beq |$fin|\r
-\r
-|$v4|\r
- CRC32_STEP_1\r
- bne |$v4|\r
-\r
-|$fin|\r
- ldmia sp!, {r4-r11, pc}\r
-\r
-|CrcUpdateT4@16| ENDP\r
-\r
- END\r
+++ /dev/null
-; 7zCrcOpt.asm -- CRC32 calculation : optimized version\r
-; 2021-02-07 : Igor Pavlov : Public domain\r
-\r
-include 7zAsm.asm\r
-\r
-MY_ASM_START\r
-\r
-rD equ r2\r
-rN equ r7\r
-rT equ r5\r
-\r
-ifdef x64\r
- num_VAR equ r8\r
- table_VAR equ r9\r
-else\r
- if (IS_CDECL gt 0)\r
- crc_OFFS equ (REG_SIZE * 5)\r
- data_OFFS equ (REG_SIZE + crc_OFFS)\r
- size_OFFS equ (REG_SIZE + data_OFFS)\r
- else\r
- size_OFFS equ (REG_SIZE * 5)\r
- endif\r
- table_OFFS equ (REG_SIZE + size_OFFS)\r
- num_VAR equ [r4 + size_OFFS]\r
- table_VAR equ [r4 + table_OFFS]\r
-endif\r
-\r
-SRCDAT equ rD + rN * 1 + 4 *\r
-\r
-CRC macro op:req, dest:req, src:req, t:req\r
- op dest, DWORD PTR [rT + src * 4 + 0400h * t]\r
-endm\r
-\r
-CRC_XOR macro dest:req, src:req, t:req\r
- CRC xor, dest, src, t\r
-endm\r
-\r
-CRC_MOV macro dest:req, src:req, t:req\r
- CRC mov, dest, src, t\r
-endm\r
-\r
-CRC1b macro\r
- movzx x6, BYTE PTR [rD]\r
- inc rD\r
- movzx x3, x0_L\r
- xor x6, x3\r
- shr x0, 8\r
- CRC xor, x0, r6, 0\r
- dec rN\r
-endm\r
-\r
-MY_PROLOG macro crc_end:req\r
-\r
- ifdef x64\r
- if (IS_LINUX gt 0)\r
- MY_PUSH_2_REGS\r
- mov x0, REG_ABI_PARAM_0_x ; x0 = x7\r
- mov rT, REG_ABI_PARAM_3 ; r5 = r1\r
- mov rN, REG_ABI_PARAM_2 ; r7 = r2\r
- mov rD, REG_ABI_PARAM_1 ; r2 = r6\r
- else\r
- MY_PUSH_4_REGS\r
- mov x0, REG_ABI_PARAM_0_x ; x0 = x1\r
- mov rT, REG_ABI_PARAM_3 ; r5 = r9\r
- mov rN, REG_ABI_PARAM_2 ; r7 = r8\r
- ; mov rD, REG_ABI_PARAM_1 ; r2 = r2\r
- endif\r
- else\r
- MY_PUSH_4_REGS\r
- if (IS_CDECL gt 0)\r
- mov x0, [r4 + crc_OFFS]\r
- mov rD, [r4 + data_OFFS]\r
- else\r
- mov x0, REG_ABI_PARAM_0_x\r
- endif\r
- mov rN, num_VAR\r
- mov rT, table_VAR\r
- endif\r
- \r
- test rN, rN\r
- jz crc_end\r
- @@:\r
- test rD, 7\r
- jz @F\r
- CRC1b\r
- jnz @B\r
- @@:\r
- cmp rN, 16\r
- jb crc_end\r
- add rN, rD\r
- mov num_VAR, rN\r
- sub rN, 8\r
- and rN, NOT 7\r
- sub rD, rN\r
- xor x0, [SRCDAT 0]\r
-endm\r
-\r
-MY_EPILOG macro crc_end:req\r
- xor x0, [SRCDAT 0]\r
- mov rD, rN\r
- mov rN, num_VAR\r
- sub rN, rD\r
- crc_end:\r
- test rN, rN\r
- jz @F\r
- CRC1b\r
- jmp crc_end\r
- @@:\r
- if (IS_X64 gt 0) and (IS_LINUX gt 0)\r
- MY_POP_2_REGS\r
- else\r
- MY_POP_4_REGS\r
- endif\r
-endm\r
-\r
-MY_PROC CrcUpdateT8, 4\r
- MY_PROLOG crc_end_8\r
- mov x1, [SRCDAT 1]\r
- align 16\r
- main_loop_8:\r
- mov x6, [SRCDAT 2]\r
- movzx x3, x1_L\r
- CRC_XOR x6, r3, 3\r
- movzx x3, x1_H\r
- CRC_XOR x6, r3, 2\r
- shr x1, 16\r
- movzx x3, x1_L\r
- movzx x1, x1_H\r
- CRC_XOR x6, r3, 1\r
- movzx x3, x0_L\r
- CRC_XOR x6, r1, 0\r
-\r
- mov x1, [SRCDAT 3]\r
- CRC_XOR x6, r3, 7\r
- movzx x3, x0_H\r
- shr x0, 16\r
- CRC_XOR x6, r3, 6\r
- movzx x3, x0_L\r
- CRC_XOR x6, r3, 5\r
- movzx x3, x0_H\r
- CRC_MOV x0, r3, 4\r
- xor x0, x6\r
- add rD, 8\r
- jnz main_loop_8\r
-\r
- MY_EPILOG crc_end_8\r
-MY_ENDP\r
-\r
-MY_PROC CrcUpdateT4, 4\r
- MY_PROLOG crc_end_4\r
- align 16\r
- main_loop_4:\r
- movzx x1, x0_L\r
- movzx x3, x0_H\r
- shr x0, 16\r
- movzx x6, x0_H\r
- and x0, 0FFh\r
- CRC_MOV x1, r1, 3\r
- xor x1, [SRCDAT 1]\r
- CRC_XOR x1, r3, 2\r
- CRC_XOR x1, r6, 0\r
- CRC_XOR x1, r0, 1\r
- \r
- movzx x0, x1_L\r
- movzx x3, x1_H\r
- shr x1, 16\r
- movzx x6, x1_H\r
- and x1, 0FFh\r
- CRC_MOV x0, r0, 3\r
- xor x0, [SRCDAT 2]\r
- CRC_XOR x0, r3, 2\r
- CRC_XOR x0, r6, 0\r
- CRC_XOR x0, r1, 1\r
- add rD, 8\r
- jnz main_loop_4\r
-\r
- MY_EPILOG crc_end_4\r
-MY_ENDP\r
-\r
-end\r
+++ /dev/null
-; AesOpt.asm -- AES optimized code for x86 AES hardware instructions\r
-; 2021-12-25 : Igor Pavlov : Public domain\r
-\r
-include 7zAsm.asm\r
-\r
-ifdef __ASMC__\r
- use_vaes_256 equ 1\r
-else\r
-ifdef ymm0\r
- use_vaes_256 equ 1\r
-endif\r
-endif\r
-\r
-\r
-ifdef use_vaes_256\r
- ECHO "++ VAES 256"\r
-else\r
- ECHO "-- NO VAES 256"\r
-endif\r
-\r
-ifdef x64\r
- ECHO "x86-64"\r
-else\r
- ECHO "x86"\r
-if (IS_CDECL gt 0)\r
- ECHO "ABI : CDECL"\r
-else\r
- ECHO "ABI : no CDECL : FASTCALL"\r
-endif\r
-endif\r
-\r
-if (IS_LINUX gt 0)\r
- ECHO "ABI : LINUX"\r
-else\r
- ECHO "ABI : WINDOWS"\r
-endif\r
-\r
-MY_ASM_START\r
-\r
-ifndef x64\r
- .686\r
- .xmm\r
-endif\r
-\r
-\r
-; MY_ALIGN EQU ALIGN(64)\r
-MY_ALIGN EQU\r
-\r
-SEG_ALIGN EQU MY_ALIGN\r
-\r
-MY_SEG_PROC macro name:req, numParams:req\r
- ; seg_name equ @CatStr(_TEXT$, name)\r
- ; seg_name SEGMENT SEG_ALIGN 'CODE'\r
- MY_PROC name, numParams\r
-endm\r
-\r
-MY_SEG_ENDP macro\r
- ; seg_name ENDS\r
-endm\r
-\r
-\r
-NUM_AES_KEYS_MAX equ 15\r
-\r
-; the number of push operators in function PROLOG\r
-if (IS_LINUX eq 0) or (IS_X64 eq 0)\r
-num_regs_push equ 2\r
-stack_param_offset equ (REG_SIZE * (1 + num_regs_push))\r
-endif\r
-\r
-ifdef x64\r
- num_param equ REG_ABI_PARAM_2\r
-else\r
- if (IS_CDECL gt 0)\r
- ; size_t size\r
- ; void * data\r
- ; UInt32 * aes\r
- ; ret-ip <- (r4)\r
- aes_OFFS equ (stack_param_offset)\r
- data_OFFS equ (REG_SIZE + aes_OFFS)\r
- size_OFFS equ (REG_SIZE + data_OFFS)\r
- num_param equ [r4 + size_OFFS]\r
- else\r
- num_param equ [r4 + stack_param_offset]\r
- endif\r
-endif\r
-\r
-keys equ REG_PARAM_0 ; r1\r
-rD equ REG_PARAM_1 ; r2\r
-rN equ r0\r
-\r
-koffs_x equ x7\r
-koffs_r equ r7\r
-\r
-ksize_x equ x6\r
-ksize_r equ r6\r
-\r
-keys2 equ r3\r
-\r
-state equ xmm0\r
-key equ xmm0\r
-key_ymm equ ymm0\r
-key_ymm_n equ 0\r
-\r
-ifdef x64\r
- ways = 11\r
-else\r
- ways = 4\r
-endif\r
-\r
-ways_start_reg equ 1\r
-\r
-iv equ @CatStr(xmm, %(ways_start_reg + ways))\r
-iv_ymm equ @CatStr(ymm, %(ways_start_reg + ways))\r
-\r
-\r
-WOP macro op, op2\r
- i = 0\r
- rept ways\r
- op @CatStr(xmm, %(ways_start_reg + i)), op2\r
- i = i + 1\r
- endm\r
-endm\r
-\r
-\r
-ifndef ABI_LINUX\r
-ifdef x64\r
-\r
-; we use 32 bytes of home space in stack in WIN64-x64\r
-NUM_HOME_MM_REGS equ (32 / 16)\r
-; we preserve xmm registers starting from xmm6 in WIN64-x64\r
-MM_START_SAVE_REG equ 6\r
-\r
-SAVE_XMM macro num_used_mm_regs:req\r
- num_save_mm_regs = num_used_mm_regs - MM_START_SAVE_REG\r
- if num_save_mm_regs GT 0\r
- num_save_mm_regs2 = num_save_mm_regs - NUM_HOME_MM_REGS\r
- ; RSP is (16*x + 8) after entering the function in WIN64-x64\r
- stack_offset = 16 * num_save_mm_regs2 + (stack_param_offset mod 16)\r
- \r
- i = 0\r
- rept num_save_mm_regs\r
- \r
- if i eq NUM_HOME_MM_REGS\r
- sub r4, stack_offset\r
- endif\r
- \r
- if i lt NUM_HOME_MM_REGS\r
- movdqa [r4 + stack_param_offset + i * 16], @CatStr(xmm, %(MM_START_SAVE_REG + i))\r
- else\r
- movdqa [r4 + (i - NUM_HOME_MM_REGS) * 16], @CatStr(xmm, %(MM_START_SAVE_REG + i))\r
- endif\r
- \r
- i = i + 1\r
- endm\r
- endif\r
-endm\r
-\r
-RESTORE_XMM macro num_used_mm_regs:req\r
- if num_save_mm_regs GT 0\r
- i = 0\r
- if num_save_mm_regs2 GT 0\r
- rept num_save_mm_regs2\r
- movdqa @CatStr(xmm, %(MM_START_SAVE_REG + NUM_HOME_MM_REGS + i)), [r4 + i * 16]\r
- i = i + 1\r
- endm\r
- add r4, stack_offset\r
- endif\r
-\r
- num_low_regs = num_save_mm_regs - i\r
- i = 0\r
- rept num_low_regs\r
- movdqa @CatStr(xmm, %(MM_START_SAVE_REG + i)), [r4 + stack_param_offset + i * 16]\r
- i = i + 1\r
- endm\r
- endif\r
-endm\r
-\r
-endif ; x64\r
-endif ; ABI_LINUX\r
-\r
-\r
-MY_PROLOG macro num_used_mm_regs:req\r
- ; num_regs_push: must be equal to the number of push operators\r
- ; push r3\r
- ; push r5\r
- if (IS_LINUX eq 0) or (IS_X64 eq 0)\r
- push r6\r
- push r7\r
- endif\r
-\r
- mov rN, num_param ; don't move it; num_param can use stack pointer (r4)\r
-\r
- if (IS_X64 eq 0)\r
- if (IS_CDECL gt 0)\r
- mov rD, [r4 + data_OFFS]\r
- mov keys, [r4 + aes_OFFS]\r
- endif\r
- elseif (IS_LINUX gt 0)\r
- MY_ABI_LINUX_TO_WIN_2\r
- endif\r
-\r
-\r
- ifndef ABI_LINUX\r
- ifdef x64\r
- SAVE_XMM num_used_mm_regs\r
- endif\r
- endif\r
- \r
- mov ksize_x, [keys + 16]\r
- shl ksize_x, 5\r
-endm\r
-\r
-\r
-MY_EPILOG macro\r
- ifndef ABI_LINUX\r
- ifdef x64\r
- RESTORE_XMM num_save_mm_regs\r
- endif\r
- endif\r
- \r
- if (IS_LINUX eq 0) or (IS_X64 eq 0)\r
- pop r7\r
- pop r6\r
- endif\r
- ; pop r5\r
- ; pop r3\r
- MY_ENDP\r
-endm\r
-\r
-\r
-OP_KEY macro op:req, offs:req\r
- op state, [keys + offs]\r
-endm\r
-\r
- \r
-WOP_KEY macro op:req, offs:req\r
- movdqa key, [keys + offs]\r
- WOP op, key\r
-endm\r
-\r
-\r
-; ---------- AES-CBC Decode ----------\r
-\r
-\r
-XOR_WITH_DATA macro reg, _ppp_\r
- pxor reg, [rD + i * 16]\r
-endm\r
-\r
-WRITE_TO_DATA macro reg, _ppp_\r
- movdqa [rD + i * 16], reg\r
-endm\r
-\r
-\r
-; state0 equ @CatStr(xmm, %(ways_start_reg))\r
-\r
-key0 equ @CatStr(xmm, %(ways_start_reg + ways + 1))\r
-key0_ymm equ @CatStr(ymm, %(ways_start_reg + ways + 1))\r
-\r
-key_last equ @CatStr(xmm, %(ways_start_reg + ways + 2))\r
-key_last_ymm equ @CatStr(ymm, %(ways_start_reg + ways + 2))\r
-key_last_ymm_n equ (ways_start_reg + ways + 2)\r
-\r
-NUM_CBC_REGS equ (ways_start_reg + ways + 3)\r
-\r
-\r
-MY_SEG_PROC AesCbc_Decode_HW, 3\r
-\r
- AesCbc_Decode_HW_start::\r
- MY_PROLOG NUM_CBC_REGS\r
- \r
- AesCbc_Decode_HW_start_2::\r
- movdqa iv, [keys]\r
- add keys, 32\r
-\r
- movdqa key0, [keys + 1 * ksize_r]\r
- movdqa key_last, [keys]\r
- sub ksize_x, 16\r
-\r
- jmp check2\r
- align 16\r
- nextBlocks2:\r
- WOP movdqa, [rD + i * 16]\r
- mov koffs_x, ksize_x\r
- ; WOP_KEY pxor, ksize_r + 16\r
- WOP pxor, key0\r
- ; align 16\r
- @@:\r
- WOP_KEY aesdec, 1 * koffs_r\r
- sub koffs_r, 16\r
- jnz @B\r
- ; WOP_KEY aesdeclast, 0\r
- WOP aesdeclast, key_last\r
- \r
- pxor @CatStr(xmm, %(ways_start_reg)), iv\r
- i = 1\r
- rept ways - 1\r
- pxor @CatStr(xmm, %(ways_start_reg + i)), [rD + i * 16 - 16]\r
- i = i + 1\r
- endm\r
- movdqa iv, [rD + ways * 16 - 16]\r
- WOP WRITE_TO_DATA\r
-\r
- add rD, ways * 16\r
- AesCbc_Decode_HW_start_3::\r
- check2:\r
- sub rN, ways\r
- jnc nextBlocks2\r
- add rN, ways\r
-\r
- sub ksize_x, 16\r
-\r
- jmp check\r
- nextBlock:\r
- movdqa state, [rD]\r
- mov koffs_x, ksize_x\r
- ; OP_KEY pxor, 1 * ksize_r + 32\r
- pxor state, key0\r
- ; movdqa state0, [rD]\r
- ; movdqa state, key0\r
- ; pxor state, state0\r
- @@:\r
- OP_KEY aesdec, 1 * koffs_r + 16\r
- OP_KEY aesdec, 1 * koffs_r\r
- sub koffs_r, 32\r
- jnz @B\r
- OP_KEY aesdec, 16\r
- ; OP_KEY aesdeclast, 0\r
- aesdeclast state, key_last\r
- \r
- pxor state, iv\r
- movdqa iv, [rD]\r
- ; movdqa iv, state0\r
- movdqa [rD], state\r
- \r
- add rD, 16\r
- check:\r
- sub rN, 1\r
- jnc nextBlock\r
-\r
- movdqa [keys - 32], iv\r
-MY_EPILOG\r
-\r
-\r
-\r
-\r
-; ---------- AVX ----------\r
-\r
-\r
-AVX__WOP_n macro op\r
- i = 0\r
- rept ways\r
- op (ways_start_reg + i)\r
- i = i + 1\r
- endm\r
-endm\r
-\r
-AVX__WOP macro op\r
- i = 0\r
- rept ways\r
- op @CatStr(ymm, %(ways_start_reg + i))\r
- i = i + 1\r
- endm\r
-endm\r
-\r
-\r
-AVX__WOP_KEY macro op:req, offs:req\r
- vmovdqa key_ymm, ymmword ptr [keys2 + offs]\r
- AVX__WOP_n op\r
-endm\r
-\r
-\r
-AVX__CBC_START macro reg\r
- ; vpxor reg, key_ymm, ymmword ptr [rD + 32 * i]\r
- vpxor reg, key0_ymm, ymmword ptr [rD + 32 * i]\r
-endm\r
-\r
-AVX__CBC_END macro reg\r
- if i eq 0\r
- vpxor reg, reg, iv_ymm\r
- else\r
- vpxor reg, reg, ymmword ptr [rD + i * 32 - 16]\r
- endif\r
-endm\r
-\r
-\r
-AVX__WRITE_TO_DATA macro reg\r
- vmovdqu ymmword ptr [rD + 32 * i], reg\r
-endm\r
-\r
-AVX__XOR_WITH_DATA macro reg\r
- vpxor reg, reg, ymmword ptr [rD + 32 * i]\r
-endm\r
-\r
-AVX__CTR_START macro reg\r
- vpaddq iv_ymm, iv_ymm, one_ymm\r
- ; vpxor reg, iv_ymm, key_ymm\r
- vpxor reg, iv_ymm, key0_ymm\r
-endm\r
-\r
-\r
-MY_VAES_INSTR_2 macro cmd, dest, a1, a2\r
- db 0c4H\r
- db 2 + 040H + 020h * (1 - (a2) / 8) + 080h * (1 - (dest) / 8)\r
- db 5 + 8 * ((not (a1)) and 15)\r
- db cmd\r
- db 0c0H + 8 * ((dest) and 7) + ((a2) and 7)\r
-endm\r
-\r
-MY_VAES_INSTR macro cmd, dest, a\r
- MY_VAES_INSTR_2 cmd, dest, dest, a\r
-endm\r
-\r
-MY_vaesenc macro dest, a\r
- MY_VAES_INSTR 0dcH, dest, a\r
-endm\r
-MY_vaesenclast macro dest, a\r
- MY_VAES_INSTR 0ddH, dest, a\r
-endm\r
-MY_vaesdec macro dest, a\r
- MY_VAES_INSTR 0deH, dest, a\r
-endm\r
-MY_vaesdeclast macro dest, a\r
- MY_VAES_INSTR 0dfH, dest, a\r
-endm\r
-\r
-\r
-AVX__VAES_DEC macro reg\r
- MY_vaesdec reg, key_ymm_n\r
-endm\r
-\r
-AVX__VAES_DEC_LAST_key_last macro reg\r
- ; MY_vaesdeclast reg, key_ymm_n\r
- MY_vaesdeclast reg, key_last_ymm_n\r
-endm\r
-\r
-AVX__VAES_ENC macro reg\r
- MY_vaesenc reg, key_ymm_n\r
-endm\r
-\r
-AVX__VAES_ENC_LAST macro reg\r
- MY_vaesenclast reg, key_ymm_n\r
-endm\r
-\r
-AVX__vinserti128_TO_HIGH macro dest, src\r
- vinserti128 dest, dest, src, 1\r
-endm\r
-\r
-\r
-MY_PROC AesCbc_Decode_HW_256, 3\r
- ifdef use_vaes_256\r
- MY_PROLOG NUM_CBC_REGS\r
- \r
- cmp rN, ways * 2\r
- jb AesCbc_Decode_HW_start_2\r
-\r
- vmovdqa iv, xmmword ptr [keys]\r
- add keys, 32\r
-\r
- vbroadcasti128 key0_ymm, xmmword ptr [keys + 1 * ksize_r]\r
- vbroadcasti128 key_last_ymm, xmmword ptr [keys]\r
- sub ksize_x, 16\r
- mov koffs_x, ksize_x\r
- add ksize_x, ksize_x\r
- \r
- AVX_STACK_SUB = ((NUM_AES_KEYS_MAX + 1 - 2) * 32)\r
- push keys2\r
- sub r4, AVX_STACK_SUB\r
- ; sub r4, 32\r
- ; sub r4, ksize_r\r
- ; lea keys2, [r4 + 32]\r
- mov keys2, r4\r
- and keys2, -32\r
- broad:\r
- vbroadcasti128 key_ymm, xmmword ptr [keys + 1 * koffs_r]\r
- vmovdqa ymmword ptr [keys2 + koffs_r * 2], key_ymm\r
- sub koffs_r, 16\r
- ; jnc broad\r
- jnz broad\r
-\r
- sub rN, ways * 2\r
-\r
- align 16\r
- avx_cbcdec_nextBlock2:\r
- mov koffs_x, ksize_x\r
- ; AVX__WOP_KEY AVX__CBC_START, 1 * koffs_r + 32\r
- AVX__WOP AVX__CBC_START\r
- @@:\r
- AVX__WOP_KEY AVX__VAES_DEC, 1 * koffs_r\r
- sub koffs_r, 32\r
- jnz @B\r
- ; AVX__WOP_KEY AVX__VAES_DEC_LAST, 0\r
- AVX__WOP_n AVX__VAES_DEC_LAST_key_last\r
-\r
- AVX__vinserti128_TO_HIGH iv_ymm, xmmword ptr [rD]\r
- AVX__WOP AVX__CBC_END\r
-\r
- vmovdqa iv, xmmword ptr [rD + ways * 32 - 16]\r
- AVX__WOP AVX__WRITE_TO_DATA\r
- \r
- add rD, ways * 32\r
- sub rN, ways * 2\r
- jnc avx_cbcdec_nextBlock2\r
- add rN, ways * 2\r
-\r
- shr ksize_x, 1\r
- \r
- ; lea r4, [r4 + 1 * ksize_r + 32]\r
- add r4, AVX_STACK_SUB\r
- pop keys2\r
-\r
- vzeroupper\r
- jmp AesCbc_Decode_HW_start_3\r
- else\r
- jmp AesCbc_Decode_HW_start\r
- endif\r
-MY_ENDP\r
-MY_SEG_ENDP\r
-\r
-\r
-\r
- \r
-; ---------- AES-CBC Encode ----------\r
-\r
-e0 equ xmm1\r
-\r
-CENC_START_KEY equ 2\r
-CENC_NUM_REG_KEYS equ (3 * 2)\r
-; last_key equ @CatStr(xmm, %(CENC_START_KEY + CENC_NUM_REG_KEYS))\r
-\r
-MY_SEG_PROC AesCbc_Encode_HW, 3\r
- MY_PROLOG (CENC_START_KEY + CENC_NUM_REG_KEYS + 0)\r
-\r
- movdqa state, [keys]\r
- add keys, 32\r
- \r
- i = 0\r
- rept CENC_NUM_REG_KEYS\r
- movdqa @CatStr(xmm, %(CENC_START_KEY + i)), [keys + i * 16]\r
- i = i + 1\r
- endm\r
- \r
- add keys, ksize_r\r
- neg ksize_r\r
- add ksize_r, (16 * CENC_NUM_REG_KEYS)\r
- ; movdqa last_key, [keys]\r
- jmp check_e\r
-\r
- align 16\r
- nextBlock_e:\r
- movdqa e0, [rD]\r
- mov koffs_r, ksize_r\r
- pxor e0, @CatStr(xmm, %(CENC_START_KEY))\r
- pxor state, e0\r
- \r
- i = 1\r
- rept (CENC_NUM_REG_KEYS - 1)\r
- aesenc state, @CatStr(xmm, %(CENC_START_KEY + i))\r
- i = i + 1\r
- endm\r
-\r
- @@:\r
- OP_KEY aesenc, 1 * koffs_r\r
- OP_KEY aesenc, 1 * koffs_r + 16\r
- add koffs_r, 32\r
- jnz @B\r
- OP_KEY aesenclast, 0\r
- ; aesenclast state, last_key\r
- \r
- movdqa [rD], state\r
- add rD, 16\r
- check_e:\r
- sub rN, 1\r
- jnc nextBlock_e\r
-\r
- ; movdqa [keys - 32], state\r
- movdqa [keys + 1 * ksize_r - (16 * CENC_NUM_REG_KEYS) - 32], state\r
-MY_EPILOG\r
-MY_SEG_ENDP\r
-\r
-\r
- \r
-; ---------- AES-CTR ----------\r
-\r
-ifdef x64\r
- ; ways = 11\r
-endif\r
-\r
- \r
-one equ @CatStr(xmm, %(ways_start_reg + ways + 1))\r
-one_ymm equ @CatStr(ymm, %(ways_start_reg + ways + 1))\r
-key0 equ @CatStr(xmm, %(ways_start_reg + ways + 2))\r
-key0_ymm equ @CatStr(ymm, %(ways_start_reg + ways + 2))\r
-NUM_CTR_REGS equ (ways_start_reg + ways + 3)\r
-\r
-INIT_CTR macro reg, _ppp_\r
- paddq iv, one\r
- movdqa reg, iv\r
-endm\r
-\r
-\r
-MY_SEG_PROC AesCtr_Code_HW, 3\r
- Ctr_start::\r
- MY_PROLOG NUM_CTR_REGS\r
-\r
- Ctr_start_2::\r
- movdqa iv, [keys]\r
- add keys, 32\r
- movdqa key0, [keys]\r
-\r
- add keys, ksize_r\r
- neg ksize_r\r
- add ksize_r, 16\r
- \r
- Ctr_start_3::\r
- mov koffs_x, 1\r
- movd one, koffs_x\r
- jmp check2_c\r
-\r
- align 16\r
- nextBlocks2_c:\r
- WOP INIT_CTR, 0\r
- mov koffs_r, ksize_r\r
- ; WOP_KEY pxor, 1 * koffs_r -16\r
- WOP pxor, key0\r
- @@:\r
- WOP_KEY aesenc, 1 * koffs_r\r
- add koffs_r, 16\r
- jnz @B\r
- WOP_KEY aesenclast, 0\r
- \r
- WOP XOR_WITH_DATA\r
- WOP WRITE_TO_DATA\r
- add rD, ways * 16\r
- check2_c:\r
- sub rN, ways\r
- jnc nextBlocks2_c\r
- add rN, ways\r
-\r
- sub keys, 16\r
- add ksize_r, 16\r
- \r
- jmp check_c\r
-\r
- ; align 16\r
- nextBlock_c:\r
- paddq iv, one\r
- ; movdqa state, [keys + 1 * koffs_r - 16]\r
- movdqa state, key0\r
- mov koffs_r, ksize_r\r
- pxor state, iv\r
- \r
- @@:\r
- OP_KEY aesenc, 1 * koffs_r\r
- OP_KEY aesenc, 1 * koffs_r + 16\r
- add koffs_r, 32\r
- jnz @B\r
- OP_KEY aesenc, 0\r
- OP_KEY aesenclast, 16\r
- \r
- pxor state, [rD]\r
- movdqa [rD], state\r
- add rD, 16\r
- check_c:\r
- sub rN, 1\r
- jnc nextBlock_c\r
-\r
- ; movdqa [keys - 32], iv\r
- movdqa [keys + 1 * ksize_r - 16 - 32], iv\r
-MY_EPILOG\r
-\r
-\r
-MY_PROC AesCtr_Code_HW_256, 3\r
- ifdef use_vaes_256\r
- MY_PROLOG NUM_CTR_REGS\r
-\r
- cmp rN, ways * 2\r
- jb Ctr_start_2\r
-\r
- vbroadcasti128 iv_ymm, xmmword ptr [keys]\r
- add keys, 32\r
- vbroadcasti128 key0_ymm, xmmword ptr [keys]\r
- mov koffs_x, 1\r
- vmovd one, koffs_x\r
- vpsubq iv_ymm, iv_ymm, one_ymm\r
- vpaddq one, one, one\r
- AVX__vinserti128_TO_HIGH one_ymm, one\r
- \r
- add keys, ksize_r\r
- sub ksize_x, 16\r
- neg ksize_r\r
- mov koffs_r, ksize_r\r
- add ksize_r, ksize_r\r
-\r
- AVX_STACK_SUB = ((NUM_AES_KEYS_MAX + 1 - 1) * 32)\r
- push keys2\r
- lea keys2, [r4 - 32]\r
- sub r4, AVX_STACK_SUB\r
- and keys2, -32\r
- vbroadcasti128 key_ymm, xmmword ptr [keys]\r
- vmovdqa ymmword ptr [keys2], key_ymm\r
- @@:\r
- vbroadcasti128 key_ymm, xmmword ptr [keys + 1 * koffs_r]\r
- vmovdqa ymmword ptr [keys2 + koffs_r * 2], key_ymm\r
- add koffs_r, 16\r
- jnz @B\r
-\r
- sub rN, ways * 2\r
- \r
- align 16\r
- avx_ctr_nextBlock2:\r
- mov koffs_r, ksize_r\r
- AVX__WOP AVX__CTR_START\r
- ; AVX__WOP_KEY AVX__CTR_START, 1 * koffs_r - 32\r
- @@:\r
- AVX__WOP_KEY AVX__VAES_ENC, 1 * koffs_r\r
- add koffs_r, 32\r
- jnz @B\r
- AVX__WOP_KEY AVX__VAES_ENC_LAST, 0\r
- \r
- AVX__WOP AVX__XOR_WITH_DATA\r
- AVX__WOP AVX__WRITE_TO_DATA\r
- \r
- add rD, ways * 32\r
- sub rN, ways * 2\r
- jnc avx_ctr_nextBlock2\r
- add rN, ways * 2\r
- \r
- vextracti128 iv, iv_ymm, 1\r
- sar ksize_r, 1\r
- \r
- add r4, AVX_STACK_SUB\r
- pop keys2\r
- \r
- vzeroupper\r
- jmp Ctr_start_3\r
- else\r
- jmp Ctr_start\r
- endif\r
-MY_ENDP\r
-MY_SEG_ENDP\r
-\r
-end\r
+++ /dev/null
-; LzFindOpt.asm -- ASM version of GetMatchesSpecN_2() function\r
-; 2021-07-21: Igor Pavlov : Public domain\r
-;\r
-\r
-ifndef x64\r
-; x64=1\r
-; .err <x64_IS_REQUIRED>\r
-endif\r
-\r
-include 7zAsm.asm\r
-\r
-MY_ASM_START\r
-\r
-_TEXT$LZFINDOPT SEGMENT ALIGN(64) 'CODE'\r
-\r
-MY_ALIGN macro num:req\r
- align num\r
-endm\r
-\r
-MY_ALIGN_32 macro\r
- MY_ALIGN 32\r
-endm\r
-\r
-MY_ALIGN_64 macro\r
- MY_ALIGN 64\r
-endm\r
-\r
-\r
-t0_L equ x0_L\r
-t0_x equ x0\r
-t0 equ r0\r
-t1_x equ x3\r
-t1 equ r3\r
-\r
-cp_x equ t1_x\r
-cp_r equ t1\r
-m equ x5\r
-m_r equ r5\r
-len_x equ x6\r
-len equ r6\r
-diff_x equ x7\r
-diff equ r7\r
-len0 equ r10\r
-len1_x equ x11\r
-len1 equ r11\r
-maxLen_x equ x12\r
-maxLen equ r12\r
-d equ r13\r
-ptr0 equ r14\r
-ptr1 equ r15\r
-\r
-d_lim equ m_r\r
-cycSize equ len_x\r
-hash_lim equ len0\r
-delta1_x equ len1_x\r
-delta1_r equ len1\r
-delta_x equ maxLen_x\r
-delta_r equ maxLen\r
-hash equ ptr0\r
-src equ ptr1\r
-\r
-\r
-\r
-if (IS_LINUX gt 0)\r
-\r
-; r1 r2 r8 r9 : win32\r
-; r7 r6 r2 r1 r8 r9 : linux\r
-\r
-lenLimit equ r8\r
-lenLimit_x equ x8\r
-; pos_r equ r2\r
-pos equ x2\r
-cur equ r1\r
-son equ r9\r
-\r
-else\r
-\r
-lenLimit equ REG_ABI_PARAM_2\r
-lenLimit_x equ REG_ABI_PARAM_2_x\r
-pos equ REG_ABI_PARAM_1_x\r
-cur equ REG_ABI_PARAM_0\r
-son equ REG_ABI_PARAM_3\r
-\r
-endif\r
-\r
-\r
-if (IS_LINUX gt 0)\r
- maxLen_OFFS equ (REG_SIZE * (6 + 1))\r
-else\r
- cutValue_OFFS equ (REG_SIZE * (8 + 1 + 4))\r
- d_OFFS equ (REG_SIZE + cutValue_OFFS)\r
- maxLen_OFFS equ (REG_SIZE + d_OFFS)\r
-endif\r
- hash_OFFS equ (REG_SIZE + maxLen_OFFS)\r
- limit_OFFS equ (REG_SIZE + hash_OFFS)\r
- size_OFFS equ (REG_SIZE + limit_OFFS)\r
- cycPos_OFFS equ (REG_SIZE + size_OFFS)\r
- cycSize_OFFS equ (REG_SIZE + cycPos_OFFS)\r
- posRes_OFFS equ (REG_SIZE + cycSize_OFFS)\r
- \r
-if (IS_LINUX gt 0)\r
-else\r
- cutValue_PAR equ [r0 + cutValue_OFFS]\r
- d_PAR equ [r0 + d_OFFS]\r
-endif\r
- maxLen_PAR equ [r0 + maxLen_OFFS]\r
- hash_PAR equ [r0 + hash_OFFS]\r
- limit_PAR equ [r0 + limit_OFFS]\r
- size_PAR equ [r0 + size_OFFS]\r
- cycPos_PAR equ [r0 + cycPos_OFFS]\r
- cycSize_PAR equ [r0 + cycSize_OFFS]\r
- posRes_PAR equ [r0 + posRes_OFFS]\r
-\r
-\r
- cutValue_VAR equ DWORD PTR [r4 + 8 * 0]\r
- cutValueCur_VAR equ DWORD PTR [r4 + 8 * 0 + 4]\r
- cycPos_VAR equ DWORD PTR [r4 + 8 * 1 + 0]\r
- cycSize_VAR equ DWORD PTR [r4 + 8 * 1 + 4]\r
- hash_VAR equ QWORD PTR [r4 + 8 * 2]\r
- limit_VAR equ QWORD PTR [r4 + 8 * 3]\r
- size_VAR equ QWORD PTR [r4 + 8 * 4]\r
- distances equ QWORD PTR [r4 + 8 * 5]\r
- maxLen_VAR equ QWORD PTR [r4 + 8 * 6]\r
-\r
- Old_RSP equ QWORD PTR [r4 + 8 * 7]\r
- LOCAL_SIZE equ 8 * 8\r
-\r
-COPY_VAR_32 macro dest_var, src_var\r
- mov x3, src_var\r
- mov dest_var, x3\r
-endm\r
-\r
-COPY_VAR_64 macro dest_var, src_var\r
- mov r3, src_var\r
- mov dest_var, r3\r
-endm\r
-\r
-\r
-; MY_ALIGN_64\r
-MY_PROC GetMatchesSpecN_2, 13\r
-MY_PUSH_PRESERVED_ABI_REGS\r
- mov r0, RSP\r
- lea r3, [r0 - LOCAL_SIZE]\r
- and r3, -64\r
- mov RSP, r3\r
- mov Old_RSP, r0\r
-\r
-if (IS_LINUX gt 0)\r
- mov d, REG_ABI_PARAM_5 ; r13 = r9\r
- mov cutValue_VAR, REG_ABI_PARAM_4_x ; = r8\r
- mov son, REG_ABI_PARAM_3 ; r9 = r1\r
- mov r8, REG_ABI_PARAM_2 ; r8 = r2\r
- mov pos, REG_ABI_PARAM_1_x ; r2 = x6\r
- mov r1, REG_ABI_PARAM_0 ; r1 = r7\r
-else\r
- COPY_VAR_32 cutValue_VAR, cutValue_PAR\r
- mov d, d_PAR\r
-endif\r
-\r
- COPY_VAR_64 limit_VAR, limit_PAR\r
- \r
- mov hash_lim, size_PAR\r
- mov size_VAR, hash_lim\r
- \r
- mov cp_x, cycPos_PAR\r
- mov hash, hash_PAR\r
-\r
- mov cycSize, cycSize_PAR\r
- mov cycSize_VAR, cycSize\r
- \r
- ; we want cur in (rcx). So we change the cur and lenLimit variables\r
- sub lenLimit, cur\r
- neg lenLimit_x\r
- inc lenLimit_x\r
- \r
- mov t0_x, maxLen_PAR\r
- sub t0, lenLimit\r
- mov maxLen_VAR, t0\r
-\r
- jmp main_loop\r
-\r
-MY_ALIGN_64\r
-fill_empty:\r
- ; ptr0 = *ptr1 = kEmptyHashValue;\r
- mov QWORD PTR [ptr1], 0\r
- inc pos\r
- inc cp_x\r
- mov DWORD PTR [d - 4], 0\r
- cmp d, limit_VAR\r
- jae fin\r
- cmp hash, hash_lim\r
- je fin\r
-\r
-; MY_ALIGN_64\r
-main_loop:\r
- ; UInt32 delta = *hash++;\r
- mov diff_x, [hash] ; delta\r
- add hash, 4\r
- ; mov cycPos_VAR, cp_x\r
- \r
- inc cur\r
- add d, 4\r
- mov m, pos\r
- sub m, diff_x; ; matchPos\r
- \r
- ; CLzRef *ptr1 = son + ((size_t)(pos) << 1) - CYC_TO_POS_OFFSET * 2;\r
- lea ptr1, [son + 8 * cp_r]\r
- ; mov cycSize, cycSize_VAR\r
- cmp pos, cycSize\r
- jb directMode ; if (pos < cycSize_VAR)\r
- \r
- ; CYC MODE\r
-\r
- cmp diff_x, cycSize\r
- jae fill_empty ; if (delta >= cycSize_VAR)\r
- \r
- xor t0_x, t0_x\r
- mov cycPos_VAR, cp_x\r
- sub cp_x, diff_x\r
- ; jae prepare_for_tree_loop\r
- ; add cp_x, cycSize\r
- cmovb t0_x, cycSize\r
- add cp_x, t0_x ; cp_x += (cycPos < delta ? cycSize : 0)\r
- jmp prepare_for_tree_loop\r
- \r
- \r
-directMode:\r
- cmp diff_x, pos\r
- je fill_empty ; if (delta == pos)\r
- jae fin_error ; if (delta >= pos)\r
- \r
- mov cycPos_VAR, cp_x\r
- mov cp_x, m\r
- \r
-prepare_for_tree_loop:\r
- mov len0, lenLimit\r
- mov hash_VAR, hash\r
- ; CLzRef *ptr0 = son + ((size_t)(pos) << 1) - CYC_TO_POS_OFFSET * 2 + 1;\r
- lea ptr0, [ptr1 + 4]\r
- ; UInt32 *_distances = ++d;\r
- mov distances, d\r
-\r
- neg len0\r
- mov len1, len0\r
-\r
- mov t0_x, cutValue_VAR\r
- mov maxLen, maxLen_VAR\r
- mov cutValueCur_VAR, t0_x\r
-\r
-MY_ALIGN_32\r
-tree_loop:\r
- neg diff\r
- mov len, len0\r
- cmp len1, len0\r
- cmovb len, len1 ; len = (len1 < len0 ? len1 : len0);\r
- add diff, cur\r
-\r
- mov t0_x, [son + cp_r * 8] ; prefetch\r
- movzx t0_x, BYTE PTR [diff + 1 * len]\r
- lea cp_r, [son + cp_r * 8]\r
- cmp [cur + 1 * len], t0_L\r
- je matched_1\r
- \r
- jb left_0\r
-\r
- mov [ptr1], m\r
- mov m, [cp_r + 4]\r
- lea ptr1, [cp_r + 4]\r
- sub diff, cur ; FIX32\r
- jmp next_node\r
-\r
-MY_ALIGN_32\r
-left_0:\r
- mov [ptr0], m\r
- mov m, [cp_r]\r
- mov ptr0, cp_r\r
- sub diff, cur ; FIX32\r
- ; jmp next_node\r
-\r
-; ------------ NEXT NODE ------------\r
-; MY_ALIGN_32\r
-next_node:\r
- mov cycSize, cycSize_VAR\r
- dec cutValueCur_VAR\r
- je finish_tree\r
- \r
- add diff_x, pos ; prev_match = pos + diff\r
- cmp m, diff_x\r
- jae fin_error ; if (new_match >= prev_match)\r
- \r
- mov diff_x, pos\r
- sub diff_x, m ; delta = pos - new_match\r
- cmp pos, cycSize\r
- jae cyc_mode_2 ; if (pos >= cycSize)\r
-\r
- mov cp_x, m\r
- test m, m\r
- jne tree_loop ; if (m != 0)\r
- \r
-finish_tree:\r
- ; ptr0 = *ptr1 = kEmptyHashValue;\r
- mov DWORD PTR [ptr0], 0\r
- mov DWORD PTR [ptr1], 0\r
-\r
- inc pos\r
- \r
- ; _distances[-1] = (UInt32)(d - _distances);\r
- mov t0, distances\r
- mov t1, d\r
- sub t1, t0\r
- shr t1_x, 2\r
- mov [t0 - 4], t1_x\r
-\r
- cmp d, limit_VAR\r
- jae fin ; if (d >= limit)\r
- \r
- mov cp_x, cycPos_VAR\r
- mov hash, hash_VAR\r
- mov hash_lim, size_VAR\r
- inc cp_x\r
- cmp hash, hash_lim\r
- jne main_loop ; if (hash != size)\r
- jmp fin\r
- \r
-\r
-MY_ALIGN_32\r
-cyc_mode_2:\r
- cmp diff_x, cycSize\r
- jae finish_tree ; if (delta >= cycSize)\r
-\r
- mov cp_x, cycPos_VAR\r
- xor t0_x, t0_x\r
- sub cp_x, diff_x ; cp_x = cycPos - delta\r
- cmovb t0_x, cycSize\r
- add cp_x, t0_x ; cp_x += (cycPos < delta ? cycSize : 0)\r
- jmp tree_loop\r
-\r
- \r
-MY_ALIGN_32\r
-matched_1:\r
-\r
- inc len\r
- ; cmp len_x, lenLimit_x\r
- je short lenLimit_reach\r
- movzx t0_x, BYTE PTR [diff + 1 * len]\r
- cmp [cur + 1 * len], t0_L\r
- jne mismatch\r
-\r
- \r
-MY_ALIGN_32\r
-match_loop:\r
- ; while (++len != lenLimit) (len[diff] != len[0]) ;\r
-\r
- inc len\r
- ; cmp len_x, lenLimit_x\r
- je short lenLimit_reach\r
- movzx t0_x, BYTE PTR [diff + 1 * len]\r
- cmp BYTE PTR [cur + 1 * len], t0_L\r
- je match_loop\r
-\r
-mismatch:\r
- jb left_2\r
-\r
- mov [ptr1], m\r
- mov m, [cp_r + 4]\r
- lea ptr1, [cp_r + 4]\r
- mov len1, len\r
-\r
- jmp max_update\r
- \r
-MY_ALIGN_32\r
-left_2:\r
- mov [ptr0], m\r
- mov m, [cp_r]\r
- mov ptr0, cp_r\r
- mov len0, len\r
-\r
-max_update:\r
- sub diff, cur ; restore diff\r
-\r
- cmp maxLen, len\r
- jae next_node\r
- \r
- mov maxLen, len\r
- add len, lenLimit\r
- mov [d], len_x\r
- mov t0_x, diff_x\r
- not t0_x\r
- mov [d + 4], t0_x\r
- add d, 8\r
- \r
- jmp next_node\r
-\r
-\r
- \r
-MY_ALIGN_32\r
-lenLimit_reach:\r
-\r
- mov delta_r, cur\r
- sub delta_r, diff\r
- lea delta1_r, [delta_r - 1]\r
-\r
- mov t0_x, [cp_r]\r
- mov [ptr1], t0_x\r
- mov t0_x, [cp_r + 4]\r
- mov [ptr0], t0_x\r
-\r
- mov [d], lenLimit_x\r
- mov [d + 4], delta1_x\r
- add d, 8\r
-\r
- ; _distances[-1] = (UInt32)(d - _distances);\r
- mov t0, distances\r
- mov t1, d\r
- sub t1, t0\r
- shr t1_x, 2\r
- mov [t0 - 4], t1_x\r
-\r
- mov hash, hash_VAR\r
- mov hash_lim, size_VAR\r
-\r
- inc pos\r
- mov cp_x, cycPos_VAR\r
- inc cp_x\r
-\r
- mov d_lim, limit_VAR\r
- mov cycSize, cycSize_VAR\r
- ; if (hash == size || *hash != delta || lenLimit[diff] != lenLimit[0] || d >= limit)\r
- ; break;\r
- cmp hash, hash_lim\r
- je fin\r
- cmp d, d_lim\r
- jae fin\r
- cmp delta_x, [hash]\r
- jne main_loop\r
- movzx t0_x, BYTE PTR [diff]\r
- cmp [cur], t0_L\r
- jne main_loop\r
-\r
- ; jmp main_loop ; bypass for debug\r
- \r
- mov cycPos_VAR, cp_x\r
- shl len, 3 ; cycSize * 8\r
- sub diff, cur ; restore diff\r
- xor t0_x, t0_x\r
- cmp cp_x, delta_x ; cmp (cycPos_VAR, delta)\r
- lea cp_r, [son + 8 * cp_r] ; dest\r
- lea src, [cp_r + 8 * diff]\r
- cmovb t0, len ; t0 = (cycPos_VAR < delta ? cycSize * 8 : 0)\r
- add src, t0\r
- add len, son ; len = son + cycSize * 8\r
-\r
- \r
-MY_ALIGN_32\r
-long_loop:\r
- add hash, 4\r
- \r
- ; *(UInt64 *)(void *)ptr = ((const UInt64 *)(const void *)ptr)[diff];\r
- \r
- mov t0, [src]\r
- add src, 8\r
- mov [cp_r], t0\r
- add cp_r, 8\r
- cmp src, len\r
- cmove src, son ; if end of (son) buffer is reached, we wrap to begin\r
-\r
- mov DWORD PTR [d], 2\r
- mov [d + 4], lenLimit_x\r
- mov [d + 8], delta1_x\r
- add d, 12\r
-\r
- inc cur\r
-\r
- cmp hash, hash_lim\r
- je long_footer\r
- cmp delta_x, [hash]\r
- jne long_footer\r
- movzx t0_x, BYTE PTR [diff + 1 * cur]\r
- cmp [cur], t0_L\r
- jne long_footer\r
- cmp d, d_lim\r
- jb long_loop\r
-\r
-long_footer:\r
- sub cp_r, son\r
- shr cp_r, 3\r
- add pos, cp_x\r
- sub pos, cycPos_VAR\r
- mov cycSize, cycSize_VAR\r
- \r
- cmp d, d_lim\r
- jae fin\r
- cmp hash, hash_lim\r
- jne main_loop\r
- jmp fin\r
-\r
-\r
-\r
-fin_error:\r
- xor d, d\r
- \r
-fin:\r
- mov RSP, Old_RSP\r
- mov t0, [r4 + posRes_OFFS]\r
- mov [t0], pos\r
- mov r0, d\r
-\r
-MY_POP_PRESERVED_ABI_REGS\r
-MY_ENDP\r
-\r
-_TEXT$LZFINDOPT ENDS\r
-\r
-end\r
+++ /dev/null
-; Sha256Opt.asm -- SHA-256 optimized code for SHA-256 x86 hardware instructions\r
-; 2022-04-17 : Igor Pavlov : Public domain\r
-\r
-include 7zAsm.asm\r
-\r
-MY_ASM_START\r
-\r
-; .data\r
-; public K\r
-\r
-; we can use external SHA256_K_ARRAY defined in Sha256.c\r
-; but we must guarantee that SHA256_K_ARRAY is aligned for 16-bytes\r
-\r
-COMMENT @\r
-ifdef x64\r
-K_CONST equ SHA256_K_ARRAY\r
-else\r
-K_CONST equ _SHA256_K_ARRAY\r
-endif\r
-EXTRN K_CONST:xmmword\r
-@\r
-\r
-CONST SEGMENT\r
-\r
-align 16\r
-Reverse_Endian_Mask db 3,2,1,0, 7,6,5,4, 11,10,9,8, 15,14,13,12\r
-\r
-; COMMENT @\r
-align 16\r
-K_CONST \\r
-DD 0428a2f98H, 071374491H, 0b5c0fbcfH, 0e9b5dba5H\r
-DD 03956c25bH, 059f111f1H, 0923f82a4H, 0ab1c5ed5H\r
-DD 0d807aa98H, 012835b01H, 0243185beH, 0550c7dc3H\r
-DD 072be5d74H, 080deb1feH, 09bdc06a7H, 0c19bf174H\r
-DD 0e49b69c1H, 0efbe4786H, 00fc19dc6H, 0240ca1ccH\r
-DD 02de92c6fH, 04a7484aaH, 05cb0a9dcH, 076f988daH\r
-DD 0983e5152H, 0a831c66dH, 0b00327c8H, 0bf597fc7H\r
-DD 0c6e00bf3H, 0d5a79147H, 006ca6351H, 014292967H\r
-DD 027b70a85H, 02e1b2138H, 04d2c6dfcH, 053380d13H\r
-DD 0650a7354H, 0766a0abbH, 081c2c92eH, 092722c85H\r
-DD 0a2bfe8a1H, 0a81a664bH, 0c24b8b70H, 0c76c51a3H\r
-DD 0d192e819H, 0d6990624H, 0f40e3585H, 0106aa070H\r
-DD 019a4c116H, 01e376c08H, 02748774cH, 034b0bcb5H\r
-DD 0391c0cb3H, 04ed8aa4aH, 05b9cca4fH, 0682e6ff3H\r
-DD 0748f82eeH, 078a5636fH, 084c87814H, 08cc70208H\r
-DD 090befffaH, 0a4506cebH, 0bef9a3f7H, 0c67178f2H\r
-; @\r
-\r
-CONST ENDS\r
-\r
-; _TEXT$SHA256OPT SEGMENT 'CODE'\r
-\r
-ifndef x64\r
- .686\r
- .xmm\r
-endif\r
- \r
-; jwasm-based assemblers for linux and linker from new versions of binutils\r
-; can generate incorrect code for load [ARRAY + offset] instructions.\r
-; 22.00: we load K_CONST offset to (rTable) register to avoid jwasm+binutils problem \r
- rTable equ r0\r
- ; rTable equ K_CONST\r
- \r
-ifdef x64\r
- rNum equ REG_ABI_PARAM_2\r
- if (IS_LINUX eq 0)\r
- LOCAL_SIZE equ (16 * 2)\r
- endif\r
-else\r
- rNum equ r3\r
- LOCAL_SIZE equ (16 * 1)\r
-endif\r
-\r
-rState equ REG_ABI_PARAM_0\r
-rData equ REG_ABI_PARAM_1\r
-\r
-\r
-\r
-\r
-\r
-\r
-MY_SHA_INSTR macro cmd, a1, a2\r
- db 0fH, 038H, cmd, (0c0H + a1 * 8 + a2)\r
-endm\r
-\r
-cmd_sha256rnds2 equ 0cbH\r
-cmd_sha256msg1 equ 0ccH\r
-cmd_sha256msg2 equ 0cdH\r
-\r
-MY_sha256rnds2 macro a1, a2\r
- MY_SHA_INSTR cmd_sha256rnds2, a1, a2\r
-endm\r
-\r
-MY_sha256msg1 macro a1, a2\r
- MY_SHA_INSTR cmd_sha256msg1, a1, a2\r
-endm\r
-\r
-MY_sha256msg2 macro a1, a2\r
- MY_SHA_INSTR cmd_sha256msg2, a1, a2\r
-endm\r
-\r
-MY_PROLOG macro\r
- ifdef x64\r
- if (IS_LINUX eq 0)\r
- movdqa [r4 + 8], xmm6\r
- movdqa [r4 + 8 + 16], xmm7\r
- sub r4, LOCAL_SIZE + 8\r
- movdqa [r4 ], xmm8\r
- movdqa [r4 + 16], xmm9\r
- endif\r
- else ; x86\r
- push r3\r
- push r5\r
- mov r5, r4\r
- NUM_PUSH_REGS equ 2\r
- PARAM_OFFSET equ (REG_SIZE * (1 + NUM_PUSH_REGS))\r
- if (IS_CDECL gt 0)\r
- mov rState, [r4 + PARAM_OFFSET]\r
- mov rData, [r4 + PARAM_OFFSET + REG_SIZE * 1]\r
- mov rNum, [r4 + PARAM_OFFSET + REG_SIZE * 2]\r
- else ; fastcall\r
- mov rNum, [r4 + PARAM_OFFSET]\r
- endif\r
- and r4, -16\r
- sub r4, LOCAL_SIZE\r
- endif\r
-endm\r
-\r
-MY_EPILOG macro\r
- ifdef x64\r
- if (IS_LINUX eq 0)\r
- movdqa xmm8, [r4]\r
- movdqa xmm9, [r4 + 16]\r
- add r4, LOCAL_SIZE + 8\r
- movdqa xmm6, [r4 + 8]\r
- movdqa xmm7, [r4 + 8 + 16]\r
- endif\r
- else ; x86\r
- mov r4, r5\r
- pop r5\r
- pop r3\r
- endif\r
- MY_ENDP\r
-endm\r
-\r
-\r
-msg equ xmm0\r
-tmp equ xmm0\r
-state0_N equ 2\r
-state1_N equ 3\r
-w_regs equ 4\r
-\r
-\r
-state1_save equ xmm1\r
-state0 equ @CatStr(xmm, %state0_N)\r
-state1 equ @CatStr(xmm, %state1_N)\r
-\r
-\r
-ifdef x64\r
- state0_save equ xmm8\r
- mask2 equ xmm9\r
-else\r
- state0_save equ [r4]\r
- mask2 equ xmm0\r
-endif\r
-\r
-LOAD_MASK macro\r
- movdqa mask2, XMMWORD PTR Reverse_Endian_Mask\r
-endm\r
-\r
-LOAD_W macro k:req\r
- movdqu @CatStr(xmm, %(w_regs + k)), [rData + (16 * (k))]\r
- pshufb @CatStr(xmm, %(w_regs + k)), mask2\r
-endm\r
-\r
-\r
-; pre1 <= 4 && pre2 >= 1 && pre1 > pre2 && (pre1 - pre2) <= 1\r
-pre1 equ 3\r
-pre2 equ 2\r
- \r
-\r
-\r
-RND4 macro k\r
- movdqa msg, xmmword ptr [rTable + (k) * 16]\r
- paddd msg, @CatStr(xmm, %(w_regs + ((k + 0) mod 4)))\r
- MY_sha256rnds2 state0_N, state1_N\r
- pshufd msg, msg, 0eH\r
- \r
- if (k GE (4 - pre1)) AND (k LT (16 - pre1))\r
- ; w4[0] = msg1(w4[-4], w4[-3])\r
- MY_sha256msg1 (w_regs + ((k + pre1) mod 4)), (w_regs + ((k + pre1 - 3) mod 4))\r
- endif\r
- \r
- MY_sha256rnds2 state1_N, state0_N\r
-\r
- if (k GE (4 - pre2)) AND (k LT (16 - pre2))\r
- movdqa tmp, @CatStr(xmm, %(w_regs + ((k + pre2 - 1) mod 4)))\r
- palignr tmp, @CatStr(xmm, %(w_regs + ((k + pre2 - 2) mod 4))), 4\r
- paddd @CatStr(xmm, %(w_regs + ((k + pre2) mod 4))), tmp\r
- ; w4[0] = msg2(w4[0], w4[-1])\r
- MY_sha256msg2 %(w_regs + ((k + pre2) mod 4)), %(w_regs + ((k + pre2 - 1) mod 4))\r
- endif\r
-endm\r
-\r
-\r
-\r
-\r
-\r
-REVERSE_STATE macro\r
- ; state0 ; dcba\r
- ; state1 ; hgfe\r
- pshufd tmp, state0, 01bH ; abcd\r
- pshufd state0, state1, 01bH ; efgh\r
- movdqa state1, state0 ; efgh\r
- punpcklqdq state0, tmp ; cdgh\r
- punpckhqdq state1, tmp ; abef\r
-endm\r
-\r
-\r
-MY_PROC Sha256_UpdateBlocks_HW, 3\r
- MY_PROLOG\r
-\r
- lea rTable, [K_CONST]\r
-\r
- cmp rNum, 0\r
- je end_c\r
-\r
- movdqu state0, [rState] ; dcba\r
- movdqu state1, [rState + 16] ; hgfe\r
-\r
- REVERSE_STATE\r
- \r
- ifdef x64\r
- LOAD_MASK\r
- endif\r
-\r
- align 16\r
- nextBlock:\r
- movdqa state0_save, state0\r
- movdqa state1_save, state1\r
- \r
- ifndef x64\r
- LOAD_MASK\r
- endif\r
- \r
- LOAD_W 0\r
- LOAD_W 1\r
- LOAD_W 2\r
- LOAD_W 3\r
-\r
- \r
- k = 0\r
- rept 16\r
- RND4 k\r
- k = k + 1\r
- endm\r
-\r
- paddd state0, state0_save\r
- paddd state1, state1_save\r
-\r
- add rData, 64\r
- sub rNum, 1\r
- jnz nextBlock\r
- \r
- REVERSE_STATE\r
-\r
- movdqu [rState], state0\r
- movdqu [rState + 16], state1\r
- \r
- end_c:\r
-MY_EPILOG\r
-\r
-; _TEXT$SHA256OPT ENDS\r
-\r
-end\r
+++ /dev/null
-; XzCrc64Opt.asm -- CRC64 calculation : optimized version\r
-; 2021-02-06 : Igor Pavlov : Public domain\r
-\r
-include 7zAsm.asm\r
-\r
-MY_ASM_START\r
-\r
-ifdef x64\r
-\r
-rD equ r9\r
-rN equ r10\r
-rT equ r5\r
-num_VAR equ r8\r
-\r
-SRCDAT4 equ dword ptr [rD + rN * 1]\r
- \r
-CRC_XOR macro dest:req, src:req, t:req\r
- xor dest, QWORD PTR [rT + src * 8 + 0800h * t]\r
-endm\r
-\r
-CRC1b macro\r
- movzx x6, BYTE PTR [rD]\r
- inc rD\r
- movzx x3, x0_L\r
- xor x6, x3\r
- shr r0, 8\r
- CRC_XOR r0, r6, 0\r
- dec rN\r
-endm\r
-\r
-MY_PROLOG macro crc_end:req\r
- ifdef ABI_LINUX\r
- MY_PUSH_2_REGS\r
- else\r
- MY_PUSH_4_REGS\r
- endif\r
- mov r0, REG_ABI_PARAM_0\r
- mov rN, REG_ABI_PARAM_2\r
- mov rT, REG_ABI_PARAM_3\r
- mov rD, REG_ABI_PARAM_1\r
- test rN, rN\r
- jz crc_end\r
- @@:\r
- test rD, 3\r
- jz @F\r
- CRC1b\r
- jnz @B\r
- @@:\r
- cmp rN, 8\r
- jb crc_end\r
- add rN, rD\r
- mov num_VAR, rN\r
- sub rN, 4\r
- and rN, NOT 3\r
- sub rD, rN\r
- mov x1, SRCDAT4\r
- xor r0, r1\r
- add rN, 4\r
-endm\r
-\r
-MY_EPILOG macro crc_end:req\r
- sub rN, 4\r
- mov x1, SRCDAT4\r
- xor r0, r1\r
- mov rD, rN\r
- mov rN, num_VAR\r
- sub rN, rD\r
- crc_end:\r
- test rN, rN\r
- jz @F\r
- CRC1b\r
- jmp crc_end\r
- @@:\r
- ifdef ABI_LINUX\r
- MY_POP_2_REGS\r
- else\r
- MY_POP_4_REGS\r
- endif\r
-endm\r
-\r
-MY_PROC XzCrc64UpdateT4, 4\r
- MY_PROLOG crc_end_4\r
- align 16\r
- main_loop_4:\r
- mov x1, SRCDAT4\r
- movzx x2, x0_L\r
- movzx x3, x0_H\r
- shr r0, 16\r
- movzx x6, x0_L\r
- movzx x7, x0_H\r
- shr r0, 16\r
- CRC_XOR r1, r2, 3\r
- CRC_XOR r0, r3, 2\r
- CRC_XOR r1, r6, 1\r
- CRC_XOR r0, r7, 0\r
- xor r0, r1\r
-\r
- add rD, 4\r
- jnz main_loop_4\r
-\r
- MY_EPILOG crc_end_4\r
-MY_ENDP\r
-\r
-else\r
-; x86 (32-bit)\r
-\r
-rD equ r1\r
-rN equ r7\r
-rT equ r5\r
-\r
-crc_OFFS equ (REG_SIZE * 5)\r
-\r
-if (IS_CDECL gt 0) or (IS_LINUX gt 0)\r
- ; cdecl or (GNU fastcall) stack:\r
- ; (UInt32 *) table\r
- ; size_t size\r
- ; void * data\r
- ; (UInt64) crc\r
- ; ret-ip <-(r4)\r
- data_OFFS equ (8 + crc_OFFS)\r
- size_OFFS equ (REG_SIZE + data_OFFS)\r
- table_OFFS equ (REG_SIZE + size_OFFS)\r
- num_VAR equ [r4 + size_OFFS]\r
- table_VAR equ [r4 + table_OFFS]\r
-else\r
- ; Windows fastcall:\r
- ; r1 = data, r2 = size\r
- ; stack:\r
- ; (UInt32 *) table\r
- ; (UInt64) crc\r
- ; ret-ip <-(r4)\r
- table_OFFS equ (8 + crc_OFFS)\r
- table_VAR equ [r4 + table_OFFS]\r
- num_VAR equ table_VAR\r
-endif\r
-\r
-SRCDAT4 equ dword ptr [rD + rN * 1]\r
-\r
-CRC macro op0:req, op1:req, dest0:req, dest1:req, src:req, t:req\r
- op0 dest0, DWORD PTR [rT + src * 8 + 0800h * t]\r
- op1 dest1, DWORD PTR [rT + src * 8 + 0800h * t + 4]\r
-endm\r
-\r
-CRC_XOR macro dest0:req, dest1:req, src:req, t:req\r
- CRC xor, xor, dest0, dest1, src, t\r
-endm\r
-\r
-\r
-CRC1b macro\r
- movzx x6, BYTE PTR [rD]\r
- inc rD\r
- movzx x3, x0_L\r
- xor x6, x3\r
- shrd r0, r2, 8\r
- shr r2, 8\r
- CRC_XOR r0, r2, r6, 0\r
- dec rN\r
-endm\r
-\r
-MY_PROLOG macro crc_end:req\r
- MY_PUSH_4_REGS\r
-\r
- if (IS_CDECL gt 0) or (IS_LINUX gt 0)\r
- proc_numParams = proc_numParams + 2 ; for ABI_LINUX\r
- mov rN, [r4 + size_OFFS]\r
- mov rD, [r4 + data_OFFS]\r
- else\r
- mov rN, r2\r
- endif\r
-\r
- mov x0, [r4 + crc_OFFS]\r
- mov x2, [r4 + crc_OFFS + 4]\r
- mov rT, table_VAR\r
- test rN, rN\r
- jz crc_end\r
- @@:\r
- test rD, 3\r
- jz @F\r
- CRC1b\r
- jnz @B\r
- @@:\r
- cmp rN, 8\r
- jb crc_end\r
- add rN, rD\r
-\r
- mov num_VAR, rN\r
-\r
- sub rN, 4\r
- and rN, NOT 3\r
- sub rD, rN\r
- xor r0, SRCDAT4\r
- add rN, 4\r
-endm\r
-\r
-MY_EPILOG macro crc_end:req\r
- sub rN, 4\r
- xor r0, SRCDAT4\r
-\r
- mov rD, rN\r
- mov rN, num_VAR\r
- sub rN, rD\r
- crc_end:\r
- test rN, rN\r
- jz @F\r
- CRC1b\r
- jmp crc_end\r
- @@:\r
- MY_POP_4_REGS\r
-endm\r
-\r
-MY_PROC XzCrc64UpdateT4, 5\r
- MY_PROLOG crc_end_4\r
- movzx x6, x0_L\r
- align 16\r
- main_loop_4:\r
- mov r3, SRCDAT4\r
- xor r3, r2\r
-\r
- CRC xor, mov, r3, r2, r6, 3\r
- movzx x6, x0_H\r
- shr r0, 16\r
- CRC_XOR r3, r2, r6, 2\r
-\r
- movzx x6, x0_L\r
- movzx x0, x0_H\r
- CRC_XOR r3, r2, r6, 1\r
- CRC_XOR r3, r2, r0, 0\r
- movzx x6, x3_L\r
- mov r0, r3\r
-\r
- add rD, 4\r
- jnz main_loop_4\r
-\r
- MY_EPILOG crc_end_4\r
-MY_ENDP\r
-\r
-endif ; ! x64\r
-\r
-end\r
+++ /dev/null
-/* Bcj2.c -- BCJ2 Decoder (Converter for x86 code)\r
-2021-02-09 : Igor Pavlov : Public domain */\r
-\r
-#include "Precomp.h"\r
-\r
-#include "Bcj2.h"\r
-#include "CpuArch.h"\r
-\r
-#define CProb UInt16\r
-\r
-#define kTopValue ((UInt32)1 << 24)\r
-#define kNumModelBits 11\r
-#define kBitModelTotal (1 << kNumModelBits)\r
-#define kNumMoveBits 5\r
-\r
-#define _IF_BIT_0 ttt = *prob; bound = (p->range >> kNumModelBits) * ttt; if (p->code < bound)\r
-#define _UPDATE_0 p->range = bound; *prob = (CProb)(ttt + ((kBitModelTotal - ttt) >> kNumMoveBits));\r
-#define _UPDATE_1 p->range -= bound; p->code -= bound; *prob = (CProb)(ttt - (ttt >> kNumMoveBits));\r
-\r
-void Bcj2Dec_Init(CBcj2Dec *p)\r
-{\r
- unsigned i;\r
-\r
- p->state = BCJ2_DEC_STATE_OK;\r
- p->ip = 0;\r
- p->temp[3] = 0;\r
- p->range = 0;\r
- p->code = 0;\r
- for (i = 0; i < sizeof(p->probs) / sizeof(p->probs[0]); i++)\r
- p->probs[i] = kBitModelTotal >> 1;\r
-}\r
-\r
-SRes Bcj2Dec_Decode(CBcj2Dec *p)\r
-{\r
- if (p->range <= 5)\r
- {\r
- p->state = BCJ2_DEC_STATE_OK;\r
- for (; p->range != 5; p->range++)\r
- {\r
- if (p->range == 1 && p->code != 0)\r
- return SZ_ERROR_DATA;\r
- \r
- if (p->bufs[BCJ2_STREAM_RC] == p->lims[BCJ2_STREAM_RC])\r
- {\r
- p->state = BCJ2_STREAM_RC;\r
- return SZ_OK;\r
- }\r
-\r
- p->code = (p->code << 8) | *(p->bufs[BCJ2_STREAM_RC])++;\r
- }\r
- \r
- if (p->code == 0xFFFFFFFF)\r
- return SZ_ERROR_DATA;\r
- \r
- p->range = 0xFFFFFFFF;\r
- }\r
- else if (p->state >= BCJ2_DEC_STATE_ORIG_0)\r
- {\r
- while (p->state <= BCJ2_DEC_STATE_ORIG_3)\r
- {\r
- Byte *dest = p->dest;\r
- if (dest == p->destLim)\r
- return SZ_OK;\r
- *dest = p->temp[(size_t)p->state - BCJ2_DEC_STATE_ORIG_0];\r
- p->state++;\r
- p->dest = dest + 1;\r
- }\r
- }\r
-\r
- /*\r
- if (BCJ2_IS_32BIT_STREAM(p->state))\r
- {\r
- const Byte *cur = p->bufs[p->state];\r
- if (cur == p->lims[p->state])\r
- return SZ_OK;\r
- p->bufs[p->state] = cur + 4;\r
- \r
- {\r
- UInt32 val;\r
- Byte *dest;\r
- SizeT rem;\r
- \r
- p->ip += 4;\r
- val = GetBe32(cur) - p->ip;\r
- dest = p->dest;\r
- rem = p->destLim - dest;\r
- if (rem < 4)\r
- {\r
- SizeT i;\r
- SetUi32(p->temp, val);\r
- for (i = 0; i < rem; i++)\r
- dest[i] = p->temp[i];\r
- p->dest = dest + rem;\r
- p->state = BCJ2_DEC_STATE_ORIG_0 + (unsigned)rem;\r
- return SZ_OK;\r
- }\r
- SetUi32(dest, val);\r
- p->temp[3] = (Byte)(val >> 24);\r
- p->dest = dest + 4;\r
- p->state = BCJ2_DEC_STATE_OK;\r
- }\r
- }\r
- */\r
-\r
- for (;;)\r
- {\r
- if (BCJ2_IS_32BIT_STREAM(p->state))\r
- p->state = BCJ2_DEC_STATE_OK;\r
- else\r
- {\r
- if (p->range < kTopValue)\r
- {\r
- if (p->bufs[BCJ2_STREAM_RC] == p->lims[BCJ2_STREAM_RC])\r
- {\r
- p->state = BCJ2_STREAM_RC;\r
- return SZ_OK;\r
- }\r
- p->range <<= 8;\r
- p->code = (p->code << 8) | *(p->bufs[BCJ2_STREAM_RC])++;\r
- }\r
-\r
- {\r
- const Byte *src = p->bufs[BCJ2_STREAM_MAIN];\r
- const Byte *srcLim;\r
- Byte *dest;\r
- SizeT num = (SizeT)(p->lims[BCJ2_STREAM_MAIN] - src);\r
- \r
- if (num == 0)\r
- {\r
- p->state = BCJ2_STREAM_MAIN;\r
- return SZ_OK;\r
- }\r
- \r
- dest = p->dest;\r
- if (num > (SizeT)(p->destLim - dest))\r
- {\r
- num = (SizeT)(p->destLim - dest);\r
- if (num == 0)\r
- {\r
- p->state = BCJ2_DEC_STATE_ORIG;\r
- return SZ_OK;\r
- }\r
- }\r
- \r
- srcLim = src + num;\r
-\r
- if (p->temp[3] == 0x0F && (src[0] & 0xF0) == 0x80)\r
- *dest = src[0];\r
- else for (;;)\r
- {\r
- Byte b = *src;\r
- *dest = b;\r
- if (b != 0x0F)\r
- {\r
- if ((b & 0xFE) == 0xE8)\r
- break;\r
- dest++;\r
- if (++src != srcLim)\r
- continue;\r
- break;\r
- }\r
- dest++;\r
- if (++src == srcLim)\r
- break;\r
- if ((*src & 0xF0) != 0x80)\r
- continue;\r
- *dest = *src;\r
- break;\r
- }\r
- \r
- num = (SizeT)(src - p->bufs[BCJ2_STREAM_MAIN]);\r
- \r
- if (src == srcLim)\r
- {\r
- p->temp[3] = src[-1];\r
- p->bufs[BCJ2_STREAM_MAIN] = src;\r
- p->ip += (UInt32)num;\r
- p->dest += num;\r
- p->state =\r
- p->bufs[BCJ2_STREAM_MAIN] ==\r
- p->lims[BCJ2_STREAM_MAIN] ?\r
- (unsigned)BCJ2_STREAM_MAIN :\r
- (unsigned)BCJ2_DEC_STATE_ORIG;\r
- return SZ_OK;\r
- }\r
- \r
- {\r
- UInt32 bound, ttt;\r
- CProb *prob;\r
- Byte b = src[0];\r
- Byte prev = (Byte)(num == 0 ? p->temp[3] : src[-1]);\r
- \r
- p->temp[3] = b;\r
- p->bufs[BCJ2_STREAM_MAIN] = src + 1;\r
- num++;\r
- p->ip += (UInt32)num;\r
- p->dest += num;\r
- \r
- prob = p->probs + (unsigned)(b == 0xE8 ? 2 + (unsigned)prev : (b == 0xE9 ? 1 : 0));\r
- \r
- _IF_BIT_0\r
- {\r
- _UPDATE_0\r
- continue;\r
- }\r
- _UPDATE_1\r
- \r
- }\r
- }\r
- }\r
-\r
- {\r
- UInt32 val;\r
- unsigned cj = (p->temp[3] == 0xE8) ? BCJ2_STREAM_CALL : BCJ2_STREAM_JUMP;\r
- const Byte *cur = p->bufs[cj];\r
- Byte *dest;\r
- SizeT rem;\r
- \r
- if (cur == p->lims[cj])\r
- {\r
- p->state = cj;\r
- break;\r
- }\r
- \r
- val = GetBe32(cur);\r
- p->bufs[cj] = cur + 4;\r
-\r
- p->ip += 4;\r
- val -= p->ip;\r
- dest = p->dest;\r
- rem = (SizeT)(p->destLim - dest);\r
- \r
- if (rem < 4)\r
- {\r
- p->temp[0] = (Byte)val; if (rem > 0) dest[0] = (Byte)val; val >>= 8;\r
- p->temp[1] = (Byte)val; if (rem > 1) dest[1] = (Byte)val; val >>= 8;\r
- p->temp[2] = (Byte)val; if (rem > 2) dest[2] = (Byte)val; val >>= 8;\r
- p->temp[3] = (Byte)val;\r
- p->dest = dest + rem;\r
- p->state = BCJ2_DEC_STATE_ORIG_0 + (unsigned)rem;\r
- break;\r
- }\r
- \r
- SetUi32(dest, val);\r
- p->temp[3] = (Byte)(val >> 24);\r
- p->dest = dest + 4;\r
- }\r
- }\r
-\r
- if (p->range < kTopValue && p->bufs[BCJ2_STREAM_RC] != p->lims[BCJ2_STREAM_RC])\r
- {\r
- p->range <<= 8;\r
- p->code = (p->code << 8) | *(p->bufs[BCJ2_STREAM_RC])++;\r
- }\r
-\r
- return SZ_OK;\r
-}\r
+++ /dev/null
-/* Bcj2Enc.c -- BCJ2 Encoder (Converter for x86 code)\r
-2021-02-09 : Igor Pavlov : Public domain */\r
-\r
-#include "Precomp.h"\r
-\r
-/* #define SHOW_STAT */\r
-\r
-#ifdef SHOW_STAT\r
-#include <stdio.h>\r
-#define PRF(x) x\r
-#else\r
-#define PRF(x)\r
-#endif\r
-\r
-#include <string.h>\r
-\r
-#include "Bcj2.h"\r
-#include "CpuArch.h"\r
-\r
-#define CProb UInt16\r
-\r
-#define kTopValue ((UInt32)1 << 24)\r
-#define kNumModelBits 11\r
-#define kBitModelTotal (1 << kNumModelBits)\r
-#define kNumMoveBits 5\r
-\r
-void Bcj2Enc_Init(CBcj2Enc *p)\r
-{\r
- unsigned i;\r
-\r
- p->state = BCJ2_ENC_STATE_OK;\r
- p->finishMode = BCJ2_ENC_FINISH_MODE_CONTINUE;\r
-\r
- p->prevByte = 0;\r
-\r
- p->cache = 0;\r
- p->range = 0xFFFFFFFF;\r
- p->low = 0;\r
- p->cacheSize = 1;\r
-\r
- p->ip = 0;\r
-\r
- p->fileIp = 0;\r
- p->fileSize = 0;\r
- p->relatLimit = BCJ2_RELAT_LIMIT;\r
-\r
- p->tempPos = 0;\r
-\r
- p->flushPos = 0;\r
-\r
- for (i = 0; i < sizeof(p->probs) / sizeof(p->probs[0]); i++)\r
- p->probs[i] = kBitModelTotal >> 1;\r
-}\r
-\r
-static BoolInt MY_FAST_CALL RangeEnc_ShiftLow(CBcj2Enc *p)\r
-{\r
- if ((UInt32)p->low < (UInt32)0xFF000000 || (UInt32)(p->low >> 32) != 0)\r
- {\r
- Byte *buf = p->bufs[BCJ2_STREAM_RC];\r
- do\r
- {\r
- if (buf == p->lims[BCJ2_STREAM_RC])\r
- {\r
- p->state = BCJ2_STREAM_RC;\r
- p->bufs[BCJ2_STREAM_RC] = buf;\r
- return True;\r
- }\r
- *buf++ = (Byte)(p->cache + (Byte)(p->low >> 32));\r
- p->cache = 0xFF;\r
- }\r
- while (--p->cacheSize);\r
- p->bufs[BCJ2_STREAM_RC] = buf;\r
- p->cache = (Byte)((UInt32)p->low >> 24);\r
- }\r
- p->cacheSize++;\r
- p->low = (UInt32)p->low << 8;\r
- return False;\r
-}\r
-\r
-static void Bcj2Enc_Encode_2(CBcj2Enc *p)\r
-{\r
- if (BCJ2_IS_32BIT_STREAM(p->state))\r
- {\r
- Byte *cur = p->bufs[p->state];\r
- if (cur == p->lims[p->state])\r
- return;\r
- SetBe32(cur, p->tempTarget);\r
- p->bufs[p->state] = cur + 4;\r
- }\r
-\r
- p->state = BCJ2_ENC_STATE_ORIG;\r
-\r
- for (;;)\r
- {\r
- if (p->range < kTopValue)\r
- {\r
- if (RangeEnc_ShiftLow(p))\r
- return;\r
- p->range <<= 8;\r
- }\r
-\r
- {\r
- {\r
- const Byte *src = p->src;\r
- const Byte *srcLim;\r
- Byte *dest;\r
- SizeT num = (SizeT)(p->srcLim - src);\r
-\r
- if (p->finishMode == BCJ2_ENC_FINISH_MODE_CONTINUE)\r
- {\r
- if (num <= 4)\r
- return;\r
- num -= 4;\r
- }\r
- else if (num == 0)\r
- break;\r
-\r
- dest = p->bufs[BCJ2_STREAM_MAIN];\r
- if (num > (SizeT)(p->lims[BCJ2_STREAM_MAIN] - dest))\r
- {\r
- num = (SizeT)(p->lims[BCJ2_STREAM_MAIN] - dest);\r
- if (num == 0)\r
- {\r
- p->state = BCJ2_STREAM_MAIN;\r
- return;\r
- }\r
- }\r
- \r
- srcLim = src + num;\r
-\r
- if (p->prevByte == 0x0F && (src[0] & 0xF0) == 0x80)\r
- *dest = src[0];\r
- else for (;;)\r
- {\r
- Byte b = *src;\r
- *dest = b;\r
- if (b != 0x0F)\r
- {\r
- if ((b & 0xFE) == 0xE8)\r
- break;\r
- dest++;\r
- if (++src != srcLim)\r
- continue;\r
- break;\r
- }\r
- dest++;\r
- if (++src == srcLim)\r
- break;\r
- if ((*src & 0xF0) != 0x80)\r
- continue;\r
- *dest = *src;\r
- break;\r
- }\r
- \r
- num = (SizeT)(src - p->src);\r
- \r
- if (src == srcLim)\r
- {\r
- p->prevByte = src[-1];\r
- p->bufs[BCJ2_STREAM_MAIN] = dest;\r
- p->src = src;\r
- p->ip += (UInt32)num;\r
- continue;\r
- }\r
- \r
- {\r
- Byte context = (Byte)(num == 0 ? p->prevByte : src[-1]);\r
- BoolInt needConvert;\r
-\r
- p->bufs[BCJ2_STREAM_MAIN] = dest + 1;\r
- p->ip += (UInt32)num + 1;\r
- src++;\r
- \r
- needConvert = False;\r
-\r
- if ((SizeT)(p->srcLim - src) >= 4)\r
- {\r
- UInt32 relatVal = GetUi32(src);\r
- if ((p->fileSize == 0 || (UInt32)(p->ip + 4 + relatVal - p->fileIp) < p->fileSize)\r
- && ((relatVal + p->relatLimit) >> 1) < p->relatLimit)\r
- needConvert = True;\r
- }\r
-\r
- {\r
- UInt32 bound;\r
- unsigned ttt;\r
- Byte b = src[-1];\r
- CProb *prob = p->probs + (unsigned)(b == 0xE8 ? 2 + (unsigned)context : (b == 0xE9 ? 1 : 0));\r
-\r
- ttt = *prob;\r
- bound = (p->range >> kNumModelBits) * ttt;\r
- \r
- if (!needConvert)\r
- {\r
- p->range = bound;\r
- *prob = (CProb)(ttt + ((kBitModelTotal - ttt) >> kNumMoveBits));\r
- p->src = src;\r
- p->prevByte = b;\r
- continue;\r
- }\r
- \r
- p->low += bound;\r
- p->range -= bound;\r
- *prob = (CProb)(ttt - (ttt >> kNumMoveBits));\r
-\r
- {\r
- UInt32 relatVal = GetUi32(src);\r
- UInt32 absVal;\r
- p->ip += 4;\r
- absVal = p->ip + relatVal;\r
- p->prevByte = src[3];\r
- src += 4;\r
- p->src = src;\r
- {\r
- unsigned cj = (b == 0xE8) ? BCJ2_STREAM_CALL : BCJ2_STREAM_JUMP;\r
- Byte *cur = p->bufs[cj];\r
- if (cur == p->lims[cj])\r
- {\r
- p->state = cj;\r
- p->tempTarget = absVal;\r
- return;\r
- }\r
- SetBe32(cur, absVal);\r
- p->bufs[cj] = cur + 4;\r
- }\r
- }\r
- }\r
- }\r
- }\r
- }\r
- }\r
-\r
- if (p->finishMode != BCJ2_ENC_FINISH_MODE_END_STREAM)\r
- return;\r
-\r
- for (; p->flushPos < 5; p->flushPos++)\r
- if (RangeEnc_ShiftLow(p))\r
- return;\r
- p->state = BCJ2_ENC_STATE_OK;\r
-}\r
-\r
-\r
-void Bcj2Enc_Encode(CBcj2Enc *p)\r
-{\r
- PRF(printf("\n"));\r
- PRF(printf("---- ip = %8d tempPos = %8d src = %8d\n", p->ip, p->tempPos, p->srcLim - p->src));\r
-\r
- if (p->tempPos != 0)\r
- {\r
- unsigned extra = 0;\r
- \r
- for (;;)\r
- {\r
- const Byte *src = p->src;\r
- const Byte *srcLim = p->srcLim;\r
- EBcj2Enc_FinishMode finishMode = p->finishMode;\r
- \r
- p->src = p->temp;\r
- p->srcLim = p->temp + p->tempPos;\r
- if (src != srcLim)\r
- p->finishMode = BCJ2_ENC_FINISH_MODE_CONTINUE;\r
- \r
- PRF(printf(" ip = %8d tempPos = %8d src = %8d\n", p->ip, p->tempPos, p->srcLim - p->src));\r
-\r
- Bcj2Enc_Encode_2(p);\r
- \r
- {\r
- unsigned num = (unsigned)(p->src - p->temp);\r
- unsigned tempPos = p->tempPos - num;\r
- unsigned i;\r
- p->tempPos = tempPos;\r
- for (i = 0; i < tempPos; i++)\r
- p->temp[i] = p->temp[(size_t)i + num];\r
- \r
- p->src = src;\r
- p->srcLim = srcLim;\r
- p->finishMode = finishMode;\r
- \r
- if (p->state != BCJ2_ENC_STATE_ORIG || src == srcLim)\r
- return;\r
- \r
- if (extra >= tempPos)\r
- {\r
- p->src = src - tempPos;\r
- p->tempPos = 0;\r
- break;\r
- }\r
- \r
- p->temp[tempPos] = src[0];\r
- p->tempPos = tempPos + 1;\r
- p->src = src + 1;\r
- extra++;\r
- }\r
- }\r
- }\r
-\r
- PRF(printf("++++ ip = %8d tempPos = %8d src = %8d\n", p->ip, p->tempPos, p->srcLim - p->src));\r
-\r
- Bcj2Enc_Encode_2(p);\r
- \r
- if (p->state == BCJ2_ENC_STATE_ORIG)\r
- {\r
- const Byte *src = p->src;\r
- unsigned rem = (unsigned)(p->srcLim - src);\r
- unsigned i;\r
- for (i = 0; i < rem; i++)\r
- p->temp[i] = src[i];\r
- p->tempPos = rem;\r
- p->src = src + rem;\r
- }\r
-}\r
+++ /dev/null
-/* Bra.c -- Converters for RISC code\r
-2021-02-09 : Igor Pavlov : Public domain */\r
-\r
-#include "Precomp.h"\r
-\r
-#include "CpuArch.h"\r
-#include "Bra.h"\r
-\r
-SizeT ARM_Convert(Byte *data, SizeT size, UInt32 ip, int encoding)\r
-{\r
- Byte *p;\r
- const Byte *lim;\r
- size &= ~(size_t)3;\r
- ip += 4;\r
- p = data;\r
- lim = data + size;\r
-\r
- if (encoding)\r
-\r
- for (;;)\r
- {\r
- for (;;)\r
- {\r
- if (p >= lim)\r
- return (SizeT)(p - data);\r
- p += 4;\r
- if (p[-1] == 0xEB)\r
- break;\r
- }\r
- {\r
- UInt32 v = GetUi32(p - 4);\r
- v <<= 2;\r
- v += ip + (UInt32)(p - data);\r
- v >>= 2;\r
- v &= 0x00FFFFFF;\r
- v |= 0xEB000000;\r
- SetUi32(p - 4, v);\r
- }\r
- }\r
-\r
- for (;;)\r
- {\r
- for (;;)\r
- {\r
- if (p >= lim)\r
- return (SizeT)(p - data);\r
- p += 4;\r
- if (p[-1] == 0xEB)\r
- break;\r
- }\r
- {\r
- UInt32 v = GetUi32(p - 4);\r
- v <<= 2;\r
- v -= ip + (UInt32)(p - data);\r
- v >>= 2;\r
- v &= 0x00FFFFFF;\r
- v |= 0xEB000000;\r
- SetUi32(p - 4, v);\r
- }\r
- }\r
-}\r
-\r
-\r
-SizeT ARMT_Convert(Byte *data, SizeT size, UInt32 ip, int encoding)\r
-{\r
- Byte *p;\r
- const Byte *lim;\r
- size &= ~(size_t)1;\r
- p = data;\r
- lim = data + size - 4;\r
-\r
- if (encoding)\r
- \r
- for (;;)\r
- {\r
- UInt32 b1;\r
- for (;;)\r
- {\r
- UInt32 b3;\r
- if (p > lim)\r
- return (SizeT)(p - data);\r
- b1 = p[1];\r
- b3 = p[3];\r
- p += 2;\r
- b1 ^= 8;\r
- if ((b3 & b1) >= 0xF8)\r
- break;\r
- }\r
- {\r
- UInt32 v =\r
- ((UInt32)b1 << 19)\r
- + (((UInt32)p[1] & 0x7) << 8)\r
- + (((UInt32)p[-2] << 11))\r
- + (p[0]);\r
-\r
- p += 2;\r
- {\r
- UInt32 cur = (ip + (UInt32)(p - data)) >> 1;\r
- v += cur;\r
- }\r
-\r
- p[-4] = (Byte)(v >> 11);\r
- p[-3] = (Byte)(0xF0 | ((v >> 19) & 0x7));\r
- p[-2] = (Byte)v;\r
- p[-1] = (Byte)(0xF8 | (v >> 8));\r
- }\r
- }\r
- \r
- for (;;)\r
- {\r
- UInt32 b1;\r
- for (;;)\r
- {\r
- UInt32 b3;\r
- if (p > lim)\r
- return (SizeT)(p - data);\r
- b1 = p[1];\r
- b3 = p[3];\r
- p += 2;\r
- b1 ^= 8;\r
- if ((b3 & b1) >= 0xF8)\r
- break;\r
- }\r
- {\r
- UInt32 v =\r
- ((UInt32)b1 << 19)\r
- + (((UInt32)p[1] & 0x7) << 8)\r
- + (((UInt32)p[-2] << 11))\r
- + (p[0]);\r
-\r
- p += 2;\r
- {\r
- UInt32 cur = (ip + (UInt32)(p - data)) >> 1;\r
- v -= cur;\r
- }\r
-\r
- /*\r
- SetUi16(p - 4, (UInt16)(((v >> 11) & 0x7FF) | 0xF000));\r
- SetUi16(p - 2, (UInt16)(v | 0xF800));\r
- */\r
- \r
- p[-4] = (Byte)(v >> 11);\r
- p[-3] = (Byte)(0xF0 | ((v >> 19) & 0x7));\r
- p[-2] = (Byte)v;\r
- p[-1] = (Byte)(0xF8 | (v >> 8));\r
- }\r
- }\r
-}\r
-\r
-\r
-SizeT PPC_Convert(Byte *data, SizeT size, UInt32 ip, int encoding)\r
-{\r
- Byte *p;\r
- const Byte *lim;\r
- size &= ~(size_t)3;\r
- ip -= 4;\r
- p = data;\r
- lim = data + size;\r
-\r
- for (;;)\r
- {\r
- for (;;)\r
- {\r
- if (p >= lim)\r
- return (SizeT)(p - data);\r
- p += 4;\r
- /* if ((v & 0xFC000003) == 0x48000001) */\r
- if ((p[-4] & 0xFC) == 0x48 && (p[-1] & 3) == 1)\r
- break;\r
- }\r
- {\r
- UInt32 v = GetBe32(p - 4);\r
- if (encoding)\r
- v += ip + (UInt32)(p - data);\r
- else\r
- v -= ip + (UInt32)(p - data);\r
- v &= 0x03FFFFFF;\r
- v |= 0x48000000;\r
- SetBe32(p - 4, v);\r
- }\r
- }\r
-}\r
-\r
-\r
-SizeT SPARC_Convert(Byte *data, SizeT size, UInt32 ip, int encoding)\r
-{\r
- Byte *p;\r
- const Byte *lim;\r
- size &= ~(size_t)3;\r
- ip -= 4;\r
- p = data;\r
- lim = data + size;\r
-\r
- for (;;)\r
- {\r
- for (;;)\r
- {\r
- if (p >= lim)\r
- return (SizeT)(p - data);\r
- /*\r
- v = GetBe32(p);\r
- p += 4;\r
- m = v + ((UInt32)5 << 29);\r
- m ^= (UInt32)7 << 29;\r
- m += (UInt32)1 << 22;\r
- if ((m & ((UInt32)0x1FF << 23)) == 0)\r
- break;\r
- */\r
- p += 4;\r
- if ((p[-4] == 0x40 && (p[-3] & 0xC0) == 0) ||\r
- (p[-4] == 0x7F && (p[-3] >= 0xC0)))\r
- break;\r
- }\r
- {\r
- UInt32 v = GetBe32(p - 4);\r
- v <<= 2;\r
- if (encoding)\r
- v += ip + (UInt32)(p - data);\r
- else\r
- v -= ip + (UInt32)(p - data);\r
- \r
- v &= 0x01FFFFFF;\r
- v -= (UInt32)1 << 24;\r
- v ^= 0xFF000000;\r
- v >>= 2;\r
- v |= 0x40000000;\r
- SetBe32(p - 4, v);\r
- }\r
- }\r
-}\r
+++ /dev/null
-/* Bra86.c -- Converter for x86 code (BCJ)\r
-2021-02-09 : Igor Pavlov : Public domain */\r
-\r
-#include "Precomp.h"\r
-\r
-#include "Bra.h"\r
-\r
-#define Test86MSByte(b) ((((b) + 1) & 0xFE) == 0)\r
-\r
-SizeT x86_Convert(Byte *data, SizeT size, UInt32 ip, UInt32 *state, int encoding)\r
-{\r
- SizeT pos = 0;\r
- UInt32 mask = *state & 7;\r
- if (size < 5)\r
- return 0;\r
- size -= 4;\r
- ip += 5;\r
-\r
- for (;;)\r
- {\r
- Byte *p = data + pos;\r
- const Byte *limit = data + size;\r
- for (; p < limit; p++)\r
- if ((*p & 0xFE) == 0xE8)\r
- break;\r
-\r
- {\r
- SizeT d = (SizeT)(p - data) - pos;\r
- pos = (SizeT)(p - data);\r
- if (p >= limit)\r
- {\r
- *state = (d > 2 ? 0 : mask >> (unsigned)d);\r
- return pos;\r
- }\r
- if (d > 2)\r
- mask = 0;\r
- else\r
- {\r
- mask >>= (unsigned)d;\r
- if (mask != 0 && (mask > 4 || mask == 3 || Test86MSByte(p[(size_t)(mask >> 1) + 1])))\r
- {\r
- mask = (mask >> 1) | 4;\r
- pos++;\r
- continue;\r
- }\r
- }\r
- }\r
-\r
- if (Test86MSByte(p[4]))\r
- {\r
- UInt32 v = ((UInt32)p[4] << 24) | ((UInt32)p[3] << 16) | ((UInt32)p[2] << 8) | ((UInt32)p[1]);\r
- UInt32 cur = ip + (UInt32)pos;\r
- pos += 5;\r
- if (encoding)\r
- v += cur;\r
- else\r
- v -= cur;\r
- if (mask != 0)\r
- {\r
- unsigned sh = (mask & 6) << 2;\r
- if (Test86MSByte((Byte)(v >> sh)))\r
- {\r
- v ^= (((UInt32)0x100 << sh) - 1);\r
- if (encoding)\r
- v += cur;\r
- else\r
- v -= cur;\r
- }\r
- mask = 0;\r
- }\r
- p[1] = (Byte)v;\r
- p[2] = (Byte)(v >> 8);\r
- p[3] = (Byte)(v >> 16);\r
- p[4] = (Byte)(0 - ((v >> 24) & 1));\r
- }\r
- else\r
- {\r
- mask = (mask >> 1) | 4;\r
- pos++;\r
- }\r
- }\r
-}\r
+++ /dev/null
-/* BraIA64.c -- Converter for IA-64 code\r
-2017-01-26 : Igor Pavlov : Public domain */\r
-\r
-#include "Precomp.h"\r
-\r
-#include "CpuArch.h"\r
-#include "Bra.h"\r
-\r
-SizeT IA64_Convert(Byte *data, SizeT size, UInt32 ip, int encoding)\r
-{\r
- SizeT i;\r
- if (size < 16)\r
- return 0;\r
- size -= 16;\r
- i = 0;\r
- do\r
- {\r
- unsigned m = ((UInt32)0x334B0000 >> (data[i] & 0x1E)) & 3;\r
- if (m)\r
- {\r
- m++;\r
- do\r
- {\r
- Byte *p = data + (i + (size_t)m * 5 - 8);\r
- if (((p[3] >> m) & 15) == 5\r
- && (((p[-1] | ((UInt32)p[0] << 8)) >> m) & 0x70) == 0)\r
- {\r
- unsigned raw = GetUi32(p);\r
- unsigned v = raw >> m;\r
- v = (v & 0xFFFFF) | ((v & (1 << 23)) >> 3);\r
- \r
- v <<= 4;\r
- if (encoding)\r
- v += ip + (UInt32)i;\r
- else\r
- v -= ip + (UInt32)i;\r
- v >>= 4;\r
- \r
- v &= 0x1FFFFF;\r
- v += 0x700000;\r
- v &= 0x8FFFFF;\r
- raw &= ~((UInt32)0x8FFFFF << m);\r
- raw |= (v << m);\r
- SetUi32(p, raw);\r
- }\r
- }\r
- while (++m <= 4);\r
- }\r
- i += 16;\r
- }\r
- while (i <= size);\r
- return i;\r
-}\r
+++ /dev/null
-/* CpuArch.c -- CPU specific code\r
-2021-07-13 : Igor Pavlov : Public domain */\r
-\r
-#include "Precomp.h"\r
-\r
-#include "CpuArch.h"\r
-\r
-#ifdef MY_CPU_X86_OR_AMD64\r
-\r
-#if (defined(_MSC_VER) && !defined(MY_CPU_AMD64)) || defined(__GNUC__)\r
-#define USE_ASM\r
-#endif\r
-\r
-#if !defined(USE_ASM) && _MSC_VER >= 1500\r
-#include <intrin.h>\r
-#endif\r
-\r
-#if defined(USE_ASM) && !defined(MY_CPU_AMD64)\r
-static UInt32 CheckFlag(UInt32 flag)\r
-{\r
- #ifdef _MSC_VER\r
- __asm pushfd;\r
- __asm pop EAX;\r
- __asm mov EDX, EAX;\r
- __asm xor EAX, flag;\r
- __asm push EAX;\r
- __asm popfd;\r
- __asm pushfd;\r
- __asm pop EAX;\r
- __asm xor EAX, EDX;\r
- __asm push EDX;\r
- __asm popfd;\r
- __asm and flag, EAX;\r
- #else\r
- __asm__ __volatile__ (\r
- "pushf\n\t"\r
- "pop %%EAX\n\t"\r
- "movl %%EAX,%%EDX\n\t"\r
- "xorl %0,%%EAX\n\t"\r
- "push %%EAX\n\t"\r
- "popf\n\t"\r
- "pushf\n\t"\r
- "pop %%EAX\n\t"\r
- "xorl %%EDX,%%EAX\n\t"\r
- "push %%EDX\n\t"\r
- "popf\n\t"\r
- "andl %%EAX, %0\n\t":\r
- "=c" (flag) : "c" (flag) :\r
- "%eax", "%edx");\r
- #endif\r
- return flag;\r
-}\r
-#define CHECK_CPUID_IS_SUPPORTED if (CheckFlag(1 << 18) == 0 || CheckFlag(1 << 21) == 0) return False;\r
-#else\r
-#define CHECK_CPUID_IS_SUPPORTED\r
-#endif\r
-\r
-#ifndef USE_ASM\r
- #ifdef _MSC_VER\r
- #if _MSC_VER >= 1600\r
- #define MY__cpuidex __cpuidex\r
- #else\r
-\r
-/*\r
- __cpuid (function == 4) requires subfunction number in ECX.\r
- MSDN: The __cpuid intrinsic clears the ECX register before calling the cpuid instruction.\r
- __cpuid() in new MSVC clears ECX.\r
- __cpuid() in old MSVC (14.00) doesn't clear ECX\r
- We still can use __cpuid for low (function) values that don't require ECX,\r
- but __cpuid() in old MSVC will be incorrect for some function values: (function == 4).\r
- So here we use the hack for old MSVC to send (subFunction) in ECX register to cpuid instruction,\r
- where ECX value is first parameter for FAST_CALL / NO_INLINE function,\r
- So the caller of MY__cpuidex_HACK() sets ECX as subFunction, and\r
- old MSVC for __cpuid() doesn't change ECX and cpuid instruction gets (subFunction) value.\r
- \r
- DON'T remove MY_NO_INLINE and MY_FAST_CALL for MY__cpuidex_HACK() !!!\r
-*/\r
-\r
-static\r
-MY_NO_INLINE\r
-void MY_FAST_CALL MY__cpuidex_HACK(UInt32 subFunction, int *CPUInfo, UInt32 function)\r
-{\r
- UNUSED_VAR(subFunction);\r
- __cpuid(CPUInfo, function);\r
-}\r
-\r
- #define MY__cpuidex(info, func, func2) MY__cpuidex_HACK(func2, info, func)\r
- #pragma message("======== MY__cpuidex_HACK WAS USED ========")\r
- #endif\r
- #else\r
- #define MY__cpuidex(info, func, func2) __cpuid(info, func)\r
- #pragma message("======== (INCORRECT ?) cpuid WAS USED ========")\r
- #endif\r
-#endif\r
-\r
-\r
-\r
-\r
-void MyCPUID(UInt32 function, UInt32 *a, UInt32 *b, UInt32 *c, UInt32 *d)\r
-{\r
- #ifdef USE_ASM\r
-\r
- #ifdef _MSC_VER\r
-\r
- UInt32 a2, b2, c2, d2;\r
- __asm xor EBX, EBX;\r
- __asm xor ECX, ECX;\r
- __asm xor EDX, EDX;\r
- __asm mov EAX, function;\r
- __asm cpuid;\r
- __asm mov a2, EAX;\r
- __asm mov b2, EBX;\r
- __asm mov c2, ECX;\r
- __asm mov d2, EDX;\r
-\r
- *a = a2;\r
- *b = b2;\r
- *c = c2;\r
- *d = d2;\r
-\r
- #else\r
-\r
- __asm__ __volatile__ (\r
- #if defined(MY_CPU_AMD64) && defined(__PIC__)\r
- "mov %%rbx, %%rdi;"\r
- "cpuid;"\r
- "xchg %%rbx, %%rdi;"\r
- : "=a" (*a) ,\r
- "=D" (*b) ,\r
- #elif defined(MY_CPU_X86) && defined(__PIC__)\r
- "mov %%ebx, %%edi;"\r
- "cpuid;"\r
- "xchgl %%ebx, %%edi;"\r
- : "=a" (*a) ,\r
- "=D" (*b) ,\r
- #else\r
- "cpuid"\r
- : "=a" (*a) ,\r
- "=b" (*b) ,\r
- #endif\r
- "=c" (*c) ,\r
- "=d" (*d)\r
- : "0" (function), "c"(0) ) ;\r
-\r
- #endif\r
- \r
- #else\r
-\r
- int CPUInfo[4];\r
-\r
- MY__cpuidex(CPUInfo, (int)function, 0);\r
-\r
- *a = (UInt32)CPUInfo[0];\r
- *b = (UInt32)CPUInfo[1];\r
- *c = (UInt32)CPUInfo[2];\r
- *d = (UInt32)CPUInfo[3];\r
-\r
- #endif\r
-}\r
-\r
-BoolInt x86cpuid_CheckAndRead(Cx86cpuid *p)\r
-{\r
- CHECK_CPUID_IS_SUPPORTED\r
- MyCPUID(0, &p->maxFunc, &p->vendor[0], &p->vendor[2], &p->vendor[1]);\r
- MyCPUID(1, &p->ver, &p->b, &p->c, &p->d);\r
- return True;\r
-}\r
-\r
-static const UInt32 kVendors[][3] =\r
-{\r
- { 0x756E6547, 0x49656E69, 0x6C65746E},\r
- { 0x68747541, 0x69746E65, 0x444D4163},\r
- { 0x746E6543, 0x48727561, 0x736C7561}\r
-};\r
-\r
-int x86cpuid_GetFirm(const Cx86cpuid *p)\r
-{\r
- unsigned i;\r
- for (i = 0; i < sizeof(kVendors) / sizeof(kVendors[i]); i++)\r
- {\r
- const UInt32 *v = kVendors[i];\r
- if (v[0] == p->vendor[0] &&\r
- v[1] == p->vendor[1] &&\r
- v[2] == p->vendor[2])\r
- return (int)i;\r
- }\r
- return -1;\r
-}\r
-\r
-BoolInt CPU_Is_InOrder()\r
-{\r
- Cx86cpuid p;\r
- int firm;\r
- UInt32 family, model;\r
- if (!x86cpuid_CheckAndRead(&p))\r
- return True;\r
-\r
- family = x86cpuid_GetFamily(p.ver);\r
- model = x86cpuid_GetModel(p.ver);\r
- \r
- firm = x86cpuid_GetFirm(&p);\r
-\r
- switch (firm)\r
- {\r
- case CPU_FIRM_INTEL: return (family < 6 || (family == 6 && (\r
- /* In-Order Atom CPU */\r
- model == 0x1C /* 45 nm, N4xx, D4xx, N5xx, D5xx, 230, 330 */\r
- || model == 0x26 /* 45 nm, Z6xx */\r
- || model == 0x27 /* 32 nm, Z2460 */\r
- || model == 0x35 /* 32 nm, Z2760 */\r
- || model == 0x36 /* 32 nm, N2xxx, D2xxx */\r
- )));\r
- case CPU_FIRM_AMD: return (family < 5 || (family == 5 && (model < 6 || model == 0xA)));\r
- case CPU_FIRM_VIA: return (family < 6 || (family == 6 && model < 0xF));\r
- }\r
- return True;\r
-}\r
-\r
-#if !defined(MY_CPU_AMD64) && defined(_WIN32)\r
-#include <windows.h>\r
-static BoolInt CPU_Sys_Is_SSE_Supported()\r
-{\r
- OSVERSIONINFO vi;\r
- vi.dwOSVersionInfoSize = sizeof(vi);\r
- if (!GetVersionEx(&vi))\r
- return False;\r
- return (vi.dwMajorVersion >= 5);\r
-}\r
-#define CHECK_SYS_SSE_SUPPORT if (!CPU_Sys_Is_SSE_Supported()) return False;\r
-#else\r
-#define CHECK_SYS_SSE_SUPPORT\r
-#endif\r
-\r
-\r
-static UInt32 X86_CPUID_ECX_Get_Flags()\r
-{\r
- Cx86cpuid p;\r
- CHECK_SYS_SSE_SUPPORT\r
- if (!x86cpuid_CheckAndRead(&p))\r
- return 0;\r
- return p.c;\r
-}\r
-\r
-BoolInt CPU_IsSupported_AES()\r
-{\r
- return (X86_CPUID_ECX_Get_Flags() >> 25) & 1;\r
-}\r
-\r
-BoolInt CPU_IsSupported_SSSE3()\r
-{\r
- return (X86_CPUID_ECX_Get_Flags() >> 9) & 1;\r
-}\r
-\r
-BoolInt CPU_IsSupported_SSE41()\r
-{\r
- return (X86_CPUID_ECX_Get_Flags() >> 19) & 1;\r
-}\r
-\r
-BoolInt CPU_IsSupported_SHA()\r
-{\r
- Cx86cpuid p;\r
- CHECK_SYS_SSE_SUPPORT\r
- if (!x86cpuid_CheckAndRead(&p))\r
- return False;\r
-\r
- if (p.maxFunc < 7)\r
- return False;\r
- {\r
- UInt32 d[4] = { 0 };\r
- MyCPUID(7, &d[0], &d[1], &d[2], &d[3]);\r
- return (d[1] >> 29) & 1;\r
- }\r
-}\r
-\r
-// #include <stdio.h>\r
-\r
-#ifdef _WIN32\r
-#include <windows.h>\r
-#endif\r
-\r
-BoolInt CPU_IsSupported_AVX2()\r
-{\r
- Cx86cpuid p;\r
- CHECK_SYS_SSE_SUPPORT\r
-\r
- #ifdef _WIN32\r
- #define MY__PF_XSAVE_ENABLED 17\r
- if (!IsProcessorFeaturePresent(MY__PF_XSAVE_ENABLED))\r
- return False;\r
- #endif\r
-\r
- if (!x86cpuid_CheckAndRead(&p))\r
- return False;\r
- if (p.maxFunc < 7)\r
- return False;\r
- {\r
- UInt32 d[4] = { 0 };\r
- MyCPUID(7, &d[0], &d[1], &d[2], &d[3]);\r
- // printf("\ncpuid(7): ebx=%8x ecx=%8x\n", d[1], d[2]);\r
- return 1\r
- & (d[1] >> 5); // avx2\r
- }\r
-}\r
-\r
-BoolInt CPU_IsSupported_VAES_AVX2()\r
-{\r
- Cx86cpuid p;\r
- CHECK_SYS_SSE_SUPPORT\r
-\r
- #ifdef _WIN32\r
- #define MY__PF_XSAVE_ENABLED 17\r
- if (!IsProcessorFeaturePresent(MY__PF_XSAVE_ENABLED))\r
- return False;\r
- #endif\r
-\r
- if (!x86cpuid_CheckAndRead(&p))\r
- return False;\r
- if (p.maxFunc < 7)\r
- return False;\r
- {\r
- UInt32 d[4] = { 0 };\r
- MyCPUID(7, &d[0], &d[1], &d[2], &d[3]);\r
- // printf("\ncpuid(7): ebx=%8x ecx=%8x\n", d[1], d[2]);\r
- return 1\r
- & (d[1] >> 5) // avx2\r
- // & (d[1] >> 31) // avx512vl\r
- & (d[2] >> 9); // vaes // VEX-256/EVEX\r
- }\r
-}\r
-\r
-BoolInt CPU_IsSupported_PageGB()\r
-{\r
- Cx86cpuid cpuid;\r
- if (!x86cpuid_CheckAndRead(&cpuid))\r
- return False;\r
- {\r
- UInt32 d[4] = { 0 };\r
- MyCPUID(0x80000000, &d[0], &d[1], &d[2], &d[3]);\r
- if (d[0] < 0x80000001)\r
- return False;\r
- }\r
- {\r
- UInt32 d[4] = { 0 };\r
- MyCPUID(0x80000001, &d[0], &d[1], &d[2], &d[3]);\r
- return (d[3] >> 26) & 1;\r
- }\r
-}\r
-\r
-\r
-#elif defined(MY_CPU_ARM_OR_ARM64)\r
-\r
-#ifdef _WIN32\r
-\r
-#include <windows.h>\r
-\r
-BoolInt CPU_IsSupported_CRC32() { return IsProcessorFeaturePresent(PF_ARM_V8_CRC32_INSTRUCTIONS_AVAILABLE) ? 1 : 0; }\r
-BoolInt CPU_IsSupported_CRYPTO() { return IsProcessorFeaturePresent(PF_ARM_V8_CRYPTO_INSTRUCTIONS_AVAILABLE) ? 1 : 0; }\r
-BoolInt CPU_IsSupported_NEON() { return IsProcessorFeaturePresent(PF_ARM_NEON_INSTRUCTIONS_AVAILABLE) ? 1 : 0; }\r
-\r
-#else\r
-\r
-#if defined(__APPLE__)\r
-\r
-/*\r
-#include <stdio.h>\r
-#include <string.h>\r
-static void Print_sysctlbyname(const char *name)\r
-{\r
- size_t bufSize = 256;\r
- char buf[256];\r
- int res = sysctlbyname(name, &buf, &bufSize, NULL, 0);\r
- {\r
- int i;\r
- printf("\nres = %d : %s : '%s' : bufSize = %d, numeric", res, name, buf, (unsigned)bufSize);\r
- for (i = 0; i < 20; i++)\r
- printf(" %2x", (unsigned)(Byte)buf[i]);\r
-\r
- }\r
-}\r
-*/\r
-\r
-static BoolInt My_sysctlbyname_Get_BoolInt(const char *name)\r
-{\r
- UInt32 val = 0;\r
- if (My_sysctlbyname_Get_UInt32(name, &val) == 0 && val == 1)\r
- return 1;\r
- return 0;\r
-}\r
-\r
- /*\r
- Print_sysctlbyname("hw.pagesize");\r
- Print_sysctlbyname("machdep.cpu.brand_string");\r
- */\r
-\r
-BoolInt CPU_IsSupported_CRC32(void)\r
-{\r
- return My_sysctlbyname_Get_BoolInt("hw.optional.armv8_crc32");\r
-}\r
-\r
-BoolInt CPU_IsSupported_NEON(void)\r
-{\r
- return My_sysctlbyname_Get_BoolInt("hw.optional.neon");\r
-}\r
-\r
-#ifdef MY_CPU_ARM64\r
-#define APPLE_CRYPTO_SUPPORT_VAL 1\r
-#else\r
-#define APPLE_CRYPTO_SUPPORT_VAL 0\r
-#endif\r
-\r
-BoolInt CPU_IsSupported_SHA1(void) { return APPLE_CRYPTO_SUPPORT_VAL; }\r
-BoolInt CPU_IsSupported_SHA2(void) { return APPLE_CRYPTO_SUPPORT_VAL; }\r
-BoolInt CPU_IsSupported_AES (void) { return APPLE_CRYPTO_SUPPORT_VAL; }\r
-\r
-\r
-#else // __APPLE__\r
-\r
-#if defined(__SWITCH__) || defined(__vita__)\r
-\r
-BoolInt CPU_IsSupported_CRC32(void) { return 0; }\r
-BoolInt CPU_IsSupported_NEON(void) { return 1; }\r
-BoolInt CPU_IsSupported_SHA1(void) { return 0; }\r
-BoolInt CPU_IsSupported_SHA2(void) { return 0; }\r
-BoolInt CPU_IsSupported_AES (void) { return 0; }\r
-\r
-#else\r
-\r
-#if (defined(__ANDROID_API__) && __ANDROID_API__ < 18) || \\r
- defined(_MIYOO) || defined(_3DS) || defined(NO_HWCAP)\r
-// no getauxval/AT_HWCAP\r
-#else\r
-#define USE_HWCAP\r
-#endif\r
-\r
-#ifdef USE_HWCAP\r
-\r
-#include <asm/hwcap.h>\r
-#include <sys/auxv.h>\r
-\r
- #define MY_HWCAP_CHECK_FUNC_2(name1, name2) \\r
- BoolInt CPU_IsSupported_ ## name1() { return (getauxval(AT_HWCAP) & (HWCAP_ ## name2)) ? 1 : 0; }\r
-\r
-#ifdef MY_CPU_ARM64\r
- #define MY_HWCAP_CHECK_FUNC(name) \\r
- MY_HWCAP_CHECK_FUNC_2(name, name)\r
- MY_HWCAP_CHECK_FUNC_2(NEON, ASIMD)\r
-// MY_HWCAP_CHECK_FUNC (ASIMD)\r
-#elif defined(MY_CPU_ARM)\r
- #define MY_HWCAP_CHECK_FUNC(name) \\r
- BoolInt CPU_IsSupported_ ## name() { return (getauxval(AT_HWCAP2) & (HWCAP2_ ## name)) ? 1 : 0; }\r
- MY_HWCAP_CHECK_FUNC_2(NEON, NEON)\r
-#endif\r
-\r
-#else // USE_HWCAP\r
-\r
- #define MY_HWCAP_CHECK_FUNC(name) \\r
- BoolInt CPU_IsSupported_ ## name() { return 0; }\r
- MY_HWCAP_CHECK_FUNC(NEON)\r
-\r
-#endif // USE_HWCAP\r
-\r
-MY_HWCAP_CHECK_FUNC (CRC32)\r
-MY_HWCAP_CHECK_FUNC (SHA1)\r
-MY_HWCAP_CHECK_FUNC (SHA2)\r
-MY_HWCAP_CHECK_FUNC (AES)\r
-\r
-#endif\r
-#endif // __APPLE__\r
-#endif // _WIN32\r
-\r
-#endif // MY_CPU_ARM_OR_ARM64\r
-\r
-\r
-\r
-#ifdef __APPLE__\r
-\r
-#include <sys/sysctl.h>\r
-\r
-int My_sysctlbyname_Get(const char *name, void *buf, size_t *bufSize)\r
-{\r
- return sysctlbyname(name, buf, bufSize, NULL, 0);\r
-}\r
-\r
-int My_sysctlbyname_Get_UInt32(const char *name, UInt32 *val)\r
-{\r
- size_t bufSize = sizeof(*val);\r
- int res = My_sysctlbyname_Get(name, val, &bufSize);\r
- if (res == 0 && bufSize != sizeof(*val))\r
- return EFAULT;\r
- return res;\r
-}\r
-\r
-#endif\r
+++ /dev/null
-/* LzFindMt.c -- multithreaded Match finder for LZ algorithms\r
-2021-12-21 : Igor Pavlov : Public domain */\r
-\r
-#include "Precomp.h"\r
-\r
-// #include <stdio.h>\r
-\r
-#include "CpuArch.h"\r
-\r
-#include "LzHash.h"\r
-#include "LzFindMt.h"\r
-\r
-// #define LOG_ITERS\r
-\r
-// #define LOG_THREAD\r
-\r
-#ifdef LOG_THREAD\r
-#include <stdio.h>\r
-#define PRF(x) x\r
-#else\r
-#define PRF(x)\r
-#endif\r
-\r
-#ifdef LOG_ITERS\r
-#include <stdio.h>\r
-extern UInt64 g_NumIters_Tree;\r
-extern UInt64 g_NumIters_Loop;\r
-extern UInt64 g_NumIters_Bytes;\r
-#define LOG_ITER(x) x\r
-#else\r
-#define LOG_ITER(x)\r
-#endif\r
-\r
-#define kMtHashBlockSize ((UInt32)1 << 17)\r
-#define kMtHashNumBlocks (1 << 1)\r
-\r
-#define GET_HASH_BLOCK_OFFSET(i) (((i) & (kMtHashNumBlocks - 1)) * kMtHashBlockSize)\r
-\r
-#define kMtBtBlockSize ((UInt32)1 << 16)\r
-#define kMtBtNumBlocks (1 << 4)\r
-\r
-#define GET_BT_BLOCK_OFFSET(i) (((i) & (kMtBtNumBlocks - 1)) * (size_t)kMtBtBlockSize)\r
-\r
-/*\r
- HASH functions:\r
- We use raw 8/16 bits from a[1] and a[2],\r
- xored with crc(a[0]) and crc(a[3]).\r
- We check a[0], a[3] only. We don't need to compare a[1] and a[2] in matches.\r
- our crc() function provides one-to-one correspondence for low 8-bit values:\r
- (crc[0...0xFF] & 0xFF) <-> [0...0xFF]\r
-*/\r
-\r
-#define MF(mt) ((mt)->MatchFinder)\r
-#define MF_CRC (p->crc)\r
-\r
-// #define MF(mt) (&(mt)->MatchFinder)\r
-// #define MF_CRC (p->MatchFinder.crc)\r
-\r
-#define MT_HASH2_CALC \\r
- h2 = (MF_CRC[cur[0]] ^ cur[1]) & (kHash2Size - 1);\r
-\r
-#define MT_HASH3_CALC { \\r
- UInt32 temp = MF_CRC[cur[0]] ^ cur[1]; \\r
- h2 = temp & (kHash2Size - 1); \\r
- h3 = (temp ^ ((UInt32)cur[2] << 8)) & (kHash3Size - 1); }\r
-\r
-/*\r
-#define MT_HASH3_CALC__NO_2 { \\r
- UInt32 temp = p->crc[cur[0]] ^ cur[1]; \\r
- h3 = (temp ^ ((UInt32)cur[2] << 8)) & (kHash3Size - 1); }\r
-\r
-#define __MT_HASH4_CALC { \\r
- UInt32 temp = p->crc[cur[0]] ^ cur[1]; \\r
- h2 = temp & (kHash2Size - 1); \\r
- temp ^= ((UInt32)cur[2] << 8); \\r
- h3 = temp & (kHash3Size - 1); \\r
- h4 = (temp ^ (p->crc[cur[3]] << kLzHash_CrcShift_1)) & p->hash4Mask; }\r
- // (kHash4Size - 1);\r
-*/\r
-\r
-\r
-MY_NO_INLINE\r
-static void MtSync_Construct(CMtSync *p)\r
-{\r
- p->affinity = 0;\r
- p->wasCreated = False;\r
- p->csWasInitialized = False;\r
- p->csWasEntered = False;\r
- Thread_Construct(&p->thread);\r
- Event_Construct(&p->canStart);\r
- Event_Construct(&p->wasStopped);\r
- Semaphore_Construct(&p->freeSemaphore);\r
- Semaphore_Construct(&p->filledSemaphore);\r
-}\r
-\r
-\r
-#define DEBUG_BUFFER_LOCK // define it to debug lock state\r
-\r
-#ifdef DEBUG_BUFFER_LOCK\r
-#include <stdlib.h>\r
-#define BUFFER_MUST_BE_LOCKED(p) if (!(p)->csWasEntered) exit(1);\r
-#define BUFFER_MUST_BE_UNLOCKED(p) if ( (p)->csWasEntered) exit(1);\r
-#else\r
-#define BUFFER_MUST_BE_LOCKED(p)\r
-#define BUFFER_MUST_BE_UNLOCKED(p)\r
-#endif\r
-\r
-#define LOCK_BUFFER(p) { \\r
- BUFFER_MUST_BE_UNLOCKED(p); \\r
- CriticalSection_Enter(&(p)->cs); \\r
- (p)->csWasEntered = True; }\r
-\r
-#define UNLOCK_BUFFER(p) { \\r
- BUFFER_MUST_BE_LOCKED(p); \\r
- CriticalSection_Leave(&(p)->cs); \\r
- (p)->csWasEntered = False; }\r
-\r
-\r
-MY_NO_INLINE\r
-static UInt32 MtSync_GetNextBlock(CMtSync *p)\r
-{\r
- UInt32 numBlocks = 0;\r
- if (p->needStart)\r
- {\r
- BUFFER_MUST_BE_UNLOCKED(p)\r
- p->numProcessedBlocks = 1;\r
- p->needStart = False;\r
- p->stopWriting = False;\r
- p->exit = False;\r
- Event_Reset(&p->wasStopped);\r
- Event_Set(&p->canStart);\r
- }\r
- else\r
- {\r
- UNLOCK_BUFFER(p)\r
- // we free current block\r
- numBlocks = p->numProcessedBlocks++;\r
- Semaphore_Release1(&p->freeSemaphore);\r
- }\r
-\r
- // buffer is UNLOCKED here\r
- Semaphore_Wait(&p->filledSemaphore);\r
- LOCK_BUFFER(p);\r
- return numBlocks;\r
-}\r
-\r
-\r
-/* if Writing (Processing) thread was started, we must call MtSync_StopWriting() */\r
-\r
-MY_NO_INLINE\r
-static void MtSync_StopWriting(CMtSync *p)\r
-{\r
- if (!Thread_WasCreated(&p->thread) || p->needStart)\r
- return;\r
-\r
- PRF(printf("\nMtSync_StopWriting %p\n", p));\r
-\r
- if (p->csWasEntered)\r
- {\r
- /* we don't use buffer in this thread after StopWriting().\r
- So we UNLOCK buffer.\r
- And we restore default UNLOCKED state for stopped thread */\r
- UNLOCK_BUFFER(p)\r
- }\r
-\r
- /* We send (p->stopWriting) message and release freeSemaphore\r
- to free current block.\r
- So the thread will see (p->stopWriting) at some\r
- iteration after Wait(freeSemaphore).\r
- The thread doesn't need to fill all avail free blocks,\r
- so we can get fast thread stop.\r
- */\r
-\r
- p->stopWriting = True;\r
- Semaphore_Release1(&p->freeSemaphore); // check semaphore count !!!\r
-\r
- PRF(printf("\nMtSync_StopWriting %p : Event_Wait(&p->wasStopped)\n", p));\r
- Event_Wait(&p->wasStopped);\r
- PRF(printf("\nMtSync_StopWriting %p : Event_Wait() finsihed\n", p));\r
-\r
- /* 21.03 : we don't restore samaphore counters here.\r
- We will recreate and reinit samaphores in next start */\r
-\r
- p->needStart = True;\r
-}\r
-\r
-\r
-MY_NO_INLINE\r
-static void MtSync_Destruct(CMtSync *p)\r
-{\r
- PRF(printf("\nMtSync_Destruct %p\n", p));\r
- \r
- if (Thread_WasCreated(&p->thread))\r
- {\r
- /* we want thread to be in Stopped state before sending EXIT command.\r
- note: stop(btSync) will stop (htSync) also */\r
- MtSync_StopWriting(p);\r
- /* thread in Stopped state here : (p->needStart == true) */\r
- p->exit = True;\r
- // if (p->needStart) // it's (true)\r
- Event_Set(&p->canStart); // we send EXIT command to thread\r
- Thread_Wait_Close(&p->thread); // we wait thread finishing\r
- }\r
-\r
- if (p->csWasInitialized)\r
- {\r
- CriticalSection_Delete(&p->cs);\r
- p->csWasInitialized = False;\r
- }\r
- p->csWasEntered = False;\r
-\r
- Event_Close(&p->canStart);\r
- Event_Close(&p->wasStopped);\r
- Semaphore_Close(&p->freeSemaphore);\r
- Semaphore_Close(&p->filledSemaphore);\r
-\r
- p->wasCreated = False;\r
-}\r
-\r
-\r
-// #define RINOK_THREAD(x) { if ((x) != 0) return SZ_ERROR_THREAD; }\r
-// we want to get real system error codes here instead of SZ_ERROR_THREAD\r
-#define RINOK_THREAD(x) RINOK(x)\r
-\r
-\r
-// call it before each new file (when new starting is required):\r
-MY_NO_INLINE\r
-static SRes MtSync_Init(CMtSync *p, UInt32 numBlocks)\r
-{\r
- WRes wres;\r
- // BUFFER_MUST_BE_UNLOCKED(p)\r
- if (!p->needStart || p->csWasEntered)\r
- return SZ_ERROR_FAIL;\r
- wres = Semaphore_OptCreateInit(&p->freeSemaphore, numBlocks, numBlocks);\r
- if (wres == 0)\r
- wres = Semaphore_OptCreateInit(&p->filledSemaphore, 0, numBlocks);\r
- return MY_SRes_HRESULT_FROM_WRes(wres);\r
-}\r
-\r
-\r
-static WRes MtSync_Create_WRes(CMtSync *p, THREAD_FUNC_TYPE startAddress, void *obj)\r
-{\r
- WRes wres;\r
-\r
- if (p->wasCreated)\r
- return SZ_OK;\r
-\r
- RINOK_THREAD(CriticalSection_Init(&p->cs));\r
- p->csWasInitialized = True;\r
- p->csWasEntered = False;\r
-\r
- RINOK_THREAD(AutoResetEvent_CreateNotSignaled(&p->canStart));\r
- RINOK_THREAD(AutoResetEvent_CreateNotSignaled(&p->wasStopped));\r
-\r
- p->needStart = True;\r
- p->exit = True; /* p->exit is unused before (canStart) Event.\r
- But in case of some unexpected code failure we will get fast exit from thread */\r
-\r
- // return ERROR_TOO_MANY_POSTS; // for debug\r
- // return EINVAL; // for debug\r
-\r
- if (p->affinity != 0)\r
- wres = Thread_Create_With_Affinity(&p->thread, startAddress, obj, (CAffinityMask)p->affinity);\r
- else\r
- wres = Thread_Create(&p->thread, startAddress, obj);\r
-\r
- RINOK_THREAD(wres);\r
- p->wasCreated = True;\r
- return SZ_OK;\r
-}\r
-\r
-\r
-MY_NO_INLINE\r
-static SRes MtSync_Create(CMtSync *p, THREAD_FUNC_TYPE startAddress, void *obj)\r
-{\r
- const WRes wres = MtSync_Create_WRes(p, startAddress, obj);\r
- if (wres == 0)\r
- return 0;\r
- MtSync_Destruct(p);\r
- return MY_SRes_HRESULT_FROM_WRes(wres);\r
-}\r
-\r
-\r
-// ---------- HASH THREAD ----------\r
-\r
-#define kMtMaxValForNormalize 0xFFFFFFFF\r
-// #define kMtMaxValForNormalize ((1 << 21)) // for debug\r
-// #define kNormalizeAlign (1 << 7) // alignment for speculated accesses\r
-\r
-#ifdef MY_CPU_LE_UNALIGN\r
- #define GetUi24hi_from32(p) ((UInt32)GetUi32(p) >> 8)\r
-#else\r
- #define GetUi24hi_from32(p) ((p)[1] ^ ((UInt32)(p)[2] << 8) ^ ((UInt32)(p)[3] << 16))\r
-#endif\r
-\r
-#define GetHeads_DECL(name) \\r
- static void GetHeads ## name(const Byte *p, UInt32 pos, \\r
- UInt32 *hash, UInt32 hashMask, UInt32 *heads, UInt32 numHeads, const UInt32 *crc)\r
-\r
-#define GetHeads_LOOP(v) \\r
- for (; numHeads != 0; numHeads--) { \\r
- const UInt32 value = (v); \\r
- p++; \\r
- *heads++ = pos - hash[value]; \\r
- hash[value] = pos++; }\r
-\r
-#define DEF_GetHeads2(name, v, action) \\r
- GetHeads_DECL(name) { action \\r
- GetHeads_LOOP(v) }\r
- \r
-#define DEF_GetHeads(name, v) DEF_GetHeads2(name, v, ;)\r
-\r
-DEF_GetHeads2(2, GetUi16(p), UNUSED_VAR(hashMask); UNUSED_VAR(crc); )\r
-DEF_GetHeads(3, (crc[p[0]] ^ GetUi16(p + 1)) & hashMask)\r
-DEF_GetHeads2(3b, GetUi16(p) ^ ((UInt32)(p)[2] << 16), UNUSED_VAR(hashMask); UNUSED_VAR(crc); )\r
-// BT3 is not good for crc collisions for big hashMask values.\r
-\r
-/*\r
-GetHeads_DECL(3b)\r
-{\r
- UNUSED_VAR(hashMask);\r
- UNUSED_VAR(crc);\r
- {\r
- const Byte *pLim = p + numHeads;\r
- if (numHeads == 0)\r
- return;\r
- pLim--;\r
- while (p < pLim)\r
- {\r
- UInt32 v1 = GetUi32(p);\r
- UInt32 v0 = v1 & 0xFFFFFF;\r
- UInt32 h0, h1;\r
- p += 2;\r
- v1 >>= 8;\r
- h0 = hash[v0]; hash[v0] = pos; heads[0] = pos - h0; pos++;\r
- h1 = hash[v1]; hash[v1] = pos; heads[1] = pos - h1; pos++;\r
- heads += 2;\r
- }\r
- if (p == pLim)\r
- {\r
- UInt32 v0 = GetUi16(p) ^ ((UInt32)(p)[2] << 16);\r
- *heads = pos - hash[v0];\r
- hash[v0] = pos;\r
- }\r
- }\r
-}\r
-*/\r
-\r
-/*\r
-GetHeads_DECL(4)\r
-{\r
- unsigned sh = 0;\r
- UNUSED_VAR(crc)\r
- while ((hashMask & 0x80000000) == 0)\r
- {\r
- hashMask <<= 1;\r
- sh++;\r
- }\r
- GetHeads_LOOP((GetUi32(p) * 0xa54a1) >> sh)\r
-}\r
-#define GetHeads4b GetHeads4\r
-*/\r
-\r
-#define USE_GetHeads_LOCAL_CRC\r
-\r
-#ifdef USE_GetHeads_LOCAL_CRC\r
-\r
-GetHeads_DECL(4)\r
-{\r
- UInt32 crc0[256];\r
- UInt32 crc1[256];\r
- {\r
- unsigned i;\r
- for (i = 0; i < 256; i++)\r
- {\r
- UInt32 v = crc[i];\r
- crc0[i] = v & hashMask;\r
- crc1[i] = (v << kLzHash_CrcShift_1) & hashMask;\r
- // crc1[i] = rotlFixed(v, 8) & hashMask;\r
- }\r
- }\r
- GetHeads_LOOP(crc0[p[0]] ^ crc1[p[3]] ^ (UInt32)GetUi16(p+1))\r
-}\r
-\r
-GetHeads_DECL(4b)\r
-{\r
- UInt32 crc0[256];\r
- {\r
- unsigned i;\r
- for (i = 0; i < 256; i++)\r
- crc0[i] = crc[i] & hashMask;\r
- }\r
- GetHeads_LOOP(crc0[p[0]] ^ GetUi24hi_from32(p))\r
-}\r
-\r
-GetHeads_DECL(5)\r
-{\r
- UInt32 crc0[256];\r
- UInt32 crc1[256];\r
- UInt32 crc2[256];\r
- {\r
- unsigned i;\r
- for (i = 0; i < 256; i++)\r
- {\r
- UInt32 v = crc[i];\r
- crc0[i] = v & hashMask;\r
- crc1[i] = (v << kLzHash_CrcShift_1) & hashMask;\r
- crc2[i] = (v << kLzHash_CrcShift_2) & hashMask;\r
- }\r
- }\r
- GetHeads_LOOP(crc0[p[0]] ^ crc1[p[3]] ^ crc2[p[4]] ^ (UInt32)GetUi16(p+1))\r
-}\r
-\r
-GetHeads_DECL(5b)\r
-{\r
- UInt32 crc0[256];\r
- UInt32 crc1[256];\r
- {\r
- unsigned i;\r
- for (i = 0; i < 256; i++)\r
- {\r
- UInt32 v = crc[i];\r
- crc0[i] = v & hashMask;\r
- crc1[i] = (v << kLzHash_CrcShift_1) & hashMask;\r
- }\r
- }\r
- GetHeads_LOOP(crc0[p[0]] ^ crc1[p[4]] ^ GetUi24hi_from32(p))\r
-}\r
-\r
-#else\r
-\r
-DEF_GetHeads(4, (crc[p[0]] ^ (crc[p[3]] << kLzHash_CrcShift_1) ^ (UInt32)GetUi16(p+1)) & hashMask)\r
-DEF_GetHeads(4b, (crc[p[0]] ^ GetUi24hi_from32(p)) & hashMask)\r
-DEF_GetHeads(5, (crc[p[0]] ^ (crc[p[3]] << kLzHash_CrcShift_1) ^ (crc[p[4]] << kLzHash_CrcShift_2) ^ (UInt32)GetUi16(p + 1)) & hashMask)\r
-DEF_GetHeads(5b, (crc[p[0]] ^ (crc[p[4]] << kLzHash_CrcShift_1) ^ GetUi24hi_from32(p)) & hashMask)\r
-\r
-#endif\r
- \r
-\r
-static void HashThreadFunc(CMatchFinderMt *mt)\r
-{\r
- CMtSync *p = &mt->hashSync;\r
- PRF(printf("\nHashThreadFunc\n"));\r
- \r
- for (;;)\r
- {\r
- UInt32 blockIndex = 0;\r
- PRF(printf("\nHashThreadFunc : Event_Wait(&p->canStart)\n"));\r
- Event_Wait(&p->canStart);\r
- PRF(printf("\nHashThreadFunc : Event_Wait(&p->canStart) : after \n"));\r
- if (p->exit)\r
- {\r
- PRF(printf("\nHashThreadFunc : exit \n"));\r
- return;\r
- }\r
-\r
- MatchFinder_Init_HighHash(MF(mt));\r
-\r
- for (;;)\r
- {\r
- PRF(printf("Hash thread block = %d pos = %d\n", (unsigned)blockIndex, mt->MatchFinder->pos));\r
-\r
- {\r
- CMatchFinder *mf = MF(mt);\r
- if (MatchFinder_NeedMove(mf))\r
- {\r
- CriticalSection_Enter(&mt->btSync.cs);\r
- CriticalSection_Enter(&mt->hashSync.cs);\r
- {\r
- const Byte *beforePtr = Inline_MatchFinder_GetPointerToCurrentPos(mf);\r
- ptrdiff_t offset;\r
- MatchFinder_MoveBlock(mf);\r
- offset = beforePtr - Inline_MatchFinder_GetPointerToCurrentPos(mf);\r
- mt->pointerToCurPos -= offset;\r
- mt->buffer -= offset;\r
- }\r
- CriticalSection_Leave(&mt->hashSync.cs);\r
- CriticalSection_Leave(&mt->btSync.cs);\r
- continue;\r
- }\r
-\r
- Semaphore_Wait(&p->freeSemaphore);\r
-\r
- if (p->exit) // exit is unexpected here. But we check it here for some failure case\r
- return;\r
-\r
- // for faster stop : we check (p->stopWriting) after Wait(freeSemaphore)\r
- if (p->stopWriting)\r
- break;\r
-\r
- MatchFinder_ReadIfRequired(mf);\r
- {\r
- UInt32 *heads = mt->hashBuf + GET_HASH_BLOCK_OFFSET(blockIndex++);\r
- UInt32 num = Inline_MatchFinder_GetNumAvailableBytes(mf);\r
- heads[0] = 2;\r
- heads[1] = num;\r
-\r
- /* heads[1] contains the number of avail bytes:\r
- if (avail < mf->numHashBytes) :\r
- {\r
- it means that stream was finished\r
- HASH_THREAD and BT_TREAD must move position for heads[1] (avail) bytes.\r
- HASH_THREAD doesn't stop,\r
- HASH_THREAD fills only the header (2 numbers) for all next blocks:\r
- {2, NumHashBytes - 1}, {2,0}, {2,0}, ... , {2,0}\r
- }\r
- else\r
- {\r
- HASH_THREAD and BT_TREAD must move position for (heads[0] - 2) bytes;\r
- }\r
- */\r
-\r
- if (num >= mf->numHashBytes)\r
- {\r
- num = num - mf->numHashBytes + 1;\r
- if (num > kMtHashBlockSize - 2)\r
- num = kMtHashBlockSize - 2;\r
-\r
- if (mf->pos > (UInt32)kMtMaxValForNormalize - num)\r
- {\r
- const UInt32 subValue = (mf->pos - mf->historySize - 1); // & ~(UInt32)(kNormalizeAlign - 1);\r
- Inline_MatchFinder_ReduceOffsets(mf, subValue);\r
- MatchFinder_Normalize3(subValue, mf->hash + mf->fixedHashSize, (size_t)mf->hashMask + 1);\r
- }\r
-\r
- heads[0] = 2 + num;\r
- mt->GetHeadsFunc(mf->buffer, mf->pos, mf->hash + mf->fixedHashSize, mf->hashMask, heads + 2, num, mf->crc);\r
- }\r
-\r
- mf->pos += num; // wrap over zero is allowed at the end of stream\r
- mf->buffer += num;\r
- }\r
- }\r
-\r
- Semaphore_Release1(&p->filledSemaphore);\r
- } // for() processing end\r
-\r
- // p->numBlocks_Sent = blockIndex;\r
- Event_Set(&p->wasStopped);\r
- } // for() thread end\r
-}\r
-\r
-\r
-\r
-\r
-// ---------- BT THREAD ----------\r
-\r
-/* we use one variable instead of two (cyclicBufferPos == pos) before CyclicBuf wrap.\r
- here we define fixed offset of (p->pos) from (p->cyclicBufferPos) */\r
-#define CYC_TO_POS_OFFSET 0\r
-// #define CYC_TO_POS_OFFSET 1 // for debug\r
-\r
-#define MFMT_GM_INLINE\r
-\r
-#ifdef MFMT_GM_INLINE\r
-\r
-/*\r
- we use size_t for (pos) instead of UInt32\r
- to eliminate "movsx" BUG in old MSVC x64 compiler.\r
-*/\r
-\r
-\r
-UInt32 * MY_FAST_CALL GetMatchesSpecN_2(const Byte *lenLimit, size_t pos, const Byte *cur, CLzRef *son,\r
- UInt32 _cutValue, UInt32 *d, size_t _maxLen, const UInt32 *hash, const UInt32 *limit, const UInt32 *size,\r
- size_t _cyclicBufferPos, UInt32 _cyclicBufferSize,\r
- UInt32 *posRes);\r
-\r
-#endif\r
-\r
-\r
-static void BtGetMatches(CMatchFinderMt *p, UInt32 *d)\r
-{\r
- UInt32 numProcessed = 0;\r
- UInt32 curPos = 2;\r
- \r
- /* GetMatchesSpec() functions don't create (len = 1)\r
- in [len, dist] match pairs, if (p->numHashBytes >= 2)\r
- Also we suppose here that (matchMaxLen >= 2).\r
- So the following code for (reserve) is not required\r
- UInt32 reserve = (p->matchMaxLen * 2);\r
- const UInt32 kNumHashBytes_Max = 5; // BT_HASH_BYTES_MAX\r
- if (reserve < kNumHashBytes_Max - 1)\r
- reserve = kNumHashBytes_Max - 1;\r
- const UInt32 limit = kMtBtBlockSize - (reserve);\r
- */\r
-\r
- const UInt32 limit = kMtBtBlockSize - (p->matchMaxLen * 2);\r
-\r
- d[1] = p->hashNumAvail;\r
-\r
- if (p->failure_BT)\r
- {\r
- // printf("\n == 1 BtGetMatches() p->failure_BT\n");\r
- d[0] = 0;\r
- // d[1] = 0;\r
- return;\r
- }\r
- \r
- while (curPos < limit)\r
- {\r
- if (p->hashBufPos == p->hashBufPosLimit)\r
- {\r
- // MatchFinderMt_GetNextBlock_Hash(p);\r
- UInt32 avail;\r
- {\r
- const UInt32 bi = MtSync_GetNextBlock(&p->hashSync);\r
- const UInt32 k = GET_HASH_BLOCK_OFFSET(bi);\r
- const UInt32 *h = p->hashBuf + k;\r
- avail = h[1];\r
- p->hashBufPosLimit = k + h[0];\r
- p->hashNumAvail = avail;\r
- p->hashBufPos = k + 2;\r
- }\r
-\r
- {\r
- /* we must prevent UInt32 overflow for avail total value,\r
- if avail was increased with new hash block */\r
- UInt32 availSum = numProcessed + avail;\r
- if (availSum < numProcessed)\r
- availSum = (UInt32)(Int32)-1;\r
- d[1] = availSum;\r
- }\r
-\r
- if (avail >= p->numHashBytes)\r
- continue;\r
-\r
- // if (p->hashBufPos != p->hashBufPosLimit) exit(1);\r
-\r
- /* (avail < p->numHashBytes)\r
- It means that stream was finished.\r
- And (avail) - is a number of remaining bytes,\r
- we fill (d) for (avail) bytes for LZ_THREAD (receiver).\r
- but we don't update (p->pos) and (p->cyclicBufferPos) here in BT_THREAD */\r
-\r
- /* here we suppose that we have space enough:\r
- (kMtBtBlockSize - curPos >= p->hashNumAvail) */\r
- p->hashNumAvail = 0;\r
- d[0] = curPos + avail;\r
- d += curPos;\r
- for (; avail != 0; avail--)\r
- *d++ = 0;\r
- return;\r
- }\r
- {\r
- UInt32 size = p->hashBufPosLimit - p->hashBufPos;\r
- UInt32 pos = p->pos;\r
- UInt32 cyclicBufferPos = p->cyclicBufferPos;\r
- UInt32 lenLimit = p->matchMaxLen;\r
- if (lenLimit >= p->hashNumAvail)\r
- lenLimit = p->hashNumAvail;\r
- {\r
- UInt32 size2 = p->hashNumAvail - lenLimit + 1;\r
- if (size2 < size)\r
- size = size2;\r
- size2 = p->cyclicBufferSize - cyclicBufferPos;\r
- if (size2 < size)\r
- size = size2;\r
- }\r
- \r
- if (pos > (UInt32)kMtMaxValForNormalize - size)\r
- {\r
- const UInt32 subValue = (pos - p->cyclicBufferSize); // & ~(UInt32)(kNormalizeAlign - 1);\r
- pos -= subValue;\r
- p->pos = pos;\r
- MatchFinder_Normalize3(subValue, p->son, (size_t)p->cyclicBufferSize * 2);\r
- }\r
-\r
- #ifndef MFMT_GM_INLINE\r
- while (curPos < limit && size-- != 0)\r
- {\r
- UInt32 *startDistances = d + curPos;\r
- UInt32 num = (UInt32)(GetMatchesSpec1(lenLimit, pos - p->hashBuf[p->hashBufPos++],\r
- pos, p->buffer, p->son, cyclicBufferPos, p->cyclicBufferSize, p->cutValue,\r
- startDistances + 1, p->numHashBytes - 1) - startDistances);\r
- *startDistances = num - 1;\r
- curPos += num;\r
- cyclicBufferPos++;\r
- pos++;\r
- p->buffer++;\r
- }\r
- #else\r
- {\r
- UInt32 posRes = pos;\r
- const UInt32 *d_end;\r
- {\r
- d_end = GetMatchesSpecN_2(\r
- p->buffer + lenLimit - 1,\r
- pos, p->buffer, p->son, p->cutValue, d + curPos,\r
- p->numHashBytes - 1, p->hashBuf + p->hashBufPos,\r
- d + limit, p->hashBuf + p->hashBufPos + size,\r
- cyclicBufferPos, p->cyclicBufferSize,\r
- &posRes);\r
- }\r
- {\r
- if (!d_end)\r
- {\r
- // printf("\n == 2 BtGetMatches() p->failure_BT\n");\r
- // internal data failure\r
- p->failure_BT = True;\r
- d[0] = 0;\r
- // d[1] = 0;\r
- return;\r
- }\r
- }\r
- curPos = (UInt32)(d_end - d);\r
- {\r
- const UInt32 processed = posRes - pos;\r
- pos = posRes;\r
- p->hashBufPos += processed;\r
- cyclicBufferPos += processed;\r
- p->buffer += processed;\r
- }\r
- }\r
- #endif\r
-\r
- {\r
- const UInt32 processed = pos - p->pos;\r
- numProcessed += processed;\r
- p->hashNumAvail -= processed;\r
- p->pos = pos;\r
- }\r
- if (cyclicBufferPos == p->cyclicBufferSize)\r
- cyclicBufferPos = 0;\r
- p->cyclicBufferPos = cyclicBufferPos;\r
- }\r
- }\r
- \r
- d[0] = curPos;\r
-}\r
-\r
-\r
-static void BtFillBlock(CMatchFinderMt *p, UInt32 globalBlockIndex)\r
-{\r
- CMtSync *sync = &p->hashSync;\r
- \r
- BUFFER_MUST_BE_UNLOCKED(sync)\r
- \r
- if (!sync->needStart)\r
- {\r
- LOCK_BUFFER(sync)\r
- }\r
- \r
- BtGetMatches(p, p->btBuf + GET_BT_BLOCK_OFFSET(globalBlockIndex));\r
- \r
- /* We suppose that we have called GetNextBlock() from start.\r
- So buffer is LOCKED */\r
-\r
- UNLOCK_BUFFER(sync)\r
-}\r
-\r
-\r
-MY_NO_INLINE\r
-static void BtThreadFunc(CMatchFinderMt *mt)\r
-{\r
- CMtSync *p = &mt->btSync;\r
- for (;;)\r
- {\r
- UInt32 blockIndex = 0;\r
- Event_Wait(&p->canStart);\r
-\r
- for (;;)\r
- {\r
- PRF(printf(" BT thread block = %d pos = %d\n", (unsigned)blockIndex, mt->pos));\r
- /* (p->exit == true) is possible after (p->canStart) at first loop iteration\r
- and is unexpected after more Wait(freeSemaphore) iterations */\r
- if (p->exit)\r
- return;\r
-\r
- Semaphore_Wait(&p->freeSemaphore);\r
- \r
- // for faster stop : we check (p->stopWriting) after Wait(freeSemaphore)\r
- if (p->stopWriting)\r
- break;\r
-\r
- BtFillBlock(mt, blockIndex++);\r
- \r
- Semaphore_Release1(&p->filledSemaphore);\r
- }\r
-\r
- // we stop HASH_THREAD here\r
- MtSync_StopWriting(&mt->hashSync);\r
-\r
- // p->numBlocks_Sent = blockIndex;\r
- Event_Set(&p->wasStopped);\r
- }\r
-}\r
-\r
-\r
-void MatchFinderMt_Construct(CMatchFinderMt *p)\r
-{\r
- p->hashBuf = NULL;\r
- MtSync_Construct(&p->hashSync);\r
- MtSync_Construct(&p->btSync);\r
-}\r
-\r
-static void MatchFinderMt_FreeMem(CMatchFinderMt *p, ISzAllocPtr alloc)\r
-{\r
- ISzAlloc_Free(alloc, p->hashBuf);\r
- p->hashBuf = NULL;\r
-}\r
-\r
-void MatchFinderMt_Destruct(CMatchFinderMt *p, ISzAllocPtr alloc)\r
-{\r
- /*\r
- HASH_THREAD can use CriticalSection(s) btSync.cs and hashSync.cs.\r
- So we must be sure that HASH_THREAD will not use CriticalSection(s)\r
- after deleting CriticalSection here.\r
-\r
- we call ReleaseStream(p)\r
- that calls StopWriting(btSync)\r
- that calls StopWriting(hashSync), if it's required to stop HASH_THREAD.\r
- after StopWriting() it's safe to destruct MtSync(s) in any order */\r
-\r
- MatchFinderMt_ReleaseStream(p);\r
-\r
- MtSync_Destruct(&p->btSync);\r
- MtSync_Destruct(&p->hashSync);\r
-\r
- LOG_ITER(\r
- printf("\nTree %9d * %7d iter = %9d = sum : bytes = %9d\n",\r
- (UInt32)(g_NumIters_Tree / 1000),\r
- (UInt32)(((UInt64)g_NumIters_Loop * 1000) / (g_NumIters_Tree + 1)),\r
- (UInt32)(g_NumIters_Loop / 1000),\r
- (UInt32)(g_NumIters_Bytes / 1000)\r
- ));\r
-\r
- MatchFinderMt_FreeMem(p, alloc);\r
-}\r
-\r
-\r
-#define kHashBufferSize (kMtHashBlockSize * kMtHashNumBlocks)\r
-#define kBtBufferSize (kMtBtBlockSize * kMtBtNumBlocks)\r
-\r
-\r
-static THREAD_FUNC_DECL HashThreadFunc2(void *p) { HashThreadFunc((CMatchFinderMt *)p); return 0; }\r
-static THREAD_FUNC_DECL BtThreadFunc2(void *p)\r
-{\r
- Byte allocaDummy[0x180];\r
- unsigned i = 0;\r
- for (i = 0; i < 16; i++)\r
- allocaDummy[i] = (Byte)0;\r
- if (allocaDummy[0] == 0)\r
- BtThreadFunc((CMatchFinderMt *)p);\r
- return 0;\r
-}\r
-\r
-\r
-SRes MatchFinderMt_Create(CMatchFinderMt *p, UInt32 historySize, UInt32 keepAddBufferBefore,\r
- UInt32 matchMaxLen, UInt32 keepAddBufferAfter, ISzAllocPtr alloc)\r
-{\r
- CMatchFinder *mf = MF(p);\r
- p->historySize = historySize;\r
- if (kMtBtBlockSize <= matchMaxLen * 4)\r
- return SZ_ERROR_PARAM;\r
- if (!p->hashBuf)\r
- {\r
- p->hashBuf = (UInt32 *)ISzAlloc_Alloc(alloc, ((size_t)kHashBufferSize + (size_t)kBtBufferSize) * sizeof(UInt32));\r
- if (!p->hashBuf)\r
- return SZ_ERROR_MEM;\r
- p->btBuf = p->hashBuf + kHashBufferSize;\r
- }\r
- keepAddBufferBefore += (kHashBufferSize + kBtBufferSize);\r
- keepAddBufferAfter += kMtHashBlockSize;\r
- if (!MatchFinder_Create(mf, historySize, keepAddBufferBefore, matchMaxLen, keepAddBufferAfter, alloc))\r
- return SZ_ERROR_MEM;\r
-\r
- RINOK(MtSync_Create(&p->hashSync, HashThreadFunc2, p));\r
- RINOK(MtSync_Create(&p->btSync, BtThreadFunc2, p));\r
- return SZ_OK;\r
-}\r
-\r
-\r
-SRes MatchFinderMt_InitMt(CMatchFinderMt *p)\r
-{\r
- RINOK(MtSync_Init(&p->hashSync, kMtHashNumBlocks));\r
- return MtSync_Init(&p->btSync, kMtBtNumBlocks);\r
-}\r
-\r
-\r
-static void MatchFinderMt_Init(CMatchFinderMt *p)\r
-{\r
- CMatchFinder *mf = MF(p);\r
- \r
- p->btBufPos =\r
- p->btBufPosLimit = NULL;\r
- p->hashBufPos =\r
- p->hashBufPosLimit = 0;\r
- p->hashNumAvail = 0; // 21.03\r
- \r
- p->failure_BT = False;\r
-\r
- /* Init without data reading. We don't want to read data in this thread */\r
- MatchFinder_Init_4(mf);\r
-\r
- MatchFinder_Init_LowHash(mf);\r
- \r
- p->pointerToCurPos = Inline_MatchFinder_GetPointerToCurrentPos(mf);\r
- p->btNumAvailBytes = 0;\r
- p->failure_LZ_BT = False;\r
- // p->failure_LZ_LZ = False;\r
- \r
- p->lzPos =\r
- 1; // optimal smallest value\r
- // 0; // for debug: ignores match to start\r
- // kNormalizeAlign; // for debug\r
-\r
- p->hash = mf->hash;\r
- p->fixedHashSize = mf->fixedHashSize;\r
- // p->hash4Mask = mf->hash4Mask;\r
- p->crc = mf->crc;\r
- // memcpy(p->crc, mf->crc, sizeof(mf->crc));\r
-\r
- p->son = mf->son;\r
- p->matchMaxLen = mf->matchMaxLen;\r
- p->numHashBytes = mf->numHashBytes;\r
- \r
- /* (mf->pos) and (mf->streamPos) were already initialized to 1 in MatchFinder_Init_4() */\r
- // mf->streamPos = mf->pos = 1; // optimal smallest value\r
- // 0; // for debug: ignores match to start\r
- // kNormalizeAlign; // for debug\r
-\r
- /* we must init (p->pos = mf->pos) for BT, because\r
- BT code needs (p->pos == delta_value_for_empty_hash_record == mf->pos) */\r
- p->pos = mf->pos; // do not change it\r
- \r
- p->cyclicBufferPos = (p->pos - CYC_TO_POS_OFFSET);\r
- p->cyclicBufferSize = mf->cyclicBufferSize;\r
- p->buffer = mf->buffer;\r
- p->cutValue = mf->cutValue;\r
- // p->son[0] = p->son[1] = 0; // unused: to init skipped record for speculated accesses.\r
-}\r
-\r
-\r
-/* ReleaseStream is required to finish multithreading */\r
-void MatchFinderMt_ReleaseStream(CMatchFinderMt *p)\r
-{\r
- // Sleep(1); // for debug\r
- MtSync_StopWriting(&p->btSync);\r
- // Sleep(200); // for debug\r
- /* p->MatchFinder->ReleaseStream(); */\r
-}\r
-\r
-\r
-MY_NO_INLINE\r
-static UInt32 MatchFinderMt_GetNextBlock_Bt(CMatchFinderMt *p)\r
-{\r
- if (p->failure_LZ_BT)\r
- p->btBufPos = p->failureBuf;\r
- else\r
- {\r
- const UInt32 bi = MtSync_GetNextBlock(&p->btSync);\r
- const UInt32 *bt = p->btBuf + GET_BT_BLOCK_OFFSET(bi);\r
- {\r
- const UInt32 numItems = bt[0];\r
- p->btBufPosLimit = bt + numItems;\r
- p->btNumAvailBytes = bt[1];\r
- p->btBufPos = bt + 2;\r
- if (numItems < 2 || numItems > kMtBtBlockSize)\r
- {\r
- p->failureBuf[0] = 0;\r
- p->btBufPos = p->failureBuf;\r
- p->btBufPosLimit = p->failureBuf + 1;\r
- p->failure_LZ_BT = True;\r
- // p->btNumAvailBytes = 0;\r
- /* we don't want to decrease AvailBytes, that was load before.\r
- that can be unxepected for the code that have loaded anopther value before */\r
- }\r
- }\r
- \r
- if (p->lzPos >= (UInt32)kMtMaxValForNormalize - (UInt32)kMtBtBlockSize)\r
- {\r
- /* we don't check (lzPos) over exact avail bytes in (btBuf).\r
- (fixedHashSize) is small, so normalization is fast */\r
- const UInt32 subValue = (p->lzPos - p->historySize - 1); // & ~(UInt32)(kNormalizeAlign - 1);\r
- p->lzPos -= subValue;\r
- MatchFinder_Normalize3(subValue, p->hash, p->fixedHashSize);\r
- }\r
- }\r
- return p->btNumAvailBytes;\r
-}\r
-\r
-\r
-\r
-static const Byte * MatchFinderMt_GetPointerToCurrentPos(CMatchFinderMt *p)\r
-{\r
- return p->pointerToCurPos;\r
-}\r
-\r
-\r
-#define GET_NEXT_BLOCK_IF_REQUIRED if (p->btBufPos == p->btBufPosLimit) MatchFinderMt_GetNextBlock_Bt(p);\r
-\r
-\r
-static UInt32 MatchFinderMt_GetNumAvailableBytes(CMatchFinderMt *p)\r
-{\r
- if (p->btBufPos != p->btBufPosLimit)\r
- return p->btNumAvailBytes;\r
- return MatchFinderMt_GetNextBlock_Bt(p);\r
-}\r
-\r
-\r
-// #define CHECK_FAILURE_LZ(_match_, _pos_) if (_match_ >= _pos_) { p->failure_LZ_LZ = True; return d; }\r
-#define CHECK_FAILURE_LZ(_match_, _pos_)\r
-\r
-static UInt32 * MixMatches2(CMatchFinderMt *p, UInt32 matchMinPos, UInt32 *d)\r
-{\r
- UInt32 h2, c2;\r
- UInt32 *hash = p->hash;\r
- const Byte *cur = p->pointerToCurPos;\r
- const UInt32 m = p->lzPos;\r
- MT_HASH2_CALC\r
- \r
- c2 = hash[h2];\r
- hash[h2] = m;\r
-\r
- if (c2 >= matchMinPos)\r
- {\r
- CHECK_FAILURE_LZ(c2, m)\r
- if (cur[(ptrdiff_t)c2 - (ptrdiff_t)m] == cur[0])\r
- {\r
- *d++ = 2;\r
- *d++ = m - c2 - 1;\r
- }\r
- }\r
- \r
- return d;\r
-}\r
-\r
-static UInt32 * MixMatches3(CMatchFinderMt *p, UInt32 matchMinPos, UInt32 *d)\r
-{\r
- UInt32 h2, h3, c2, c3;\r
- UInt32 *hash = p->hash;\r
- const Byte *cur = p->pointerToCurPos;\r
- const UInt32 m = p->lzPos;\r
- MT_HASH3_CALC\r
-\r
- c2 = hash[h2];\r
- c3 = (hash + kFix3HashSize)[h3];\r
- \r
- hash[h2] = m;\r
- (hash + kFix3HashSize)[h3] = m;\r
-\r
- if (c2 >= matchMinPos)\r
- {\r
- CHECK_FAILURE_LZ(c2, m)\r
- if (cur[(ptrdiff_t)c2 - (ptrdiff_t)m] == cur[0])\r
- {\r
- d[1] = m - c2 - 1;\r
- if (cur[(ptrdiff_t)c2 - (ptrdiff_t)m + 2] == cur[2])\r
- {\r
- d[0] = 3;\r
- return d + 2;\r
- }\r
- d[0] = 2;\r
- d += 2;\r
- }\r
- }\r
- \r
- if (c3 >= matchMinPos)\r
- {\r
- CHECK_FAILURE_LZ(c3, m)\r
- if (cur[(ptrdiff_t)c3 - (ptrdiff_t)m] == cur[0])\r
- {\r
- *d++ = 3;\r
- *d++ = m - c3 - 1;\r
- }\r
- }\r
- \r
- return d;\r
-}\r
-\r
-\r
-#define INCREASE_LZ_POS p->lzPos++; p->pointerToCurPos++;\r
-\r
-/*\r
-static\r
-UInt32* MatchFinderMt_GetMatches_Bt4(CMatchFinderMt *p, UInt32 *d)\r
-{\r
- const UInt32 *bt = p->btBufPos;\r
- const UInt32 len = *bt++;\r
- const UInt32 *btLim = bt + len;\r
- UInt32 matchMinPos;\r
- UInt32 avail = p->btNumAvailBytes - 1;\r
- p->btBufPos = btLim;\r
-\r
- {\r
- p->btNumAvailBytes = avail;\r
-\r
- #define BT_HASH_BYTES_MAX 5\r
- \r
- matchMinPos = p->lzPos;\r
-\r
- if (len != 0)\r
- matchMinPos -= bt[1];\r
- else if (avail < (BT_HASH_BYTES_MAX - 1) - 1)\r
- {\r
- INCREASE_LZ_POS\r
- return d;\r
- }\r
- else\r
- {\r
- const UInt32 hs = p->historySize;\r
- if (matchMinPos > hs)\r
- matchMinPos -= hs;\r
- else\r
- matchMinPos = 1;\r
- }\r
- }\r
-\r
- for (;;)\r
- {\r
- \r
- UInt32 h2, h3, c2, c3;\r
- UInt32 *hash = p->hash;\r
- const Byte *cur = p->pointerToCurPos;\r
- UInt32 m = p->lzPos;\r
- MT_HASH3_CALC\r
-\r
- c2 = hash[h2];\r
- c3 = (hash + kFix3HashSize)[h3];\r
- \r
- hash[h2] = m;\r
- (hash + kFix3HashSize)[h3] = m;\r
-\r
- if (c2 >= matchMinPos && cur[(ptrdiff_t)c2 - (ptrdiff_t)m] == cur[0])\r
- {\r
- d[1] = m - c2 - 1;\r
- if (cur[(ptrdiff_t)c2 - (ptrdiff_t)m + 2] == cur[2])\r
- {\r
- d[0] = 3;\r
- d += 2;\r
- break;\r
- }\r
- // else\r
- {\r
- d[0] = 2;\r
- d += 2;\r
- }\r
- }\r
- if (c3 >= matchMinPos && cur[(ptrdiff_t)c3 - (ptrdiff_t)m] == cur[0])\r
- {\r
- *d++ = 3;\r
- *d++ = m - c3 - 1;\r
- }\r
- break;\r
- }\r
-\r
- if (len != 0)\r
- {\r
- do\r
- {\r
- const UInt32 v0 = bt[0];\r
- const UInt32 v1 = bt[1];\r
- bt += 2;\r
- d[0] = v0;\r
- d[1] = v1;\r
- d += 2;\r
- }\r
- while (bt != btLim);\r
- }\r
- INCREASE_LZ_POS\r
- return d;\r
-}\r
-*/\r
-\r
-\r
-static UInt32 *MixMatches4(CMatchFinderMt *p, UInt32 matchMinPos, UInt32 *d)\r
-{\r
- UInt32 h2, h3, /* h4, */ c2, c3 /* , c4 */;\r
- UInt32 *hash = p->hash;\r
- const Byte *cur = p->pointerToCurPos;\r
- const UInt32 m = p->lzPos;\r
- MT_HASH3_CALC\r
- // MT_HASH4_CALC\r
- c2 = hash[h2];\r
- c3 = (hash + kFix3HashSize)[h3];\r
- // c4 = (hash + kFix4HashSize)[h4];\r
- \r
- hash[h2] = m;\r
- (hash + kFix3HashSize)[h3] = m;\r
- // (hash + kFix4HashSize)[h4] = m;\r
-\r
- #define _USE_H2\r
-\r
- #ifdef _USE_H2\r
- if (c2 >= matchMinPos && cur[(ptrdiff_t)c2 - (ptrdiff_t)m] == cur[0])\r
- {\r
- d[1] = m - c2 - 1;\r
- if (cur[(ptrdiff_t)c2 - (ptrdiff_t)m + 2] == cur[2])\r
- {\r
- // d[0] = (cur[(ptrdiff_t)c2 - (ptrdiff_t)m + 3] == cur[3]) ? 4 : 3;\r
- // return d + 2;\r
-\r
- if (cur[(ptrdiff_t)c2 - (ptrdiff_t)m + 3] == cur[3])\r
- {\r
- d[0] = 4;\r
- return d + 2;\r
- }\r
- d[0] = 3;\r
- d += 2;\r
- \r
- #ifdef _USE_H4\r
- if (c4 >= matchMinPos)\r
- if (\r
- cur[(ptrdiff_t)c4 - (ptrdiff_t)m] == cur[0] &&\r
- cur[(ptrdiff_t)c4 - (ptrdiff_t)m + 3] == cur[3]\r
- )\r
- {\r
- *d++ = 4;\r
- *d++ = m - c4 - 1;\r
- }\r
- #endif\r
- return d;\r
- }\r
- d[0] = 2;\r
- d += 2;\r
- }\r
- #endif\r
- \r
- if (c3 >= matchMinPos && cur[(ptrdiff_t)c3 - (ptrdiff_t)m] == cur[0])\r
- {\r
- d[1] = m - c3 - 1;\r
- if (cur[(ptrdiff_t)c3 - (ptrdiff_t)m + 3] == cur[3])\r
- {\r
- d[0] = 4;\r
- return d + 2;\r
- }\r
- d[0] = 3;\r
- d += 2;\r
- }\r
-\r
- #ifdef _USE_H4\r
- if (c4 >= matchMinPos)\r
- if (\r
- cur[(ptrdiff_t)c4 - (ptrdiff_t)m] == cur[0] &&\r
- cur[(ptrdiff_t)c4 - (ptrdiff_t)m + 3] == cur[3]\r
- )\r
- {\r
- *d++ = 4;\r
- *d++ = m - c4 - 1;\r
- }\r
- #endif\r
- \r
- return d;\r
-}\r
-\r
-\r
-static UInt32* MatchFinderMt2_GetMatches(CMatchFinderMt *p, UInt32 *d)\r
-{\r
- const UInt32 *bt = p->btBufPos;\r
- const UInt32 len = *bt++;\r
- const UInt32 *btLim = bt + len;\r
- p->btBufPos = btLim;\r
- p->btNumAvailBytes--;\r
- INCREASE_LZ_POS\r
- {\r
- while (bt != btLim)\r
- {\r
- const UInt32 v0 = bt[0];\r
- const UInt32 v1 = bt[1];\r
- bt += 2;\r
- d[0] = v0;\r
- d[1] = v1;\r
- d += 2;\r
- }\r
- }\r
- return d;\r
-}\r
-\r
-\r
-\r
-static UInt32* MatchFinderMt_GetMatches(CMatchFinderMt *p, UInt32 *d)\r
-{\r
- const UInt32 *bt = p->btBufPos;\r
- UInt32 len = *bt++;\r
- const UInt32 avail = p->btNumAvailBytes - 1;\r
- p->btNumAvailBytes = avail;\r
- p->btBufPos = bt + len;\r
- if (len == 0)\r
- {\r
- #define BT_HASH_BYTES_MAX 5\r
- if (avail >= (BT_HASH_BYTES_MAX - 1) - 1)\r
- {\r
- UInt32 m = p->lzPos;\r
- if (m > p->historySize)\r
- m -= p->historySize;\r
- else\r
- m = 1;\r
- d = p->MixMatchesFunc(p, m, d);\r
- }\r
- }\r
- else\r
- {\r
- /*\r
- first match pair from BinTree: (match_len, match_dist),\r
- (match_len >= numHashBytes).\r
- MixMatchesFunc() inserts only hash matches that are nearer than (match_dist)\r
- */\r
- d = p->MixMatchesFunc(p, p->lzPos - bt[1], d);\r
- // if (d) // check for failure\r
- do\r
- {\r
- const UInt32 v0 = bt[0];\r
- const UInt32 v1 = bt[1];\r
- bt += 2;\r
- d[0] = v0;\r
- d[1] = v1;\r
- d += 2;\r
- }\r
- while (len -= 2);\r
- }\r
- INCREASE_LZ_POS\r
- return d;\r
-}\r
-\r
-#define SKIP_HEADER2_MT do { GET_NEXT_BLOCK_IF_REQUIRED\r
-#define SKIP_HEADER_MT(n) SKIP_HEADER2_MT if (p->btNumAvailBytes-- >= (n)) { const Byte *cur = p->pointerToCurPos; UInt32 *hash = p->hash;\r
-#define SKIP_FOOTER_MT } INCREASE_LZ_POS p->btBufPos += (size_t)*p->btBufPos + 1; } while (--num != 0);\r
-\r
-static void MatchFinderMt0_Skip(CMatchFinderMt *p, UInt32 num)\r
-{\r
- SKIP_HEADER2_MT { p->btNumAvailBytes--;\r
- SKIP_FOOTER_MT\r
-}\r
-\r
-static void MatchFinderMt2_Skip(CMatchFinderMt *p, UInt32 num)\r
-{\r
- SKIP_HEADER_MT(2)\r
- UInt32 h2;\r
- MT_HASH2_CALC\r
- hash[h2] = p->lzPos;\r
- SKIP_FOOTER_MT\r
-}\r
-\r
-static void MatchFinderMt3_Skip(CMatchFinderMt *p, UInt32 num)\r
-{\r
- SKIP_HEADER_MT(3)\r
- UInt32 h2, h3;\r
- MT_HASH3_CALC\r
- (hash + kFix3HashSize)[h3] =\r
- hash[ h2] =\r
- p->lzPos;\r
- SKIP_FOOTER_MT\r
-}\r
-\r
-/*\r
-// MatchFinderMt4_Skip() is similar to MatchFinderMt3_Skip().\r
-// The difference is that MatchFinderMt3_Skip() updates hash for last 3 bytes of stream.\r
-\r
-static void MatchFinderMt4_Skip(CMatchFinderMt *p, UInt32 num)\r
-{\r
- SKIP_HEADER_MT(4)\r
- UInt32 h2, h3; // h4\r
- MT_HASH3_CALC\r
- // MT_HASH4_CALC\r
- // (hash + kFix4HashSize)[h4] =\r
- (hash + kFix3HashSize)[h3] =\r
- hash[ h2] =\r
- p->lzPos;\r
- SKIP_FOOTER_MT\r
-}\r
-*/\r
-\r
-void MatchFinderMt_CreateVTable(CMatchFinderMt *p, IMatchFinder2 *vTable)\r
-{\r
- vTable->Init = (Mf_Init_Func)MatchFinderMt_Init;\r
- vTable->GetNumAvailableBytes = (Mf_GetNumAvailableBytes_Func)MatchFinderMt_GetNumAvailableBytes;\r
- vTable->GetPointerToCurrentPos = (Mf_GetPointerToCurrentPos_Func)MatchFinderMt_GetPointerToCurrentPos;\r
- vTable->GetMatches = (Mf_GetMatches_Func)MatchFinderMt_GetMatches;\r
- \r
- switch (MF(p)->numHashBytes)\r
- {\r
- case 2:\r
- p->GetHeadsFunc = GetHeads2;\r
- p->MixMatchesFunc = (Mf_Mix_Matches)NULL;\r
- vTable->Skip = (Mf_Skip_Func)MatchFinderMt0_Skip;\r
- vTable->GetMatches = (Mf_GetMatches_Func)MatchFinderMt2_GetMatches;\r
- break;\r
- case 3:\r
- p->GetHeadsFunc = MF(p)->bigHash ? GetHeads3b : GetHeads3;\r
- p->MixMatchesFunc = (Mf_Mix_Matches)MixMatches2;\r
- vTable->Skip = (Mf_Skip_Func)MatchFinderMt2_Skip;\r
- break;\r
- case 4:\r
- p->GetHeadsFunc = MF(p)->bigHash ? GetHeads4b : GetHeads4;\r
-\r
- // it's fast inline version of GetMatches()\r
- // vTable->GetMatches = (Mf_GetMatches_Func)MatchFinderMt_GetMatches_Bt4;\r
-\r
- p->MixMatchesFunc = (Mf_Mix_Matches)MixMatches3;\r
- vTable->Skip = (Mf_Skip_Func)MatchFinderMt3_Skip;\r
- break;\r
- default:\r
- p->GetHeadsFunc = MF(p)->bigHash ? GetHeads5b : GetHeads5;\r
- p->MixMatchesFunc = (Mf_Mix_Matches)MixMatches4;\r
- vTable->Skip =\r
- (Mf_Skip_Func)MatchFinderMt3_Skip;\r
- // (Mf_Skip_Func)MatchFinderMt4_Skip;\r
- break;\r
- }\r
-}\r
+++ /dev/null
-/* LzFindOpt.c -- multithreaded Match finder for LZ algorithms\r
-2021-07-13 : Igor Pavlov : Public domain */\r
-\r
-#include "Precomp.h"\r
-\r
-#include "CpuArch.h"\r
-#include "LzFind.h"\r
-\r
-// #include "LzFindMt.h"\r
-\r
-// #define LOG_ITERS\r
-\r
-// #define LOG_THREAD\r
-\r
-#ifdef LOG_THREAD\r
-#include <stdio.h>\r
-#define PRF(x) x\r
-#else\r
-// #define PRF(x)\r
-#endif\r
-\r
-#ifdef LOG_ITERS\r
-#include <stdio.h>\r
-UInt64 g_NumIters_Tree;\r
-UInt64 g_NumIters_Loop;\r
-UInt64 g_NumIters_Bytes;\r
-#define LOG_ITER(x) x\r
-#else\r
-#define LOG_ITER(x)\r
-#endif\r
-\r
-// ---------- BT THREAD ----------\r
-\r
-#define USE_SON_PREFETCH\r
-#define USE_LONG_MATCH_OPT\r
-\r
-#define kEmptyHashValue 0\r
-\r
-// #define CYC_TO_POS_OFFSET 0\r
-\r
-// #define CYC_TO_POS_OFFSET 1 // for debug\r
-\r
-/*\r
-MY_NO_INLINE\r
-UInt32 * MY_FAST_CALL GetMatchesSpecN_1(const Byte *lenLimit, size_t pos, const Byte *cur, CLzRef *son,\r
- UInt32 _cutValue, UInt32 *d, size_t _maxLen, const UInt32 *hash, const UInt32 *limit, const UInt32 *size, UInt32 *posRes)\r
-{\r
- do\r
- {\r
- UInt32 delta;\r
- if (hash == size)\r
- break;\r
- delta = *hash++;\r
-\r
- if (delta == 0 || delta > (UInt32)pos)\r
- return NULL;\r
-\r
- lenLimit++;\r
-\r
- if (delta == (UInt32)pos)\r
- {\r
- CLzRef *ptr1 = son + ((size_t)pos << 1) - CYC_TO_POS_OFFSET * 2;\r
- *d++ = 0;\r
- ptr1[0] = kEmptyHashValue;\r
- ptr1[1] = kEmptyHashValue;\r
- }\r
-else\r
-{\r
- UInt32 *_distances = ++d;\r
-\r
- CLzRef *ptr0 = son + ((size_t)(pos) << 1) - CYC_TO_POS_OFFSET * 2 + 1;\r
- CLzRef *ptr1 = son + ((size_t)(pos) << 1) - CYC_TO_POS_OFFSET * 2;\r
-\r
- const Byte *len0 = cur, *len1 = cur;\r
- UInt32 cutValue = _cutValue;\r
- const Byte *maxLen = cur + _maxLen;\r
-\r
- for (LOG_ITER(g_NumIters_Tree++);;)\r
- {\r
- LOG_ITER(g_NumIters_Loop++);\r
- {\r
- const ptrdiff_t diff = (ptrdiff_t)0 - (ptrdiff_t)delta;\r
- CLzRef *pair = son + ((size_t)(((ptrdiff_t)pos - CYC_TO_POS_OFFSET) + diff) << 1);\r
- const Byte *len = (len0 < len1 ? len0 : len1);\r
-\r
- #ifdef USE_SON_PREFETCH\r
- const UInt32 pair0 = *pair;\r
- #endif\r
-\r
- if (len[diff] == len[0])\r
- {\r
- if (++len != lenLimit && len[diff] == len[0])\r
- while (++len != lenLimit)\r
- {\r
- LOG_ITER(g_NumIters_Bytes++);\r
- if (len[diff] != len[0])\r
- break;\r
- }\r
- if (maxLen < len)\r
- {\r
- maxLen = len;\r
- *d++ = (UInt32)(len - cur);\r
- *d++ = delta - 1;\r
- \r
- if (len == lenLimit)\r
- {\r
- const UInt32 pair1 = pair[1];\r
- *ptr1 =\r
- #ifdef USE_SON_PREFETCH\r
- pair0;\r
- #else\r
- pair[0];\r
- #endif\r
- *ptr0 = pair1;\r
-\r
- _distances[-1] = (UInt32)(d - _distances);\r
-\r
- #ifdef USE_LONG_MATCH_OPT\r
-\r
- if (hash == size || *hash != delta || lenLimit[diff] != lenLimit[0] || d >= limit)\r
- break;\r
-\r
- {\r
- for (;;)\r
- {\r
- hash++;\r
- pos++;\r
- cur++;\r
- lenLimit++;\r
- {\r
- CLzRef *ptr = son + ((size_t)(pos) << 1) - CYC_TO_POS_OFFSET * 2;\r
- #if 0\r
- *(UInt64 *)(void *)ptr = ((const UInt64 *)(const void *)ptr)[diff];\r
- #else\r
- const UInt32 p0 = ptr[0 + (diff * 2)];\r
- const UInt32 p1 = ptr[1 + (diff * 2)];\r
- ptr[0] = p0;\r
- ptr[1] = p1;\r
- // ptr[0] = ptr[0 + (diff * 2)];\r
- // ptr[1] = ptr[1 + (diff * 2)];\r
- #endif\r
- }\r
- // PrintSon(son + 2, pos - 1);\r
- // printf("\npos = %x delta = %x\n", pos, delta);\r
- len++;\r
- *d++ = 2;\r
- *d++ = (UInt32)(len - cur);\r
- *d++ = delta - 1;\r
- if (hash == size || *hash != delta || lenLimit[diff] != lenLimit[0] || d >= limit)\r
- break;\r
- }\r
- }\r
- #endif\r
-\r
- break;\r
- }\r
- }\r
- }\r
-\r
- {\r
- const UInt32 curMatch = (UInt32)pos - delta; // (UInt32)(pos + diff);\r
- if (len[diff] < len[0])\r
- {\r
- delta = pair[1];\r
- if (delta >= curMatch)\r
- return NULL;\r
- *ptr1 = curMatch;\r
- ptr1 = pair + 1;\r
- len1 = len;\r
- }\r
- else\r
- {\r
- delta = *pair;\r
- if (delta >= curMatch)\r
- return NULL;\r
- *ptr0 = curMatch;\r
- ptr0 = pair;\r
- len0 = len;\r
- }\r
-\r
- delta = (UInt32)pos - delta;\r
- \r
- if (--cutValue == 0 || delta >= pos)\r
- {\r
- *ptr0 = *ptr1 = kEmptyHashValue;\r
- _distances[-1] = (UInt32)(d - _distances);\r
- break;\r
- }\r
- }\r
- }\r
- } // for (tree iterations)\r
-}\r
- pos++;\r
- cur++;\r
- }\r
- while (d < limit);\r
- *posRes = (UInt32)pos;\r
- return d;\r
-}\r
-*/\r
-\r
-/* define cbs if you use 2 functions.\r
- GetMatchesSpecN_1() : (pos < _cyclicBufferSize)\r
- GetMatchesSpecN_2() : (pos >= _cyclicBufferSize)\r
-\r
- do not define cbs if you use 1 function:\r
- GetMatchesSpecN_2()\r
-*/\r
-\r
-// #define cbs _cyclicBufferSize\r
-\r
-/*\r
- we use size_t for (pos) and (_cyclicBufferPos_ instead of UInt32\r
- to eliminate "movsx" BUG in old MSVC x64 compiler.\r
-*/\r
-\r
-UInt32 * MY_FAST_CALL GetMatchesSpecN_2(const Byte *lenLimit, size_t pos, const Byte *cur, CLzRef *son,\r
- UInt32 _cutValue, UInt32 *d, size_t _maxLen, const UInt32 *hash, const UInt32 *limit, const UInt32 *size,\r
- size_t _cyclicBufferPos, UInt32 _cyclicBufferSize,\r
- UInt32 *posRes);\r
-\r
-MY_NO_INLINE\r
-UInt32 * MY_FAST_CALL GetMatchesSpecN_2(const Byte *lenLimit, size_t pos, const Byte *cur, CLzRef *son,\r
- UInt32 _cutValue, UInt32 *d, size_t _maxLen, const UInt32 *hash, const UInt32 *limit, const UInt32 *size,\r
- size_t _cyclicBufferPos, UInt32 _cyclicBufferSize,\r
- UInt32 *posRes)\r
-{\r
- do // while (hash != size)\r
- {\r
- UInt32 delta;\r
- \r
- #ifndef cbs\r
- UInt32 cbs;\r
- #endif\r
-\r
- if (hash == size)\r
- break;\r
-\r
- delta = *hash++;\r
-\r
- if (delta == 0)\r
- return NULL;\r
-\r
- lenLimit++;\r
-\r
- #ifndef cbs\r
- cbs = _cyclicBufferSize;\r
- if ((UInt32)pos < cbs)\r
- {\r
- if (delta > (UInt32)pos)\r
- return NULL;\r
- cbs = (UInt32)pos;\r
- }\r
- #endif\r
-\r
- if (delta >= cbs)\r
- {\r
- CLzRef *ptr1 = son + ((size_t)_cyclicBufferPos << 1);\r
- *d++ = 0;\r
- ptr1[0] = kEmptyHashValue;\r
- ptr1[1] = kEmptyHashValue;\r
- }\r
-else\r
-{\r
- UInt32 *_distances = ++d;\r
-\r
- CLzRef *ptr0 = son + ((size_t)_cyclicBufferPos << 1) + 1;\r
- CLzRef *ptr1 = son + ((size_t)_cyclicBufferPos << 1);\r
-\r
- UInt32 cutValue = _cutValue;\r
- const Byte *len0 = cur, *len1 = cur;\r
- const Byte *maxLen = cur + _maxLen;\r
-\r
- // if (cutValue == 0) { *ptr0 = *ptr1 = kEmptyHashValue; } else\r
- for (LOG_ITER(g_NumIters_Tree++);;)\r
- {\r
- LOG_ITER(g_NumIters_Loop++);\r
- {\r
- // SPEC code\r
- CLzRef *pair = son + ((size_t)((ptrdiff_t)_cyclicBufferPos - (ptrdiff_t)delta\r
- + (ptrdiff_t)(UInt32)(_cyclicBufferPos < delta ? cbs : 0)\r
- ) << 1);\r
-\r
- const ptrdiff_t diff = (ptrdiff_t)0 - (ptrdiff_t)delta;\r
- const Byte *len = (len0 < len1 ? len0 : len1);\r
-\r
- #ifdef USE_SON_PREFETCH\r
- const UInt32 pair0 = *pair;\r
- #endif\r
-\r
- if (len[diff] == len[0])\r
- {\r
- if (++len != lenLimit && len[diff] == len[0])\r
- while (++len != lenLimit)\r
- {\r
- LOG_ITER(g_NumIters_Bytes++);\r
- if (len[diff] != len[0])\r
- break;\r
- }\r
- if (maxLen < len)\r
- {\r
- maxLen = len;\r
- *d++ = (UInt32)(len - cur);\r
- *d++ = delta - 1;\r
- \r
- if (len == lenLimit)\r
- {\r
- const UInt32 pair1 = pair[1];\r
- *ptr1 =\r
- #ifdef USE_SON_PREFETCH\r
- pair0;\r
- #else\r
- pair[0];\r
- #endif\r
- *ptr0 = pair1;\r
-\r
- _distances[-1] = (UInt32)(d - _distances);\r
-\r
- #ifdef USE_LONG_MATCH_OPT\r
-\r
- if (hash == size || *hash != delta || lenLimit[diff] != lenLimit[0] || d >= limit)\r
- break;\r
-\r
- {\r
- for (;;)\r
- {\r
- *d++ = 2;\r
- *d++ = (UInt32)(lenLimit - cur);\r
- *d++ = delta - 1;\r
- cur++;\r
- lenLimit++;\r
- // SPEC\r
- _cyclicBufferPos++;\r
- {\r
- // SPEC code\r
- CLzRef *dest = son + ((size_t)(_cyclicBufferPos) << 1);\r
- const CLzRef *src = dest + ((diff\r
- + (ptrdiff_t)(UInt32)((_cyclicBufferPos < delta) ? cbs : 0)) << 1);\r
- // CLzRef *ptr = son + ((size_t)(pos) << 1) - CYC_TO_POS_OFFSET * 2;\r
- #if 0\r
- *(UInt64 *)(void *)dest = *((const UInt64 *)(const void *)src);\r
- #else\r
- const UInt32 p0 = src[0];\r
- const UInt32 p1 = src[1];\r
- dest[0] = p0;\r
- dest[1] = p1;\r
- #endif\r
- }\r
- pos++;\r
- hash++;\r
- if (hash == size || *hash != delta || lenLimit[diff] != lenLimit[0] || d >= limit)\r
- break;\r
- } // for() end for long matches\r
- }\r
- #endif\r
-\r
- break; // break from TREE iterations\r
- }\r
- }\r
- }\r
- {\r
- const UInt32 curMatch = (UInt32)pos - delta; // (UInt32)(pos + diff);\r
- if (len[diff] < len[0])\r
- {\r
- delta = pair[1];\r
- *ptr1 = curMatch;\r
- ptr1 = pair + 1;\r
- len1 = len;\r
- if (delta >= curMatch)\r
- return NULL;\r
- }\r
- else\r
- {\r
- delta = *pair;\r
- *ptr0 = curMatch;\r
- ptr0 = pair;\r
- len0 = len;\r
- if (delta >= curMatch)\r
- return NULL;\r
- }\r
- delta = (UInt32)pos - delta;\r
- \r
- if (--cutValue == 0 || delta >= cbs)\r
- {\r
- *ptr0 = *ptr1 = kEmptyHashValue;\r
- _distances[-1] = (UInt32)(d - _distances);\r
- break;\r
- }\r
- }\r
- }\r
- } // for (tree iterations)\r
-}\r
- pos++;\r
- _cyclicBufferPos++;\r
- cur++;\r
- }\r
- while (d < limit);\r
- *posRes = (UInt32)pos;\r
- return d;\r
-}\r
-\r
-\r
-\r
-/*\r
-typedef UInt32 uint32plus; // size_t\r
-\r
-UInt32 * MY_FAST_CALL GetMatchesSpecN_3(uint32plus lenLimit, size_t pos, const Byte *cur, CLzRef *son,\r
- UInt32 _cutValue, UInt32 *d, uint32plus _maxLen, const UInt32 *hash, const UInt32 *limit, const UInt32 *size,\r
- size_t _cyclicBufferPos, UInt32 _cyclicBufferSize,\r
- UInt32 *posRes)\r
-{\r
- do // while (hash != size)\r
- {\r
- UInt32 delta;\r
-\r
- #ifndef cbs\r
- UInt32 cbs;\r
- #endif\r
-\r
- if (hash == size)\r
- break;\r
-\r
- delta = *hash++;\r
-\r
- if (delta == 0)\r
- return NULL;\r
-\r
- #ifndef cbs\r
- cbs = _cyclicBufferSize;\r
- if ((UInt32)pos < cbs)\r
- {\r
- if (delta > (UInt32)pos)\r
- return NULL;\r
- cbs = (UInt32)pos;\r
- }\r
- #endif\r
- \r
- if (delta >= cbs)\r
- {\r
- CLzRef *ptr1 = son + ((size_t)_cyclicBufferPos << 1);\r
- *d++ = 0;\r
- ptr1[0] = kEmptyHashValue;\r
- ptr1[1] = kEmptyHashValue;\r
- }\r
-else\r
-{\r
- CLzRef *ptr0 = son + ((size_t)_cyclicBufferPos << 1) + 1;\r
- CLzRef *ptr1 = son + ((size_t)_cyclicBufferPos << 1);\r
- UInt32 *_distances = ++d;\r
- uint32plus len0 = 0, len1 = 0;\r
- UInt32 cutValue = _cutValue;\r
- uint32plus maxLen = _maxLen;\r
- // lenLimit++; // const Byte *lenLimit = cur + _lenLimit;\r
-\r
- for (LOG_ITER(g_NumIters_Tree++);;)\r
- {\r
- LOG_ITER(g_NumIters_Loop++);\r
- {\r
- // const ptrdiff_t diff = (ptrdiff_t)0 - (ptrdiff_t)delta;\r
- CLzRef *pair = son + ((size_t)((ptrdiff_t)_cyclicBufferPos - delta\r
- + (ptrdiff_t)(UInt32)(_cyclicBufferPos < delta ? cbs : 0)\r
- ) << 1);\r
- const Byte *pb = cur - delta;\r
- uint32plus len = (len0 < len1 ? len0 : len1);\r
-\r
- #ifdef USE_SON_PREFETCH\r
- const UInt32 pair0 = *pair;\r
- #endif\r
-\r
- if (pb[len] == cur[len])\r
- {\r
- if (++len != lenLimit && pb[len] == cur[len])\r
- while (++len != lenLimit)\r
- if (pb[len] != cur[len])\r
- break;\r
- if (maxLen < len)\r
- {\r
- maxLen = len;\r
- *d++ = (UInt32)len;\r
- *d++ = delta - 1;\r
- if (len == lenLimit)\r
- {\r
- {\r
- const UInt32 pair1 = pair[1];\r
- *ptr0 = pair1;\r
- *ptr1 =\r
- #ifdef USE_SON_PREFETCH\r
- pair0;\r
- #else\r
- pair[0];\r
- #endif\r
- }\r
-\r
- _distances[-1] = (UInt32)(d - _distances);\r
-\r
- #ifdef USE_LONG_MATCH_OPT\r
-\r
- if (hash == size || *hash != delta || pb[lenLimit] != cur[lenLimit] || d >= limit)\r
- break;\r
-\r
- {\r
- const ptrdiff_t diff = (ptrdiff_t)0 - (ptrdiff_t)delta;\r
- for (;;)\r
- {\r
- *d++ = 2;\r
- *d++ = (UInt32)lenLimit;\r
- *d++ = delta - 1;\r
- _cyclicBufferPos++;\r
- {\r
- CLzRef *dest = son + ((size_t)_cyclicBufferPos << 1);\r
- const CLzRef *src = dest + ((diff +\r
- (ptrdiff_t)(UInt32)(_cyclicBufferPos < delta ? cbs : 0)) << 1);\r
- #if 0\r
- *(UInt64 *)(void *)dest = *((const UInt64 *)(const void *)src);\r
- #else\r
- const UInt32 p0 = src[0];\r
- const UInt32 p1 = src[1];\r
- dest[0] = p0;\r
- dest[1] = p1;\r
- #endif\r
- }\r
- hash++;\r
- pos++;\r
- cur++;\r
- pb++;\r
- if (hash == size || *hash != delta || pb[lenLimit] != cur[lenLimit] || d >= limit)\r
- break;\r
- }\r
- }\r
- #endif\r
-\r
- break;\r
- }\r
- }\r
- }\r
- {\r
- const UInt32 curMatch = (UInt32)pos - delta;\r
- if (pb[len] < cur[len])\r
- {\r
- delta = pair[1];\r
- *ptr1 = curMatch;\r
- ptr1 = pair + 1;\r
- len1 = len;\r
- }\r
- else\r
- {\r
- delta = *pair;\r
- *ptr0 = curMatch;\r
- ptr0 = pair;\r
- len0 = len;\r
- }\r
-\r
- {\r
- if (delta >= curMatch)\r
- return NULL;\r
- delta = (UInt32)pos - delta;\r
- if (delta >= cbs\r
- // delta >= _cyclicBufferSize || delta >= pos\r
- || --cutValue == 0)\r
- {\r
- *ptr0 = *ptr1 = kEmptyHashValue;\r
- _distances[-1] = (UInt32)(d - _distances);\r
- break;\r
- }\r
- }\r
- }\r
- }\r
- } // for (tree iterations)\r
-}\r
- pos++;\r
- _cyclicBufferPos++;\r
- cur++;\r
- }\r
- while (d < limit);\r
- *posRes = (UInt32)pos;\r
- return d;\r
-}\r
-*/\r
+++ /dev/null
-/* Lzma86Dec.c -- LZMA + x86 (BCJ) Filter Decoder\r
-2016-05-16 : Igor Pavlov : Public domain */\r
-\r
-#include "Precomp.h"\r
-\r
-#include "Lzma86.h"\r
-\r
-#include "Alloc.h"\r
-#include "Bra.h"\r
-#include "LzmaDec.h"\r
-\r
-SRes Lzma86_GetUnpackSize(const Byte *src, SizeT srcLen, UInt64 *unpackSize)\r
-{\r
- unsigned i;\r
- if (srcLen < LZMA86_HEADER_SIZE)\r
- return SZ_ERROR_INPUT_EOF;\r
- *unpackSize = 0;\r
- for (i = 0; i < sizeof(UInt64); i++)\r
- *unpackSize += ((UInt64)src[LZMA86_SIZE_OFFSET + i]) << (8 * i);\r
- return SZ_OK;\r
-}\r
-\r
-SRes Lzma86_Decode(Byte *dest, SizeT *destLen, const Byte *src, SizeT *srcLen)\r
-{\r
- SRes res;\r
- int useFilter;\r
- SizeT inSizePure;\r
- ELzmaStatus status;\r
-\r
- if (*srcLen < LZMA86_HEADER_SIZE)\r
- return SZ_ERROR_INPUT_EOF;\r
-\r
- useFilter = src[0];\r
-\r
- if (useFilter > 1)\r
- {\r
- *destLen = 0;\r
- return SZ_ERROR_UNSUPPORTED;\r
- }\r
-\r
- inSizePure = *srcLen - LZMA86_HEADER_SIZE;\r
- res = LzmaDecode(dest, destLen, src + LZMA86_HEADER_SIZE, &inSizePure,\r
- src + 1, LZMA_PROPS_SIZE, LZMA_FINISH_ANY, &status, &g_Alloc);\r
- *srcLen = inSizePure + LZMA86_HEADER_SIZE;\r
- if (res != SZ_OK)\r
- return res;\r
- if (useFilter == 1)\r
- {\r
- UInt32 x86State;\r
- x86_Convert_Init(x86State);\r
- x86_Convert(dest, *destLen, 0, &x86State, 0);\r
- }\r
- return SZ_OK;\r
-}\r
+++ /dev/null
-/* Lzma86Enc.c -- LZMA + x86 (BCJ) Filter Encoder\r
-2018-07-04 : Igor Pavlov : Public domain */\r
-\r
-#include "Precomp.h"\r
-\r
-#include <string.h>\r
-\r
-#include "Lzma86.h"\r
-\r
-#include "Alloc.h"\r
-#include "Bra.h"\r
-#include "LzmaEnc.h"\r
-\r
-int Lzma86_Encode(Byte *dest, size_t *destLen, const Byte *src, size_t srcLen,\r
- int level, UInt32 dictSize, int filterMode)\r
-{\r
- size_t outSize2 = *destLen;\r
- Byte *filteredStream;\r
- BoolInt useFilter;\r
- int mainResult = SZ_ERROR_OUTPUT_EOF;\r
- CLzmaEncProps props;\r
- LzmaEncProps_Init(&props);\r
- props.level = level;\r
- props.dictSize = dictSize;\r
- \r
- *destLen = 0;\r
- if (outSize2 < LZMA86_HEADER_SIZE)\r
- return SZ_ERROR_OUTPUT_EOF;\r
-\r
- {\r
- int i;\r
- UInt64 t = srcLen;\r
- for (i = 0; i < 8; i++, t >>= 8)\r
- dest[LZMA86_SIZE_OFFSET + i] = (Byte)t;\r
- }\r
-\r
- filteredStream = 0;\r
- useFilter = (filterMode != SZ_FILTER_NO);\r
- if (useFilter)\r
- {\r
- if (srcLen != 0)\r
- {\r
- filteredStream = (Byte *)MyAlloc(srcLen);\r
- if (filteredStream == 0)\r
- return SZ_ERROR_MEM;\r
- memcpy(filteredStream, src, srcLen);\r
- }\r
- {\r
- UInt32 x86State;\r
- x86_Convert_Init(x86State);\r
- x86_Convert(filteredStream, srcLen, 0, &x86State, 1);\r
- }\r
- }\r
-\r
- {\r
- size_t minSize = 0;\r
- BoolInt bestIsFiltered = False;\r
-\r
- /* passes for SZ_FILTER_AUTO:\r
- 0 - BCJ + LZMA\r
- 1 - LZMA\r
- 2 - BCJ + LZMA agaian, if pass 0 (BCJ + LZMA) is better.\r
- */\r
- int numPasses = (filterMode == SZ_FILTER_AUTO) ? 3 : 1;\r
-\r
- int i;\r
- for (i = 0; i < numPasses; i++)\r
- {\r
- size_t outSizeProcessed = outSize2 - LZMA86_HEADER_SIZE;\r
- size_t outPropsSize = 5;\r
- SRes curRes;\r
- BoolInt curModeIsFiltered = (numPasses > 1 && i == numPasses - 1);\r
- if (curModeIsFiltered && !bestIsFiltered)\r
- break;\r
- if (useFilter && i == 0)\r
- curModeIsFiltered = True;\r
- \r
- curRes = LzmaEncode(dest + LZMA86_HEADER_SIZE, &outSizeProcessed,\r
- curModeIsFiltered ? filteredStream : src, srcLen,\r
- &props, dest + 1, &outPropsSize, 0,\r
- NULL, &g_Alloc, &g_Alloc);\r
- \r
- if (curRes != SZ_ERROR_OUTPUT_EOF)\r
- {\r
- if (curRes != SZ_OK)\r
- {\r
- mainResult = curRes;\r
- break;\r
- }\r
- if (outSizeProcessed <= minSize || mainResult != SZ_OK)\r
- {\r
- minSize = outSizeProcessed;\r
- bestIsFiltered = curModeIsFiltered;\r
- mainResult = SZ_OK;\r
- }\r
- }\r
- }\r
- dest[0] = (Byte)(bestIsFiltered ? 1 : 0);\r
- *destLen = LZMA86_HEADER_SIZE + minSize;\r
- }\r
- if (useFilter)\r
- MyFree(filteredStream);\r
- return mainResult;\r
-}\r
+++ /dev/null
-/* LzmaLib.c -- LZMA library wrapper\r
-2015-06-13 : Igor Pavlov : Public domain */\r
-\r
-#include "Alloc.h"\r
-#include "LzmaDec.h"\r
-#include "LzmaEnc.h"\r
-#include "LzmaLib.h"\r
-\r
-MY_STDAPI LzmaCompress(unsigned char *dest, size_t *destLen, const unsigned char *src, size_t srcLen,\r
- unsigned char *outProps, size_t *outPropsSize,\r
- int level, /* 0 <= level <= 9, default = 5 */\r
- unsigned dictSize, /* use (1 << N) or (3 << N). 4 KB < dictSize <= 128 MB */\r
- int lc, /* 0 <= lc <= 8, default = 3 */\r
- int lp, /* 0 <= lp <= 4, default = 0 */\r
- int pb, /* 0 <= pb <= 4, default = 2 */\r
- int fb, /* 5 <= fb <= 273, default = 32 */\r
- int numThreads /* 1 or 2, default = 2 */\r
-)\r
-{\r
- CLzmaEncProps props;\r
- LzmaEncProps_Init(&props);\r
- props.level = level;\r
- props.dictSize = dictSize;\r
- props.lc = lc;\r
- props.lp = lp;\r
- props.pb = pb;\r
- props.fb = fb;\r
- props.numThreads = numThreads;\r
-\r
- return LzmaEncode(dest, destLen, src, srcLen, &props, outProps, outPropsSize, 0,\r
- NULL, &g_Alloc, &g_Alloc);\r
-}\r
-\r
-\r
-MY_STDAPI LzmaUncompress(unsigned char *dest, size_t *destLen, const unsigned char *src, size_t *srcLen,\r
- const unsigned char *props, size_t propsSize)\r
-{\r
- ELzmaStatus status;\r
- return LzmaDecode(dest, destLen, src, srcLen, props, (unsigned)propsSize, LZMA_FINISH_ANY, &status, &g_Alloc);\r
-}\r
+++ /dev/null
-/* MtCoder.c -- Multi-thread Coder\r
-2021-12-21 : Igor Pavlov : Public domain */\r
-\r
-#include "Precomp.h"\r
-\r
-#include "MtCoder.h"\r
-\r
-#ifndef _7ZIP_ST\r
-\r
-static SRes MtProgressThunk_Progress(const ICompressProgress *pp, UInt64 inSize, UInt64 outSize)\r
-{\r
- CMtProgressThunk *thunk = CONTAINER_FROM_VTBL(pp, CMtProgressThunk, vt);\r
- UInt64 inSize2 = 0;\r
- UInt64 outSize2 = 0;\r
- if (inSize != (UInt64)(Int64)-1)\r
- {\r
- inSize2 = inSize - thunk->inSize;\r
- thunk->inSize = inSize;\r
- }\r
- if (outSize != (UInt64)(Int64)-1)\r
- {\r
- outSize2 = outSize - thunk->outSize;\r
- thunk->outSize = outSize;\r
- }\r
- return MtProgress_ProgressAdd(thunk->mtProgress, inSize2, outSize2);\r
-}\r
-\r
-\r
-void MtProgressThunk_CreateVTable(CMtProgressThunk *p)\r
-{\r
- p->vt.Progress = MtProgressThunk_Progress;\r
-}\r
-\r
-\r
-\r
-#define RINOK_THREAD(x) { if ((x) != 0) return SZ_ERROR_THREAD; }\r
-\r
-\r
-static WRes ArEvent_OptCreate_And_Reset(CEvent *p)\r
-{\r
- if (Event_IsCreated(p))\r
- return Event_Reset(p);\r
- return AutoResetEvent_CreateNotSignaled(p);\r
-}\r
-\r
-\r
-static THREAD_FUNC_DECL ThreadFunc(void *pp);\r
-\r
-\r
-static SRes MtCoderThread_CreateAndStart(CMtCoderThread *t)\r
-{\r
- WRes wres = ArEvent_OptCreate_And_Reset(&t->startEvent);\r
- if (wres == 0)\r
- {\r
- t->stop = False;\r
- if (!Thread_WasCreated(&t->thread))\r
- wres = Thread_Create(&t->thread, ThreadFunc, t);\r
- if (wres == 0)\r
- wres = Event_Set(&t->startEvent);\r
- }\r
- if (wres == 0)\r
- return SZ_OK;\r
- return MY_SRes_HRESULT_FROM_WRes(wres);\r
-}\r
-\r
-\r
-static void MtCoderThread_Destruct(CMtCoderThread *t)\r
-{\r
- if (Thread_WasCreated(&t->thread))\r
- {\r
- t->stop = 1;\r
- Event_Set(&t->startEvent);\r
- Thread_Wait_Close(&t->thread);\r
- }\r
-\r
- Event_Close(&t->startEvent);\r
-\r
- if (t->inBuf)\r
- {\r
- ISzAlloc_Free(t->mtCoder->allocBig, t->inBuf);\r
- t->inBuf = NULL;\r
- }\r
-}\r
-\r
-\r
-\r
-static SRes FullRead(ISeqInStream *stream, Byte *data, size_t *processedSize)\r
-{\r
- size_t size = *processedSize;\r
- *processedSize = 0;\r
- while (size != 0)\r
- {\r
- size_t cur = size;\r
- SRes res = ISeqInStream_Read(stream, data, &cur);\r
- *processedSize += cur;\r
- data += cur;\r
- size -= cur;\r
- RINOK(res);\r
- if (cur == 0)\r
- return SZ_OK;\r
- }\r
- return SZ_OK;\r
-}\r
-\r
-\r
-/*\r
- ThreadFunc2() returns:\r
- SZ_OK - in all normal cases (even for stream error or memory allocation error)\r
- SZ_ERROR_THREAD - in case of failure in system synch function\r
-*/\r
-\r
-static SRes ThreadFunc2(CMtCoderThread *t)\r
-{\r
- CMtCoder *mtc = t->mtCoder;\r
-\r
- for (;;)\r
- {\r
- unsigned bi;\r
- SRes res;\r
- SRes res2;\r
- BoolInt finished;\r
- unsigned bufIndex;\r
- size_t size;\r
- const Byte *inData;\r
- UInt64 readProcessed = 0;\r
- \r
- RINOK_THREAD(Event_Wait(&mtc->readEvent))\r
-\r
- /* after Event_Wait(&mtc->readEvent) we must call Event_Set(&mtc->readEvent) in any case to unlock another threads */\r
-\r
- if (mtc->stopReading)\r
- {\r
- return Event_Set(&mtc->readEvent) == 0 ? SZ_OK : SZ_ERROR_THREAD;\r
- }\r
-\r
- res = MtProgress_GetError(&mtc->mtProgress);\r
- \r
- size = 0;\r
- inData = NULL;\r
- finished = True;\r
-\r
- if (res == SZ_OK)\r
- {\r
- size = mtc->blockSize;\r
- if (mtc->inStream)\r
- {\r
- if (!t->inBuf)\r
- {\r
- t->inBuf = (Byte *)ISzAlloc_Alloc(mtc->allocBig, mtc->blockSize);\r
- if (!t->inBuf)\r
- res = SZ_ERROR_MEM;\r
- }\r
- if (res == SZ_OK)\r
- {\r
- res = FullRead(mtc->inStream, t->inBuf, &size);\r
- readProcessed = mtc->readProcessed + size;\r
- mtc->readProcessed = readProcessed;\r
- }\r
- if (res != SZ_OK)\r
- {\r
- mtc->readRes = res;\r
- /* after reading error - we can stop encoding of previous blocks */\r
- MtProgress_SetError(&mtc->mtProgress, res);\r
- }\r
- else\r
- finished = (size != mtc->blockSize);\r
- }\r
- else\r
- {\r
- size_t rem;\r
- readProcessed = mtc->readProcessed;\r
- rem = mtc->inDataSize - (size_t)readProcessed;\r
- if (size > rem)\r
- size = rem;\r
- inData = mtc->inData + (size_t)readProcessed;\r
- readProcessed += size;\r
- mtc->readProcessed = readProcessed;\r
- finished = (mtc->inDataSize == (size_t)readProcessed);\r
- }\r
- }\r
-\r
- /* we must get some block from blocksSemaphore before Event_Set(&mtc->readEvent) */\r
-\r
- res2 = SZ_OK;\r
-\r
- if (Semaphore_Wait(&mtc->blocksSemaphore) != 0)\r
- {\r
- res2 = SZ_ERROR_THREAD;\r
- if (res == SZ_OK)\r
- {\r
- res = res2;\r
- // MtProgress_SetError(&mtc->mtProgress, res);\r
- }\r
- }\r
-\r
- bi = mtc->blockIndex;\r
-\r
- if (++mtc->blockIndex >= mtc->numBlocksMax)\r
- mtc->blockIndex = 0;\r
-\r
- bufIndex = (unsigned)(int)-1;\r
-\r
- if (res == SZ_OK)\r
- res = MtProgress_GetError(&mtc->mtProgress);\r
-\r
- if (res != SZ_OK)\r
- finished = True;\r
-\r
- if (!finished)\r
- {\r
- if (mtc->numStartedThreads < mtc->numStartedThreadsLimit\r
- && mtc->expectedDataSize != readProcessed)\r
- {\r
- res = MtCoderThread_CreateAndStart(&mtc->threads[mtc->numStartedThreads]);\r
- if (res == SZ_OK)\r
- mtc->numStartedThreads++;\r
- else\r
- {\r
- MtProgress_SetError(&mtc->mtProgress, res);\r
- finished = True;\r
- }\r
- }\r
- }\r
-\r
- if (finished)\r
- mtc->stopReading = True;\r
-\r
- RINOK_THREAD(Event_Set(&mtc->readEvent))\r
-\r
- if (res2 != SZ_OK)\r
- return res2;\r
-\r
- if (res == SZ_OK)\r
- {\r
- CriticalSection_Enter(&mtc->cs);\r
- bufIndex = mtc->freeBlockHead;\r
- mtc->freeBlockHead = mtc->freeBlockList[bufIndex];\r
- CriticalSection_Leave(&mtc->cs);\r
- \r
- res = mtc->mtCallback->Code(mtc->mtCallbackObject, t->index, bufIndex,\r
- mtc->inStream ? t->inBuf : inData, size, finished);\r
- \r
- // MtProgress_Reinit(&mtc->mtProgress, t->index);\r
-\r
- if (res != SZ_OK)\r
- MtProgress_SetError(&mtc->mtProgress, res);\r
- }\r
-\r
- {\r
- CMtCoderBlock *block = &mtc->blocks[bi];\r
- block->res = res;\r
- block->bufIndex = bufIndex;\r
- block->finished = finished;\r
- }\r
- \r
- #ifdef MTCODER__USE_WRITE_THREAD\r
- RINOK_THREAD(Event_Set(&mtc->writeEvents[bi]))\r
- #else\r
- {\r
- unsigned wi;\r
- {\r
- CriticalSection_Enter(&mtc->cs);\r
- wi = mtc->writeIndex;\r
- if (wi == bi)\r
- mtc->writeIndex = (unsigned)(int)-1;\r
- else\r
- mtc->ReadyBlocks[bi] = True;\r
- CriticalSection_Leave(&mtc->cs);\r
- }\r
-\r
- if (wi != bi)\r
- {\r
- if (res != SZ_OK || finished)\r
- return 0;\r
- continue;\r
- }\r
-\r
- if (mtc->writeRes != SZ_OK)\r
- res = mtc->writeRes;\r
-\r
- for (;;)\r
- {\r
- if (res == SZ_OK && bufIndex != (unsigned)(int)-1)\r
- {\r
- res = mtc->mtCallback->Write(mtc->mtCallbackObject, bufIndex);\r
- if (res != SZ_OK)\r
- {\r
- mtc->writeRes = res;\r
- MtProgress_SetError(&mtc->mtProgress, res);\r
- }\r
- }\r
-\r
- if (++wi >= mtc->numBlocksMax)\r
- wi = 0;\r
- {\r
- BoolInt isReady;\r
-\r
- CriticalSection_Enter(&mtc->cs);\r
- \r
- if (bufIndex != (unsigned)(int)-1)\r
- {\r
- mtc->freeBlockList[bufIndex] = mtc->freeBlockHead;\r
- mtc->freeBlockHead = bufIndex;\r
- }\r
- \r
- isReady = mtc->ReadyBlocks[wi];\r
- \r
- if (isReady)\r
- mtc->ReadyBlocks[wi] = False;\r
- else\r
- mtc->writeIndex = wi;\r
- \r
- CriticalSection_Leave(&mtc->cs);\r
-\r
- RINOK_THREAD(Semaphore_Release1(&mtc->blocksSemaphore))\r
-\r
- if (!isReady)\r
- break;\r
- }\r
-\r
- {\r
- CMtCoderBlock *block = &mtc->blocks[wi];\r
- if (res == SZ_OK && block->res != SZ_OK)\r
- res = block->res;\r
- bufIndex = block->bufIndex;\r
- finished = block->finished;\r
- }\r
- }\r
- }\r
- #endif\r
- \r
- if (finished || res != SZ_OK)\r
- return 0;\r
- }\r
-}\r
-\r
-\r
-static THREAD_FUNC_DECL ThreadFunc(void *pp)\r
-{\r
- CMtCoderThread *t = (CMtCoderThread *)pp;\r
- for (;;)\r
- {\r
- if (Event_Wait(&t->startEvent) != 0)\r
- return (THREAD_FUNC_RET_TYPE)SZ_ERROR_THREAD;\r
- if (t->stop)\r
- return 0;\r
- {\r
- SRes res = ThreadFunc2(t);\r
- CMtCoder *mtc = t->mtCoder;\r
- if (res != SZ_OK)\r
- {\r
- MtProgress_SetError(&mtc->mtProgress, res);\r
- }\r
- \r
- #ifndef MTCODER__USE_WRITE_THREAD\r
- {\r
- unsigned numFinished = (unsigned)InterlockedIncrement(&mtc->numFinishedThreads);\r
- if (numFinished == mtc->numStartedThreads)\r
- if (Event_Set(&mtc->finishedEvent) != 0)\r
- return (THREAD_FUNC_RET_TYPE)SZ_ERROR_THREAD;\r
- }\r
- #endif\r
- }\r
- }\r
-}\r
-\r
-\r
-\r
-void MtCoder_Construct(CMtCoder *p)\r
-{\r
- unsigned i;\r
- \r
- p->blockSize = 0;\r
- p->numThreadsMax = 0;\r
- p->expectedDataSize = (UInt64)(Int64)-1;\r
-\r
- p->inStream = NULL;\r
- p->inData = NULL;\r
- p->inDataSize = 0;\r
-\r
- p->progress = NULL;\r
- p->allocBig = NULL;\r
-\r
- p->mtCallback = NULL;\r
- p->mtCallbackObject = NULL;\r
-\r
- p->allocatedBufsSize = 0;\r
-\r
- Event_Construct(&p->readEvent);\r
- Semaphore_Construct(&p->blocksSemaphore);\r
-\r
- for (i = 0; i < MTCODER__THREADS_MAX; i++)\r
- {\r
- CMtCoderThread *t = &p->threads[i];\r
- t->mtCoder = p;\r
- t->index = i;\r
- t->inBuf = NULL;\r
- t->stop = False;\r
- Event_Construct(&t->startEvent);\r
- Thread_Construct(&t->thread);\r
- }\r
-\r
- #ifdef MTCODER__USE_WRITE_THREAD\r
- for (i = 0; i < MTCODER__BLOCKS_MAX; i++)\r
- Event_Construct(&p->writeEvents[i]);\r
- #else\r
- Event_Construct(&p->finishedEvent);\r
- #endif\r
-\r
- CriticalSection_Init(&p->cs);\r
- CriticalSection_Init(&p->mtProgress.cs);\r
-}\r
-\r
-\r
-\r
-\r
-static void MtCoder_Free(CMtCoder *p)\r
-{\r
- unsigned i;\r
-\r
- /*\r
- p->stopReading = True;\r
- if (Event_IsCreated(&p->readEvent))\r
- Event_Set(&p->readEvent);\r
- */\r
-\r
- for (i = 0; i < MTCODER__THREADS_MAX; i++)\r
- MtCoderThread_Destruct(&p->threads[i]);\r
-\r
- Event_Close(&p->readEvent);\r
- Semaphore_Close(&p->blocksSemaphore);\r
-\r
- #ifdef MTCODER__USE_WRITE_THREAD\r
- for (i = 0; i < MTCODER__BLOCKS_MAX; i++)\r
- Event_Close(&p->writeEvents[i]);\r
- #else\r
- Event_Close(&p->finishedEvent);\r
- #endif\r
-}\r
-\r
-\r
-void MtCoder_Destruct(CMtCoder *p)\r
-{\r
- MtCoder_Free(p);\r
-\r
- CriticalSection_Delete(&p->cs);\r
- CriticalSection_Delete(&p->mtProgress.cs);\r
-}\r
-\r
-\r
-SRes MtCoder_Code(CMtCoder *p)\r
-{\r
- unsigned numThreads = p->numThreadsMax;\r
- unsigned numBlocksMax;\r
- unsigned i;\r
- SRes res = SZ_OK;\r
-\r
- if (numThreads > MTCODER__THREADS_MAX)\r
- numThreads = MTCODER__THREADS_MAX;\r
- numBlocksMax = MTCODER__GET_NUM_BLOCKS_FROM_THREADS(numThreads);\r
- \r
- if (p->blockSize < ((UInt32)1 << 26)) numBlocksMax++;\r
- if (p->blockSize < ((UInt32)1 << 24)) numBlocksMax++;\r
- if (p->blockSize < ((UInt32)1 << 22)) numBlocksMax++;\r
-\r
- if (numBlocksMax > MTCODER__BLOCKS_MAX)\r
- numBlocksMax = MTCODER__BLOCKS_MAX;\r
-\r
- if (p->blockSize != p->allocatedBufsSize)\r
- {\r
- for (i = 0; i < MTCODER__THREADS_MAX; i++)\r
- {\r
- CMtCoderThread *t = &p->threads[i];\r
- if (t->inBuf)\r
- {\r
- ISzAlloc_Free(p->allocBig, t->inBuf);\r
- t->inBuf = NULL;\r
- }\r
- }\r
- p->allocatedBufsSize = p->blockSize;\r
- }\r
-\r
- p->readRes = SZ_OK;\r
-\r
- MtProgress_Init(&p->mtProgress, p->progress);\r
-\r
- #ifdef MTCODER__USE_WRITE_THREAD\r
- for (i = 0; i < numBlocksMax; i++)\r
- {\r
- RINOK_THREAD(ArEvent_OptCreate_And_Reset(&p->writeEvents[i]));\r
- }\r
- #else\r
- RINOK_THREAD(ArEvent_OptCreate_And_Reset(&p->finishedEvent));\r
- #endif\r
-\r
- {\r
- RINOK_THREAD(ArEvent_OptCreate_And_Reset(&p->readEvent));\r
- RINOK_THREAD(Semaphore_OptCreateInit(&p->blocksSemaphore, numBlocksMax, numBlocksMax));\r
- }\r
-\r
- for (i = 0; i < MTCODER__BLOCKS_MAX - 1; i++)\r
- p->freeBlockList[i] = i + 1;\r
- p->freeBlockList[MTCODER__BLOCKS_MAX - 1] = (unsigned)(int)-1;\r
- p->freeBlockHead = 0;\r
-\r
- p->readProcessed = 0;\r
- p->blockIndex = 0;\r
- p->numBlocksMax = numBlocksMax;\r
- p->stopReading = False;\r
-\r
- #ifndef MTCODER__USE_WRITE_THREAD\r
- p->writeIndex = 0;\r
- p->writeRes = SZ_OK;\r
- for (i = 0; i < MTCODER__BLOCKS_MAX; i++)\r
- p->ReadyBlocks[i] = False;\r
- p->numFinishedThreads = 0;\r
- #endif\r
-\r
- p->numStartedThreadsLimit = numThreads;\r
- p->numStartedThreads = 0;\r
-\r
- // for (i = 0; i < numThreads; i++)\r
- {\r
- CMtCoderThread *nextThread = &p->threads[p->numStartedThreads++];\r
- RINOK(MtCoderThread_CreateAndStart(nextThread));\r
- }\r
-\r
- RINOK_THREAD(Event_Set(&p->readEvent))\r
-\r
- #ifdef MTCODER__USE_WRITE_THREAD\r
- {\r
- unsigned bi = 0;\r
-\r
- for (;; bi++)\r
- {\r
- if (bi >= numBlocksMax)\r
- bi = 0;\r
-\r
- RINOK_THREAD(Event_Wait(&p->writeEvents[bi]))\r
-\r
- {\r
- const CMtCoderBlock *block = &p->blocks[bi];\r
- unsigned bufIndex = block->bufIndex;\r
- BoolInt finished = block->finished;\r
- if (res == SZ_OK && block->res != SZ_OK)\r
- res = block->res;\r
-\r
- if (bufIndex != (unsigned)(int)-1)\r
- {\r
- if (res == SZ_OK)\r
- {\r
- res = p->mtCallback->Write(p->mtCallbackObject, bufIndex);\r
- if (res != SZ_OK)\r
- MtProgress_SetError(&p->mtProgress, res);\r
- }\r
- \r
- CriticalSection_Enter(&p->cs);\r
- {\r
- p->freeBlockList[bufIndex] = p->freeBlockHead;\r
- p->freeBlockHead = bufIndex;\r
- }\r
- CriticalSection_Leave(&p->cs);\r
- }\r
- \r
- RINOK_THREAD(Semaphore_Release1(&p->blocksSemaphore))\r
-\r
- if (finished)\r
- break;\r
- }\r
- }\r
- }\r
- #else\r
- {\r
- WRes wres = Event_Wait(&p->finishedEvent);\r
- res = MY_SRes_HRESULT_FROM_WRes(wres);\r
- }\r
- #endif\r
-\r
- if (res == SZ_OK)\r
- res = p->readRes;\r
-\r
- if (res == SZ_OK)\r
- res = p->mtProgress.res;\r
-\r
- #ifndef MTCODER__USE_WRITE_THREAD\r
- if (res == SZ_OK)\r
- res = p->writeRes;\r
- #endif\r
-\r
- if (res != SZ_OK)\r
- MtCoder_Free(p);\r
- return res;\r
-}\r
-\r
-#endif\r
+++ /dev/null
-/* MtDec.c -- Multi-thread Decoder\r
-2021-12-21 : Igor Pavlov : Public domain */\r
-\r
-#include "Precomp.h"\r
-\r
-// #define SHOW_DEBUG_INFO\r
-\r
-// #include <stdio.h>\r
-#include <string.h>\r
-\r
-#ifdef SHOW_DEBUG_INFO\r
-#include <stdio.h>\r
-#endif\r
-\r
-#include "MtDec.h"\r
-\r
-#ifndef _7ZIP_ST\r
-\r
-#ifdef SHOW_DEBUG_INFO\r
-#define PRF(x) x\r
-#else\r
-#define PRF(x)\r
-#endif\r
-\r
-#define PRF_STR_INT(s, d) PRF(printf("\n" s " %d\n", (unsigned)d))\r
-\r
-void MtProgress_Init(CMtProgress *p, ICompressProgress *progress)\r
-{\r
- p->progress = progress;\r
- p->res = SZ_OK;\r
- p->totalInSize = 0;\r
- p->totalOutSize = 0;\r
-}\r
-\r
-\r
-SRes MtProgress_Progress_ST(CMtProgress *p)\r
-{\r
- if (p->res == SZ_OK && p->progress)\r
- if (ICompressProgress_Progress(p->progress, p->totalInSize, p->totalOutSize) != SZ_OK)\r
- p->res = SZ_ERROR_PROGRESS;\r
- return p->res;\r
-}\r
-\r
-\r
-SRes MtProgress_ProgressAdd(CMtProgress *p, UInt64 inSize, UInt64 outSize)\r
-{\r
- SRes res;\r
- CriticalSection_Enter(&p->cs);\r
- \r
- p->totalInSize += inSize;\r
- p->totalOutSize += outSize;\r
- if (p->res == SZ_OK && p->progress)\r
- if (ICompressProgress_Progress(p->progress, p->totalInSize, p->totalOutSize) != SZ_OK)\r
- p->res = SZ_ERROR_PROGRESS;\r
- res = p->res;\r
- \r
- CriticalSection_Leave(&p->cs);\r
- return res;\r
-}\r
-\r
-\r
-SRes MtProgress_GetError(CMtProgress *p)\r
-{\r
- SRes res;\r
- CriticalSection_Enter(&p->cs);\r
- res = p->res;\r
- CriticalSection_Leave(&p->cs);\r
- return res;\r
-}\r
-\r
-\r
-void MtProgress_SetError(CMtProgress *p, SRes res)\r
-{\r
- CriticalSection_Enter(&p->cs);\r
- if (p->res == SZ_OK)\r
- p->res = res;\r
- CriticalSection_Leave(&p->cs);\r
-}\r
-\r
-\r
-#define RINOK_THREAD(x) RINOK_WRes(x)\r
-\r
-\r
-static WRes ArEvent_OptCreate_And_Reset(CEvent *p)\r
-{\r
- if (Event_IsCreated(p))\r
- return Event_Reset(p);\r
- return AutoResetEvent_CreateNotSignaled(p);\r
-}\r
-\r
-\r
-struct __CMtDecBufLink\r
-{\r
- struct __CMtDecBufLink *next;\r
- void *pad[3];\r
-};\r
-\r
-typedef struct __CMtDecBufLink CMtDecBufLink;\r
-\r
-#define MTDEC__LINK_DATA_OFFSET sizeof(CMtDecBufLink)\r
-#define MTDEC__DATA_PTR_FROM_LINK(link) ((Byte *)(link) + MTDEC__LINK_DATA_OFFSET)\r
-\r
-\r
-\r
-static THREAD_FUNC_DECL ThreadFunc(void *pp);\r
-\r
-\r
-static WRes MtDecThread_CreateEvents(CMtDecThread *t)\r
-{\r
- WRes wres = ArEvent_OptCreate_And_Reset(&t->canWrite);\r
- if (wres == 0)\r
- {\r
- wres = ArEvent_OptCreate_And_Reset(&t->canRead);\r
- if (wres == 0)\r
- return SZ_OK;\r
- }\r
- return wres;\r
-}\r
-\r
-\r
-static SRes MtDecThread_CreateAndStart(CMtDecThread *t)\r
-{\r
- WRes wres = MtDecThread_CreateEvents(t);\r
- // wres = 17; // for test\r
- if (wres == 0)\r
- {\r
- if (Thread_WasCreated(&t->thread))\r
- return SZ_OK;\r
- wres = Thread_Create(&t->thread, ThreadFunc, t);\r
- if (wres == 0)\r
- return SZ_OK;\r
- }\r
- return MY_SRes_HRESULT_FROM_WRes(wres);\r
-}\r
-\r
-\r
-void MtDecThread_FreeInBufs(CMtDecThread *t)\r
-{\r
- if (t->inBuf)\r
- {\r
- void *link = t->inBuf;\r
- t->inBuf = NULL;\r
- do\r
- {\r
- void *next = ((CMtDecBufLink *)link)->next;\r
- ISzAlloc_Free(t->mtDec->alloc, link);\r
- link = next;\r
- }\r
- while (link);\r
- }\r
-}\r
-\r
-\r
-static void MtDecThread_CloseThread(CMtDecThread *t)\r
-{\r
- if (Thread_WasCreated(&t->thread))\r
- {\r
- Event_Set(&t->canWrite); /* we can disable it. There are no threads waiting canWrite in normal cases */\r
- Event_Set(&t->canRead);\r
- Thread_Wait_Close(&t->thread);\r
- }\r
-\r
- Event_Close(&t->canRead);\r
- Event_Close(&t->canWrite);\r
-}\r
-\r
-static void MtDec_CloseThreads(CMtDec *p)\r
-{\r
- unsigned i;\r
- for (i = 0; i < MTDEC__THREADS_MAX; i++)\r
- MtDecThread_CloseThread(&p->threads[i]);\r
-}\r
-\r
-static void MtDecThread_Destruct(CMtDecThread *t)\r
-{\r
- MtDecThread_CloseThread(t);\r
- MtDecThread_FreeInBufs(t);\r
-}\r
-\r
-\r
-\r
-static SRes FullRead(ISeqInStream *stream, Byte *data, size_t *processedSize)\r
-{\r
- size_t size = *processedSize;\r
- *processedSize = 0;\r
- while (size != 0)\r
- {\r
- size_t cur = size;\r
- SRes res = ISeqInStream_Read(stream, data, &cur);\r
- *processedSize += cur;\r
- data += cur;\r
- size -= cur;\r
- RINOK(res);\r
- if (cur == 0)\r
- return SZ_OK;\r
- }\r
- return SZ_OK;\r
-}\r
-\r
-\r
-static SRes MtDec_GetError_Spec(CMtDec *p, UInt64 interruptIndex, BoolInt *wasInterrupted)\r
-{\r
- SRes res;\r
- CriticalSection_Enter(&p->mtProgress.cs);\r
- *wasInterrupted = (p->needInterrupt && interruptIndex > p->interruptIndex);\r
- res = p->mtProgress.res;\r
- CriticalSection_Leave(&p->mtProgress.cs);\r
- return res;\r
-}\r
-\r
-static SRes MtDec_Progress_GetError_Spec(CMtDec *p, UInt64 inSize, UInt64 outSize, UInt64 interruptIndex, BoolInt *wasInterrupted)\r
-{\r
- SRes res;\r
- CriticalSection_Enter(&p->mtProgress.cs);\r
-\r
- p->mtProgress.totalInSize += inSize;\r
- p->mtProgress.totalOutSize += outSize;\r
- if (p->mtProgress.res == SZ_OK && p->mtProgress.progress)\r
- if (ICompressProgress_Progress(p->mtProgress.progress, p->mtProgress.totalInSize, p->mtProgress.totalOutSize) != SZ_OK)\r
- p->mtProgress.res = SZ_ERROR_PROGRESS;\r
-\r
- *wasInterrupted = (p->needInterrupt && interruptIndex > p->interruptIndex);\r
- res = p->mtProgress.res;\r
- \r
- CriticalSection_Leave(&p->mtProgress.cs);\r
-\r
- return res;\r
-}\r
-\r
-static void MtDec_Interrupt(CMtDec *p, UInt64 interruptIndex)\r
-{\r
- CriticalSection_Enter(&p->mtProgress.cs);\r
- if (!p->needInterrupt || interruptIndex < p->interruptIndex)\r
- {\r
- p->interruptIndex = interruptIndex;\r
- p->needInterrupt = True;\r
- }\r
- CriticalSection_Leave(&p->mtProgress.cs);\r
-}\r
-\r
-Byte *MtDec_GetCrossBuff(CMtDec *p)\r
-{\r
- Byte *cr = p->crossBlock;\r
- if (!cr)\r
- {\r
- cr = (Byte *)ISzAlloc_Alloc(p->alloc, MTDEC__LINK_DATA_OFFSET + p->inBufSize);\r
- if (!cr)\r
- return NULL;\r
- p->crossBlock = cr;\r
- }\r
- return MTDEC__DATA_PTR_FROM_LINK(cr);\r
-}\r
-\r
-\r
-/*\r
- ThreadFunc2() returns:\r
- 0 - in all normal cases (even for stream error or memory allocation error)\r
- (!= 0) - WRes error return by system threading function\r
-*/\r
-\r
-// #define MTDEC_ProgessStep (1 << 22)\r
-#define MTDEC_ProgessStep (1 << 0)\r
-\r
-static WRes ThreadFunc2(CMtDecThread *t)\r
-{\r
- CMtDec *p = t->mtDec;\r
-\r
- PRF_STR_INT("ThreadFunc2", t->index);\r
-\r
- // SetThreadAffinityMask(GetCurrentThread(), 1 << t->index);\r
-\r
- for (;;)\r
- {\r
- SRes res, codeRes;\r
- BoolInt wasInterrupted, isAllocError, overflow, finish;\r
- SRes threadingErrorSRes;\r
- BoolInt needCode, needWrite, needContinue;\r
- \r
- size_t inDataSize_Start;\r
- UInt64 inDataSize;\r
- // UInt64 inDataSize_Full;\r
- \r
- UInt64 blockIndex;\r
-\r
- UInt64 inPrev = 0;\r
- UInt64 outPrev = 0;\r
- UInt64 inCodePos;\r
- UInt64 outCodePos;\r
- \r
- Byte *afterEndData = NULL;\r
- size_t afterEndData_Size = 0;\r
- BoolInt afterEndData_IsCross = False;\r
-\r
- BoolInt canCreateNewThread = False;\r
- // CMtDecCallbackInfo parse;\r
- CMtDecThread *nextThread;\r
-\r
- PRF_STR_INT("=============== Event_Wait(&t->canRead)", t->index);\r
-\r
- RINOK_THREAD(Event_Wait(&t->canRead));\r
- if (p->exitThread)\r
- return 0;\r
-\r
- PRF_STR_INT("after Event_Wait(&t->canRead)", t->index);\r
-\r
- // if (t->index == 3) return 19; // for test\r
-\r
- blockIndex = p->blockIndex++;\r
-\r
- // PRF(printf("\ncanRead\n"))\r
-\r
- res = MtDec_Progress_GetError_Spec(p, 0, 0, blockIndex, &wasInterrupted);\r
-\r
- finish = p->readWasFinished;\r
- needCode = False;\r
- needWrite = False;\r
- isAllocError = False;\r
- overflow = False;\r
-\r
- inDataSize_Start = 0;\r
- inDataSize = 0;\r
- // inDataSize_Full = 0;\r
-\r
- if (res == SZ_OK && !wasInterrupted)\r
- {\r
- // if (p->inStream)\r
- {\r
- CMtDecBufLink *prev = NULL;\r
- CMtDecBufLink *link = (CMtDecBufLink *)t->inBuf;\r
- size_t crossSize = p->crossEnd - p->crossStart;\r
-\r
- PRF(printf("\ncrossSize = %d\n", crossSize));\r
-\r
- for (;;)\r
- {\r
- if (!link)\r
- {\r
- link = (CMtDecBufLink *)ISzAlloc_Alloc(p->alloc, MTDEC__LINK_DATA_OFFSET + p->inBufSize);\r
- if (!link)\r
- {\r
- finish = True;\r
- // p->allocError_for_Read_BlockIndex = blockIndex;\r
- isAllocError = True;\r
- break;\r
- }\r
- link->next = NULL;\r
- if (prev)\r
- {\r
- // static unsigned g_num = 0;\r
- // printf("\n%6d : %x", ++g_num, (unsigned)(size_t)((Byte *)link - (Byte *)prev));\r
- prev->next = link;\r
- }\r
- else\r
- t->inBuf = (void *)link;\r
- }\r
-\r
- {\r
- Byte *data = MTDEC__DATA_PTR_FROM_LINK(link);\r
- Byte *parseData = data;\r
- size_t size;\r
-\r
- if (crossSize != 0)\r
- {\r
- inDataSize = crossSize;\r
- // inDataSize_Full = inDataSize;\r
- inDataSize_Start = crossSize;\r
- size = crossSize;\r
- parseData = MTDEC__DATA_PTR_FROM_LINK(p->crossBlock) + p->crossStart;\r
- PRF(printf("\ncross : crossStart = %7d crossEnd = %7d finish = %1d",\r
- (int)p->crossStart, (int)p->crossEnd, (int)finish));\r
- }\r
- else\r
- {\r
- size = p->inBufSize;\r
- \r
- res = FullRead(p->inStream, data, &size);\r
- \r
- // size = 10; // test\r
-\r
- inDataSize += size;\r
- // inDataSize_Full = inDataSize;\r
- if (!prev)\r
- inDataSize_Start = size;\r
-\r
- p->readProcessed += size;\r
- finish = (size != p->inBufSize);\r
- if (finish)\r
- p->readWasFinished = True;\r
- \r
- // res = E_INVALIDARG; // test\r
-\r
- if (res != SZ_OK)\r
- {\r
- // PRF(printf("\nRead error = %d\n", res))\r
- // we want to decode all data before error\r
- p->readRes = res;\r
- // p->readError_BlockIndex = blockIndex;\r
- p->readWasFinished = True;\r
- finish = True;\r
- res = SZ_OK;\r
- // break;\r
- }\r
-\r
- if (inDataSize - inPrev >= MTDEC_ProgessStep)\r
- {\r
- res = MtDec_Progress_GetError_Spec(p, 0, 0, blockIndex, &wasInterrupted);\r
- if (res != SZ_OK || wasInterrupted)\r
- break;\r
- inPrev = inDataSize;\r
- }\r
- }\r
-\r
- {\r
- CMtDecCallbackInfo parse;\r
-\r
- parse.startCall = (prev == NULL);\r
- parse.src = parseData;\r
- parse.srcSize = size;\r
- parse.srcFinished = finish;\r
- parse.canCreateNewThread = True;\r
-\r
- PRF(printf("\nParse size = %d\n", (unsigned)size));\r
-\r
- p->mtCallback->Parse(p->mtCallbackObject, t->index, &parse);\r
-\r
- PRF(printf(" Parse processed = %d, state = %d \n", (unsigned)parse.srcSize, (unsigned)parse.state));\r
-\r
- needWrite = True;\r
- canCreateNewThread = parse.canCreateNewThread;\r
-\r
- // printf("\n\n%12I64u %12I64u", (UInt64)p->mtProgress.totalInSize, (UInt64)p->mtProgress.totalOutSize);\r
- \r
- if (\r
- // parseRes != SZ_OK ||\r
- // inDataSize - (size - parse.srcSize) > p->inBlockMax\r
- // ||\r
- parse.state == MTDEC_PARSE_OVERFLOW\r
- // || wasInterrupted\r
- )\r
- {\r
- // Overflow or Parse error - switch from MT decoding to ST decoding\r
- finish = True;\r
- overflow = True;\r
-\r
- {\r
- PRF(printf("\n Overflow"));\r
- // PRF(printf("\nisBlockFinished = %d", (unsigned)parse.blockWasFinished));\r
- PRF(printf("\n inDataSize = %d", (unsigned)inDataSize));\r
- }\r
- \r
- if (crossSize != 0)\r
- memcpy(data, parseData, size);\r
- p->crossStart = 0;\r
- p->crossEnd = 0;\r
- break;\r
- }\r
-\r
- if (crossSize != 0)\r
- {\r
- memcpy(data, parseData, parse.srcSize);\r
- p->crossStart += parse.srcSize;\r
- }\r
-\r
- if (parse.state != MTDEC_PARSE_CONTINUE || finish)\r
- {\r
- // we don't need to parse in current thread anymore\r
-\r
- if (parse.state == MTDEC_PARSE_END)\r
- finish = True;\r
-\r
- needCode = True;\r
- // p->crossFinished = finish;\r
-\r
- if (parse.srcSize == size)\r
- {\r
- // full parsed - no cross transfer\r
- p->crossStart = 0;\r
- p->crossEnd = 0;\r
- break;\r
- }\r
-\r
- if (parse.state == MTDEC_PARSE_END)\r
- {\r
- afterEndData = parseData + parse.srcSize;\r
- afterEndData_Size = size - parse.srcSize;\r
- if (crossSize != 0)\r
- afterEndData_IsCross = True;\r
- // we reduce data size to required bytes (parsed only)\r
- inDataSize -= afterEndData_Size;\r
- if (!prev)\r
- inDataSize_Start = parse.srcSize;\r
- break;\r
- }\r
-\r
- {\r
- // partial parsed - need cross transfer\r
- if (crossSize != 0)\r
- inDataSize = parse.srcSize; // it's only parsed now\r
- else\r
- {\r
- // partial parsed - is not in initial cross block - we need to copy new data to cross block\r
- Byte *cr = MtDec_GetCrossBuff(p);\r
- if (!cr)\r
- {\r
- {\r
- PRF(printf("\ncross alloc error error\n"));\r
- // res = SZ_ERROR_MEM;\r
- finish = True;\r
- // p->allocError_for_Read_BlockIndex = blockIndex;\r
- isAllocError = True;\r
- break;\r
- }\r
- }\r
-\r
- {\r
- size_t crSize = size - parse.srcSize;\r
- inDataSize -= crSize;\r
- p->crossEnd = crSize;\r
- p->crossStart = 0;\r
- memcpy(cr, parseData + parse.srcSize, crSize);\r
- }\r
- }\r
-\r
- // inDataSize_Full = inDataSize;\r
- if (!prev)\r
- inDataSize_Start = parse.srcSize; // it's partial size (parsed only)\r
-\r
- finish = False;\r
- break;\r
- }\r
- }\r
-\r
- if (parse.srcSize != size)\r
- {\r
- res = SZ_ERROR_FAIL;\r
- PRF(printf("\nfinished error SZ_ERROR_FAIL = %d\n", res));\r
- break;\r
- }\r
- }\r
- }\r
- \r
- prev = link;\r
- link = link->next;\r
-\r
- if (crossSize != 0)\r
- {\r
- crossSize = 0;\r
- p->crossStart = 0;\r
- p->crossEnd = 0;\r
- }\r
- }\r
- }\r
-\r
- if (res == SZ_OK)\r
- res = MtDec_GetError_Spec(p, blockIndex, &wasInterrupted);\r
- }\r
-\r
- codeRes = SZ_OK;\r
-\r
- if (res == SZ_OK && needCode && !wasInterrupted)\r
- {\r
- codeRes = p->mtCallback->PreCode(p->mtCallbackObject, t->index);\r
- if (codeRes != SZ_OK)\r
- {\r
- needCode = False;\r
- finish = True;\r
- // SZ_ERROR_MEM is expected error here.\r
- // if (codeRes == SZ_ERROR_MEM) - we will try single-thread decoding later.\r
- // if (codeRes != SZ_ERROR_MEM) - we can stop decoding or try single-thread decoding.\r
- }\r
- }\r
- \r
- if (res != SZ_OK || wasInterrupted)\r
- finish = True;\r
- \r
- nextThread = NULL;\r
- threadingErrorSRes = SZ_OK;\r
-\r
- if (!finish)\r
- {\r
- if (p->numStartedThreads < p->numStartedThreads_Limit && canCreateNewThread)\r
- {\r
- SRes res2 = MtDecThread_CreateAndStart(&p->threads[p->numStartedThreads]);\r
- if (res2 == SZ_OK)\r
- {\r
- // if (p->numStartedThreads % 1000 == 0) PRF(printf("\n numStartedThreads=%d\n", p->numStartedThreads));\r
- p->numStartedThreads++;\r
- }\r
- else\r
- {\r
- PRF(printf("\nERROR: numStartedThreads=%d\n", p->numStartedThreads));\r
- if (p->numStartedThreads == 1)\r
- {\r
- // if only one thread is possible, we leave muti-threading code\r
- finish = True;\r
- needCode = False;\r
- threadingErrorSRes = res2;\r
- }\r
- else\r
- p->numStartedThreads_Limit = p->numStartedThreads;\r
- }\r
- }\r
- \r
- if (!finish)\r
- {\r
- unsigned nextIndex = t->index + 1;\r
- nextThread = &p->threads[nextIndex >= p->numStartedThreads ? 0 : nextIndex];\r
- RINOK_THREAD(Event_Set(&nextThread->canRead))\r
- // We have started executing for new iteration (with next thread)\r
- // And that next thread now is responsible for possible exit from decoding (threading_code)\r
- }\r
- }\r
-\r
- // each call of Event_Set(&nextThread->canRead) must be followed by call of Event_Set(&nextThread->canWrite)\r
- // if ( !finish ) we must call Event_Set(&nextThread->canWrite) in any case\r
- // if ( finish ) we switch to single-thread mode and there are 2 ways at the end of current iteration (current block):\r
- // - if (needContinue) after Write(&needContinue), we restore decoding with new iteration\r
- // - otherwise we stop decoding and exit from ThreadFunc2()\r
-\r
- // Don't change (finish) variable in the further code\r
-\r
-\r
- // ---------- CODE ----------\r
-\r
- inPrev = 0;\r
- outPrev = 0;\r
- inCodePos = 0;\r
- outCodePos = 0;\r
-\r
- if (res == SZ_OK && needCode && codeRes == SZ_OK)\r
- {\r
- BoolInt isStartBlock = True;\r
- CMtDecBufLink *link = (CMtDecBufLink *)t->inBuf;\r
-\r
- for (;;)\r
- {\r
- size_t inSize;\r
- int stop;\r
-\r
- if (isStartBlock)\r
- inSize = inDataSize_Start;\r
- else\r
- {\r
- UInt64 rem = inDataSize - inCodePos;\r
- inSize = p->inBufSize;\r
- if (inSize > rem)\r
- inSize = (size_t)rem;\r
- }\r
-\r
- inCodePos += inSize;\r
- stop = True;\r
-\r
- codeRes = p->mtCallback->Code(p->mtCallbackObject, t->index,\r
- (const Byte *)MTDEC__DATA_PTR_FROM_LINK(link), inSize,\r
- (inCodePos == inDataSize), // srcFinished\r
- &inCodePos, &outCodePos, &stop);\r
- \r
- if (codeRes != SZ_OK)\r
- {\r
- PRF(printf("\nCode Interrupt error = %x\n", codeRes));\r
- // we interrupt only later blocks\r
- MtDec_Interrupt(p, blockIndex);\r
- break;\r
- }\r
-\r
- if (stop || inCodePos == inDataSize)\r
- break;\r
- \r
- {\r
- const UInt64 inDelta = inCodePos - inPrev;\r
- const UInt64 outDelta = outCodePos - outPrev;\r
- if (inDelta >= MTDEC_ProgessStep || outDelta >= MTDEC_ProgessStep)\r
- {\r
- // Sleep(1);\r
- res = MtDec_Progress_GetError_Spec(p, inDelta, outDelta, blockIndex, &wasInterrupted);\r
- if (res != SZ_OK || wasInterrupted)\r
- break;\r
- inPrev = inCodePos;\r
- outPrev = outCodePos;\r
- }\r
- }\r
-\r
- link = link->next;\r
- isStartBlock = False;\r
- }\r
- }\r
-\r
-\r
- // ---------- WRITE ----------\r
- \r
- RINOK_THREAD(Event_Wait(&t->canWrite));\r
-\r
- {\r
- BoolInt isErrorMode = False;\r
- BoolInt canRecode = True;\r
- BoolInt needWriteToStream = needWrite;\r
-\r
- if (p->exitThread) return 0; // it's never executed in normal cases\r
-\r
- if (p->wasInterrupted)\r
- wasInterrupted = True;\r
- else\r
- {\r
- if (codeRes != SZ_OK) // || !needCode // check it !!!\r
- {\r
- p->wasInterrupted = True;\r
- p->codeRes = codeRes;\r
- if (codeRes == SZ_ERROR_MEM)\r
- isAllocError = True;\r
- }\r
- \r
- if (threadingErrorSRes)\r
- {\r
- p->wasInterrupted = True;\r
- p->threadingErrorSRes = threadingErrorSRes;\r
- needWriteToStream = False;\r
- }\r
- if (isAllocError)\r
- {\r
- p->wasInterrupted = True;\r
- p->isAllocError = True;\r
- needWriteToStream = False;\r
- }\r
- if (overflow)\r
- {\r
- p->wasInterrupted = True;\r
- p->overflow = True;\r
- needWriteToStream = False;\r
- }\r
- }\r
-\r
- if (needCode)\r
- {\r
- if (wasInterrupted)\r
- {\r
- inCodePos = 0;\r
- outCodePos = 0;\r
- }\r
- {\r
- const UInt64 inDelta = inCodePos - inPrev;\r
- const UInt64 outDelta = outCodePos - outPrev;\r
- // if (inDelta != 0 || outDelta != 0)\r
- res = MtProgress_ProgressAdd(&p->mtProgress, inDelta, outDelta);\r
- }\r
- }\r
-\r
- needContinue = (!finish);\r
-\r
- // if (res == SZ_OK && needWrite && !wasInterrupted)\r
- if (needWrite)\r
- {\r
- // p->inProcessed += inCodePos;\r
-\r
- PRF(printf("\n--Write afterSize = %d\n", (unsigned)afterEndData_Size));\r
-\r
- res = p->mtCallback->Write(p->mtCallbackObject, t->index,\r
- res == SZ_OK && needWriteToStream && !wasInterrupted, // needWrite\r
- afterEndData, afterEndData_Size, afterEndData_IsCross,\r
- &needContinue,\r
- &canRecode);\r
-\r
- // res = SZ_ERROR_FAIL; // for test\r
-\r
- PRF(printf("\nAfter Write needContinue = %d\n", (unsigned)needContinue));\r
- PRF(printf("\nprocessed = %d\n", (unsigned)p->inProcessed));\r
-\r
- if (res != SZ_OK)\r
- {\r
- PRF(printf("\nWrite error = %d\n", res));\r
- isErrorMode = True;\r
- p->wasInterrupted = True;\r
- }\r
- if (res != SZ_OK\r
- || (!needContinue && !finish))\r
- {\r
- PRF(printf("\nWrite Interrupt error = %x\n", res));\r
- MtDec_Interrupt(p, blockIndex);\r
- }\r
- }\r
-\r
- if (canRecode)\r
- if (!needCode\r
- || res != SZ_OK\r
- || p->wasInterrupted\r
- || codeRes != SZ_OK\r
- || wasInterrupted\r
- || p->numFilledThreads != 0\r
- || isErrorMode)\r
- {\r
- if (p->numFilledThreads == 0)\r
- p->filledThreadStart = t->index;\r
- if (inDataSize != 0 || !finish)\r
- {\r
- t->inDataSize_Start = inDataSize_Start;\r
- t->inDataSize = inDataSize;\r
- p->numFilledThreads++;\r
- }\r
- PRF(printf("\np->numFilledThreads = %d\n", p->numFilledThreads));\r
- PRF(printf("p->filledThreadStart = %d\n", p->filledThreadStart));\r
- }\r
-\r
- if (!finish)\r
- {\r
- RINOK_THREAD(Event_Set(&nextThread->canWrite));\r
- }\r
- else\r
- {\r
- if (needContinue)\r
- {\r
- // we restore decoding with new iteration\r
- RINOK_THREAD(Event_Set(&p->threads[0].canWrite));\r
- }\r
- else\r
- {\r
- // we exit from decoding\r
- if (t->index == 0)\r
- return SZ_OK;\r
- p->exitThread = True;\r
- }\r
- RINOK_THREAD(Event_Set(&p->threads[0].canRead));\r
- }\r
- }\r
- }\r
-}\r
-\r
-#ifdef _WIN32\r
-#define USE_ALLOCA\r
-#endif\r
-\r
-#ifdef USE_ALLOCA\r
-#ifdef _WIN32\r
-#include <malloc.h>\r
-#else\r
-#include <stdlib.h>\r
-#endif\r
-#endif\r
-\r
-\r
-static THREAD_FUNC_DECL ThreadFunc1(void *pp)\r
-{\r
- WRes res;\r
-\r
- CMtDecThread *t = (CMtDecThread *)pp;\r
- CMtDec *p;\r
-\r
- // fprintf(stdout, "\n%d = %p\n", t->index, &t);\r
-\r
- res = ThreadFunc2(t);\r
- p = t->mtDec;\r
- if (res == 0)\r
- return (THREAD_FUNC_RET_TYPE)(UINT_PTR)p->exitThreadWRes;\r
- {\r
- // it's unexpected situation for some threading function error\r
- if (p->exitThreadWRes == 0)\r
- p->exitThreadWRes = res;\r
- PRF(printf("\nthread exit error = %d\n", res));\r
- p->exitThread = True;\r
- Event_Set(&p->threads[0].canRead);\r
- Event_Set(&p->threads[0].canWrite);\r
- MtProgress_SetError(&p->mtProgress, MY_SRes_HRESULT_FROM_WRes(res));\r
- }\r
- return (THREAD_FUNC_RET_TYPE)(UINT_PTR)res;\r
-}\r
-\r
-static MY_NO_INLINE THREAD_FUNC_DECL ThreadFunc(void *pp)\r
-{\r
- #ifdef USE_ALLOCA\r
- CMtDecThread *t = (CMtDecThread *)pp;\r
- // fprintf(stderr, "\n%d = %p - before", t->index, &t);\r
- t->allocaPtr = alloca(t->index * 128);\r
- #endif\r
- return ThreadFunc1(pp);\r
-}\r
-\r
-\r
-int MtDec_PrepareRead(CMtDec *p)\r
-{\r
- if (p->crossBlock && p->crossStart == p->crossEnd)\r
- {\r
- ISzAlloc_Free(p->alloc, p->crossBlock);\r
- p->crossBlock = NULL;\r
- }\r
- \r
- {\r
- unsigned i;\r
- for (i = 0; i < MTDEC__THREADS_MAX; i++)\r
- if (i > p->numStartedThreads\r
- || p->numFilledThreads <=\r
- (i >= p->filledThreadStart ?\r
- i - p->filledThreadStart :\r
- i + p->numStartedThreads - p->filledThreadStart))\r
- MtDecThread_FreeInBufs(&p->threads[i]);\r
- }\r
-\r
- return (p->numFilledThreads != 0) || (p->crossStart != p->crossEnd);\r
-}\r
-\r
- \r
-const Byte *MtDec_Read(CMtDec *p, size_t *inLim)\r
-{\r
- while (p->numFilledThreads != 0)\r
- {\r
- CMtDecThread *t = &p->threads[p->filledThreadStart];\r
- \r
- if (*inLim != 0)\r
- {\r
- {\r
- void *link = t->inBuf;\r
- void *next = ((CMtDecBufLink *)link)->next;\r
- ISzAlloc_Free(p->alloc, link);\r
- t->inBuf = next;\r
- }\r
- \r
- if (t->inDataSize == 0)\r
- {\r
- MtDecThread_FreeInBufs(t);\r
- if (--p->numFilledThreads == 0)\r
- break;\r
- if (++p->filledThreadStart == p->numStartedThreads)\r
- p->filledThreadStart = 0;\r
- t = &p->threads[p->filledThreadStart];\r
- }\r
- }\r
- \r
- {\r
- size_t lim = t->inDataSize_Start;\r
- if (lim != 0)\r
- t->inDataSize_Start = 0;\r
- else\r
- {\r
- UInt64 rem = t->inDataSize;\r
- lim = p->inBufSize;\r
- if (lim > rem)\r
- lim = (size_t)rem;\r
- }\r
- t->inDataSize -= lim;\r
- *inLim = lim;\r
- return (const Byte *)MTDEC__DATA_PTR_FROM_LINK(t->inBuf);\r
- }\r
- }\r
-\r
- {\r
- size_t crossSize = p->crossEnd - p->crossStart;\r
- if (crossSize != 0)\r
- {\r
- const Byte *data = MTDEC__DATA_PTR_FROM_LINK(p->crossBlock) + p->crossStart;\r
- *inLim = crossSize;\r
- p->crossStart = 0;\r
- p->crossEnd = 0;\r
- return data;\r
- }\r
- *inLim = 0;\r
- if (p->crossBlock)\r
- {\r
- ISzAlloc_Free(p->alloc, p->crossBlock);\r
- p->crossBlock = NULL;\r
- }\r
- return NULL;\r
- }\r
-}\r
-\r
-\r
-void MtDec_Construct(CMtDec *p)\r
-{\r
- unsigned i;\r
- \r
- p->inBufSize = (size_t)1 << 18;\r
-\r
- p->numThreadsMax = 0;\r
-\r
- p->inStream = NULL;\r
- \r
- // p->inData = NULL;\r
- // p->inDataSize = 0;\r
-\r
- p->crossBlock = NULL;\r
- p->crossStart = 0;\r
- p->crossEnd = 0;\r
-\r
- p->numFilledThreads = 0;\r
-\r
- p->progress = NULL;\r
- p->alloc = NULL;\r
-\r
- p->mtCallback = NULL;\r
- p->mtCallbackObject = NULL;\r
-\r
- p->allocatedBufsSize = 0;\r
-\r
- for (i = 0; i < MTDEC__THREADS_MAX; i++)\r
- {\r
- CMtDecThread *t = &p->threads[i];\r
- t->mtDec = p;\r
- t->index = i;\r
- t->inBuf = NULL;\r
- Event_Construct(&t->canRead);\r
- Event_Construct(&t->canWrite);\r
- Thread_Construct(&t->thread);\r
- }\r
-\r
- // Event_Construct(&p->finishedEvent);\r
-\r
- CriticalSection_Init(&p->mtProgress.cs);\r
-}\r
-\r
-\r
-static void MtDec_Free(CMtDec *p)\r
-{\r
- unsigned i;\r
-\r
- p->exitThread = True;\r
-\r
- for (i = 0; i < MTDEC__THREADS_MAX; i++)\r
- MtDecThread_Destruct(&p->threads[i]);\r
-\r
- // Event_Close(&p->finishedEvent);\r
-\r
- if (p->crossBlock)\r
- {\r
- ISzAlloc_Free(p->alloc, p->crossBlock);\r
- p->crossBlock = NULL;\r
- }\r
-}\r
-\r
-\r
-void MtDec_Destruct(CMtDec *p)\r
-{\r
- MtDec_Free(p);\r
-\r
- CriticalSection_Delete(&p->mtProgress.cs);\r
-}\r
-\r
-\r
-SRes MtDec_Code(CMtDec *p)\r
-{\r
- unsigned i;\r
-\r
- p->inProcessed = 0;\r
-\r
- p->blockIndex = 1; // it must be larger than not_defined index (0)\r
- p->isAllocError = False;\r
- p->overflow = False;\r
- p->threadingErrorSRes = SZ_OK;\r
-\r
- p->needContinue = True;\r
-\r
- p->readWasFinished = False;\r
- p->needInterrupt = False;\r
- p->interruptIndex = (UInt64)(Int64)-1;\r
-\r
- p->readProcessed = 0;\r
- p->readRes = SZ_OK;\r
- p->codeRes = SZ_OK;\r
- p->wasInterrupted = False;\r
-\r
- p->crossStart = 0;\r
- p->crossEnd = 0;\r
-\r
- p->filledThreadStart = 0;\r
- p->numFilledThreads = 0;\r
-\r
- {\r
- unsigned numThreads = p->numThreadsMax;\r
- if (numThreads > MTDEC__THREADS_MAX)\r
- numThreads = MTDEC__THREADS_MAX;\r
- p->numStartedThreads_Limit = numThreads;\r
- p->numStartedThreads = 0;\r
- }\r
-\r
- if (p->inBufSize != p->allocatedBufsSize)\r
- {\r
- for (i = 0; i < MTDEC__THREADS_MAX; i++)\r
- {\r
- CMtDecThread *t = &p->threads[i];\r
- if (t->inBuf)\r
- MtDecThread_FreeInBufs(t);\r
- }\r
- if (p->crossBlock)\r
- {\r
- ISzAlloc_Free(p->alloc, p->crossBlock);\r
- p->crossBlock = NULL;\r
- }\r
-\r
- p->allocatedBufsSize = p->inBufSize;\r
- }\r
-\r
- MtProgress_Init(&p->mtProgress, p->progress);\r
-\r
- // RINOK_THREAD(ArEvent_OptCreate_And_Reset(&p->finishedEvent));\r
- p->exitThread = False;\r
- p->exitThreadWRes = 0;\r
-\r
- {\r
- WRes wres;\r
- SRes sres;\r
- CMtDecThread *nextThread = &p->threads[p->numStartedThreads++];\r
- // wres = MtDecThread_CreateAndStart(nextThread);\r
- wres = MtDecThread_CreateEvents(nextThread);\r
- if (wres == 0) { wres = Event_Set(&nextThread->canWrite);\r
- if (wres == 0) { wres = Event_Set(&nextThread->canRead);\r
- if (wres == 0) { THREAD_FUNC_RET_TYPE res = ThreadFunc(nextThread);\r
- wres = (WRes)(UINT_PTR)res;\r
- if (wres != 0)\r
- {\r
- p->needContinue = False;\r
- MtDec_CloseThreads(p);\r
- }}}}\r
-\r
- // wres = 17; // for test\r
- // wres = Event_Wait(&p->finishedEvent);\r
-\r
- sres = MY_SRes_HRESULT_FROM_WRes(wres);\r
-\r
- if (sres != 0)\r
- p->threadingErrorSRes = sres;\r
-\r
- if (\r
- // wres == 0\r
- // wres != 0\r
- // || p->mtc.codeRes == SZ_ERROR_MEM\r
- p->isAllocError\r
- || p->threadingErrorSRes != SZ_OK\r
- || p->overflow)\r
- {\r
- // p->needContinue = True;\r
- }\r
- else\r
- p->needContinue = False;\r
- \r
- if (p->needContinue)\r
- return SZ_OK;\r
-\r
- // if (sres != SZ_OK)\r
- return sres;\r
- // return SZ_ERROR_FAIL;\r
- }\r
-}\r
-\r
-#endif\r
+++ /dev/null
-/* Sort.c -- Sort functions\r
-2014-04-05 : Igor Pavlov : Public domain */\r
-\r
-#include "Precomp.h"\r
-\r
-#include "Sort.h"\r
-\r
-#define HeapSortDown(p, k, size, temp) \\r
- { for (;;) { \\r
- size_t s = (k << 1); \\r
- if (s > size) break; \\r
- if (s < size && p[s + 1] > p[s]) s++; \\r
- if (temp >= p[s]) break; \\r
- p[k] = p[s]; k = s; \\r
- } p[k] = temp; }\r
-\r
-void HeapSort(UInt32 *p, size_t size)\r
-{\r
- if (size <= 1)\r
- return;\r
- p--;\r
- {\r
- size_t i = size / 2;\r
- do\r
- {\r
- UInt32 temp = p[i];\r
- size_t k = i;\r
- HeapSortDown(p, k, size, temp)\r
- }\r
- while (--i != 0);\r
- }\r
- /*\r
- do\r
- {\r
- size_t k = 1;\r
- UInt32 temp = p[size];\r
- p[size--] = p[1];\r
- HeapSortDown(p, k, size, temp)\r
- }\r
- while (size > 1);\r
- */\r
- while (size > 3)\r
- {\r
- UInt32 temp = p[size];\r
- size_t k = (p[3] > p[2]) ? 3 : 2;\r
- p[size--] = p[1];\r
- p[1] = p[k];\r
- HeapSortDown(p, k, size, temp)\r
- }\r
- {\r
- UInt32 temp = p[size];\r
- p[size] = p[1];\r
- if (size > 2 && p[2] < temp)\r
- {\r
- p[1] = p[2];\r
- p[2] = temp;\r
- }\r
- else\r
- p[1] = temp;\r
- }\r
-}\r
-\r
-void HeapSort64(UInt64 *p, size_t size)\r
-{\r
- if (size <= 1)\r
- return;\r
- p--;\r
- {\r
- size_t i = size / 2;\r
- do\r
- {\r
- UInt64 temp = p[i];\r
- size_t k = i;\r
- HeapSortDown(p, k, size, temp)\r
- }\r
- while (--i != 0);\r
- }\r
- /*\r
- do\r
- {\r
- size_t k = 1;\r
- UInt64 temp = p[size];\r
- p[size--] = p[1];\r
- HeapSortDown(p, k, size, temp)\r
- }\r
- while (size > 1);\r
- */\r
- while (size > 3)\r
- {\r
- UInt64 temp = p[size];\r
- size_t k = (p[3] > p[2]) ? 3 : 2;\r
- p[size--] = p[1];\r
- p[1] = p[k];\r
- HeapSortDown(p, k, size, temp)\r
- }\r
- {\r
- UInt64 temp = p[size];\r
- p[size] = p[1];\r
- if (size > 2 && p[2] < temp)\r
- {\r
- p[1] = p[2];\r
- p[2] = temp;\r
- }\r
- else\r
- p[1] = temp;\r
- }\r
-}\r
-\r
-/*\r
-#define HeapSortRefDown(p, vals, n, size, temp) \\r
- { size_t k = n; UInt32 val = vals[temp]; for (;;) { \\r
- size_t s = (k << 1); \\r
- if (s > size) break; \\r
- if (s < size && vals[p[s + 1]] > vals[p[s]]) s++; \\r
- if (val >= vals[p[s]]) break; \\r
- p[k] = p[s]; k = s; \\r
- } p[k] = temp; }\r
-\r
-void HeapSortRef(UInt32 *p, UInt32 *vals, size_t size)\r
-{\r
- if (size <= 1)\r
- return;\r
- p--;\r
- {\r
- size_t i = size / 2;\r
- do\r
- {\r
- UInt32 temp = p[i];\r
- HeapSortRefDown(p, vals, i, size, temp);\r
- }\r
- while (--i != 0);\r
- }\r
- do\r
- {\r
- UInt32 temp = p[size];\r
- p[size--] = p[1];\r
- HeapSortRefDown(p, vals, 1, size, temp);\r
- }\r
- while (size > 1);\r
-}\r
-*/\r
+++ /dev/null
-PLATFORM=\r
-O=b/c\r
-IS_X64=\r
-IS_X86=\r
-IS_ARM64=\r
-CROSS_COMPILE=\r
-MY_ARCH=\r
-USE_ASM=\r
-CC=$(CROSS_COMPILE)clang\r
-CXX=$(CROSS_COMPILE)clang++\r
-USE_CLANG=1\r
+++ /dev/null
-PLATFORM=arm64\r
-O=b/c_$(PLATFORM)\r
-IS_X64=\r
-IS_X86=\r
-IS_ARM64=1\r
-CROSS_COMPILE=\r
-MY_ARCH=\r
-USE_ASM=1\r
-CC=$(CROSS_COMPILE)clang\r
-CXX=$(CROSS_COMPILE)clang++\r
-USE_CLANG=1\r
+++ /dev/null
-PLATFORM=x64\r
-O=b/c_$(PLATFORM)\r
-IS_X64=1\r
-IS_X86=\r
-IS_ARM64=\r
-CROSS_COMPILE=\r
-MY_ARCH=\r
-USE_ASM=1\r
-CC=$(CROSS_COMPILE)clang\r
-CXX=$(CROSS_COMPILE)clang++\r
-USE_CLANG=1\r
+++ /dev/null
-PLATFORM=x86\r
-O=b/c_$(PLATFORM)\r
-IS_X64=\r
-IS_X86=1\r
-IS_ARM64=\r
-CROSS_COMPILE=\r
-MY_ARCH=-m32\r
-USE_ASM=1\r
-CC=$(CROSS_COMPILE)clang\r
-CXX=$(CROSS_COMPILE)clang++\r
-USE_CLANG=1\r
+++ /dev/null
-PLATFORM=\r
-O=b/g\r
-IS_X64=\r
-IS_X86=\r
-IS_ARM64=\r
-CROSS_COMPILE=\r
-MY_ARCH=\r
-USE_ASM=\r
-CC=$(CROSS_COMPILE)gcc\r
-CXX=$(CROSS_COMPILE)g++\r
-\r
-# -march=armv8-a+crc+crypto\r
+++ /dev/null
-PLATFORM=arm64\r
-O=b/g_$(PLATFORM)\r
-IS_X64=\r
-IS_X86=\r
-IS_ARM64=1\r
-CROSS_COMPILE=\r
-MY_ARCH=-mtune=cortex-a53\r
-USE_ASM=1\r
-CC=$(CROSS_COMPILE)gcc\r
-CXX=$(CROSS_COMPILE)g++\r
-\r
-# -march=armv8-a+crc+crypto\r
+++ /dev/null
-PLATFORM=x64\r
-O=b/g_$(PLATFORM)\r
-IS_X64=1\r
-IS_X86=\r
-IS_ARM64=\r
-CROSS_COMPILE=\r
-MY_ARCH=\r
-USE_ASM=1\r
-CC=$(CROSS_COMPILE)gcc\r
-CXX=$(CROSS_COMPILE)g++\r
+++ /dev/null
-PLATFORM=x86\r
-O=b/g_$(PLATFORM)\r
-IS_X64=\r
-IS_X86=1\r
-IS_ARM64=\r
-CROSS_COMPILE=\r
-MY_ARCH=-m32\r
-USE_ASM=1\r
-CC=$(CROSS_COMPILE)gcc\r
-CXX=$(CROSS_COMPILE)g++\r
+++ /dev/null
-PLATFORM=arm64\r
-O=b/m_$(PLATFORM)\r
-IS_X64=\r
-IS_X86=\r
-IS_ARM64=1\r
-CROSS_COMPILE=\r
-MY_ARCH=-arch arm64\r
-USE_ASM=1\r
-CC=$(CROSS_COMPILE)clang\r
-CXX=$(CROSS_COMPILE)clang++\r
-USE_CLANG=1\r
+++ /dev/null
-PLATFORM=x64\r
-O=b/m_$(PLATFORM)\r
-IS_X64=1\r
-IS_X86=\r
-IS_ARM64=\r
-CROSS_COMPILE=\r
-MY_ARCH=-arch x86_64\r
-USE_ASM=\r
-CC=$(CROSS_COMPILE)clang\r
-CXX=$(CROSS_COMPILE)clang++\r
-USE_CLANG=1\r
+++ /dev/null
-CFLAGS_WARN_CLANG_3_8_UNIQ = \\r
- -Wno-reserved-id-macro \\r
- -Wno-old-style-cast \\r
- -Wno-c++11-long-long \\r
- -Wno-unused-macros \\r
-\r
-CFLAGS_WARN_CLANG_3_8 = \\r
- $(CFLAGS_WARN_CLANG_3_8_UNIQ) \\r
- -Weverything \\r
- -Wno-extra-semi \\r
- -Wno-sign-conversion \\r
- -Wno-language-extension-token \\r
- -Wno-global-constructors \\r
- -Wno-non-virtual-dtor \\r
- -Wno-switch-enum \\r
- -Wno-covered-switch-default \\r
- -Wno-cast-qual \\r
- -Wno-padded \\r
- -Wno-exit-time-destructors \\r
- -Wno-weak-vtables \\r
-\r
-CFLAGS_WARN_CLANG_12= $(CFLAGS_WARN_CLANG_3_8) \\r
- -Wno-extra-semi-stmt \\r
- -Wno-zero-as-null-pointer-constant \\r
- -Wno-deprecated-dynamic-exception-spec \\r
- -Wno-c++98-compat-pedantic \\r
- -Wno-atomic-implicit-seq-cst \\r
- -Wconversion \\r
- -Wno-sign-conversion \\r
-\r
-CFLAGS_WARN_1 = \\r
- -Wno-deprecated-copy-dtor \\r
-\r
-\r
-\r
-\r
-CFLAGS_WARN = $(CFLAGS_WARN_CLANG_12) $(CFLAGS_WARN_1)\r
+++ /dev/null
-CFLAGS_WARN_CLANG_3_8_UNIQ = \\r
- -Wno-reserved-id-macro \\r
- -Wno-old-style-cast \\r
- -Wno-c++11-long-long \\r
- -Wno-unused-macros \\r
-\r
-CFLAGS_WARN_CLANG_3_8 = \\r
- $(CFLAGS_WARN_CLANG_3_8_UNIQ) \\r
- -Weverything \\r
- -Wno-extra-semi \\r
- -Wno-sign-conversion \\r
- -Wno-language-extension-token \\r
- -Wno-global-constructors \\r
- -Wno-non-virtual-dtor \\r
- -Wno-switch-enum \\r
- -Wno-covered-switch-default \\r
- -Wno-cast-qual \\r
- -Wno-padded \\r
- -Wno-exit-time-destructors \\r
- -Wno-weak-vtables \\r
-\r
-CFLAGS_WARN_CLANG_12= $(CFLAGS_WARN_CLANG_3_8) \\r
- -Wno-extra-semi-stmt \\r
- -Wno-zero-as-null-pointer-constant \\r
- -Wno-deprecated-dynamic-exception-spec \\r
- -Wno-c++98-compat-pedantic \\r
- -Wno-atomic-implicit-seq-cst \\r
- -Wconversion \\r
- -Wno-sign-conversion \\r
-\r
-CFLAGS_WARN_MAC = \\r
- -Wno-poison-system-directories \\r
- -Wno-c++11-long-long \\r
- -Wno-atomic-implicit-seq-cst \\r
-\r
-\r
-CFLAGS_WARN = $(CFLAGS_WARN_CLANG_12) $(CFLAGS_WARN_MAC)\r
+++ /dev/null
-CFLAGS_WARN_GCC_4_5 = \\r
-\r
-CFLAGS_WARN_GCC_6 = \\r
- -Waddress \\r
- -Waggressive-loop-optimizations \\r
- -Wattributes \\r
- -Wbool-compare \\r
- -Wcast-align \\r
- -Wcomment \\r
- -Wdiv-by-zero \\r
- -Wduplicated-cond \\r
- -Wformat-contains-nul \\r
- -Winit-self \\r
- -Wint-to-pointer-cast \\r
- -Wunused \\r
- -Wunused-macros \\r
-\r
-# -Wno-strict-aliasing\r
-\r
-CFLAGS_WARN_GCC_9 = \\r
- -Waddress \\r
- -Waddress-of-packed-member \\r
- -Waggressive-loop-optimizations \\r
- -Wattributes \\r
- -Wbool-compare \\r
- -Wbool-operation \\r
- -Wcast-align \\r
- -Wcast-align=strict \\r
- -Wcomment \\r
- -Wdangling-else \\r
- -Wdiv-by-zero \\r
- -Wduplicated-branches \\r
- -Wduplicated-cond \\r
- -Wformat-contains-nul \\r
- -Wimplicit-fallthrough=5 \\r
- -Winit-self \\r
- -Wint-in-bool-context \\r
- -Wint-to-pointer-cast \\r
- -Wunused \\r
- -Wunused-macros \\r
- -Wconversion \\r
-\r
-# -Wno-sign-conversion \\r
-\r
-CFLAGS_WARN_GCC_PPMD_UNALIGNED = \\r
- -Wno-strict-aliasing \\r
-\r
-\r
-CFLAGS_WARN = $(CFLAGS_WARN_GCC_9) \\r
-\r
-# $(CFLAGS_WARN_GCC_PPMD_UNALIGNED)\r
-// 7zAsm.S -- ASM macros for arm64\r
-// 2021-04-25 : Igor Pavlov : Public domain\r
-\r
-#define r0 x0\r
-#define r1 x1\r
-#define r2 x2\r
-#define r3 x3\r
-#define r4 x4\r
-#define r5 x5\r
-#define r6 x6\r
-#define r7 x7\r
-#define r8 x8\r
-#define r9 x9\r
-#define r10 x10\r
-#define r11 x11\r
-#define r12 x12\r
-#define r13 x13\r
-#define r14 x14\r
-#define r15 x15\r
-#define r16 x16\r
-#define r17 x17\r
-#define r18 x18\r
-#define r19 x19\r
-#define r20 x20\r
-#define r21 x21\r
-#define r22 x22\r
-#define r23 x23\r
-#define r24 x24\r
-#define r25 x25\r
-#define r26 x26\r
-#define r27 x27\r
-#define r28 x28\r
-#define r29 x29\r
-#define r30 x30\r
-\r
-#define REG_ABI_PARAM_0 r0\r
-#define REG_ABI_PARAM_1 r1\r
-#define REG_ABI_PARAM_2 r2\r
-\r
-\r
-.macro p2_add reg:req, param:req\r
- add \reg, \reg, \param\r
-.endm\r
-\r
-.macro p2_sub reg:req, param:req\r
- sub \reg, \reg, \param\r
-.endm\r
-\r
-.macro p2_sub_s reg:req, param:req\r
- subs \reg, \reg, \param\r
-.endm\r
-\r
-.macro p2_and reg:req, param:req\r
- and \reg, \reg, \param\r
-.endm\r
-\r
-.macro xor reg:req, param:req\r
- eor \reg, \reg, \param\r
-.endm\r
-\r
-.macro or reg:req, param:req\r
- orr \reg, \reg, \param\r
-.endm\r
-\r
-.macro shl reg:req, param:req\r
- lsl \reg, \reg, \param\r
-.endm\r
-\r
-.macro shr reg:req, param:req\r
- lsr \reg, \reg, \param\r
-.endm\r
-\r
-.macro sar reg:req, param:req\r
- asr \reg, \reg, \param\r
-.endm\r
-\r
-.macro p1_neg reg:req\r
- neg \reg, \reg\r
-.endm\r
-\r
-.macro dec reg:req\r
- sub \reg, \reg, 1\r
-.endm\r
-\r
-.macro dec_s reg:req\r
- subs \reg, \reg, 1\r
-.endm\r
-\r
-.macro inc reg:req\r
- add \reg, \reg, 1\r
-.endm\r
-\r
-.macro inc_s reg:req\r
- adds \reg, \reg, 1\r
-.endm\r
-\r
-\r
-.macro imul reg:req, param:req\r
- mul \reg, \reg, \param\r
-.endm\r
-\r
-/*\r
-arm64 and arm use reverted c flag after subs/cmp instructions:\r
- arm64-arm : x86\r
- b.lo / b.cc : jb / jc\r
- b.hs / b.cs : jae / jnc\r
-*/ \r
-\r
-.macro jmp lab:req\r
- b \lab\r
-.endm\r
-\r
-.macro je lab:req\r
- b.eq \lab\r
-.endm\r
-\r
-.macro jz lab:req\r
- b.eq \lab\r
-.endm\r
-\r
-.macro jnz lab:req\r
- b.ne \lab\r
-.endm\r
-\r
-.macro jne lab:req\r
- b.ne \lab\r
-.endm\r
-\r
-.macro jb lab:req\r
- b.lo \lab\r
-.endm\r
-\r
-.macro jbe lab:req\r
- b.ls \lab\r
-.endm\r
-\r
-.macro ja lab:req\r
- b.hi \lab\r
-.endm\r
-\r
-.macro jae lab:req\r
- b.hs \lab\r
-.endm\r
-\r
-\r
-.macro cmove dest:req, srcTrue:req\r
- csel \dest, \srcTrue, \dest, eq\r
-.endm\r
-\r
-.macro cmovne dest:req, srcTrue:req\r
- csel \dest, \srcTrue, \dest, ne\r
-.endm\r
-\r
-.macro cmovs dest:req, srcTrue:req\r
- csel \dest, \srcTrue, \dest, mi\r
-.endm\r
-\r
-.macro cmovns dest:req, srcTrue:req\r
- csel \dest, \srcTrue, \dest, pl\r
-.endm\r
-\r
-.macro cmovb dest:req, srcTrue:req\r
- csel \dest, \srcTrue, \dest, lo\r
-.endm\r
-\r
-.macro cmovae dest:req, srcTrue:req\r
- csel \dest, \srcTrue, \dest, hs\r
-.endm\r
-\r
-\r
-.macro MY_ALIGN_16 macro\r
- .p2align 4,, (1 << 4) - 1\r
-.endm\r
-\r
-.macro MY_ALIGN_32 macro\r
- .p2align 5,, (1 << 5) - 1\r
-.endm\r
-\r
-.macro MY_ALIGN_64 macro\r
- .p2align 6,, (1 << 6) - 1\r
-.endm\r
+// 7zAsm.S -- ASM macros for arm64
+// 2021-04-25 : Igor Pavlov : Public domain
+
+#define r0 x0
+#define r1 x1
+#define r2 x2
+#define r3 x3
+#define r4 x4
+#define r5 x5
+#define r6 x6
+#define r7 x7
+#define r8 x8
+#define r9 x9
+#define r10 x10
+#define r11 x11
+#define r12 x12
+#define r13 x13
+#define r14 x14
+#define r15 x15
+#define r16 x16
+#define r17 x17
+#define r18 x18
+#define r19 x19
+#define r20 x20
+#define r21 x21
+#define r22 x22
+#define r23 x23
+#define r24 x24
+#define r25 x25
+#define r26 x26
+#define r27 x27
+#define r28 x28
+#define r29 x29
+#define r30 x30
+
+#define REG_ABI_PARAM_0 r0
+#define REG_ABI_PARAM_1 r1
+#define REG_ABI_PARAM_2 r2
+
+
+.macro p2_add reg:req, param:req
+ add \reg, \reg, \param
+.endm
+
+.macro p2_sub reg:req, param:req
+ sub \reg, \reg, \param
+.endm
+
+.macro p2_sub_s reg:req, param:req
+ subs \reg, \reg, \param
+.endm
+
+.macro p2_and reg:req, param:req
+ and \reg, \reg, \param
+.endm
+
+.macro xor reg:req, param:req
+ eor \reg, \reg, \param
+.endm
+
+.macro or reg:req, param:req
+ orr \reg, \reg, \param
+.endm
+
+.macro shl reg:req, param:req
+ lsl \reg, \reg, \param
+.endm
+
+.macro shr reg:req, param:req
+ lsr \reg, \reg, \param
+.endm
+
+.macro sar reg:req, param:req
+ asr \reg, \reg, \param
+.endm
+
+.macro p1_neg reg:req
+ neg \reg, \reg
+.endm
+
+.macro dec reg:req
+ sub \reg, \reg, 1
+.endm
+
+.macro dec_s reg:req
+ subs \reg, \reg, 1
+.endm
+
+.macro inc reg:req
+ add \reg, \reg, 1
+.endm
+
+.macro inc_s reg:req
+ adds \reg, \reg, 1
+.endm
+
+
+.macro imul reg:req, param:req
+ mul \reg, \reg, \param
+.endm
+
+/*
+arm64 and arm use reverted c flag after subs/cmp instructions:
+ arm64-arm : x86
+ b.lo / b.cc : jb / jc
+ b.hs / b.cs : jae / jnc
+*/
+
+.macro jmp lab:req
+ b \lab
+.endm
+
+.macro je lab:req
+ b.eq \lab
+.endm
+
+.macro jz lab:req
+ b.eq \lab
+.endm
+
+.macro jnz lab:req
+ b.ne \lab
+.endm
+
+.macro jne lab:req
+ b.ne \lab
+.endm
+
+.macro jb lab:req
+ b.lo \lab
+.endm
+
+.macro jbe lab:req
+ b.ls \lab
+.endm
+
+.macro ja lab:req
+ b.hi \lab
+.endm
+
+.macro jae lab:req
+ b.hs \lab
+.endm
+
+
+.macro cmove dest:req, srcTrue:req
+ csel \dest, \srcTrue, \dest, eq
+.endm
+
+.macro cmovne dest:req, srcTrue:req
+ csel \dest, \srcTrue, \dest, ne
+.endm
+
+.macro cmovs dest:req, srcTrue:req
+ csel \dest, \srcTrue, \dest, mi
+.endm
+
+.macro cmovns dest:req, srcTrue:req
+ csel \dest, \srcTrue, \dest, pl
+.endm
+
+.macro cmovb dest:req, srcTrue:req
+ csel \dest, \srcTrue, \dest, lo
+.endm
+
+.macro cmovae dest:req, srcTrue:req
+ csel \dest, \srcTrue, \dest, hs
+.endm
+
+
+.macro MY_ALIGN_16 macro
+ .p2align 4,, (1 << 4) - 1
+.endm
+
+.macro MY_ALIGN_32 macro
+ .p2align 5,, (1 << 5) - 1
+.endm
+
+.macro MY_ALIGN_64 macro
+ .p2align 6,, (1 << 6) - 1
+.endm
-; 7zAsm.asm -- ASM macros\r
-; 2022-05-16 : Igor Pavlov : Public domain\r
-\r
-\r
-; UASM can require these changes\r
-; OPTION FRAMEPRESERVEFLAGS:ON\r
-; OPTION PROLOGUE:NONE\r
-; OPTION EPILOGUE:NONE\r
-\r
-ifdef @wordsize\r
-; @wordsize is defined only in JWASM and ASMC and is not defined in MASM\r
-; @wordsize eq 8 for 64-bit x64\r
-; @wordsize eq 2 for 32-bit x86\r
-if @wordsize eq 8\r
- x64 equ 1\r
-endif\r
-else\r
-ifdef RAX\r
- x64 equ 1\r
-endif\r
-endif\r
-\r
-\r
-ifdef x64\r
- IS_X64 equ 1\r
-else\r
- IS_X64 equ 0\r
-endif\r
-\r
-ifdef ABI_LINUX\r
- IS_LINUX equ 1\r
-else\r
- IS_LINUX equ 0\r
-endif\r
-\r
-ifndef x64\r
-; Use ABI_CDECL for x86 (32-bit) only\r
-; if ABI_CDECL is not defined, we use fastcall abi\r
-ifdef ABI_CDECL\r
- IS_CDECL equ 1\r
-else\r
- IS_CDECL equ 0\r
-endif\r
-endif\r
-\r
-OPTION PROLOGUE:NONE \r
-OPTION EPILOGUE:NONE\r
-\r
-MY_ASM_START macro\r
- ifdef x64\r
- .code\r
- else\r
- .386\r
- .model flat\r
- _TEXT$00 SEGMENT PARA PUBLIC 'CODE'\r
- endif\r
-endm\r
-\r
-MY_PROC macro name:req, numParams:req\r
- align 16\r
- proc_numParams = numParams\r
- if (IS_X64 gt 0)\r
- proc_name equ name\r
- elseif (IS_LINUX gt 0)\r
- proc_name equ name\r
- elseif (IS_CDECL gt 0)\r
- proc_name equ @CatStr(_,name)\r
- else\r
- proc_name equ @CatStr(@,name,@, %numParams * 4)\r
- endif\r
- proc_name PROC\r
-endm\r
-\r
-MY_ENDP macro\r
- if (IS_X64 gt 0)\r
- ret\r
- elseif (IS_CDECL gt 0)\r
- ret\r
- elseif (proc_numParams LT 3)\r
- ret\r
- else\r
- ret (proc_numParams - 2) * 4\r
- endif\r
- proc_name ENDP\r
-endm\r
-\r
-\r
-ifdef x64\r
- REG_SIZE equ 8\r
- REG_LOGAR_SIZE equ 3\r
-else\r
- REG_SIZE equ 4\r
- REG_LOGAR_SIZE equ 2\r
-endif\r
-\r
- x0 equ EAX\r
- x1 equ ECX\r
- x2 equ EDX\r
- x3 equ EBX\r
- x4 equ ESP\r
- x5 equ EBP\r
- x6 equ ESI\r
- x7 equ EDI\r
-\r
- x0_W equ AX\r
- x1_W equ CX\r
- x2_W equ DX\r
- x3_W equ BX\r
-\r
- x5_W equ BP\r
- x6_W equ SI\r
- x7_W equ DI\r
-\r
- x0_L equ AL\r
- x1_L equ CL\r
- x2_L equ DL\r
- x3_L equ BL\r
-\r
- x0_H equ AH\r
- x1_H equ CH\r
- x2_H equ DH\r
- x3_H equ BH\r
-\r
-ifdef x64\r
- x5_L equ BPL\r
- x6_L equ SIL\r
- x7_L equ DIL\r
-\r
- r0 equ RAX\r
- r1 equ RCX\r
- r2 equ RDX\r
- r3 equ RBX\r
- r4 equ RSP\r
- r5 equ RBP\r
- r6 equ RSI\r
- r7 equ RDI\r
- x8 equ r8d\r
- x9 equ r9d\r
- x10 equ r10d\r
- x11 equ r11d\r
- x12 equ r12d\r
- x13 equ r13d\r
- x14 equ r14d\r
- x15 equ r15d\r
-else\r
- r0 equ x0\r
- r1 equ x1\r
- r2 equ x2\r
- r3 equ x3\r
- r4 equ x4\r
- r5 equ x5\r
- r6 equ x6\r
- r7 equ x7\r
-endif\r
-\r
-\r
-ifdef x64\r
-ifdef ABI_LINUX\r
-\r
-MY_PUSH_2_REGS macro\r
- push r3\r
- push r5\r
-endm\r
-\r
-MY_POP_2_REGS macro\r
- pop r5\r
- pop r3\r
-endm\r
-\r
-endif\r
-endif\r
-\r
-\r
-MY_PUSH_4_REGS macro\r
- push r3\r
- push r5\r
- push r6\r
- push r7\r
-endm\r
-\r
-MY_POP_4_REGS macro\r
- pop r7\r
- pop r6\r
- pop r5\r
- pop r3\r
-endm\r
-\r
-\r
-; for fastcall and for WIN-x64\r
-REG_PARAM_0_x equ x1\r
-REG_PARAM_0 equ r1\r
-REG_PARAM_1_x equ x2\r
-REG_PARAM_1 equ r2\r
-\r
-ifndef x64\r
-; for x86-fastcall\r
-\r
-REG_ABI_PARAM_0_x equ REG_PARAM_0_x\r
-REG_ABI_PARAM_0 equ REG_PARAM_0\r
-REG_ABI_PARAM_1_x equ REG_PARAM_1_x\r
-REG_ABI_PARAM_1 equ REG_PARAM_1\r
-\r
-else\r
-; x64\r
-\r
-if (IS_LINUX eq 0)\r
-\r
-; for WIN-x64:\r
-REG_PARAM_2_x equ x8\r
-REG_PARAM_2 equ r8\r
-REG_PARAM_3 equ r9\r
-\r
-REG_ABI_PARAM_0_x equ REG_PARAM_0_x\r
-REG_ABI_PARAM_0 equ REG_PARAM_0\r
-REG_ABI_PARAM_1_x equ REG_PARAM_1_x\r
-REG_ABI_PARAM_1 equ REG_PARAM_1\r
-REG_ABI_PARAM_2_x equ REG_PARAM_2_x\r
-REG_ABI_PARAM_2 equ REG_PARAM_2\r
-REG_ABI_PARAM_3 equ REG_PARAM_3\r
-\r
-else\r
-; for LINUX-x64:\r
-REG_LINUX_PARAM_0_x equ x7\r
-REG_LINUX_PARAM_0 equ r7\r
-REG_LINUX_PARAM_1_x equ x6\r
-REG_LINUX_PARAM_1 equ r6\r
-REG_LINUX_PARAM_2 equ r2\r
-REG_LINUX_PARAM_3 equ r1\r
-REG_LINUX_PARAM_4_x equ x8\r
-REG_LINUX_PARAM_4 equ r8\r
-REG_LINUX_PARAM_5 equ r9\r
-\r
-REG_ABI_PARAM_0_x equ REG_LINUX_PARAM_0_x\r
-REG_ABI_PARAM_0 equ REG_LINUX_PARAM_0\r
-REG_ABI_PARAM_1_x equ REG_LINUX_PARAM_1_x\r
-REG_ABI_PARAM_1 equ REG_LINUX_PARAM_1\r
-REG_ABI_PARAM_2 equ REG_LINUX_PARAM_2\r
-REG_ABI_PARAM_3 equ REG_LINUX_PARAM_3\r
-REG_ABI_PARAM_4_x equ REG_LINUX_PARAM_4_x\r
-REG_ABI_PARAM_4 equ REG_LINUX_PARAM_4\r
-REG_ABI_PARAM_5 equ REG_LINUX_PARAM_5\r
-\r
-MY_ABI_LINUX_TO_WIN_2 macro\r
- mov r2, r6\r
- mov r1, r7\r
-endm\r
-\r
-MY_ABI_LINUX_TO_WIN_3 macro\r
- mov r8, r2\r
- mov r2, r6\r
- mov r1, r7\r
-endm\r
-\r
-MY_ABI_LINUX_TO_WIN_4 macro\r
- mov r9, r1\r
- mov r8, r2\r
- mov r2, r6\r
- mov r1, r7\r
-endm\r
-\r
-endif ; IS_LINUX\r
-\r
-\r
-MY_PUSH_PRESERVED_ABI_REGS macro\r
- if (IS_LINUX gt 0)\r
- MY_PUSH_2_REGS\r
- else\r
- MY_PUSH_4_REGS\r
- endif\r
- push r12\r
- push r13\r
- push r14\r
- push r15\r
-endm\r
-\r
-\r
-MY_POP_PRESERVED_ABI_REGS macro\r
- pop r15\r
- pop r14\r
- pop r13\r
- pop r12\r
- if (IS_LINUX gt 0)\r
- MY_POP_2_REGS\r
- else\r
- MY_POP_4_REGS\r
- endif\r
-endm\r
-\r
-endif ; x64\r
+; 7zAsm.asm -- ASM macros
+; 2023-12-08 : Igor Pavlov : Public domain
+
+
+; UASM can require these changes
+; OPTION FRAMEPRESERVEFLAGS:ON
+; OPTION PROLOGUE:NONE
+; OPTION EPILOGUE:NONE
+
+ifdef @wordsize
+; @wordsize is defined only in JWASM and ASMC and is not defined in MASM
+; @wordsize eq 8 for 64-bit x64
+; @wordsize eq 2 for 32-bit x86
+if @wordsize eq 8
+ x64 equ 1
+endif
+else
+ifdef RAX
+ x64 equ 1
+endif
+endif
+
+
+ifdef x64
+ IS_X64 equ 1
+else
+ IS_X64 equ 0
+endif
+
+ifdef ABI_LINUX
+ IS_LINUX equ 1
+else
+ IS_LINUX equ 0
+endif
+
+ifndef x64
+; Use ABI_CDECL for x86 (32-bit) only
+; if ABI_CDECL is not defined, we use fastcall abi
+ifdef ABI_CDECL
+ IS_CDECL equ 1
+else
+ IS_CDECL equ 0
+endif
+endif
+
+OPTION PROLOGUE:NONE
+OPTION EPILOGUE:NONE
+
+MY_ASM_START macro
+ ifdef x64
+ .code
+ else
+ .386
+ .model flat
+ _TEXT$00 SEGMENT PARA PUBLIC 'CODE'
+ endif
+endm
+
+MY_PROC macro name:req, numParams:req
+ align 16
+ proc_numParams = numParams
+ if (IS_X64 gt 0)
+ proc_name equ name
+ elseif (IS_LINUX gt 0)
+ proc_name equ name
+ elseif (IS_CDECL gt 0)
+ proc_name equ @CatStr(_,name)
+ else
+ proc_name equ @CatStr(@,name,@, %numParams * 4)
+ endif
+ proc_name PROC
+endm
+
+MY_ENDP macro
+ if (IS_X64 gt 0)
+ ret
+ elseif (IS_CDECL gt 0)
+ ret
+ elseif (proc_numParams LT 3)
+ ret
+ else
+ ret (proc_numParams - 2) * 4
+ endif
+ proc_name ENDP
+endm
+
+
+ifdef x64
+ REG_SIZE equ 8
+ REG_LOGAR_SIZE equ 3
+else
+ REG_SIZE equ 4
+ REG_LOGAR_SIZE equ 2
+endif
+
+ x0 equ EAX
+ x1 equ ECX
+ x2 equ EDX
+ x3 equ EBX
+ x4 equ ESP
+ x5 equ EBP
+ x6 equ ESI
+ x7 equ EDI
+
+ x0_W equ AX
+ x1_W equ CX
+ x2_W equ DX
+ x3_W equ BX
+
+ x5_W equ BP
+ x6_W equ SI
+ x7_W equ DI
+
+ x0_L equ AL
+ x1_L equ CL
+ x2_L equ DL
+ x3_L equ BL
+
+ x0_H equ AH
+ x1_H equ CH
+ x2_H equ DH
+ x3_H equ BH
+
+; r0_L equ AL
+; r1_L equ CL
+; r2_L equ DL
+; r3_L equ BL
+
+; r0_H equ AH
+; r1_H equ CH
+; r2_H equ DH
+; r3_H equ BH
+
+
+ifdef x64
+ x5_L equ BPL
+ x6_L equ SIL
+ x7_L equ DIL
+ x8_L equ r8b
+ x9_L equ r9b
+ x10_L equ r10b
+ x11_L equ r11b
+ x12_L equ r12b
+ x13_L equ r13b
+ x14_L equ r14b
+ x15_L equ r15b
+
+ r0 equ RAX
+ r1 equ RCX
+ r2 equ RDX
+ r3 equ RBX
+ r4 equ RSP
+ r5 equ RBP
+ r6 equ RSI
+ r7 equ RDI
+ x8 equ r8d
+ x9 equ r9d
+ x10 equ r10d
+ x11 equ r11d
+ x12 equ r12d
+ x13 equ r13d
+ x14 equ r14d
+ x15 equ r15d
+else
+ r0 equ x0
+ r1 equ x1
+ r2 equ x2
+ r3 equ x3
+ r4 equ x4
+ r5 equ x5
+ r6 equ x6
+ r7 equ x7
+endif
+
+ x0_R equ r0
+ x1_R equ r1
+ x2_R equ r2
+ x3_R equ r3
+ x4_R equ r4
+ x5_R equ r5
+ x6_R equ r6
+ x7_R equ r7
+ x8_R equ r8
+ x9_R equ r9
+ x10_R equ r10
+ x11_R equ r11
+ x12_R equ r12
+ x13_R equ r13
+ x14_R equ r14
+ x15_R equ r15
+
+ifdef x64
+ifdef ABI_LINUX
+
+MY_PUSH_2_REGS macro
+ push r3
+ push r5
+endm
+
+MY_POP_2_REGS macro
+ pop r5
+ pop r3
+endm
+
+endif
+endif
+
+
+MY_PUSH_4_REGS macro
+ push r3
+ push r5
+ push r6
+ push r7
+endm
+
+MY_POP_4_REGS macro
+ pop r7
+ pop r6
+ pop r5
+ pop r3
+endm
+
+
+; for fastcall and for WIN-x64
+REG_PARAM_0_x equ x1
+REG_PARAM_0 equ r1
+REG_PARAM_1_x equ x2
+REG_PARAM_1 equ r2
+
+ifndef x64
+; for x86-fastcall
+
+REG_ABI_PARAM_0_x equ REG_PARAM_0_x
+REG_ABI_PARAM_0 equ REG_PARAM_0
+REG_ABI_PARAM_1_x equ REG_PARAM_1_x
+REG_ABI_PARAM_1 equ REG_PARAM_1
+
+MY_PUSH_PRESERVED_ABI_REGS_UP_TO_INCLUDING_R11 macro
+ MY_PUSH_4_REGS
+endm
+
+MY_POP_PRESERVED_ABI_REGS_UP_TO_INCLUDING_R11 macro
+ MY_POP_4_REGS
+endm
+
+else
+; x64
+
+if (IS_LINUX eq 0)
+
+; for WIN-x64:
+REG_PARAM_2_x equ x8
+REG_PARAM_2 equ r8
+REG_PARAM_3 equ r9
+
+REG_ABI_PARAM_0_x equ REG_PARAM_0_x
+REG_ABI_PARAM_0 equ REG_PARAM_0
+REG_ABI_PARAM_1_x equ REG_PARAM_1_x
+REG_ABI_PARAM_1 equ REG_PARAM_1
+REG_ABI_PARAM_2_x equ REG_PARAM_2_x
+REG_ABI_PARAM_2 equ REG_PARAM_2
+REG_ABI_PARAM_3 equ REG_PARAM_3
+
+else
+; for LINUX-x64:
+REG_LINUX_PARAM_0_x equ x7
+REG_LINUX_PARAM_0 equ r7
+REG_LINUX_PARAM_1_x equ x6
+REG_LINUX_PARAM_1 equ r6
+REG_LINUX_PARAM_2 equ r2
+REG_LINUX_PARAM_3 equ r1
+REG_LINUX_PARAM_4_x equ x8
+REG_LINUX_PARAM_4 equ r8
+REG_LINUX_PARAM_5 equ r9
+
+REG_ABI_PARAM_0_x equ REG_LINUX_PARAM_0_x
+REG_ABI_PARAM_0 equ REG_LINUX_PARAM_0
+REG_ABI_PARAM_1_x equ REG_LINUX_PARAM_1_x
+REG_ABI_PARAM_1 equ REG_LINUX_PARAM_1
+REG_ABI_PARAM_2 equ REG_LINUX_PARAM_2
+REG_ABI_PARAM_3 equ REG_LINUX_PARAM_3
+REG_ABI_PARAM_4_x equ REG_LINUX_PARAM_4_x
+REG_ABI_PARAM_4 equ REG_LINUX_PARAM_4
+REG_ABI_PARAM_5 equ REG_LINUX_PARAM_5
+
+MY_ABI_LINUX_TO_WIN_2 macro
+ mov r2, r6
+ mov r1, r7
+endm
+
+MY_ABI_LINUX_TO_WIN_3 macro
+ mov r8, r2
+ mov r2, r6
+ mov r1, r7
+endm
+
+MY_ABI_LINUX_TO_WIN_4 macro
+ mov r9, r1
+ mov r8, r2
+ mov r2, r6
+ mov r1, r7
+endm
+
+endif ; IS_LINUX
+
+
+MY_PUSH_PRESERVED_ABI_REGS_UP_TO_INCLUDING_R11 macro
+ if (IS_LINUX gt 0)
+ MY_PUSH_2_REGS
+ else
+ MY_PUSH_4_REGS
+ endif
+endm
+
+MY_POP_PRESERVED_ABI_REGS_UP_TO_INCLUDING_R11 macro
+ if (IS_LINUX gt 0)
+ MY_POP_2_REGS
+ else
+ MY_POP_4_REGS
+ endif
+endm
+
+
+MY_PUSH_PRESERVED_ABI_REGS macro
+ MY_PUSH_PRESERVED_ABI_REGS_UP_TO_INCLUDING_R11
+ push r12
+ push r13
+ push r14
+ push r15
+endm
+
+
+MY_POP_PRESERVED_ABI_REGS macro
+ pop r15
+ pop r14
+ pop r13
+ pop r12
+ MY_POP_PRESERVED_ABI_REGS_UP_TO_INCLUDING_R11
+endm
+
+endif ; x64
-; LzmaDecOpt.asm -- ASM version of LzmaDec_DecodeReal_3() function\r
-; 2021-02-23: Igor Pavlov : Public domain\r
-;\r
-; 3 - is the code compatibility version of LzmaDec_DecodeReal_*()\r
-; function for check at link time.\r
-; That code is tightly coupled with LzmaDec_TryDummy()\r
-; and with another functions in LzmaDec.c file.\r
-; CLzmaDec structure, (probs) array layout, input and output of\r
-; LzmaDec_DecodeReal_*() must be equal in both versions (C / ASM).\r
-\r
-ifndef x64\r
-; x64=1\r
-; .err <x64_IS_REQUIRED>\r
-endif\r
-\r
-include 7zAsm.asm\r
-\r
-MY_ASM_START\r
-\r
-_TEXT$LZMADECOPT SEGMENT ALIGN(64) 'CODE'\r
-\r
-MY_ALIGN macro num:req\r
- align num\r
-endm\r
-\r
-MY_ALIGN_16 macro\r
- MY_ALIGN 16\r
-endm\r
-\r
-MY_ALIGN_32 macro\r
- MY_ALIGN 32\r
-endm\r
-\r
-MY_ALIGN_64 macro\r
- MY_ALIGN 64\r
-endm\r
-\r
-\r
-; _LZMA_SIZE_OPT equ 1\r
-\r
-; _LZMA_PROB32 equ 1\r
-\r
-ifdef _LZMA_PROB32\r
- PSHIFT equ 2\r
- PLOAD macro dest, mem\r
- mov dest, dword ptr [mem]\r
- endm\r
- PSTORE macro src, mem\r
- mov dword ptr [mem], src\r
- endm\r
-else\r
- PSHIFT equ 1\r
- PLOAD macro dest, mem\r
- movzx dest, word ptr [mem]\r
- endm\r
- PSTORE macro src, mem\r
- mov word ptr [mem], @CatStr(src, _W)\r
- endm\r
-endif\r
-\r
-PMULT equ (1 SHL PSHIFT)\r
-PMULT_HALF equ (1 SHL (PSHIFT - 1))\r
-PMULT_2 equ (1 SHL (PSHIFT + 1))\r
-\r
-kMatchSpecLen_Error_Data equ (1 SHL 9)\r
-\r
-; x0 range\r
-; x1 pbPos / (prob) TREE\r
-; x2 probBranch / prm (MATCHED) / pbPos / cnt\r
-; x3 sym\r
-;====== r4 === RSP\r
-; x5 cod\r
-; x6 t1 NORM_CALC / probs_state / dist\r
-; x7 t0 NORM_CALC / prob2 IF_BIT_1\r
-; x8 state\r
-; x9 match (MATCHED) / sym2 / dist2 / lpMask_reg\r
-; x10 kBitModelTotal_reg\r
-; r11 probs\r
-; x12 offs (MATCHED) / dic / len_temp\r
-; x13 processedPos\r
-; x14 bit (MATCHED) / dicPos\r
-; r15 buf\r
-\r
-\r
-cod equ x5\r
-cod_L equ x5_L\r
-range equ x0\r
-state equ x8\r
-state_R equ r8\r
-buf equ r15\r
-processedPos equ x13\r
-kBitModelTotal_reg equ x10\r
-\r
-probBranch equ x2\r
-probBranch_R equ r2\r
-probBranch_W equ x2_W\r
-\r
-pbPos equ x1\r
-pbPos_R equ r1\r
-\r
-cnt equ x2\r
-cnt_R equ r2\r
-\r
-lpMask_reg equ x9\r
-dicPos equ r14\r
-\r
-sym equ x3\r
-sym_R equ r3\r
-sym_L equ x3_L\r
-\r
-probs equ r11\r
-dic equ r12\r
-\r
-t0 equ x7\r
-t0_W equ x7_W\r
-t0_R equ r7\r
-\r
-prob2 equ t0\r
-prob2_W equ t0_W\r
-\r
-t1 equ x6\r
-t1_R equ r6\r
-\r
-probs_state equ t1\r
-probs_state_R equ t1_R\r
-\r
-prm equ r2\r
-match equ x9\r
-match_R equ r9\r
-offs equ x12\r
-offs_R equ r12\r
-bit equ x14\r
-bit_R equ r14\r
-\r
-sym2 equ x9\r
-sym2_R equ r9\r
-\r
-len_temp equ x12\r
-\r
-dist equ sym\r
-dist2 equ x9\r
-\r
-\r
-\r
-kNumBitModelTotalBits equ 11\r
-kBitModelTotal equ (1 SHL kNumBitModelTotalBits)\r
-kNumMoveBits equ 5\r
-kBitModelOffset equ ((1 SHL kNumMoveBits) - 1)\r
-kTopValue equ (1 SHL 24)\r
-\r
-NORM_2 macro\r
- ; movzx t0, BYTE PTR [buf]\r
- shl cod, 8\r
- mov cod_L, BYTE PTR [buf]\r
- shl range, 8\r
- ; or cod, t0\r
- inc buf\r
-endm\r
-\r
-\r
-NORM macro\r
- cmp range, kTopValue\r
- jae SHORT @F\r
- NORM_2\r
-@@:\r
-endm\r
-\r
-\r
-; ---------- Branch MACROS ----------\r
-\r
-UPDATE_0 macro probsArray:req, probOffset:req, probDisp:req\r
- mov prob2, kBitModelTotal_reg\r
- sub prob2, probBranch\r
- shr prob2, kNumMoveBits\r
- add probBranch, prob2\r
- PSTORE probBranch, probOffset * 1 + probsArray + probDisp * PMULT\r
-endm\r
-\r
-\r
-UPDATE_1 macro probsArray:req, probOffset:req, probDisp:req\r
- sub prob2, range\r
- sub cod, range\r
- mov range, prob2\r
- mov prob2, probBranch\r
- shr probBranch, kNumMoveBits\r
- sub prob2, probBranch\r
- PSTORE prob2, probOffset * 1 + probsArray + probDisp * PMULT\r
-endm\r
-\r
-\r
-CMP_COD macro probsArray:req, probOffset:req, probDisp:req\r
- PLOAD probBranch, probOffset * 1 + probsArray + probDisp * PMULT\r
- NORM\r
- mov prob2, range\r
- shr range, kNumBitModelTotalBits\r
- imul range, probBranch\r
- cmp cod, range\r
-endm\r
-\r
-\r
-IF_BIT_1_NOUP macro probsArray:req, probOffset:req, probDisp:req, toLabel:req\r
- CMP_COD probsArray, probOffset, probDisp\r
- jae toLabel\r
-endm\r
-\r
-\r
-IF_BIT_1 macro probsArray:req, probOffset:req, probDisp:req, toLabel:req\r
- IF_BIT_1_NOUP probsArray, probOffset, probDisp, toLabel\r
- UPDATE_0 probsArray, probOffset, probDisp\r
-endm\r
-\r
-\r
-IF_BIT_0_NOUP macro probsArray:req, probOffset:req, probDisp:req, toLabel:req\r
- CMP_COD probsArray, probOffset, probDisp\r
- jb toLabel\r
-endm\r
-\r
-\r
-; ---------- CMOV MACROS ----------\r
-\r
-NORM_CALC macro prob:req\r
- NORM\r
- mov t0, range\r
- shr range, kNumBitModelTotalBits\r
- imul range, prob\r
- sub t0, range\r
- mov t1, cod\r
- sub cod, range\r
-endm\r
-\r
-\r
-PUP macro prob:req, probPtr:req\r
- sub t0, prob\r
- ; only sar works for both 16/32 bit prob modes\r
- sar t0, kNumMoveBits\r
- add t0, prob\r
- PSTORE t0, probPtr\r
-endm\r
-\r
-\r
-PUP_SUB macro prob:req, probPtr:req, symSub:req\r
- sbb sym, symSub\r
- PUP prob, probPtr\r
-endm\r
-\r
-\r
-PUP_COD macro prob:req, probPtr:req, symSub:req\r
- mov t0, kBitModelOffset\r
- cmovb cod, t1\r
- mov t1, sym\r
- cmovb t0, kBitModelTotal_reg\r
- PUP_SUB prob, probPtr, symSub\r
-endm\r
-\r
-\r
-BIT_0 macro prob:req, probNext:req\r
- PLOAD prob, probs + 1 * PMULT\r
- PLOAD probNext, probs + 1 * PMULT_2\r
-\r
- NORM_CALC prob\r
- \r
- cmovae range, t0\r
- PLOAD t0, probs + 1 * PMULT_2 + PMULT\r
- cmovae probNext, t0\r
- mov t0, kBitModelOffset\r
- cmovb cod, t1\r
- cmovb t0, kBitModelTotal_reg\r
- mov sym, 2\r
- PUP_SUB prob, probs + 1 * PMULT, 0 - 1\r
-endm\r
-\r
-\r
-BIT_1 macro prob:req, probNext:req\r
- PLOAD probNext, probs + sym_R * PMULT_2\r
- add sym, sym\r
- \r
- NORM_CALC prob\r
- \r
- cmovae range, t0\r
- PLOAD t0, probs + sym_R * PMULT + PMULT\r
- cmovae probNext, t0\r
- PUP_COD prob, probs + t1_R * PMULT_HALF, 0 - 1\r
-endm\r
-\r
-\r
-BIT_2 macro prob:req, symSub:req\r
- add sym, sym\r
-\r
- NORM_CALC prob\r
- \r
- cmovae range, t0\r
- PUP_COD prob, probs + t1_R * PMULT_HALF, symSub\r
-endm\r
-\r
-\r
-; ---------- MATCHED LITERAL ----------\r
-\r
-LITM_0 macro\r
- mov offs, 256 * PMULT\r
- shl match, (PSHIFT + 1)\r
- mov bit, offs\r
- and bit, match\r
- PLOAD x1, probs + 256 * PMULT + bit_R * 1 + 1 * PMULT\r
- lea prm, [probs + 256 * PMULT + bit_R * 1 + 1 * PMULT]\r
- ; lea prm, [probs + 256 * PMULT + 1 * PMULT]\r
- ; add prm, bit_R\r
- xor offs, bit\r
- add match, match\r
-\r
- NORM_CALC x1\r
-\r
- cmovae offs, bit\r
- mov bit, match\r
- cmovae range, t0\r
- mov t0, kBitModelOffset\r
- cmovb cod, t1\r
- cmovb t0, kBitModelTotal_reg\r
- mov sym, 0\r
- PUP_SUB x1, prm, -2-1\r
-endm\r
-\r
-\r
-LITM macro\r
- and bit, offs\r
- lea prm, [probs + offs_R * 1]\r
- add prm, bit_R\r
- PLOAD x1, prm + sym_R * PMULT\r
- xor offs, bit\r
- add sym, sym\r
- add match, match\r
-\r
- NORM_CALC x1\r
-\r
- cmovae offs, bit\r
- mov bit, match\r
- cmovae range, t0\r
- PUP_COD x1, prm + t1_R * PMULT_HALF, - 1\r
-endm\r
-\r
-\r
-LITM_2 macro\r
- and bit, offs\r
- lea prm, [probs + offs_R * 1]\r
- add prm, bit_R\r
- PLOAD x1, prm + sym_R * PMULT\r
- add sym, sym\r
-\r
- NORM_CALC x1\r
-\r
- cmovae range, t0\r
- PUP_COD x1, prm + t1_R * PMULT_HALF, 256 - 1\r
-endm\r
-\r
-\r
-; ---------- REVERSE BITS ----------\r
-\r
-REV_0 macro prob:req, probNext:req\r
- ; PLOAD prob, probs + 1 * PMULT\r
- ; lea sym2_R, [probs + 2 * PMULT]\r
- ; PLOAD probNext, probs + 2 * PMULT\r
- PLOAD probNext, sym2_R\r
-\r
- NORM_CALC prob\r
-\r
- cmovae range, t0\r
- PLOAD t0, probs + 3 * PMULT\r
- cmovae probNext, t0\r
- cmovb cod, t1\r
- mov t0, kBitModelOffset\r
- cmovb t0, kBitModelTotal_reg\r
- lea t1_R, [probs + 3 * PMULT]\r
- cmovae sym2_R, t1_R\r
- PUP prob, probs + 1 * PMULT\r
-endm\r
-\r
-\r
-REV_1 macro prob:req, probNext:req, step:req\r
- add sym2_R, step * PMULT\r
- PLOAD probNext, sym2_R\r
-\r
- NORM_CALC prob\r
-\r
- cmovae range, t0\r
- PLOAD t0, sym2_R + step * PMULT\r
- cmovae probNext, t0\r
- cmovb cod, t1\r
- mov t0, kBitModelOffset\r
- cmovb t0, kBitModelTotal_reg\r
- lea t1_R, [sym2_R + step * PMULT]\r
- cmovae sym2_R, t1_R\r
- PUP prob, t1_R - step * PMULT_2\r
-endm\r
-\r
-\r
-REV_2 macro prob:req, step:req\r
- sub sym2_R, probs\r
- shr sym2, PSHIFT\r
- or sym, sym2\r
-\r
- NORM_CALC prob\r
-\r
- cmovae range, t0\r
- lea t0, [sym - step]\r
- cmovb sym, t0\r
- cmovb cod, t1\r
- mov t0, kBitModelOffset\r
- cmovb t0, kBitModelTotal_reg\r
- PUP prob, probs + sym2_R * PMULT\r
-endm\r
-\r
-\r
-REV_1_VAR macro prob:req\r
- PLOAD prob, sym_R\r
- mov probs, sym_R\r
- add sym_R, sym2_R\r
-\r
- NORM_CALC prob\r
-\r
- cmovae range, t0\r
- lea t0_R, [sym_R + 1 * sym2_R]\r
- cmovae sym_R, t0_R\r
- mov t0, kBitModelOffset\r
- cmovb cod, t1\r
- ; mov t1, kBitModelTotal\r
- ; cmovb t0, t1\r
- cmovb t0, kBitModelTotal_reg\r
- add sym2, sym2\r
- PUP prob, probs\r
-endm\r
-\r
-\r
-\r
-\r
-LIT_PROBS macro lpMaskParam:req\r
- ; prob += (UInt32)3 * ((((processedPos << 8) + dic[(dicPos == 0 ? dicBufSize : dicPos) - 1]) & lpMask) << lc);\r
- mov t0, processedPos\r
- shl t0, 8\r
- add sym, t0\r
- and sym, lpMaskParam\r
- add probs_state_R, pbPos_R\r
- mov x1, LOC lc2\r
- lea sym, dword ptr[sym_R + 2 * sym_R]\r
- add probs, Literal * PMULT\r
- shl sym, x1_L\r
- add probs, sym_R\r
- UPDATE_0 probs_state_R, 0, IsMatch\r
- inc processedPos\r
-endm\r
-\r
-\r
-\r
-kNumPosBitsMax equ 4\r
-kNumPosStatesMax equ (1 SHL kNumPosBitsMax)\r
-\r
-kLenNumLowBits equ 3\r
-kLenNumLowSymbols equ (1 SHL kLenNumLowBits)\r
-kLenNumHighBits equ 8\r
-kLenNumHighSymbols equ (1 SHL kLenNumHighBits)\r
-kNumLenProbs equ (2 * kLenNumLowSymbols * kNumPosStatesMax + kLenNumHighSymbols)\r
-\r
-LenLow equ 0\r
-LenChoice equ LenLow\r
-LenChoice2 equ (LenLow + kLenNumLowSymbols)\r
-LenHigh equ (LenLow + 2 * kLenNumLowSymbols * kNumPosStatesMax)\r
-\r
-kNumStates equ 12\r
-kNumStates2 equ 16\r
-kNumLitStates equ 7\r
-\r
-kStartPosModelIndex equ 4\r
-kEndPosModelIndex equ 14\r
-kNumFullDistances equ (1 SHL (kEndPosModelIndex SHR 1))\r
-\r
-kNumPosSlotBits equ 6\r
-kNumLenToPosStates equ 4\r
-\r
-kNumAlignBits equ 4\r
-kAlignTableSize equ (1 SHL kNumAlignBits)\r
-\r
-kMatchMinLen equ 2\r
-kMatchSpecLenStart equ (kMatchMinLen + kLenNumLowSymbols * 2 + kLenNumHighSymbols)\r
-\r
-kStartOffset equ 1664\r
-SpecPos equ (-kStartOffset)\r
-IsRep0Long equ (SpecPos + kNumFullDistances)\r
-RepLenCoder equ (IsRep0Long + (kNumStates2 SHL kNumPosBitsMax))\r
-LenCoder equ (RepLenCoder + kNumLenProbs)\r
-IsMatch equ (LenCoder + kNumLenProbs)\r
-kAlign equ (IsMatch + (kNumStates2 SHL kNumPosBitsMax))\r
-IsRep equ (kAlign + kAlignTableSize)\r
-IsRepG0 equ (IsRep + kNumStates)\r
-IsRepG1 equ (IsRepG0 + kNumStates)\r
-IsRepG2 equ (IsRepG1 + kNumStates)\r
-PosSlot equ (IsRepG2 + kNumStates)\r
-Literal equ (PosSlot + (kNumLenToPosStates SHL kNumPosSlotBits))\r
-NUM_BASE_PROBS equ (Literal + kStartOffset)\r
-\r
-if kAlign ne 0\r
- .err <Stop_Compiling_Bad_LZMA_kAlign>\r
-endif\r
-\r
-if NUM_BASE_PROBS ne 1984\r
- .err <Stop_Compiling_Bad_LZMA_PROBS>\r
-endif\r
-\r
-\r
-PTR_FIELD equ dq ?\r
-\r
-CLzmaDec_Asm struct\r
- lc db ?\r
- lp db ?\r
- pb db ?\r
- _pad_ db ?\r
- dicSize dd ?\r
-\r
- probs_Spec PTR_FIELD\r
- probs_1664 PTR_FIELD\r
- dic_Spec PTR_FIELD\r
- dicBufSize PTR_FIELD\r
- dicPos_Spec PTR_FIELD\r
- buf_Spec PTR_FIELD\r
-\r
- range_Spec dd ?\r
- code_Spec dd ?\r
- processedPos_Spec dd ?\r
- checkDicSize dd ?\r
- rep0 dd ?\r
- rep1 dd ?\r
- rep2 dd ?\r
- rep3 dd ?\r
- state_Spec dd ?\r
- remainLen dd ?\r
-CLzmaDec_Asm ends\r
-\r
-\r
-CLzmaDec_Asm_Loc struct\r
- OLD_RSP PTR_FIELD\r
- lzmaPtr PTR_FIELD\r
- _pad0_ PTR_FIELD\r
- _pad1_ PTR_FIELD\r
- _pad2_ PTR_FIELD\r
- dicBufSize PTR_FIELD\r
- probs_Spec PTR_FIELD\r
- dic_Spec PTR_FIELD\r
- \r
- limit PTR_FIELD\r
- bufLimit PTR_FIELD\r
- lc2 dd ?\r
- lpMask dd ?\r
- pbMask dd ?\r
- checkDicSize dd ?\r
-\r
- _pad_ dd ?\r
- remainLen dd ?\r
- dicPos_Spec PTR_FIELD\r
- rep0 dd ?\r
- rep1 dd ?\r
- rep2 dd ?\r
- rep3 dd ?\r
-CLzmaDec_Asm_Loc ends\r
-\r
-\r
-GLOB_2 equ [sym_R].CLzmaDec_Asm.\r
-GLOB equ [r1].CLzmaDec_Asm.\r
-LOC_0 equ [r0].CLzmaDec_Asm_Loc.\r
-LOC equ [RSP].CLzmaDec_Asm_Loc.\r
-\r
-\r
-COPY_VAR macro name\r
- mov t0, GLOB_2 name\r
- mov LOC_0 name, t0\r
-endm\r
-\r
-\r
-RESTORE_VAR macro name\r
- mov t0, LOC name\r
- mov GLOB name, t0\r
-endm\r
-\r
-\r
-\r
-IsMatchBranch_Pre macro reg\r
- ; prob = probs + IsMatch + (state << kNumPosBitsMax) + posState;\r
- mov pbPos, LOC pbMask\r
- and pbPos, processedPos\r
- shl pbPos, (kLenNumLowBits + 1 + PSHIFT)\r
- lea probs_state_R, [probs + 1 * state_R]\r
-endm\r
-\r
-\r
-IsMatchBranch macro reg\r
- IsMatchBranch_Pre\r
- IF_BIT_1 probs_state_R, pbPos_R, IsMatch, IsMatch_label\r
-endm\r
- \r
-\r
-CheckLimits macro reg\r
- cmp buf, LOC bufLimit\r
- jae fin_OK\r
- cmp dicPos, LOC limit\r
- jae fin_OK\r
-endm\r
-\r
-\r
-\r
-; RSP is (16x + 8) bytes aligned in WIN64-x64\r
-; LocalSize equ ((((SIZEOF CLzmaDec_Asm_Loc) + 7) / 16 * 16) + 8)\r
-\r
-PARAM_lzma equ REG_ABI_PARAM_0\r
-PARAM_limit equ REG_ABI_PARAM_1\r
-PARAM_bufLimit equ REG_ABI_PARAM_2\r
-\r
-; MY_ALIGN_64\r
-MY_PROC LzmaDec_DecodeReal_3, 3\r
-MY_PUSH_PRESERVED_ABI_REGS\r
-\r
- lea r0, [RSP - (SIZEOF CLzmaDec_Asm_Loc)]\r
- and r0, -128\r
- mov r5, RSP\r
- mov RSP, r0\r
- mov LOC_0 Old_RSP, r5\r
- mov LOC_0 lzmaPtr, PARAM_lzma\r
- \r
- mov LOC_0 remainLen, 0 ; remainLen must be ZERO\r
-\r
- mov LOC_0 bufLimit, PARAM_bufLimit\r
- mov sym_R, PARAM_lzma ; CLzmaDec_Asm_Loc pointer for GLOB_2\r
- mov dic, GLOB_2 dic_Spec\r
- add PARAM_limit, dic\r
- mov LOC_0 limit, PARAM_limit\r
-\r
- COPY_VAR(rep0)\r
- COPY_VAR(rep1)\r
- COPY_VAR(rep2)\r
- COPY_VAR(rep3)\r
- \r
- mov dicPos, GLOB_2 dicPos_Spec\r
- add dicPos, dic\r
- mov LOC_0 dicPos_Spec, dicPos\r
- mov LOC_0 dic_Spec, dic\r
- \r
- mov x1_L, GLOB_2 pb\r
- mov t0, 1\r
- shl t0, x1_L\r
- dec t0\r
- mov LOC_0 pbMask, t0\r
-\r
- ; unsigned pbMask = ((unsigned)1 << (p->prop.pb)) - 1;\r
- ; unsigned lc = p->prop.lc;\r
- ; unsigned lpMask = ((unsigned)0x100 << p->prop.lp) - ((unsigned)0x100 >> lc);\r
-\r
- mov x1_L, GLOB_2 lc\r
- mov x2, 100h\r
- mov t0, x2\r
- shr x2, x1_L\r
- ; inc x1\r
- add x1_L, PSHIFT\r
- mov LOC_0 lc2, x1\r
- mov x1_L, GLOB_2 lp\r
- shl t0, x1_L\r
- sub t0, x2\r
- mov LOC_0 lpMask, t0\r
- mov lpMask_reg, t0\r
- \r
- ; mov probs, GLOB_2 probs_Spec\r
- ; add probs, kStartOffset SHL PSHIFT\r
- mov probs, GLOB_2 probs_1664\r
- mov LOC_0 probs_Spec, probs\r
-\r
- mov t0_R, GLOB_2 dicBufSize\r
- mov LOC_0 dicBufSize, t0_R\r
- \r
- mov x1, GLOB_2 checkDicSize\r
- mov LOC_0 checkDicSize, x1\r
-\r
- mov processedPos, GLOB_2 processedPos_Spec\r
-\r
- mov state, GLOB_2 state_Spec\r
- shl state, PSHIFT\r
-\r
- mov buf, GLOB_2 buf_Spec\r
- mov range, GLOB_2 range_Spec\r
- mov cod, GLOB_2 code_Spec\r
- mov kBitModelTotal_reg, kBitModelTotal\r
- xor sym, sym\r
-\r
- ; if (processedPos != 0 || checkDicSize != 0)\r
- or x1, processedPos\r
- jz @f\r
- \r
- add t0_R, dic\r
- cmp dicPos, dic\r
- cmovnz t0_R, dicPos\r
- movzx sym, byte ptr[t0_R - 1]\r
-\r
-@@:\r
- IsMatchBranch_Pre\r
- cmp state, 4 * PMULT\r
- jb lit_end\r
- cmp state, kNumLitStates * PMULT\r
- jb lit_matched_end\r
- jmp lz_end\r
- \r
-\r
- \r
-\r
-; ---------- LITERAL ----------\r
-MY_ALIGN_64\r
-lit_start:\r
- xor state, state\r
-lit_start_2:\r
- LIT_PROBS lpMask_reg\r
-\r
- ifdef _LZMA_SIZE_OPT\r
-\r
- PLOAD x1, probs + 1 * PMULT\r
- mov sym, 1\r
-MY_ALIGN_16\r
-lit_loop:\r
- BIT_1 x1, x2\r
- mov x1, x2\r
- cmp sym, 127\r
- jbe lit_loop\r
- \r
- else\r
- \r
- BIT_0 x1, x2\r
- BIT_1 x2, x1\r
- BIT_1 x1, x2\r
- BIT_1 x2, x1\r
- BIT_1 x1, x2\r
- BIT_1 x2, x1\r
- BIT_1 x1, x2\r
- \r
- endif\r
-\r
- BIT_2 x2, 256 - 1\r
- \r
- ; mov dic, LOC dic_Spec\r
- mov probs, LOC probs_Spec\r
- IsMatchBranch_Pre\r
- mov byte ptr[dicPos], sym_L\r
- inc dicPos\r
- \r
- CheckLimits\r
-lit_end:\r
- IF_BIT_0_NOUP probs_state_R, pbPos_R, IsMatch, lit_start\r
-\r
- ; jmp IsMatch_label\r
- \r
-; ---------- MATCHES ----------\r
-; MY_ALIGN_32\r
-IsMatch_label:\r
- UPDATE_1 probs_state_R, pbPos_R, IsMatch\r
- IF_BIT_1 probs_state_R, 0, IsRep, IsRep_label\r
-\r
- add probs, LenCoder * PMULT\r
- add state, kNumStates * PMULT\r
-\r
-; ---------- LEN DECODE ----------\r
-len_decode:\r
- mov len_temp, 8 - 1 - kMatchMinLen\r
- IF_BIT_0_NOUP probs, 0, 0, len_mid_0\r
- UPDATE_1 probs, 0, 0\r
- add probs, (1 SHL (kLenNumLowBits + PSHIFT))\r
- mov len_temp, -1 - kMatchMinLen\r
- IF_BIT_0_NOUP probs, 0, 0, len_mid_0\r
- UPDATE_1 probs, 0, 0\r
- add probs, LenHigh * PMULT - (1 SHL (kLenNumLowBits + PSHIFT))\r
- mov sym, 1\r
- PLOAD x1, probs + 1 * PMULT\r
-\r
-MY_ALIGN_32\r
-len8_loop:\r
- BIT_1 x1, x2\r
- mov x1, x2\r
- cmp sym, 64\r
- jb len8_loop\r
- \r
- mov len_temp, (kLenNumHighSymbols - kLenNumLowSymbols * 2) - 1 - kMatchMinLen\r
- jmp short len_mid_2 ; we use short here for MASM that doesn't optimize that code as another assembler programs\r
- \r
-MY_ALIGN_32\r
-len_mid_0:\r
- UPDATE_0 probs, 0, 0\r
- add probs, pbPos_R\r
- BIT_0 x2, x1\r
-len_mid_2:\r
- BIT_1 x1, x2\r
- BIT_2 x2, len_temp\r
- mov probs, LOC probs_Spec\r
- cmp state, kNumStates * PMULT\r
- jb copy_match\r
- \r
-\r
-; ---------- DECODE DISTANCE ----------\r
- ; probs + PosSlot + ((len < kNumLenToPosStates ? len : kNumLenToPosStates - 1) << kNumPosSlotBits);\r
-\r
- mov t0, 3 + kMatchMinLen\r
- cmp sym, 3 + kMatchMinLen\r
- cmovb t0, sym\r
- add probs, PosSlot * PMULT - (kMatchMinLen SHL (kNumPosSlotBits + PSHIFT))\r
- shl t0, (kNumPosSlotBits + PSHIFT)\r
- add probs, t0_R\r
- \r
- ; sym = Len\r
- ; mov LOC remainLen, sym\r
- mov len_temp, sym\r
-\r
- ifdef _LZMA_SIZE_OPT\r
-\r
- PLOAD x1, probs + 1 * PMULT\r
- mov sym, 1\r
-MY_ALIGN_16\r
-slot_loop:\r
- BIT_1 x1, x2\r
- mov x1, x2\r
- cmp sym, 32\r
- jb slot_loop\r
- \r
- else\r
- \r
- BIT_0 x1, x2\r
- BIT_1 x2, x1\r
- BIT_1 x1, x2\r
- BIT_1 x2, x1\r
- BIT_1 x1, x2\r
- \r
- endif\r
- \r
- mov x1, sym\r
- BIT_2 x2, 64-1\r
-\r
- and sym, 3\r
- mov probs, LOC probs_Spec\r
- cmp x1, 32 + kEndPosModelIndex / 2\r
- jb short_dist\r
-\r
- ; unsigned numDirectBits = (unsigned)(((distance >> 1) - 1));\r
- sub x1, (32 + 1 + kNumAlignBits)\r
- ; distance = (2 | (distance & 1));\r
- or sym, 2\r
- PLOAD x2, probs + 1 * PMULT\r
- shl sym, kNumAlignBits + 1\r
- lea sym2_R, [probs + 2 * PMULT]\r
- \r
- jmp direct_norm\r
- ; lea t1, [sym_R + (1 SHL kNumAlignBits)]\r
- ; cmp range, kTopValue\r
- ; jb direct_norm\r
- \r
-; ---------- DIRECT DISTANCE ----------\r
-MY_ALIGN_32\r
-direct_loop:\r
- shr range, 1\r
- mov t0, cod\r
- sub cod, range\r
- cmovs cod, t0\r
- cmovns sym, t1\r
- \r
- comment ~\r
- sub cod, range\r
- mov x2, cod\r
- sar x2, 31\r
- lea sym, dword ptr [r2 + sym_R * 2 + 1]\r
- and x2, range\r
- add cod, x2\r
- ~\r
- dec x1\r
- je direct_end\r
-\r
- add sym, sym\r
-direct_norm:\r
- lea t1, [sym_R + (1 SHL kNumAlignBits)]\r
- cmp range, kTopValue\r
- jae near ptr direct_loop\r
- ; we align for 32 here with "near ptr" command above\r
- NORM_2\r
- jmp direct_loop\r
-\r
-MY_ALIGN_32\r
-direct_end:\r
- ; prob = + kAlign;\r
- ; distance <<= kNumAlignBits;\r
- REV_0 x2, x1\r
- REV_1 x1, x2, 2\r
- REV_1 x2, x1, 4\r
- REV_2 x1, 8\r
-\r
-decode_dist_end:\r
-\r
- ; if (distance >= (checkDicSize == 0 ? processedPos: checkDicSize))\r
-\r
- mov t1, LOC rep0\r
- mov x1, LOC rep1\r
- mov x2, LOC rep2\r
- \r
- mov t0, LOC checkDicSize\r
- test t0, t0\r
- cmove t0, processedPos\r
- cmp sym, t0\r
- jae end_of_payload\r
- ; jmp end_of_payload ; for debug\r
- \r
- ; rep3 = rep2;\r
- ; rep2 = rep1;\r
- ; rep1 = rep0;\r
- ; rep0 = distance + 1;\r
-\r
- inc sym\r
- mov LOC rep0, sym\r
- ; mov sym, LOC remainLen\r
- mov sym, len_temp\r
- mov LOC rep1, t1\r
- mov LOC rep2, x1\r
- mov LOC rep3, x2\r
- \r
- ; state = (state < kNumStates + kNumLitStates) ? kNumLitStates : kNumLitStates + 3;\r
- cmp state, (kNumStates + kNumLitStates) * PMULT\r
- mov state, kNumLitStates * PMULT\r
- mov t0, (kNumLitStates + 3) * PMULT\r
- cmovae state, t0\r
-\r
- \r
-; ---------- COPY MATCH ----------\r
-copy_match:\r
-\r
- ; len += kMatchMinLen;\r
- ; add sym, kMatchMinLen\r
-\r
- ; if ((rem = limit - dicPos) == 0)\r
- ; {\r
- ; p->dicPos = dicPos;\r
- ; return SZ_ERROR_DATA;\r
- ; }\r
- mov cnt_R, LOC limit\r
- sub cnt_R, dicPos\r
- jz fin_dicPos_LIMIT\r
-\r
- ; curLen = ((rem < len) ? (unsigned)rem : len);\r
- cmp cnt_R, sym_R\r
- ; cmovae cnt_R, sym_R ; 64-bit\r
- cmovae cnt, sym ; 32-bit\r
-\r
- mov dic, LOC dic_Spec\r
- mov x1, LOC rep0\r
-\r
- mov t0_R, dicPos\r
- add dicPos, cnt_R\r
- ; processedPos += curLen;\r
- add processedPos, cnt\r
- ; len -= curLen;\r
- sub sym, cnt\r
- mov LOC remainLen, sym\r
-\r
- sub t0_R, dic\r
- \r
- ; pos = dicPos - rep0 + (dicPos < rep0 ? dicBufSize : 0);\r
- sub t0_R, r1\r
- jae @f\r
-\r
- mov r1, LOC dicBufSize\r
- add t0_R, r1\r
- sub r1, t0_R\r
- cmp cnt_R, r1\r
- ja copy_match_cross\r
-@@:\r
- ; if (curLen <= dicBufSize - pos)\r
-\r
-; ---------- COPY MATCH FAST ----------\r
- ; Byte *dest = dic + dicPos;\r
- ; mov r1, dic\r
- ; ptrdiff_t src = (ptrdiff_t)pos - (ptrdiff_t)dicPos;\r
- ; sub t0_R, dicPos\r
- ; dicPos += curLen;\r
-\r
- ; const Byte *lim = dest + curLen;\r
- add t0_R, dic\r
- movzx sym, byte ptr[t0_R]\r
- add t0_R, cnt_R\r
- neg cnt_R\r
- ; lea r1, [dicPos - 1]\r
-copy_common:\r
- dec dicPos\r
- ; cmp LOC rep0, 1\r
- ; je rep0Label\r
-\r
- ; t0_R - src_lim\r
- ; r1 - dest_lim - 1\r
- ; cnt_R - (-cnt)\r
-\r
- IsMatchBranch_Pre\r
- inc cnt_R\r
- jz copy_end\r
-MY_ALIGN_16\r
-@@:\r
- mov byte ptr[cnt_R * 1 + dicPos], sym_L\r
- movzx sym, byte ptr[cnt_R * 1 + t0_R]\r
- inc cnt_R\r
- jnz @b\r
-\r
-copy_end:\r
-lz_end_match:\r
- mov byte ptr[dicPos], sym_L\r
- inc dicPos\r
- \r
- ; IsMatchBranch_Pre\r
- CheckLimits\r
-lz_end:\r
- IF_BIT_1_NOUP probs_state_R, pbPos_R, IsMatch, IsMatch_label\r
-\r
-\r
-\r
-; ---------- LITERAL MATCHED ----------\r
- \r
- LIT_PROBS LOC lpMask\r
- \r
- ; matchByte = dic[dicPos - rep0 + (dicPos < rep0 ? dicBufSize : 0)];\r
- mov x1, LOC rep0\r
- ; mov dic, LOC dic_Spec\r
- mov LOC dicPos_Spec, dicPos\r
- \r
- ; state -= (state < 10) ? 3 : 6;\r
- lea t0, [state_R - 6 * PMULT]\r
- sub state, 3 * PMULT\r
- cmp state, 7 * PMULT\r
- cmovae state, t0\r
- \r
- sub dicPos, dic\r
- sub dicPos, r1\r
- jae @f\r
- add dicPos, LOC dicBufSize\r
-@@:\r
- comment ~\r
- xor t0, t0\r
- sub dicPos, r1\r
- cmovb t0_R, LOC dicBufSize\r
- ~\r
- \r
- movzx match, byte ptr[dic + dicPos * 1]\r
-\r
- ifdef _LZMA_SIZE_OPT\r
-\r
- mov offs, 256 * PMULT\r
- shl match, (PSHIFT + 1)\r
- mov bit, match\r
- mov sym, 1\r
-MY_ALIGN_16\r
-litm_loop:\r
- LITM\r
- cmp sym, 256\r
- jb litm_loop\r
- sub sym, 256\r
- \r
- else\r
- \r
- LITM_0\r
- LITM\r
- LITM\r
- LITM\r
- LITM\r
- LITM\r
- LITM\r
- LITM_2\r
- \r
- endif\r
- \r
- mov probs, LOC probs_Spec\r
- IsMatchBranch_Pre\r
- ; mov dic, LOC dic_Spec\r
- mov dicPos, LOC dicPos_Spec\r
- mov byte ptr[dicPos], sym_L\r
- inc dicPos\r
- \r
- CheckLimits\r
-lit_matched_end:\r
- IF_BIT_1_NOUP probs_state_R, pbPos_R, IsMatch, IsMatch_label\r
- ; IsMatchBranch\r
- mov lpMask_reg, LOC lpMask\r
- sub state, 3 * PMULT\r
- jmp lit_start_2\r
- \r
-\r
-\r
-; ---------- REP 0 LITERAL ----------\r
-MY_ALIGN_32\r
-IsRep0Short_label:\r
- UPDATE_0 probs_state_R, pbPos_R, IsRep0Long\r
-\r
- ; dic[dicPos] = dic[dicPos - rep0 + (dicPos < rep0 ? dicBufSize : 0)];\r
- mov dic, LOC dic_Spec\r
- mov t0_R, dicPos\r
- mov probBranch, LOC rep0\r
- sub t0_R, dic\r
- \r
- sub probs, RepLenCoder * PMULT\r
- \r
- ; state = state < kNumLitStates ? 9 : 11;\r
- or state, 1 * PMULT\r
- \r
- ; the caller doesn't allow (dicPos >= limit) case for REP_SHORT\r
- ; so we don't need the following (dicPos == limit) check here:\r
- ; cmp dicPos, LOC limit\r
- ; jae fin_dicPos_LIMIT_REP_SHORT\r
-\r
- inc processedPos\r
-\r
- IsMatchBranch_Pre\r
- \r
-; xor sym, sym\r
-; sub t0_R, probBranch_R\r
-; cmovb sym_R, LOC dicBufSize\r
-; add t0_R, sym_R\r
- sub t0_R, probBranch_R\r
- jae @f\r
- add t0_R, LOC dicBufSize\r
-@@:\r
- movzx sym, byte ptr[dic + t0_R * 1]\r
- jmp lz_end_match\r
- \r
- \r
-MY_ALIGN_32\r
-IsRep_label:\r
- UPDATE_1 probs_state_R, 0, IsRep\r
-\r
- ; The (checkDicSize == 0 && processedPos == 0) case was checked before in LzmaDec.c with kBadRepCode.\r
- ; So we don't check it here.\r
- \r
- ; mov t0, processedPos\r
- ; or t0, LOC checkDicSize\r
- ; jz fin_ERROR_2\r
-\r
- ; state = state < kNumLitStates ? 8 : 11;\r
- cmp state, kNumLitStates * PMULT\r
- mov state, 8 * PMULT\r
- mov probBranch, 11 * PMULT\r
- cmovae state, probBranch\r
-\r
- ; prob = probs + RepLenCoder;\r
- add probs, RepLenCoder * PMULT\r
- \r
- IF_BIT_1 probs_state_R, 0, IsRepG0, IsRepG0_label\r
- IF_BIT_0_NOUP probs_state_R, pbPos_R, IsRep0Long, IsRep0Short_label\r
- UPDATE_1 probs_state_R, pbPos_R, IsRep0Long\r
- jmp len_decode\r
-\r
-MY_ALIGN_32\r
-IsRepG0_label:\r
- UPDATE_1 probs_state_R, 0, IsRepG0\r
- mov dist2, LOC rep0\r
- mov dist, LOC rep1\r
- mov LOC rep1, dist2\r
- \r
- IF_BIT_1 probs_state_R, 0, IsRepG1, IsRepG1_label\r
- mov LOC rep0, dist\r
- jmp len_decode\r
- \r
-; MY_ALIGN_32\r
-IsRepG1_label:\r
- UPDATE_1 probs_state_R, 0, IsRepG1\r
- mov dist2, LOC rep2\r
- mov LOC rep2, dist\r
- \r
- IF_BIT_1 probs_state_R, 0, IsRepG2, IsRepG2_label\r
- mov LOC rep0, dist2\r
- jmp len_decode\r
-\r
-; MY_ALIGN_32\r
-IsRepG2_label:\r
- UPDATE_1 probs_state_R, 0, IsRepG2\r
- mov dist, LOC rep3\r
- mov LOC rep3, dist2\r
- mov LOC rep0, dist\r
- jmp len_decode\r
-\r
- \r
-\r
-; ---------- SPEC SHORT DISTANCE ----------\r
-\r
-MY_ALIGN_32\r
-short_dist:\r
- sub x1, 32 + 1\r
- jbe decode_dist_end\r
- or sym, 2\r
- shl sym, x1_L\r
- lea sym_R, [probs + sym_R * PMULT + SpecPos * PMULT + 1 * PMULT]\r
- mov sym2, PMULT ; step\r
-MY_ALIGN_32\r
-spec_loop:\r
- REV_1_VAR x2\r
- dec x1\r
- jnz spec_loop\r
-\r
- mov probs, LOC probs_Spec\r
- sub sym, sym2\r
- sub sym, SpecPos * PMULT\r
- sub sym_R, probs\r
- shr sym, PSHIFT\r
- \r
- jmp decode_dist_end\r
-\r
-\r
-; ---------- COPY MATCH CROSS ----------\r
-copy_match_cross:\r
- ; t0_R - src pos\r
- ; r1 - len to dicBufSize\r
- ; cnt_R - total copy len\r
-\r
- mov t1_R, t0_R ; srcPos\r
- mov t0_R, dic\r
- mov r1, LOC dicBufSize ;\r
- neg cnt_R\r
-@@:\r
- movzx sym, byte ptr[t1_R * 1 + t0_R]\r
- inc t1_R\r
- mov byte ptr[cnt_R * 1 + dicPos], sym_L\r
- inc cnt_R\r
- cmp t1_R, r1\r
- jne @b\r
- \r
- movzx sym, byte ptr[t0_R]\r
- sub t0_R, cnt_R\r
- jmp copy_common\r
-\r
-\r
-\r
-\r
-; fin_dicPos_LIMIT_REP_SHORT:\r
- ; mov sym, 1\r
-\r
-fin_dicPos_LIMIT:\r
- mov LOC remainLen, sym\r
- jmp fin_OK\r
- ; For more strict mode we can stop decoding with error\r
- ; mov sym, 1\r
- ; jmp fin\r
-\r
-\r
-fin_ERROR_MATCH_DIST:\r
-\r
- ; rep3 = rep2;\r
- ; rep2 = rep1;\r
- ; rep1 = rep0;\r
- ; rep0 = distance + 1;\r
- \r
- add len_temp, kMatchSpecLen_Error_Data\r
- mov LOC remainLen, len_temp\r
-\r
- mov LOC rep0, sym\r
- mov LOC rep1, t1\r
- mov LOC rep2, x1\r
- mov LOC rep3, x2\r
- \r
- ; state = (state < kNumStates + kNumLitStates) ? kNumLitStates : kNumLitStates + 3;\r
- cmp state, (kNumStates + kNumLitStates) * PMULT\r
- mov state, kNumLitStates * PMULT\r
- mov t0, (kNumLitStates + 3) * PMULT\r
- cmovae state, t0\r
-\r
- ; jmp fin_OK\r
- mov sym, 1\r
- jmp fin\r
-\r
-end_of_payload:\r
- inc sym\r
- jnz fin_ERROR_MATCH_DIST\r
-\r
- mov LOC remainLen, kMatchSpecLenStart\r
- sub state, kNumStates * PMULT\r
-\r
-fin_OK:\r
- xor sym, sym\r
-\r
-fin:\r
- NORM\r
-\r
- mov r1, LOC lzmaPtr\r
-\r
- sub dicPos, LOC dic_Spec\r
- mov GLOB dicPos_Spec, dicPos\r
- mov GLOB buf_Spec, buf\r
- mov GLOB range_Spec, range\r
- mov GLOB code_Spec, cod\r
- shr state, PSHIFT\r
- mov GLOB state_Spec, state\r
- mov GLOB processedPos_Spec, processedPos\r
-\r
- RESTORE_VAR(remainLen)\r
- RESTORE_VAR(rep0)\r
- RESTORE_VAR(rep1)\r
- RESTORE_VAR(rep2)\r
- RESTORE_VAR(rep3)\r
-\r
- mov x0, sym\r
- \r
- mov RSP, LOC Old_RSP\r
-\r
-MY_POP_PRESERVED_ABI_REGS\r
-MY_ENDP\r
-\r
-_TEXT$LZMADECOPT ENDS\r
-\r
-end\r
+; LzmaDecOpt.asm -- ASM version of LzmaDec_DecodeReal_3() function
+; 2021-02-23: Igor Pavlov : Public domain
+;
+; 3 - is the code compatibility version of LzmaDec_DecodeReal_*()
+; function for check at link time.
+; That code is tightly coupled with LzmaDec_TryDummy()
+; and with another functions in LzmaDec.c file.
+; CLzmaDec structure, (probs) array layout, input and output of
+; LzmaDec_DecodeReal_*() must be equal in both versions (C / ASM).
+
+ifndef x64
+; x64=1
+; .err <x64_IS_REQUIRED>
+endif
+
+include 7zAsm.asm
+
+MY_ASM_START
+
+_TEXT$LZMADECOPT SEGMENT ALIGN(64) 'CODE'
+
+MY_ALIGN macro num:req
+ align num
+endm
+
+MY_ALIGN_16 macro
+ MY_ALIGN 16
+endm
+
+MY_ALIGN_32 macro
+ MY_ALIGN 32
+endm
+
+MY_ALIGN_64 macro
+ MY_ALIGN 64
+endm
+
+
+; _LZMA_SIZE_OPT equ 1
+
+; _LZMA_PROB32 equ 1
+
+ifdef _LZMA_PROB32
+ PSHIFT equ 2
+ PLOAD macro dest, mem
+ mov dest, dword ptr [mem]
+ endm
+ PSTORE macro src, mem
+ mov dword ptr [mem], src
+ endm
+else
+ PSHIFT equ 1
+ PLOAD macro dest, mem
+ movzx dest, word ptr [mem]
+ endm
+ PSTORE macro src, mem
+ mov word ptr [mem], @CatStr(src, _W)
+ endm
+endif
+
+PMULT equ (1 SHL PSHIFT)
+PMULT_HALF equ (1 SHL (PSHIFT - 1))
+PMULT_2 equ (1 SHL (PSHIFT + 1))
+
+kMatchSpecLen_Error_Data equ (1 SHL 9)
+
+; x0 range
+; x1 pbPos / (prob) TREE
+; x2 probBranch / prm (MATCHED) / pbPos / cnt
+; x3 sym
+;====== r4 === RSP
+; x5 cod
+; x6 t1 NORM_CALC / probs_state / dist
+; x7 t0 NORM_CALC / prob2 IF_BIT_1
+; x8 state
+; x9 match (MATCHED) / sym2 / dist2 / lpMask_reg
+; x10 kBitModelTotal_reg
+; r11 probs
+; x12 offs (MATCHED) / dic / len_temp
+; x13 processedPos
+; x14 bit (MATCHED) / dicPos
+; r15 buf
+
+
+cod equ x5
+cod_L equ x5_L
+range equ x0
+state equ x8
+state_R equ r8
+buf equ r15
+processedPos equ x13
+kBitModelTotal_reg equ x10
+
+probBranch equ x2
+probBranch_R equ r2
+probBranch_W equ x2_W
+
+pbPos equ x1
+pbPos_R equ r1
+
+cnt equ x2
+cnt_R equ r2
+
+lpMask_reg equ x9
+dicPos equ r14
+
+sym equ x3
+sym_R equ r3
+sym_L equ x3_L
+
+probs equ r11
+dic equ r12
+
+t0 equ x7
+t0_W equ x7_W
+t0_R equ r7
+
+prob2 equ t0
+prob2_W equ t0_W
+
+t1 equ x6
+t1_R equ r6
+
+probs_state equ t1
+probs_state_R equ t1_R
+
+prm equ r2
+match equ x9
+match_R equ r9
+offs equ x12
+offs_R equ r12
+bit equ x14
+bit_R equ r14
+
+sym2 equ x9
+sym2_R equ r9
+
+len_temp equ x12
+
+dist equ sym
+dist2 equ x9
+
+
+
+kNumBitModelTotalBits equ 11
+kBitModelTotal equ (1 SHL kNumBitModelTotalBits)
+kNumMoveBits equ 5
+kBitModelOffset equ ((1 SHL kNumMoveBits) - 1)
+kTopValue equ (1 SHL 24)
+
+NORM_2 macro
+ ; movzx t0, BYTE PTR [buf]
+ shl cod, 8
+ mov cod_L, BYTE PTR [buf]
+ shl range, 8
+ ; or cod, t0
+ inc buf
+endm
+
+
+NORM macro
+ cmp range, kTopValue
+ jae SHORT @F
+ NORM_2
+@@:
+endm
+
+
+; ---------- Branch MACROS ----------
+
+UPDATE_0 macro probsArray:req, probOffset:req, probDisp:req
+ mov prob2, kBitModelTotal_reg
+ sub prob2, probBranch
+ shr prob2, kNumMoveBits
+ add probBranch, prob2
+ PSTORE probBranch, probOffset * 1 + probsArray + probDisp * PMULT
+endm
+
+
+UPDATE_1 macro probsArray:req, probOffset:req, probDisp:req
+ sub prob2, range
+ sub cod, range
+ mov range, prob2
+ mov prob2, probBranch
+ shr probBranch, kNumMoveBits
+ sub prob2, probBranch
+ PSTORE prob2, probOffset * 1 + probsArray + probDisp * PMULT
+endm
+
+
+CMP_COD macro probsArray:req, probOffset:req, probDisp:req
+ PLOAD probBranch, probOffset * 1 + probsArray + probDisp * PMULT
+ NORM
+ mov prob2, range
+ shr range, kNumBitModelTotalBits
+ imul range, probBranch
+ cmp cod, range
+endm
+
+
+IF_BIT_1_NOUP macro probsArray:req, probOffset:req, probDisp:req, toLabel:req
+ CMP_COD probsArray, probOffset, probDisp
+ jae toLabel
+endm
+
+
+IF_BIT_1 macro probsArray:req, probOffset:req, probDisp:req, toLabel:req
+ IF_BIT_1_NOUP probsArray, probOffset, probDisp, toLabel
+ UPDATE_0 probsArray, probOffset, probDisp
+endm
+
+
+IF_BIT_0_NOUP macro probsArray:req, probOffset:req, probDisp:req, toLabel:req
+ CMP_COD probsArray, probOffset, probDisp
+ jb toLabel
+endm
+
+
+; ---------- CMOV MACROS ----------
+
+NORM_CALC macro prob:req
+ NORM
+ mov t0, range
+ shr range, kNumBitModelTotalBits
+ imul range, prob
+ sub t0, range
+ mov t1, cod
+ sub cod, range
+endm
+
+
+PUP macro prob:req, probPtr:req
+ sub t0, prob
+ ; only sar works for both 16/32 bit prob modes
+ sar t0, kNumMoveBits
+ add t0, prob
+ PSTORE t0, probPtr
+endm
+
+
+PUP_SUB macro prob:req, probPtr:req, symSub:req
+ sbb sym, symSub
+ PUP prob, probPtr
+endm
+
+
+PUP_COD macro prob:req, probPtr:req, symSub:req
+ mov t0, kBitModelOffset
+ cmovb cod, t1
+ mov t1, sym
+ cmovb t0, kBitModelTotal_reg
+ PUP_SUB prob, probPtr, symSub
+endm
+
+
+BIT_0 macro prob:req, probNext:req
+ PLOAD prob, probs + 1 * PMULT
+ PLOAD probNext, probs + 1 * PMULT_2
+
+ NORM_CALC prob
+
+ cmovae range, t0
+ PLOAD t0, probs + 1 * PMULT_2 + PMULT
+ cmovae probNext, t0
+ mov t0, kBitModelOffset
+ cmovb cod, t1
+ cmovb t0, kBitModelTotal_reg
+ mov sym, 2
+ PUP_SUB prob, probs + 1 * PMULT, 0 - 1
+endm
+
+
+BIT_1 macro prob:req, probNext:req
+ PLOAD probNext, probs + sym_R * PMULT_2
+ add sym, sym
+
+ NORM_CALC prob
+
+ cmovae range, t0
+ PLOAD t0, probs + sym_R * PMULT + PMULT
+ cmovae probNext, t0
+ PUP_COD prob, probs + t1_R * PMULT_HALF, 0 - 1
+endm
+
+
+BIT_2 macro prob:req, symSub:req
+ add sym, sym
+
+ NORM_CALC prob
+
+ cmovae range, t0
+ PUP_COD prob, probs + t1_R * PMULT_HALF, symSub
+endm
+
+
+; ---------- MATCHED LITERAL ----------
+
+LITM_0 macro
+ mov offs, 256 * PMULT
+ shl match, (PSHIFT + 1)
+ mov bit, offs
+ and bit, match
+ PLOAD x1, probs + 256 * PMULT + bit_R * 1 + 1 * PMULT
+ lea prm, [probs + 256 * PMULT + bit_R * 1 + 1 * PMULT]
+ ; lea prm, [probs + 256 * PMULT + 1 * PMULT]
+ ; add prm, bit_R
+ xor offs, bit
+ add match, match
+
+ NORM_CALC x1
+
+ cmovae offs, bit
+ mov bit, match
+ cmovae range, t0
+ mov t0, kBitModelOffset
+ cmovb cod, t1
+ cmovb t0, kBitModelTotal_reg
+ mov sym, 0
+ PUP_SUB x1, prm, -2-1
+endm
+
+
+LITM macro
+ and bit, offs
+ lea prm, [probs + offs_R * 1]
+ add prm, bit_R
+ PLOAD x1, prm + sym_R * PMULT
+ xor offs, bit
+ add sym, sym
+ add match, match
+
+ NORM_CALC x1
+
+ cmovae offs, bit
+ mov bit, match
+ cmovae range, t0
+ PUP_COD x1, prm + t1_R * PMULT_HALF, - 1
+endm
+
+
+LITM_2 macro
+ and bit, offs
+ lea prm, [probs + offs_R * 1]
+ add prm, bit_R
+ PLOAD x1, prm + sym_R * PMULT
+ add sym, sym
+
+ NORM_CALC x1
+
+ cmovae range, t0
+ PUP_COD x1, prm + t1_R * PMULT_HALF, 256 - 1
+endm
+
+
+; ---------- REVERSE BITS ----------
+
+REV_0 macro prob:req, probNext:req
+ ; PLOAD prob, probs + 1 * PMULT
+ ; lea sym2_R, [probs + 2 * PMULT]
+ ; PLOAD probNext, probs + 2 * PMULT
+ PLOAD probNext, sym2_R
+
+ NORM_CALC prob
+
+ cmovae range, t0
+ PLOAD t0, probs + 3 * PMULT
+ cmovae probNext, t0
+ cmovb cod, t1
+ mov t0, kBitModelOffset
+ cmovb t0, kBitModelTotal_reg
+ lea t1_R, [probs + 3 * PMULT]
+ cmovae sym2_R, t1_R
+ PUP prob, probs + 1 * PMULT
+endm
+
+
+REV_1 macro prob:req, probNext:req, step:req
+ add sym2_R, step * PMULT
+ PLOAD probNext, sym2_R
+
+ NORM_CALC prob
+
+ cmovae range, t0
+ PLOAD t0, sym2_R + step * PMULT
+ cmovae probNext, t0
+ cmovb cod, t1
+ mov t0, kBitModelOffset
+ cmovb t0, kBitModelTotal_reg
+ lea t1_R, [sym2_R + step * PMULT]
+ cmovae sym2_R, t1_R
+ PUP prob, t1_R - step * PMULT_2
+endm
+
+
+REV_2 macro prob:req, step:req
+ sub sym2_R, probs
+ shr sym2, PSHIFT
+ or sym, sym2
+
+ NORM_CALC prob
+
+ cmovae range, t0
+ lea t0, [sym - step]
+ cmovb sym, t0
+ cmovb cod, t1
+ mov t0, kBitModelOffset
+ cmovb t0, kBitModelTotal_reg
+ PUP prob, probs + sym2_R * PMULT
+endm
+
+
+REV_1_VAR macro prob:req
+ PLOAD prob, sym_R
+ mov probs, sym_R
+ add sym_R, sym2_R
+
+ NORM_CALC prob
+
+ cmovae range, t0
+ lea t0_R, [sym_R + 1 * sym2_R]
+ cmovae sym_R, t0_R
+ mov t0, kBitModelOffset
+ cmovb cod, t1
+ ; mov t1, kBitModelTotal
+ ; cmovb t0, t1
+ cmovb t0, kBitModelTotal_reg
+ add sym2, sym2
+ PUP prob, probs
+endm
+
+
+
+
+LIT_PROBS macro lpMaskParam:req
+ ; prob += (UInt32)3 * ((((processedPos << 8) + dic[(dicPos == 0 ? dicBufSize : dicPos) - 1]) & lpMask) << lc);
+ mov t0, processedPos
+ shl t0, 8
+ add sym, t0
+ and sym, lpMaskParam
+ add probs_state_R, pbPos_R
+ mov x1, LOC lc2
+ lea sym, dword ptr[sym_R + 2 * sym_R]
+ add probs, Literal * PMULT
+ shl sym, x1_L
+ add probs, sym_R
+ UPDATE_0 probs_state_R, 0, IsMatch
+ inc processedPos
+endm
+
+
+
+kNumPosBitsMax equ 4
+kNumPosStatesMax equ (1 SHL kNumPosBitsMax)
+
+kLenNumLowBits equ 3
+kLenNumLowSymbols equ (1 SHL kLenNumLowBits)
+kLenNumHighBits equ 8
+kLenNumHighSymbols equ (1 SHL kLenNumHighBits)
+kNumLenProbs equ (2 * kLenNumLowSymbols * kNumPosStatesMax + kLenNumHighSymbols)
+
+LenLow equ 0
+LenChoice equ LenLow
+LenChoice2 equ (LenLow + kLenNumLowSymbols)
+LenHigh equ (LenLow + 2 * kLenNumLowSymbols * kNumPosStatesMax)
+
+kNumStates equ 12
+kNumStates2 equ 16
+kNumLitStates equ 7
+
+kStartPosModelIndex equ 4
+kEndPosModelIndex equ 14
+kNumFullDistances equ (1 SHL (kEndPosModelIndex SHR 1))
+
+kNumPosSlotBits equ 6
+kNumLenToPosStates equ 4
+
+kNumAlignBits equ 4
+kAlignTableSize equ (1 SHL kNumAlignBits)
+
+kMatchMinLen equ 2
+kMatchSpecLenStart equ (kMatchMinLen + kLenNumLowSymbols * 2 + kLenNumHighSymbols)
+
+kStartOffset equ 1664
+SpecPos equ (-kStartOffset)
+IsRep0Long equ (SpecPos + kNumFullDistances)
+RepLenCoder equ (IsRep0Long + (kNumStates2 SHL kNumPosBitsMax))
+LenCoder equ (RepLenCoder + kNumLenProbs)
+IsMatch equ (LenCoder + kNumLenProbs)
+kAlign equ (IsMatch + (kNumStates2 SHL kNumPosBitsMax))
+IsRep equ (kAlign + kAlignTableSize)
+IsRepG0 equ (IsRep + kNumStates)
+IsRepG1 equ (IsRepG0 + kNumStates)
+IsRepG2 equ (IsRepG1 + kNumStates)
+PosSlot equ (IsRepG2 + kNumStates)
+Literal equ (PosSlot + (kNumLenToPosStates SHL kNumPosSlotBits))
+NUM_BASE_PROBS equ (Literal + kStartOffset)
+
+if kAlign ne 0
+ .err <Stop_Compiling_Bad_LZMA_kAlign>
+endif
+
+if NUM_BASE_PROBS ne 1984
+ .err <Stop_Compiling_Bad_LZMA_PROBS>
+endif
+
+
+PTR_FIELD equ dq ?
+
+CLzmaDec_Asm struct
+ lc db ?
+ lp db ?
+ pb db ?
+ _pad_ db ?
+ dicSize dd ?
+
+ probs_Spec PTR_FIELD
+ probs_1664 PTR_FIELD
+ dic_Spec PTR_FIELD
+ dicBufSize PTR_FIELD
+ dicPos_Spec PTR_FIELD
+ buf_Spec PTR_FIELD
+
+ range_Spec dd ?
+ code_Spec dd ?
+ processedPos_Spec dd ?
+ checkDicSize dd ?
+ rep0 dd ?
+ rep1 dd ?
+ rep2 dd ?
+ rep3 dd ?
+ state_Spec dd ?
+ remainLen dd ?
+CLzmaDec_Asm ends
+
+
+CLzmaDec_Asm_Loc struct
+ OLD_RSP PTR_FIELD
+ lzmaPtr PTR_FIELD
+ _pad0_ PTR_FIELD
+ _pad1_ PTR_FIELD
+ _pad2_ PTR_FIELD
+ dicBufSize PTR_FIELD
+ probs_Spec PTR_FIELD
+ dic_Spec PTR_FIELD
+
+ limit PTR_FIELD
+ bufLimit PTR_FIELD
+ lc2 dd ?
+ lpMask dd ?
+ pbMask dd ?
+ checkDicSize dd ?
+
+ _pad_ dd ?
+ remainLen dd ?
+ dicPos_Spec PTR_FIELD
+ rep0 dd ?
+ rep1 dd ?
+ rep2 dd ?
+ rep3 dd ?
+CLzmaDec_Asm_Loc ends
+
+
+GLOB_2 equ [sym_R].CLzmaDec_Asm.
+GLOB equ [r1].CLzmaDec_Asm.
+LOC_0 equ [r0].CLzmaDec_Asm_Loc.
+LOC equ [RSP].CLzmaDec_Asm_Loc.
+
+
+COPY_VAR macro name
+ mov t0, GLOB_2 name
+ mov LOC_0 name, t0
+endm
+
+
+RESTORE_VAR macro name
+ mov t0, LOC name
+ mov GLOB name, t0
+endm
+
+
+
+IsMatchBranch_Pre macro reg
+ ; prob = probs + IsMatch + (state << kNumPosBitsMax) + posState;
+ mov pbPos, LOC pbMask
+ and pbPos, processedPos
+ shl pbPos, (kLenNumLowBits + 1 + PSHIFT)
+ lea probs_state_R, [probs + 1 * state_R]
+endm
+
+
+IsMatchBranch macro reg
+ IsMatchBranch_Pre
+ IF_BIT_1 probs_state_R, pbPos_R, IsMatch, IsMatch_label
+endm
+
+
+CheckLimits macro reg
+ cmp buf, LOC bufLimit
+ jae fin_OK
+ cmp dicPos, LOC limit
+ jae fin_OK
+endm
+
+
+
+; RSP is (16x + 8) bytes aligned in WIN64-x64
+; LocalSize equ ((((SIZEOF CLzmaDec_Asm_Loc) + 7) / 16 * 16) + 8)
+
+PARAM_lzma equ REG_ABI_PARAM_0
+PARAM_limit equ REG_ABI_PARAM_1
+PARAM_bufLimit equ REG_ABI_PARAM_2
+
+; MY_ALIGN_64
+MY_PROC LzmaDec_DecodeReal_3, 3
+MY_PUSH_PRESERVED_ABI_REGS
+
+ lea r0, [RSP - (SIZEOF CLzmaDec_Asm_Loc)]
+ and r0, -128
+ mov r5, RSP
+ mov RSP, r0
+ mov LOC_0 Old_RSP, r5
+ mov LOC_0 lzmaPtr, PARAM_lzma
+
+ mov LOC_0 remainLen, 0 ; remainLen must be ZERO
+
+ mov LOC_0 bufLimit, PARAM_bufLimit
+ mov sym_R, PARAM_lzma ; CLzmaDec_Asm_Loc pointer for GLOB_2
+ mov dic, GLOB_2 dic_Spec
+ add PARAM_limit, dic
+ mov LOC_0 limit, PARAM_limit
+
+ COPY_VAR(rep0)
+ COPY_VAR(rep1)
+ COPY_VAR(rep2)
+ COPY_VAR(rep3)
+
+ mov dicPos, GLOB_2 dicPos_Spec
+ add dicPos, dic
+ mov LOC_0 dicPos_Spec, dicPos
+ mov LOC_0 dic_Spec, dic
+
+ mov x1_L, GLOB_2 pb
+ mov t0, 1
+ shl t0, x1_L
+ dec t0
+ mov LOC_0 pbMask, t0
+
+ ; unsigned pbMask = ((unsigned)1 << (p->prop.pb)) - 1;
+ ; unsigned lc = p->prop.lc;
+ ; unsigned lpMask = ((unsigned)0x100 << p->prop.lp) - ((unsigned)0x100 >> lc);
+
+ mov x1_L, GLOB_2 lc
+ mov x2, 100h
+ mov t0, x2
+ shr x2, x1_L
+ ; inc x1
+ add x1_L, PSHIFT
+ mov LOC_0 lc2, x1
+ mov x1_L, GLOB_2 lp
+ shl t0, x1_L
+ sub t0, x2
+ mov LOC_0 lpMask, t0
+ mov lpMask_reg, t0
+
+ ; mov probs, GLOB_2 probs_Spec
+ ; add probs, kStartOffset SHL PSHIFT
+ mov probs, GLOB_2 probs_1664
+ mov LOC_0 probs_Spec, probs
+
+ mov t0_R, GLOB_2 dicBufSize
+ mov LOC_0 dicBufSize, t0_R
+
+ mov x1, GLOB_2 checkDicSize
+ mov LOC_0 checkDicSize, x1
+
+ mov processedPos, GLOB_2 processedPos_Spec
+
+ mov state, GLOB_2 state_Spec
+ shl state, PSHIFT
+
+ mov buf, GLOB_2 buf_Spec
+ mov range, GLOB_2 range_Spec
+ mov cod, GLOB_2 code_Spec
+ mov kBitModelTotal_reg, kBitModelTotal
+ xor sym, sym
+
+ ; if (processedPos != 0 || checkDicSize != 0)
+ or x1, processedPos
+ jz @f
+
+ add t0_R, dic
+ cmp dicPos, dic
+ cmovnz t0_R, dicPos
+ movzx sym, byte ptr[t0_R - 1]
+
+@@:
+ IsMatchBranch_Pre
+ cmp state, 4 * PMULT
+ jb lit_end
+ cmp state, kNumLitStates * PMULT
+ jb lit_matched_end
+ jmp lz_end
+
+
+
+
+; ---------- LITERAL ----------
+MY_ALIGN_64
+lit_start:
+ xor state, state
+lit_start_2:
+ LIT_PROBS lpMask_reg
+
+ ifdef _LZMA_SIZE_OPT
+
+ PLOAD x1, probs + 1 * PMULT
+ mov sym, 1
+MY_ALIGN_16
+lit_loop:
+ BIT_1 x1, x2
+ mov x1, x2
+ cmp sym, 127
+ jbe lit_loop
+
+ else
+
+ BIT_0 x1, x2
+ BIT_1 x2, x1
+ BIT_1 x1, x2
+ BIT_1 x2, x1
+ BIT_1 x1, x2
+ BIT_1 x2, x1
+ BIT_1 x1, x2
+
+ endif
+
+ BIT_2 x2, 256 - 1
+
+ ; mov dic, LOC dic_Spec
+ mov probs, LOC probs_Spec
+ IsMatchBranch_Pre
+ mov byte ptr[dicPos], sym_L
+ inc dicPos
+
+ CheckLimits
+lit_end:
+ IF_BIT_0_NOUP probs_state_R, pbPos_R, IsMatch, lit_start
+
+ ; jmp IsMatch_label
+
+; ---------- MATCHES ----------
+; MY_ALIGN_32
+IsMatch_label:
+ UPDATE_1 probs_state_R, pbPos_R, IsMatch
+ IF_BIT_1 probs_state_R, 0, IsRep, IsRep_label
+
+ add probs, LenCoder * PMULT
+ add state, kNumStates * PMULT
+
+; ---------- LEN DECODE ----------
+len_decode:
+ mov len_temp, 8 - 1 - kMatchMinLen
+ IF_BIT_0_NOUP probs, 0, 0, len_mid_0
+ UPDATE_1 probs, 0, 0
+ add probs, (1 SHL (kLenNumLowBits + PSHIFT))
+ mov len_temp, -1 - kMatchMinLen
+ IF_BIT_0_NOUP probs, 0, 0, len_mid_0
+ UPDATE_1 probs, 0, 0
+ add probs, LenHigh * PMULT - (1 SHL (kLenNumLowBits + PSHIFT))
+ mov sym, 1
+ PLOAD x1, probs + 1 * PMULT
+
+MY_ALIGN_32
+len8_loop:
+ BIT_1 x1, x2
+ mov x1, x2
+ cmp sym, 64
+ jb len8_loop
+
+ mov len_temp, (kLenNumHighSymbols - kLenNumLowSymbols * 2) - 1 - kMatchMinLen
+ jmp short len_mid_2 ; we use short here for MASM that doesn't optimize that code as another assembler programs
+
+MY_ALIGN_32
+len_mid_0:
+ UPDATE_0 probs, 0, 0
+ add probs, pbPos_R
+ BIT_0 x2, x1
+len_mid_2:
+ BIT_1 x1, x2
+ BIT_2 x2, len_temp
+ mov probs, LOC probs_Spec
+ cmp state, kNumStates * PMULT
+ jb copy_match
+
+
+; ---------- DECODE DISTANCE ----------
+ ; probs + PosSlot + ((len < kNumLenToPosStates ? len : kNumLenToPosStates - 1) << kNumPosSlotBits);
+
+ mov t0, 3 + kMatchMinLen
+ cmp sym, 3 + kMatchMinLen
+ cmovb t0, sym
+ add probs, PosSlot * PMULT - (kMatchMinLen SHL (kNumPosSlotBits + PSHIFT))
+ shl t0, (kNumPosSlotBits + PSHIFT)
+ add probs, t0_R
+
+ ; sym = Len
+ ; mov LOC remainLen, sym
+ mov len_temp, sym
+
+ ifdef _LZMA_SIZE_OPT
+
+ PLOAD x1, probs + 1 * PMULT
+ mov sym, 1
+MY_ALIGN_16
+slot_loop:
+ BIT_1 x1, x2
+ mov x1, x2
+ cmp sym, 32
+ jb slot_loop
+
+ else
+
+ BIT_0 x1, x2
+ BIT_1 x2, x1
+ BIT_1 x1, x2
+ BIT_1 x2, x1
+ BIT_1 x1, x2
+
+ endif
+
+ mov x1, sym
+ BIT_2 x2, 64-1
+
+ and sym, 3
+ mov probs, LOC probs_Spec
+ cmp x1, 32 + kEndPosModelIndex / 2
+ jb short_dist
+
+ ; unsigned numDirectBits = (unsigned)(((distance >> 1) - 1));
+ sub x1, (32 + 1 + kNumAlignBits)
+ ; distance = (2 | (distance & 1));
+ or sym, 2
+ PLOAD x2, probs + 1 * PMULT
+ shl sym, kNumAlignBits + 1
+ lea sym2_R, [probs + 2 * PMULT]
+
+ jmp direct_norm
+ ; lea t1, [sym_R + (1 SHL kNumAlignBits)]
+ ; cmp range, kTopValue
+ ; jb direct_norm
+
+; ---------- DIRECT DISTANCE ----------
+MY_ALIGN_32
+direct_loop:
+ shr range, 1
+ mov t0, cod
+ sub cod, range
+ cmovs cod, t0
+ cmovns sym, t1
+
+ comment ~
+ sub cod, range
+ mov x2, cod
+ sar x2, 31
+ lea sym, dword ptr [r2 + sym_R * 2 + 1]
+ and x2, range
+ add cod, x2
+ ~
+ dec x1
+ je direct_end
+
+ add sym, sym
+direct_norm:
+ lea t1, [sym_R + (1 SHL kNumAlignBits)]
+ cmp range, kTopValue
+ jae near ptr direct_loop
+ ; we align for 32 here with "near ptr" command above
+ NORM_2
+ jmp direct_loop
+
+MY_ALIGN_32
+direct_end:
+ ; prob = + kAlign;
+ ; distance <<= kNumAlignBits;
+ REV_0 x2, x1
+ REV_1 x1, x2, 2
+ REV_1 x2, x1, 4
+ REV_2 x1, 8
+
+decode_dist_end:
+
+ ; if (distance >= (checkDicSize == 0 ? processedPos: checkDicSize))
+
+ mov t1, LOC rep0
+ mov x1, LOC rep1
+ mov x2, LOC rep2
+
+ mov t0, LOC checkDicSize
+ test t0, t0
+ cmove t0, processedPos
+ cmp sym, t0
+ jae end_of_payload
+ ; jmp end_of_payload ; for debug
+
+ ; rep3 = rep2;
+ ; rep2 = rep1;
+ ; rep1 = rep0;
+ ; rep0 = distance + 1;
+
+ inc sym
+ mov LOC rep0, sym
+ ; mov sym, LOC remainLen
+ mov sym, len_temp
+ mov LOC rep1, t1
+ mov LOC rep2, x1
+ mov LOC rep3, x2
+
+ ; state = (state < kNumStates + kNumLitStates) ? kNumLitStates : kNumLitStates + 3;
+ cmp state, (kNumStates + kNumLitStates) * PMULT
+ mov state, kNumLitStates * PMULT
+ mov t0, (kNumLitStates + 3) * PMULT
+ cmovae state, t0
+
+
+; ---------- COPY MATCH ----------
+copy_match:
+
+ ; len += kMatchMinLen;
+ ; add sym, kMatchMinLen
+
+ ; if ((rem = limit - dicPos) == 0)
+ ; {
+ ; p->dicPos = dicPos;
+ ; return SZ_ERROR_DATA;
+ ; }
+ mov cnt_R, LOC limit
+ sub cnt_R, dicPos
+ jz fin_dicPos_LIMIT
+
+ ; curLen = ((rem < len) ? (unsigned)rem : len);
+ cmp cnt_R, sym_R
+ ; cmovae cnt_R, sym_R ; 64-bit
+ cmovae cnt, sym ; 32-bit
+
+ mov dic, LOC dic_Spec
+ mov x1, LOC rep0
+
+ mov t0_R, dicPos
+ add dicPos, cnt_R
+ ; processedPos += curLen;
+ add processedPos, cnt
+ ; len -= curLen;
+ sub sym, cnt
+ mov LOC remainLen, sym
+
+ sub t0_R, dic
+
+ ; pos = dicPos - rep0 + (dicPos < rep0 ? dicBufSize : 0);
+ sub t0_R, r1
+ jae @f
+
+ mov r1, LOC dicBufSize
+ add t0_R, r1
+ sub r1, t0_R
+ cmp cnt_R, r1
+ ja copy_match_cross
+@@:
+ ; if (curLen <= dicBufSize - pos)
+
+; ---------- COPY MATCH FAST ----------
+ ; Byte *dest = dic + dicPos;
+ ; mov r1, dic
+ ; ptrdiff_t src = (ptrdiff_t)pos - (ptrdiff_t)dicPos;
+ ; sub t0_R, dicPos
+ ; dicPos += curLen;
+
+ ; const Byte *lim = dest + curLen;
+ add t0_R, dic
+ movzx sym, byte ptr[t0_R]
+ add t0_R, cnt_R
+ neg cnt_R
+ ; lea r1, [dicPos - 1]
+copy_common:
+ dec dicPos
+ ; cmp LOC rep0, 1
+ ; je rep0Label
+
+ ; t0_R - src_lim
+ ; r1 - dest_lim - 1
+ ; cnt_R - (-cnt)
+
+ IsMatchBranch_Pre
+ inc cnt_R
+ jz copy_end
+MY_ALIGN_16
+@@:
+ mov byte ptr[cnt_R * 1 + dicPos], sym_L
+ movzx sym, byte ptr[cnt_R * 1 + t0_R]
+ inc cnt_R
+ jnz @b
+
+copy_end:
+lz_end_match:
+ mov byte ptr[dicPos], sym_L
+ inc dicPos
+
+ ; IsMatchBranch_Pre
+ CheckLimits
+lz_end:
+ IF_BIT_1_NOUP probs_state_R, pbPos_R, IsMatch, IsMatch_label
+
+
+
+; ---------- LITERAL MATCHED ----------
+
+ LIT_PROBS LOC lpMask
+
+ ; matchByte = dic[dicPos - rep0 + (dicPos < rep0 ? dicBufSize : 0)];
+ mov x1, LOC rep0
+ ; mov dic, LOC dic_Spec
+ mov LOC dicPos_Spec, dicPos
+
+ ; state -= (state < 10) ? 3 : 6;
+ lea t0, [state_R - 6 * PMULT]
+ sub state, 3 * PMULT
+ cmp state, 7 * PMULT
+ cmovae state, t0
+
+ sub dicPos, dic
+ sub dicPos, r1
+ jae @f
+ add dicPos, LOC dicBufSize
+@@:
+ comment ~
+ xor t0, t0
+ sub dicPos, r1
+ cmovb t0_R, LOC dicBufSize
+ ~
+
+ movzx match, byte ptr[dic + dicPos * 1]
+
+ ifdef _LZMA_SIZE_OPT
+
+ mov offs, 256 * PMULT
+ shl match, (PSHIFT + 1)
+ mov bit, match
+ mov sym, 1
+MY_ALIGN_16
+litm_loop:
+ LITM
+ cmp sym, 256
+ jb litm_loop
+ sub sym, 256
+
+ else
+
+ LITM_0
+ LITM
+ LITM
+ LITM
+ LITM
+ LITM
+ LITM
+ LITM_2
+
+ endif
+
+ mov probs, LOC probs_Spec
+ IsMatchBranch_Pre
+ ; mov dic, LOC dic_Spec
+ mov dicPos, LOC dicPos_Spec
+ mov byte ptr[dicPos], sym_L
+ inc dicPos
+
+ CheckLimits
+lit_matched_end:
+ IF_BIT_1_NOUP probs_state_R, pbPos_R, IsMatch, IsMatch_label
+ ; IsMatchBranch
+ mov lpMask_reg, LOC lpMask
+ sub state, 3 * PMULT
+ jmp lit_start_2
+
+
+
+; ---------- REP 0 LITERAL ----------
+MY_ALIGN_32
+IsRep0Short_label:
+ UPDATE_0 probs_state_R, pbPos_R, IsRep0Long
+
+ ; dic[dicPos] = dic[dicPos - rep0 + (dicPos < rep0 ? dicBufSize : 0)];
+ mov dic, LOC dic_Spec
+ mov t0_R, dicPos
+ mov probBranch, LOC rep0
+ sub t0_R, dic
+
+ sub probs, RepLenCoder * PMULT
+
+ ; state = state < kNumLitStates ? 9 : 11;
+ or state, 1 * PMULT
+
+ ; the caller doesn't allow (dicPos >= limit) case for REP_SHORT
+ ; so we don't need the following (dicPos == limit) check here:
+ ; cmp dicPos, LOC limit
+ ; jae fin_dicPos_LIMIT_REP_SHORT
+
+ inc processedPos
+
+ IsMatchBranch_Pre
+
+; xor sym, sym
+; sub t0_R, probBranch_R
+; cmovb sym_R, LOC dicBufSize
+; add t0_R, sym_R
+ sub t0_R, probBranch_R
+ jae @f
+ add t0_R, LOC dicBufSize
+@@:
+ movzx sym, byte ptr[dic + t0_R * 1]
+ jmp lz_end_match
+
+
+MY_ALIGN_32
+IsRep_label:
+ UPDATE_1 probs_state_R, 0, IsRep
+
+ ; The (checkDicSize == 0 && processedPos == 0) case was checked before in LzmaDec.c with kBadRepCode.
+ ; So we don't check it here.
+
+ ; mov t0, processedPos
+ ; or t0, LOC checkDicSize
+ ; jz fin_ERROR_2
+
+ ; state = state < kNumLitStates ? 8 : 11;
+ cmp state, kNumLitStates * PMULT
+ mov state, 8 * PMULT
+ mov probBranch, 11 * PMULT
+ cmovae state, probBranch
+
+ ; prob = probs + RepLenCoder;
+ add probs, RepLenCoder * PMULT
+
+ IF_BIT_1 probs_state_R, 0, IsRepG0, IsRepG0_label
+ IF_BIT_0_NOUP probs_state_R, pbPos_R, IsRep0Long, IsRep0Short_label
+ UPDATE_1 probs_state_R, pbPos_R, IsRep0Long
+ jmp len_decode
+
+MY_ALIGN_32
+IsRepG0_label:
+ UPDATE_1 probs_state_R, 0, IsRepG0
+ mov dist2, LOC rep0
+ mov dist, LOC rep1
+ mov LOC rep1, dist2
+
+ IF_BIT_1 probs_state_R, 0, IsRepG1, IsRepG1_label
+ mov LOC rep0, dist
+ jmp len_decode
+
+; MY_ALIGN_32
+IsRepG1_label:
+ UPDATE_1 probs_state_R, 0, IsRepG1
+ mov dist2, LOC rep2
+ mov LOC rep2, dist
+
+ IF_BIT_1 probs_state_R, 0, IsRepG2, IsRepG2_label
+ mov LOC rep0, dist2
+ jmp len_decode
+
+; MY_ALIGN_32
+IsRepG2_label:
+ UPDATE_1 probs_state_R, 0, IsRepG2
+ mov dist, LOC rep3
+ mov LOC rep3, dist2
+ mov LOC rep0, dist
+ jmp len_decode
+
+
+
+; ---------- SPEC SHORT DISTANCE ----------
+
+MY_ALIGN_32
+short_dist:
+ sub x1, 32 + 1
+ jbe decode_dist_end
+ or sym, 2
+ shl sym, x1_L
+ lea sym_R, [probs + sym_R * PMULT + SpecPos * PMULT + 1 * PMULT]
+ mov sym2, PMULT ; step
+MY_ALIGN_32
+spec_loop:
+ REV_1_VAR x2
+ dec x1
+ jnz spec_loop
+
+ mov probs, LOC probs_Spec
+ sub sym, sym2
+ sub sym, SpecPos * PMULT
+ sub sym_R, probs
+ shr sym, PSHIFT
+
+ jmp decode_dist_end
+
+
+; ---------- COPY MATCH CROSS ----------
+copy_match_cross:
+ ; t0_R - src pos
+ ; r1 - len to dicBufSize
+ ; cnt_R - total copy len
+
+ mov t1_R, t0_R ; srcPos
+ mov t0_R, dic
+ mov r1, LOC dicBufSize ;
+ neg cnt_R
+@@:
+ movzx sym, byte ptr[t1_R * 1 + t0_R]
+ inc t1_R
+ mov byte ptr[cnt_R * 1 + dicPos], sym_L
+ inc cnt_R
+ cmp t1_R, r1
+ jne @b
+
+ movzx sym, byte ptr[t0_R]
+ sub t0_R, cnt_R
+ jmp copy_common
+
+
+
+
+; fin_dicPos_LIMIT_REP_SHORT:
+ ; mov sym, 1
+
+fin_dicPos_LIMIT:
+ mov LOC remainLen, sym
+ jmp fin_OK
+ ; For more strict mode we can stop decoding with error
+ ; mov sym, 1
+ ; jmp fin
+
+
+fin_ERROR_MATCH_DIST:
+
+ ; rep3 = rep2;
+ ; rep2 = rep1;
+ ; rep1 = rep0;
+ ; rep0 = distance + 1;
+
+ add len_temp, kMatchSpecLen_Error_Data
+ mov LOC remainLen, len_temp
+
+ mov LOC rep0, sym
+ mov LOC rep1, t1
+ mov LOC rep2, x1
+ mov LOC rep3, x2
+
+ ; state = (state < kNumStates + kNumLitStates) ? kNumLitStates : kNumLitStates + 3;
+ cmp state, (kNumStates + kNumLitStates) * PMULT
+ mov state, kNumLitStates * PMULT
+ mov t0, (kNumLitStates + 3) * PMULT
+ cmovae state, t0
+
+ ; jmp fin_OK
+ mov sym, 1
+ jmp fin
+
+end_of_payload:
+ inc sym
+ jnz fin_ERROR_MATCH_DIST
+
+ mov LOC remainLen, kMatchSpecLenStart
+ sub state, kNumStates * PMULT
+
+fin_OK:
+ xor sym, sym
+
+fin:
+ NORM
+
+ mov r1, LOC lzmaPtr
+
+ sub dicPos, LOC dic_Spec
+ mov GLOB dicPos_Spec, dicPos
+ mov GLOB buf_Spec, buf
+ mov GLOB range_Spec, range
+ mov GLOB code_Spec, cod
+ shr state, PSHIFT
+ mov GLOB state_Spec, state
+ mov GLOB processedPos_Spec, processedPos
+
+ RESTORE_VAR(remainLen)
+ RESTORE_VAR(rep0)
+ RESTORE_VAR(rep1)
+ RESTORE_VAR(rep2)
+ RESTORE_VAR(rep3)
+
+ mov x0, sym
+
+ mov RSP, LOC Old_RSP
+
+MY_POP_PRESERVED_ABI_REGS
+MY_ENDP
+
+_TEXT$LZMADECOPT ENDS
+
+end
--- /dev/null
+add_library(lzma STATIC
+ include/7zTypes.h
+ include/7zWindows.h
+ include/Alloc.h
+ include/Bra.h
+ include/Compiler.h
+ include/CpuArch.h
+ include/Delta.h
+ include/LzFind.h
+ include/LzHash.h
+ include/Lzma86.h
+ include/LzmaDec.h
+ include/LzmaEnc.h
+ include/Precomp.h
+ include/RotateDefs.h
+ include/Sort.h
+ src/Alloc.c
+ src/Bra.c
+ src/Bra86.c
+ src/BraIA64.c
+ src/CpuArch.c
+ src/Delta.c
+ src/LzFind.c
+ src/Lzma86Dec.c
+ src/LzmaDec.c
+ src/LzmaEnc.c
+ src/Sort.c
+)
+
+target_compile_definitions(lzma PRIVATE Z7_ST)
+
+target_include_directories(lzma PRIVATE "${CMAKE_CURRENT_SOURCE_DIR}/include")
+target_include_directories(lzma INTERFACE "${CMAKE_CURRENT_SOURCE_DIR}/include")
+
+set_target_properties(lzma PROPERTIES POSITION_INDEPENDENT_CODE ON)
+
+option(WITH_LZMA_ASM "Use lzma asm" ON)
+if(WITH_LZMA_ASM)
+ if(CMAKE_SYSTEM_NAME STREQUAL "Linux")
+ include(CheckSymbolExists)
+ check_symbol_exists("__aarch64__" "" CPU_ARM64)
+ if(CPU_ARM64)
+ enable_language(ASM)
+ set_source_files_properties(src/LzmaDec.c PROPERTIES COMPILE_DEFINITIONS Z7_LZMA_DEC_OPT)
+ target_sources(lzma PRIVATE Asm/arm64/LzmaDecOpt.S)
+ set_source_files_properties(Asm/arm64/LzmaDecOpt.S PROPERTIES LANGUAGE ASM)
+ endif()
+ elseif(WIN32)
+ include(CheckSymbolExists)
+ check_symbol_exists("_M_AMD64" "" CPU_X64)
+ if(CPU_X64)
+ enable_language(ASM_MASM)
+ set_source_files_properties(src/LzmaDec.c PROPERTIES COMPILE_DEFINITIONS Z7_LZMA_DEC_OPT)
+ target_sources(lzma PRIVATE Asm/x86/LzmaDecOpt.asm)
+ set_source_files_properties(Asm/x86/LzmaDecOpt.asm PROPERTIES LANGUAGE ASM_MASM)
+ endif()
+ endif()
+endif()
-/* 7zTypes.h -- Basic types\r
-2022-04-01 : Igor Pavlov : Public domain */\r
-\r
-#ifndef __7Z_TYPES_H\r
-#define __7Z_TYPES_H\r
-\r
-#ifdef _WIN32\r
-/* #include <windows.h> */\r
-#else\r
-#include <errno.h>\r
-#endif\r
-\r
-#include <stddef.h>\r
-\r
-#ifndef EXTERN_C_BEGIN\r
-#ifdef __cplusplus\r
-#define EXTERN_C_BEGIN extern "C" {\r
-#define EXTERN_C_END }\r
-#else\r
-#define EXTERN_C_BEGIN\r
-#define EXTERN_C_END\r
-#endif\r
-#endif\r
-\r
-EXTERN_C_BEGIN\r
-\r
-#define SZ_OK 0\r
-\r
-#define SZ_ERROR_DATA 1\r
-#define SZ_ERROR_MEM 2\r
-#define SZ_ERROR_CRC 3\r
-#define SZ_ERROR_UNSUPPORTED 4\r
-#define SZ_ERROR_PARAM 5\r
-#define SZ_ERROR_INPUT_EOF 6\r
-#define SZ_ERROR_OUTPUT_EOF 7\r
-#define SZ_ERROR_READ 8\r
-#define SZ_ERROR_WRITE 9\r
-#define SZ_ERROR_PROGRESS 10\r
-#define SZ_ERROR_FAIL 11\r
-#define SZ_ERROR_THREAD 12\r
-\r
-#define SZ_ERROR_ARCHIVE 16\r
-#define SZ_ERROR_NO_ARCHIVE 17\r
-\r
-typedef int SRes;\r
-\r
-\r
-#ifdef _MSC_VER\r
- #if _MSC_VER > 1200\r
- #define MY_ALIGN(n) __declspec(align(n))\r
- #else\r
- #define MY_ALIGN(n)\r
- #endif\r
-#else\r
- #define MY_ALIGN(n) __attribute__ ((aligned(n)))\r
-#endif\r
-\r
-\r
-#ifdef _WIN32\r
-\r
-/* typedef DWORD WRes; */\r
-typedef unsigned WRes;\r
-#define MY_SRes_HRESULT_FROM_WRes(x) HRESULT_FROM_WIN32(x)\r
-\r
-// #define MY_HRES_ERROR__INTERNAL_ERROR MY_SRes_HRESULT_FROM_WRes(ERROR_INTERNAL_ERROR)\r
-\r
-#else // _WIN32\r
-\r
-// #define ENV_HAVE_LSTAT\r
-typedef int WRes;\r
-\r
-// (FACILITY_ERRNO = 0x800) is 7zip's FACILITY constant to represent (errno) errors in HRESULT\r
-#define MY__FACILITY_ERRNO 0x800\r
-#define MY__FACILITY_WIN32 7\r
-#define MY__FACILITY__WRes MY__FACILITY_ERRNO\r
-\r
-#define MY_HRESULT_FROM_errno_CONST_ERROR(x) ((HRESULT)( \\r
- ( (HRESULT)(x) & 0x0000FFFF) \\r
- | (MY__FACILITY__WRes << 16) \\r
- | (HRESULT)0x80000000 ))\r
-\r
-#define MY_SRes_HRESULT_FROM_WRes(x) \\r
- ((HRESULT)(x) <= 0 ? ((HRESULT)(x)) : MY_HRESULT_FROM_errno_CONST_ERROR(x))\r
-\r
-// we call macro HRESULT_FROM_WIN32 for system errors (WRes) that are (errno)\r
-#define HRESULT_FROM_WIN32(x) MY_SRes_HRESULT_FROM_WRes(x)\r
-\r
-/*\r
-#define ERROR_FILE_NOT_FOUND 2L\r
-#define ERROR_ACCESS_DENIED 5L\r
-#define ERROR_NO_MORE_FILES 18L\r
-#define ERROR_LOCK_VIOLATION 33L\r
-#define ERROR_FILE_EXISTS 80L\r
-#define ERROR_DISK_FULL 112L\r
-#define ERROR_NEGATIVE_SEEK 131L\r
-#define ERROR_ALREADY_EXISTS 183L\r
-#define ERROR_DIRECTORY 267L\r
-#define ERROR_TOO_MANY_POSTS 298L\r
-\r
-#define ERROR_INTERNAL_ERROR 1359L\r
-#define ERROR_INVALID_REPARSE_DATA 4392L\r
-#define ERROR_REPARSE_TAG_INVALID 4393L\r
-#define ERROR_REPARSE_TAG_MISMATCH 4394L\r
-*/\r
-\r
-// we use errno equivalents for some WIN32 errors:\r
-\r
-#define ERROR_INVALID_PARAMETER EINVAL\r
-#define ERROR_INVALID_FUNCTION EINVAL\r
-#define ERROR_ALREADY_EXISTS EEXIST\r
-#define ERROR_FILE_EXISTS EEXIST\r
-#define ERROR_PATH_NOT_FOUND ENOENT\r
-#define ERROR_FILE_NOT_FOUND ENOENT\r
-#define ERROR_DISK_FULL ENOSPC\r
-// #define ERROR_INVALID_HANDLE EBADF\r
-\r
-// we use FACILITY_WIN32 for errors that has no errno equivalent\r
-// Too many posts were made to a semaphore.\r
-#define ERROR_TOO_MANY_POSTS ((HRESULT)0x8007012AL)\r
-#define ERROR_INVALID_REPARSE_DATA ((HRESULT)0x80071128L)\r
-#define ERROR_REPARSE_TAG_INVALID ((HRESULT)0x80071129L)\r
-\r
-// if (MY__FACILITY__WRes != FACILITY_WIN32),\r
-// we use FACILITY_WIN32 for COM errors:\r
-#define E_OUTOFMEMORY ((HRESULT)0x8007000EL)\r
-#define E_INVALIDARG ((HRESULT)0x80070057L)\r
-#define MY__E_ERROR_NEGATIVE_SEEK ((HRESULT)0x80070083L)\r
-\r
-/*\r
-// we can use FACILITY_ERRNO for some COM errors, that have errno equivalents:\r
-#define E_OUTOFMEMORY MY_HRESULT_FROM_errno_CONST_ERROR(ENOMEM)\r
-#define E_INVALIDARG MY_HRESULT_FROM_errno_CONST_ERROR(EINVAL)\r
-#define MY__E_ERROR_NEGATIVE_SEEK MY_HRESULT_FROM_errno_CONST_ERROR(EINVAL)\r
-*/\r
-\r
-#define TEXT(quote) quote\r
-\r
-#define FILE_ATTRIBUTE_READONLY 0x0001\r
-#define FILE_ATTRIBUTE_HIDDEN 0x0002\r
-#define FILE_ATTRIBUTE_SYSTEM 0x0004\r
-#define FILE_ATTRIBUTE_DIRECTORY 0x0010\r
-#define FILE_ATTRIBUTE_ARCHIVE 0x0020\r
-#define FILE_ATTRIBUTE_DEVICE 0x0040\r
-#define FILE_ATTRIBUTE_NORMAL 0x0080\r
-#define FILE_ATTRIBUTE_TEMPORARY 0x0100\r
-#define FILE_ATTRIBUTE_SPARSE_FILE 0x0200\r
-#define FILE_ATTRIBUTE_REPARSE_POINT 0x0400\r
-#define FILE_ATTRIBUTE_COMPRESSED 0x0800\r
-#define FILE_ATTRIBUTE_OFFLINE 0x1000\r
-#define FILE_ATTRIBUTE_NOT_CONTENT_INDEXED 0x2000\r
-#define FILE_ATTRIBUTE_ENCRYPTED 0x4000\r
-\r
-#define FILE_ATTRIBUTE_UNIX_EXTENSION 0x8000 /* trick for Unix */\r
-\r
-#endif\r
-\r
-\r
-#ifndef RINOK\r
-#define RINOK(x) { int __result__ = (x); if (__result__ != 0) return __result__; }\r
-#endif\r
-\r
-#ifndef RINOK_WRes\r
-#define RINOK_WRes(x) { WRes __result__ = (x); if (__result__ != 0) return __result__; }\r
-#endif\r
-\r
-typedef unsigned char Byte;\r
-typedef short Int16;\r
-typedef unsigned short UInt16;\r
-\r
-#ifdef _LZMA_UINT32_IS_ULONG\r
-typedef long Int32;\r
-typedef unsigned long UInt32;\r
-#else\r
-typedef int Int32;\r
-typedef unsigned int UInt32;\r
-#endif\r
-\r
-\r
-#ifndef _WIN32\r
-\r
-typedef int INT;\r
-typedef Int32 INT32;\r
-typedef unsigned int UINT;\r
-typedef UInt32 UINT32;\r
-typedef INT32 LONG; // LONG, ULONG and DWORD must be 32-bit for _WIN32 compatibility\r
-typedef UINT32 ULONG;\r
-\r
-#undef DWORD\r
-typedef UINT32 DWORD;\r
-\r
-#define VOID void\r
-\r
-#define HRESULT LONG\r
-\r
-typedef void *LPVOID;\r
-// typedef void VOID;\r
-// typedef ULONG_PTR DWORD_PTR, *PDWORD_PTR;\r
-// gcc / clang on Unix : sizeof(long==sizeof(void*) in 32 or 64 bits)\r
-typedef long INT_PTR;\r
-typedef unsigned long UINT_PTR;\r
-typedef long LONG_PTR;\r
-typedef unsigned long DWORD_PTR;\r
-\r
-typedef size_t SIZE_T;\r
-\r
-#endif // _WIN32\r
-\r
-\r
-#define MY_HRES_ERROR__INTERNAL_ERROR ((HRESULT)0x8007054FL)\r
-\r
-\r
-#ifdef _SZ_NO_INT_64\r
-\r
-/* define _SZ_NO_INT_64, if your compiler doesn't support 64-bit integers.\r
- NOTES: Some code will work incorrectly in that case! */\r
-\r
-typedef long Int64;\r
-typedef unsigned long UInt64;\r
-\r
-#else\r
-\r
-#if defined(_MSC_VER) || defined(__BORLANDC__)\r
-typedef __int64 Int64;\r
-typedef unsigned __int64 UInt64;\r
-#define UINT64_CONST(n) n\r
-#else\r
-typedef long long int Int64;\r
-typedef unsigned long long int UInt64;\r
-#define UINT64_CONST(n) n ## ULL\r
-#endif\r
-\r
-#endif\r
-\r
-#ifdef _LZMA_NO_SYSTEM_SIZE_T\r
-typedef UInt32 SizeT;\r
-#else\r
-typedef size_t SizeT;\r
-#endif\r
-\r
-typedef int BoolInt;\r
-/* typedef BoolInt Bool; */\r
-#define True 1\r
-#define False 0\r
-\r
-\r
-#ifdef _WIN32\r
-#define MY_STD_CALL __stdcall\r
-#else\r
-#define MY_STD_CALL\r
-#endif\r
-\r
-#ifdef _MSC_VER\r
-\r
-#if _MSC_VER >= 1300\r
-#define MY_NO_INLINE __declspec(noinline)\r
-#else\r
-#define MY_NO_INLINE\r
-#endif\r
-\r
-#define MY_FORCE_INLINE __forceinline\r
-\r
-#define MY_CDECL __cdecl\r
-#define MY_FAST_CALL __fastcall\r
-\r
-#else // _MSC_VER\r
-\r
-#if (defined(__GNUC__) && (__GNUC__ >= 4)) \\r
- || (defined(__clang__) && (__clang_major__ >= 4)) \\r
- || defined(__INTEL_COMPILER) \\r
- || defined(__xlC__)\r
-#define MY_NO_INLINE __attribute__((noinline))\r
-// #define MY_FORCE_INLINE __attribute__((always_inline)) inline\r
-#else\r
-#define MY_NO_INLINE\r
-#endif\r
-\r
-#define MY_FORCE_INLINE\r
-\r
-\r
-#define MY_CDECL\r
-\r
-#if defined(_M_IX86) \\r
- || defined(__i386__)\r
-// #define MY_FAST_CALL __attribute__((fastcall))\r
-// #define MY_FAST_CALL __attribute__((cdecl))\r
-#define MY_FAST_CALL\r
-#elif defined(MY_CPU_AMD64)\r
-// #define MY_FAST_CALL __attribute__((ms_abi))\r
-#define MY_FAST_CALL\r
-#else\r
-#define MY_FAST_CALL\r
-#endif\r
-\r
-#endif // _MSC_VER\r
-\r
-\r
-/* The following interfaces use first parameter as pointer to structure */\r
-\r
-typedef struct IByteIn IByteIn;\r
-struct IByteIn\r
-{\r
- Byte (*Read)(const IByteIn *p); /* reads one byte, returns 0 in case of EOF or error */\r
-};\r
-#define IByteIn_Read(p) (p)->Read(p)\r
-\r
-\r
-typedef struct IByteOut IByteOut;\r
-struct IByteOut\r
-{\r
- void (*Write)(const IByteOut *p, Byte b);\r
-};\r
-#define IByteOut_Write(p, b) (p)->Write(p, b)\r
-\r
-\r
-typedef struct ISeqInStream ISeqInStream;\r
-struct ISeqInStream\r
-{\r
- SRes (*Read)(const ISeqInStream *p, void *buf, size_t *size);\r
- /* if (input(*size) != 0 && output(*size) == 0) means end_of_stream.\r
- (output(*size) < input(*size)) is allowed */\r
-};\r
-#define ISeqInStream_Read(p, buf, size) (p)->Read(p, buf, size)\r
-\r
-/* it can return SZ_ERROR_INPUT_EOF */\r
-SRes SeqInStream_Read(const ISeqInStream *stream, void *buf, size_t size);\r
-SRes SeqInStream_Read2(const ISeqInStream *stream, void *buf, size_t size, SRes errorType);\r
-SRes SeqInStream_ReadByte(const ISeqInStream *stream, Byte *buf);\r
-\r
-\r
-typedef struct ISeqOutStream ISeqOutStream;\r
-struct ISeqOutStream\r
-{\r
- size_t (*Write)(const ISeqOutStream *p, const void *buf, size_t size);\r
- /* Returns: result - the number of actually written bytes.\r
- (result < size) means error */\r
-};\r
-#define ISeqOutStream_Write(p, buf, size) (p)->Write(p, buf, size)\r
-\r
-typedef enum\r
-{\r
- SZ_SEEK_SET = 0,\r
- SZ_SEEK_CUR = 1,\r
- SZ_SEEK_END = 2\r
-} ESzSeek;\r
-\r
-\r
-typedef struct ISeekInStream ISeekInStream;\r
-struct ISeekInStream\r
-{\r
- SRes (*Read)(const ISeekInStream *p, void *buf, size_t *size); /* same as ISeqInStream::Read */\r
- SRes (*Seek)(const ISeekInStream *p, Int64 *pos, ESzSeek origin);\r
-};\r
-#define ISeekInStream_Read(p, buf, size) (p)->Read(p, buf, size)\r
-#define ISeekInStream_Seek(p, pos, origin) (p)->Seek(p, pos, origin)\r
-\r
-\r
-typedef struct ILookInStream ILookInStream;\r
-struct ILookInStream\r
-{\r
- SRes (*Look)(const ILookInStream *p, const void **buf, size_t *size);\r
- /* if (input(*size) != 0 && output(*size) == 0) means end_of_stream.\r
- (output(*size) > input(*size)) is not allowed\r
- (output(*size) < input(*size)) is allowed */\r
- SRes (*Skip)(const ILookInStream *p, size_t offset);\r
- /* offset must be <= output(*size) of Look */\r
-\r
- SRes (*Read)(const ILookInStream *p, void *buf, size_t *size);\r
- /* reads directly (without buffer). It's same as ISeqInStream::Read */\r
- SRes (*Seek)(const ILookInStream *p, Int64 *pos, ESzSeek origin);\r
-};\r
-\r
-#define ILookInStream_Look(p, buf, size) (p)->Look(p, buf, size)\r
-#define ILookInStream_Skip(p, offset) (p)->Skip(p, offset)\r
-#define ILookInStream_Read(p, buf, size) (p)->Read(p, buf, size)\r
-#define ILookInStream_Seek(p, pos, origin) (p)->Seek(p, pos, origin)\r
-\r
-\r
-SRes LookInStream_LookRead(const ILookInStream *stream, void *buf, size_t *size);\r
-SRes LookInStream_SeekTo(const ILookInStream *stream, UInt64 offset);\r
-\r
-/* reads via ILookInStream::Read */\r
-SRes LookInStream_Read2(const ILookInStream *stream, void *buf, size_t size, SRes errorType);\r
-SRes LookInStream_Read(const ILookInStream *stream, void *buf, size_t size);\r
-\r
-\r
-\r
-typedef struct\r
-{\r
- ILookInStream vt;\r
- const ISeekInStream *realStream;\r
- \r
- size_t pos;\r
- size_t size; /* it's data size */\r
- \r
- /* the following variables must be set outside */\r
- Byte *buf;\r
- size_t bufSize;\r
-} CLookToRead2;\r
-\r
-void LookToRead2_CreateVTable(CLookToRead2 *p, int lookahead);\r
-\r
-#define LookToRead2_Init(p) { (p)->pos = (p)->size = 0; }\r
-\r
-\r
-typedef struct\r
-{\r
- ISeqInStream vt;\r
- const ILookInStream *realStream;\r
-} CSecToLook;\r
-\r
-void SecToLook_CreateVTable(CSecToLook *p);\r
-\r
-\r
-\r
-typedef struct\r
-{\r
- ISeqInStream vt;\r
- const ILookInStream *realStream;\r
-} CSecToRead;\r
-\r
-void SecToRead_CreateVTable(CSecToRead *p);\r
-\r
-\r
-typedef struct ICompressProgress ICompressProgress;\r
-\r
-struct ICompressProgress\r
-{\r
- SRes (*Progress)(const ICompressProgress *p, UInt64 inSize, UInt64 outSize);\r
- /* Returns: result. (result != SZ_OK) means break.\r
- Value (UInt64)(Int64)-1 for size means unknown value. */\r
-};\r
-#define ICompressProgress_Progress(p, inSize, outSize) (p)->Progress(p, inSize, outSize)\r
-\r
-\r
-\r
-typedef struct ISzAlloc ISzAlloc;\r
-typedef const ISzAlloc * ISzAllocPtr;\r
-\r
-struct ISzAlloc\r
-{\r
- void *(*Alloc)(ISzAllocPtr p, size_t size);\r
- void (*Free)(ISzAllocPtr p, void *address); /* address can be 0 */\r
-};\r
-\r
-#define ISzAlloc_Alloc(p, size) (p)->Alloc(p, size)\r
-#define ISzAlloc_Free(p, a) (p)->Free(p, a)\r
-\r
-/* deprecated */\r
-#define IAlloc_Alloc(p, size) ISzAlloc_Alloc(p, size)\r
-#define IAlloc_Free(p, a) ISzAlloc_Free(p, a)\r
-\r
-\r
-\r
-\r
-\r
-#ifndef MY_offsetof\r
- #ifdef offsetof\r
- #define MY_offsetof(type, m) offsetof(type, m)\r
- /*\r
- #define MY_offsetof(type, m) FIELD_OFFSET(type, m)\r
- */\r
- #else\r
- #define MY_offsetof(type, m) ((size_t)&(((type *)0)->m))\r
- #endif\r
-#endif\r
-\r
-\r
-\r
-#ifndef MY_container_of\r
-\r
-/*\r
-#define MY_container_of(ptr, type, m) container_of(ptr, type, m)\r
-#define MY_container_of(ptr, type, m) CONTAINING_RECORD(ptr, type, m)\r
-#define MY_container_of(ptr, type, m) ((type *)((char *)(ptr) - offsetof(type, m)))\r
-#define MY_container_of(ptr, type, m) (&((type *)0)->m == (ptr), ((type *)(((char *)(ptr)) - MY_offsetof(type, m))))\r
-*/\r
-\r
-/*\r
- GCC shows warning: "perhaps the 'offsetof' macro was used incorrectly"\r
- GCC 3.4.4 : classes with constructor\r
- GCC 4.8.1 : classes with non-public variable members"\r
-*/\r
-\r
-#define MY_container_of(ptr, type, m) ((type *)(void *)((char *)(void *)(1 ? (ptr) : &((type *)0)->m) - MY_offsetof(type, m)))\r
-\r
-#endif\r
-\r
-#define CONTAINER_FROM_VTBL_SIMPLE(ptr, type, m) ((type *)(void *)(ptr))\r
-\r
-/*\r
-#define CONTAINER_FROM_VTBL(ptr, type, m) CONTAINER_FROM_VTBL_SIMPLE(ptr, type, m)\r
-*/\r
-#define CONTAINER_FROM_VTBL(ptr, type, m) MY_container_of(ptr, type, m)\r
-\r
-#define CONTAINER_FROM_VTBL_CLS(ptr, type, m) CONTAINER_FROM_VTBL_SIMPLE(ptr, type, m)\r
-/*\r
-#define CONTAINER_FROM_VTBL_CLS(ptr, type, m) CONTAINER_FROM_VTBL(ptr, type, m)\r
-*/\r
-\r
-\r
-#define MY_memset_0_ARRAY(a) memset((a), 0, sizeof(a))\r
-\r
-#ifdef _WIN32\r
-\r
-#define CHAR_PATH_SEPARATOR '\\'\r
-#define WCHAR_PATH_SEPARATOR L'\\'\r
-#define STRING_PATH_SEPARATOR "\\"\r
-#define WSTRING_PATH_SEPARATOR L"\\"\r
-\r
-#else\r
-\r
-#define CHAR_PATH_SEPARATOR '/'\r
-#define WCHAR_PATH_SEPARATOR L'/'\r
-#define STRING_PATH_SEPARATOR "/"\r
-#define WSTRING_PATH_SEPARATOR L"/"\r
-\r
-#endif\r
-\r
-#define k_PropVar_TimePrec_0 0\r
-#define k_PropVar_TimePrec_Unix 1\r
-#define k_PropVar_TimePrec_DOS 2\r
-#define k_PropVar_TimePrec_HighPrec 3\r
-#define k_PropVar_TimePrec_Base 16\r
-#define k_PropVar_TimePrec_100ns (k_PropVar_TimePrec_Base + 7)\r
-#define k_PropVar_TimePrec_1ns (k_PropVar_TimePrec_Base + 9)\r
-\r
-EXTERN_C_END\r
-\r
-#endif\r
+/* 7zTypes.h -- Basic types
+2024-01-24 : Igor Pavlov : Public domain */
+
+#ifndef ZIP7_7Z_TYPES_H
+#define ZIP7_7Z_TYPES_H
+
+#ifdef _WIN32
+/* #include <windows.h> */
+#else
+#include <errno.h>
+#endif
+
+#include <stddef.h>
+
+#ifndef EXTERN_C_BEGIN
+#ifdef __cplusplus
+#define EXTERN_C_BEGIN extern "C" {
+#define EXTERN_C_END }
+#else
+#define EXTERN_C_BEGIN
+#define EXTERN_C_END
+#endif
+#endif
+
+EXTERN_C_BEGIN
+
+#define SZ_OK 0
+
+#define SZ_ERROR_DATA 1
+#define SZ_ERROR_MEM 2
+#define SZ_ERROR_CRC 3
+#define SZ_ERROR_UNSUPPORTED 4
+#define SZ_ERROR_PARAM 5
+#define SZ_ERROR_INPUT_EOF 6
+#define SZ_ERROR_OUTPUT_EOF 7
+#define SZ_ERROR_READ 8
+#define SZ_ERROR_WRITE 9
+#define SZ_ERROR_PROGRESS 10
+#define SZ_ERROR_FAIL 11
+#define SZ_ERROR_THREAD 12
+
+#define SZ_ERROR_ARCHIVE 16
+#define SZ_ERROR_NO_ARCHIVE 17
+
+typedef int SRes;
+
+
+#ifdef _MSC_VER
+ #if _MSC_VER > 1200
+ #define MY_ALIGN(n) __declspec(align(n))
+ #else
+ #define MY_ALIGN(n)
+ #endif
+#else
+ /*
+ // C11/C++11:
+ #include <stdalign.h>
+ #define MY_ALIGN(n) alignas(n)
+ */
+ #define MY_ALIGN(n) __attribute__ ((aligned(n)))
+#endif
+
+
+#ifdef _WIN32
+
+/* typedef DWORD WRes; */
+typedef unsigned WRes;
+#define MY_SRes_HRESULT_FROM_WRes(x) HRESULT_FROM_WIN32(x)
+
+// #define MY_HRES_ERROR_INTERNAL_ERROR MY_SRes_HRESULT_FROM_WRes(ERROR_INTERNAL_ERROR)
+
+#else // _WIN32
+
+// #define ENV_HAVE_LSTAT
+typedef int WRes;
+
+// (FACILITY_ERRNO = 0x800) is 7zip's FACILITY constant to represent (errno) errors in HRESULT
+#define MY_FACILITY_ERRNO 0x800
+#define MY_FACILITY_WIN32 7
+#define MY_FACILITY_WRes MY_FACILITY_ERRNO
+
+#define MY_HRESULT_FROM_errno_CONST_ERROR(x) ((HRESULT)( \
+ ( (HRESULT)(x) & 0x0000FFFF) \
+ | (MY_FACILITY_WRes << 16) \
+ | (HRESULT)0x80000000 ))
+
+#define MY_SRes_HRESULT_FROM_WRes(x) \
+ ((HRESULT)(x) <= 0 ? ((HRESULT)(x)) : MY_HRESULT_FROM_errno_CONST_ERROR(x))
+
+// we call macro HRESULT_FROM_WIN32 for system errors (WRes) that are (errno)
+#define HRESULT_FROM_WIN32(x) MY_SRes_HRESULT_FROM_WRes(x)
+
+/*
+#define ERROR_FILE_NOT_FOUND 2L
+#define ERROR_ACCESS_DENIED 5L
+#define ERROR_NO_MORE_FILES 18L
+#define ERROR_LOCK_VIOLATION 33L
+#define ERROR_FILE_EXISTS 80L
+#define ERROR_DISK_FULL 112L
+#define ERROR_NEGATIVE_SEEK 131L
+#define ERROR_ALREADY_EXISTS 183L
+#define ERROR_DIRECTORY 267L
+#define ERROR_TOO_MANY_POSTS 298L
+
+#define ERROR_INTERNAL_ERROR 1359L
+#define ERROR_INVALID_REPARSE_DATA 4392L
+#define ERROR_REPARSE_TAG_INVALID 4393L
+#define ERROR_REPARSE_TAG_MISMATCH 4394L
+*/
+
+// we use errno equivalents for some WIN32 errors:
+
+#define ERROR_INVALID_PARAMETER EINVAL
+#define ERROR_INVALID_FUNCTION EINVAL
+#define ERROR_ALREADY_EXISTS EEXIST
+#define ERROR_FILE_EXISTS EEXIST
+#define ERROR_PATH_NOT_FOUND ENOENT
+#define ERROR_FILE_NOT_FOUND ENOENT
+#define ERROR_DISK_FULL ENOSPC
+// #define ERROR_INVALID_HANDLE EBADF
+
+// we use FACILITY_WIN32 for errors that has no errno equivalent
+// Too many posts were made to a semaphore.
+#define ERROR_TOO_MANY_POSTS ((HRESULT)0x8007012AL)
+#define ERROR_INVALID_REPARSE_DATA ((HRESULT)0x80071128L)
+#define ERROR_REPARSE_TAG_INVALID ((HRESULT)0x80071129L)
+
+// if (MY_FACILITY_WRes != FACILITY_WIN32),
+// we use FACILITY_WIN32 for COM errors:
+#define E_OUTOFMEMORY ((HRESULT)0x8007000EL)
+#define E_INVALIDARG ((HRESULT)0x80070057L)
+#define MY_E_ERROR_NEGATIVE_SEEK ((HRESULT)0x80070083L)
+
+/*
+// we can use FACILITY_ERRNO for some COM errors, that have errno equivalents:
+#define E_OUTOFMEMORY MY_HRESULT_FROM_errno_CONST_ERROR(ENOMEM)
+#define E_INVALIDARG MY_HRESULT_FROM_errno_CONST_ERROR(EINVAL)
+#define MY_E_ERROR_NEGATIVE_SEEK MY_HRESULT_FROM_errno_CONST_ERROR(EINVAL)
+*/
+
+#define TEXT(quote) quote
+
+#define FILE_ATTRIBUTE_READONLY 0x0001
+#define FILE_ATTRIBUTE_HIDDEN 0x0002
+#define FILE_ATTRIBUTE_SYSTEM 0x0004
+#define FILE_ATTRIBUTE_DIRECTORY 0x0010
+#define FILE_ATTRIBUTE_ARCHIVE 0x0020
+#define FILE_ATTRIBUTE_DEVICE 0x0040
+#define FILE_ATTRIBUTE_NORMAL 0x0080
+#define FILE_ATTRIBUTE_TEMPORARY 0x0100
+#define FILE_ATTRIBUTE_SPARSE_FILE 0x0200
+#define FILE_ATTRIBUTE_REPARSE_POINT 0x0400
+#define FILE_ATTRIBUTE_COMPRESSED 0x0800
+#define FILE_ATTRIBUTE_OFFLINE 0x1000
+#define FILE_ATTRIBUTE_NOT_CONTENT_INDEXED 0x2000
+#define FILE_ATTRIBUTE_ENCRYPTED 0x4000
+
+#define FILE_ATTRIBUTE_UNIX_EXTENSION 0x8000 /* trick for Unix */
+
+#endif
+
+
+#ifndef RINOK
+#define RINOK(x) { const int _result_ = (x); if (_result_ != 0) return _result_; }
+#endif
+
+#ifndef RINOK_WRes
+#define RINOK_WRes(x) { const WRes _result_ = (x); if (_result_ != 0) return _result_; }
+#endif
+
+typedef unsigned char Byte;
+typedef short Int16;
+typedef unsigned short UInt16;
+
+#ifdef Z7_DECL_Int32_AS_long
+typedef long Int32;
+typedef unsigned long UInt32;
+#else
+typedef int Int32;
+typedef unsigned int UInt32;
+#endif
+
+
+#ifndef _WIN32
+
+typedef int INT;
+typedef Int32 INT32;
+typedef unsigned int UINT;
+typedef UInt32 UINT32;
+typedef INT32 LONG; // LONG, ULONG and DWORD must be 32-bit for _WIN32 compatibility
+typedef UINT32 ULONG;
+
+#undef DWORD
+typedef UINT32 DWORD;
+
+#define VOID void
+
+#define HRESULT LONG
+
+typedef void *LPVOID;
+// typedef void VOID;
+// typedef ULONG_PTR DWORD_PTR, *PDWORD_PTR;
+// gcc / clang on Unix : sizeof(long==sizeof(void*) in 32 or 64 bits)
+typedef long INT_PTR;
+typedef unsigned long UINT_PTR;
+typedef long LONG_PTR;
+typedef unsigned long DWORD_PTR;
+
+typedef size_t SIZE_T;
+
+#endif // _WIN32
+
+
+#define MY_HRES_ERROR_INTERNAL_ERROR ((HRESULT)0x8007054FL)
+
+
+#ifdef Z7_DECL_Int64_AS_long
+
+typedef long Int64;
+typedef unsigned long UInt64;
+
+#else
+
+#if (defined(_MSC_VER) || defined(__BORLANDC__)) && !defined(__clang__)
+typedef __int64 Int64;
+typedef unsigned __int64 UInt64;
+#else
+#if defined(__clang__) || defined(__GNUC__)
+#include <stdint.h>
+typedef int64_t Int64;
+typedef uint64_t UInt64;
+#else
+typedef long long int Int64;
+typedef unsigned long long int UInt64;
+// #define UINT64_CONST(n) n ## ULL
+#endif
+#endif
+
+#endif
+
+#define UINT64_CONST(n) n
+
+
+#ifdef Z7_DECL_SizeT_AS_unsigned_int
+typedef unsigned int SizeT;
+#else
+typedef size_t SizeT;
+#endif
+
+/*
+#if (defined(_MSC_VER) && _MSC_VER <= 1200)
+typedef size_t MY_uintptr_t;
+#else
+#include <stdint.h>
+typedef uintptr_t MY_uintptr_t;
+#endif
+*/
+
+typedef int BoolInt;
+/* typedef BoolInt Bool; */
+#define True 1
+#define False 0
+
+
+#ifdef _WIN32
+#define Z7_STDCALL __stdcall
+#else
+#define Z7_STDCALL
+#endif
+
+#ifdef _MSC_VER
+
+#if _MSC_VER >= 1300
+#define Z7_NO_INLINE __declspec(noinline)
+#else
+#define Z7_NO_INLINE
+#endif
+
+#define Z7_FORCE_INLINE __forceinline
+
+#define Z7_CDECL __cdecl
+#define Z7_FASTCALL __fastcall
+
+#else // _MSC_VER
+
+#if (defined(__GNUC__) && (__GNUC__ >= 4)) \
+ || (defined(__clang__) && (__clang_major__ >= 4)) \
+ || defined(__INTEL_COMPILER) \
+ || defined(__xlC__)
+#define Z7_NO_INLINE __attribute__((noinline))
+#define Z7_FORCE_INLINE __attribute__((always_inline)) inline
+#else
+#define Z7_NO_INLINE
+#define Z7_FORCE_INLINE
+#endif
+
+#define Z7_CDECL
+
+#if defined(_M_IX86) \
+ || defined(__i386__)
+// #define Z7_FASTCALL __attribute__((fastcall))
+// #define Z7_FASTCALL __attribute__((cdecl))
+#define Z7_FASTCALL
+#elif defined(MY_CPU_AMD64)
+// #define Z7_FASTCALL __attribute__((ms_abi))
+#define Z7_FASTCALL
+#else
+#define Z7_FASTCALL
+#endif
+
+#endif // _MSC_VER
+
+
+/* The following interfaces use first parameter as pointer to structure */
+
+// #define Z7_C_IFACE_CONST_QUAL
+#define Z7_C_IFACE_CONST_QUAL const
+
+#define Z7_C_IFACE_DECL(a) \
+ struct a ## _; \
+ typedef Z7_C_IFACE_CONST_QUAL struct a ## _ * a ## Ptr; \
+ typedef struct a ## _ a; \
+ struct a ## _
+
+
+Z7_C_IFACE_DECL (IByteIn)
+{
+ Byte (*Read)(IByteInPtr p); /* reads one byte, returns 0 in case of EOF or error */
+};
+#define IByteIn_Read(p) (p)->Read(p)
+
+
+Z7_C_IFACE_DECL (IByteOut)
+{
+ void (*Write)(IByteOutPtr p, Byte b);
+};
+#define IByteOut_Write(p, b) (p)->Write(p, b)
+
+
+Z7_C_IFACE_DECL (ISeqInStream)
+{
+ SRes (*Read)(ISeqInStreamPtr p, void *buf, size_t *size);
+ /* if (input(*size) != 0 && output(*size) == 0) means end_of_stream.
+ (output(*size) < input(*size)) is allowed */
+};
+#define ISeqInStream_Read(p, buf, size) (p)->Read(p, buf, size)
+
+/* try to read as much as avail in stream and limited by (*processedSize) */
+SRes SeqInStream_ReadMax(ISeqInStreamPtr stream, void *buf, size_t *processedSize);
+/* it can return SZ_ERROR_INPUT_EOF */
+// SRes SeqInStream_Read(ISeqInStreamPtr stream, void *buf, size_t size);
+// SRes SeqInStream_Read2(ISeqInStreamPtr stream, void *buf, size_t size, SRes errorType);
+SRes SeqInStream_ReadByte(ISeqInStreamPtr stream, Byte *buf);
+
+
+Z7_C_IFACE_DECL (ISeqOutStream)
+{
+ size_t (*Write)(ISeqOutStreamPtr p, const void *buf, size_t size);
+ /* Returns: result - the number of actually written bytes.
+ (result < size) means error */
+};
+#define ISeqOutStream_Write(p, buf, size) (p)->Write(p, buf, size)
+
+typedef enum
+{
+ SZ_SEEK_SET = 0,
+ SZ_SEEK_CUR = 1,
+ SZ_SEEK_END = 2
+} ESzSeek;
+
+
+Z7_C_IFACE_DECL (ISeekInStream)
+{
+ SRes (*Read)(ISeekInStreamPtr p, void *buf, size_t *size); /* same as ISeqInStream::Read */
+ SRes (*Seek)(ISeekInStreamPtr p, Int64 *pos, ESzSeek origin);
+};
+#define ISeekInStream_Read(p, buf, size) (p)->Read(p, buf, size)
+#define ISeekInStream_Seek(p, pos, origin) (p)->Seek(p, pos, origin)
+
+
+Z7_C_IFACE_DECL (ILookInStream)
+{
+ SRes (*Look)(ILookInStreamPtr p, const void **buf, size_t *size);
+ /* if (input(*size) != 0 && output(*size) == 0) means end_of_stream.
+ (output(*size) > input(*size)) is not allowed
+ (output(*size) < input(*size)) is allowed */
+ SRes (*Skip)(ILookInStreamPtr p, size_t offset);
+ /* offset must be <= output(*size) of Look */
+ SRes (*Read)(ILookInStreamPtr p, void *buf, size_t *size);
+ /* reads directly (without buffer). It's same as ISeqInStream::Read */
+ SRes (*Seek)(ILookInStreamPtr p, Int64 *pos, ESzSeek origin);
+};
+
+#define ILookInStream_Look(p, buf, size) (p)->Look(p, buf, size)
+#define ILookInStream_Skip(p, offset) (p)->Skip(p, offset)
+#define ILookInStream_Read(p, buf, size) (p)->Read(p, buf, size)
+#define ILookInStream_Seek(p, pos, origin) (p)->Seek(p, pos, origin)
+
+
+SRes LookInStream_LookRead(ILookInStreamPtr stream, void *buf, size_t *size);
+SRes LookInStream_SeekTo(ILookInStreamPtr stream, UInt64 offset);
+
+/* reads via ILookInStream::Read */
+SRes LookInStream_Read2(ILookInStreamPtr stream, void *buf, size_t size, SRes errorType);
+SRes LookInStream_Read(ILookInStreamPtr stream, void *buf, size_t size);
+
+
+typedef struct
+{
+ ILookInStream vt;
+ ISeekInStreamPtr realStream;
+
+ size_t pos;
+ size_t size; /* it's data size */
+
+ /* the following variables must be set outside */
+ Byte *buf;
+ size_t bufSize;
+} CLookToRead2;
+
+void LookToRead2_CreateVTable(CLookToRead2 *p, int lookahead);
+
+#define LookToRead2_INIT(p) { (p)->pos = (p)->size = 0; }
+
+
+typedef struct
+{
+ ISeqInStream vt;
+ ILookInStreamPtr realStream;
+} CSecToLook;
+
+void SecToLook_CreateVTable(CSecToLook *p);
+
+
+
+typedef struct
+{
+ ISeqInStream vt;
+ ILookInStreamPtr realStream;
+} CSecToRead;
+
+void SecToRead_CreateVTable(CSecToRead *p);
+
+
+Z7_C_IFACE_DECL (ICompressProgress)
+{
+ SRes (*Progress)(ICompressProgressPtr p, UInt64 inSize, UInt64 outSize);
+ /* Returns: result. (result != SZ_OK) means break.
+ Value (UInt64)(Int64)-1 for size means unknown value. */
+};
+
+#define ICompressProgress_Progress(p, inSize, outSize) (p)->Progress(p, inSize, outSize)
+
+
+
+typedef struct ISzAlloc ISzAlloc;
+typedef const ISzAlloc * ISzAllocPtr;
+
+struct ISzAlloc
+{
+ void *(*Alloc)(ISzAllocPtr p, size_t size);
+ void (*Free)(ISzAllocPtr p, void *address); /* address can be 0 */
+};
+
+#define ISzAlloc_Alloc(p, size) (p)->Alloc(p, size)
+#define ISzAlloc_Free(p, a) (p)->Free(p, a)
+
+/* deprecated */
+#define IAlloc_Alloc(p, size) ISzAlloc_Alloc(p, size)
+#define IAlloc_Free(p, a) ISzAlloc_Free(p, a)
+
+
+
+
+
+#ifndef MY_offsetof
+ #ifdef offsetof
+ #define MY_offsetof(type, m) offsetof(type, m)
+ /*
+ #define MY_offsetof(type, m) FIELD_OFFSET(type, m)
+ */
+ #else
+ #define MY_offsetof(type, m) ((size_t)&(((type *)0)->m))
+ #endif
+#endif
+
+
+
+#ifndef Z7_container_of
+
+/*
+#define Z7_container_of(ptr, type, m) container_of(ptr, type, m)
+#define Z7_container_of(ptr, type, m) CONTAINING_RECORD(ptr, type, m)
+#define Z7_container_of(ptr, type, m) ((type *)((char *)(ptr) - offsetof(type, m)))
+#define Z7_container_of(ptr, type, m) (&((type *)0)->m == (ptr), ((type *)(((char *)(ptr)) - MY_offsetof(type, m))))
+*/
+
+/*
+ GCC shows warning: "perhaps the 'offsetof' macro was used incorrectly"
+ GCC 3.4.4 : classes with constructor
+ GCC 4.8.1 : classes with non-public variable members"
+*/
+
+#define Z7_container_of(ptr, type, m) \
+ ((type *)(void *)((char *)(void *) \
+ (1 ? (ptr) : &((type *)NULL)->m) - MY_offsetof(type, m)))
+
+#define Z7_container_of_CONST(ptr, type, m) \
+ ((const type *)(const void *)((const char *)(const void *) \
+ (1 ? (ptr) : &((type *)NULL)->m) - MY_offsetof(type, m)))
+
+/*
+#define Z7_container_of_NON_CONST_FROM_CONST(ptr, type, m) \
+ ((type *)(void *)(const void *)((const char *)(const void *) \
+ (1 ? (ptr) : &((type *)NULL)->m) - MY_offsetof(type, m)))
+*/
+
+#endif
+
+#define Z7_CONTAINER_FROM_VTBL_SIMPLE(ptr, type, m) ((type *)(void *)(ptr))
+
+// #define Z7_CONTAINER_FROM_VTBL(ptr, type, m) Z7_CONTAINER_FROM_VTBL_SIMPLE(ptr, type, m)
+#define Z7_CONTAINER_FROM_VTBL(ptr, type, m) Z7_container_of(ptr, type, m)
+// #define Z7_CONTAINER_FROM_VTBL(ptr, type, m) Z7_container_of_NON_CONST_FROM_CONST(ptr, type, m)
+
+#define Z7_CONTAINER_FROM_VTBL_CONST(ptr, type, m) Z7_container_of_CONST(ptr, type, m)
+
+#define Z7_CONTAINER_FROM_VTBL_CLS(ptr, type, m) Z7_CONTAINER_FROM_VTBL_SIMPLE(ptr, type, m)
+/*
+#define Z7_CONTAINER_FROM_VTBL_CLS(ptr, type, m) Z7_CONTAINER_FROM_VTBL(ptr, type, m)
+*/
+#if defined (__clang__) || defined(__GNUC__)
+#define Z7_DIAGNOSTIC_IGNORE_BEGIN_CAST_QUAL \
+ _Pragma("GCC diagnostic push") \
+ _Pragma("GCC diagnostic ignored \"-Wcast-qual\"")
+#define Z7_DIAGNOSTIC_IGNORE_END_CAST_QUAL \
+ _Pragma("GCC diagnostic pop")
+#else
+#define Z7_DIAGNOSTIC_IGNORE_BEGIN_CAST_QUAL
+#define Z7_DIAGNOSTIC_IGNORE_END_CAST_QUAL
+#endif
+
+#define Z7_CONTAINER_FROM_VTBL_TO_DECL_VAR(ptr, type, m, p) \
+ Z7_DIAGNOSTIC_IGNORE_BEGIN_CAST_QUAL \
+ type *p = Z7_CONTAINER_FROM_VTBL(ptr, type, m); \
+ Z7_DIAGNOSTIC_IGNORE_END_CAST_QUAL
+
+#define Z7_CONTAINER_FROM_VTBL_TO_DECL_VAR_pp_vt_p(type) \
+ Z7_CONTAINER_FROM_VTBL_TO_DECL_VAR(pp, type, vt, p)
+
+
+// #define ZIP7_DECLARE_HANDLE(name) typedef void *name;
+#define Z7_DECLARE_HANDLE(name) struct name##_dummy{int unused;}; typedef struct name##_dummy *name;
+
+
+#define Z7_memset_0_ARRAY(a) memset((a), 0, sizeof(a))
+
+#ifndef Z7_ARRAY_SIZE
+#define Z7_ARRAY_SIZE(a) (sizeof(a) / sizeof((a)[0]))
+#endif
+
+
+#ifdef _WIN32
+
+#define CHAR_PATH_SEPARATOR '\\'
+#define WCHAR_PATH_SEPARATOR L'\\'
+#define STRING_PATH_SEPARATOR "\\"
+#define WSTRING_PATH_SEPARATOR L"\\"
+
+#else
+
+#define CHAR_PATH_SEPARATOR '/'
+#define WCHAR_PATH_SEPARATOR L'/'
+#define STRING_PATH_SEPARATOR "/"
+#define WSTRING_PATH_SEPARATOR L"/"
+
+#endif
+
+#define k_PropVar_TimePrec_0 0
+#define k_PropVar_TimePrec_Unix 1
+#define k_PropVar_TimePrec_DOS 2
+#define k_PropVar_TimePrec_HighPrec 3
+#define k_PropVar_TimePrec_Base 16
+#define k_PropVar_TimePrec_100ns (k_PropVar_TimePrec_Base + 7)
+#define k_PropVar_TimePrec_1ns (k_PropVar_TimePrec_Base + 9)
+
+EXTERN_C_END
+
+#endif
+
+/*
+#ifndef Z7_ST
+#ifdef _7ZIP_ST
+#define Z7_ST
+#endif
+#endif
+*/
--- /dev/null
+/* 7zWindows.h -- StdAfx
+2023-04-02 : Igor Pavlov : Public domain */
+
+#ifndef ZIP7_INC_7Z_WINDOWS_H
+#define ZIP7_INC_7Z_WINDOWS_H
+
+#ifdef _WIN32
+
+#if defined(__clang__)
+# pragma clang diagnostic push
+#endif
+
+#if defined(_MSC_VER)
+
+#pragma warning(push)
+#pragma warning(disable : 4668) // '_WIN32_WINNT' is not defined as a preprocessor macro, replacing with '0' for '#if/#elif'
+
+#if _MSC_VER == 1900
+// for old kit10 versions
+// #pragma warning(disable : 4255) // winuser.h(13979): warning C4255: 'GetThreadDpiAwarenessContext':
+#endif
+// win10 Windows Kit:
+#endif // _MSC_VER
+
+#if defined(_MSC_VER) && _MSC_VER <= 1200 && !defined(_WIN64)
+// for msvc6 without sdk2003
+#define RPC_NO_WINDOWS_H
+#endif
+
+#if defined(__MINGW32__) || defined(__MINGW64__)
+// #if defined(__GNUC__) && !defined(__clang__)
+#include <windows.h>
+#else
+#include <Windows.h>
+#endif
+// #include <basetsd.h>
+// #include <wtypes.h>
+
+// but if precompiled with clang-cl then we need
+// #include <windows.h>
+#if defined(_MSC_VER)
+#pragma warning(pop)
+#endif
+
+#if defined(__clang__)
+# pragma clang diagnostic pop
+#endif
+
+#if defined(_MSC_VER) && _MSC_VER <= 1200 && !defined(_WIN64)
+#ifndef _W64
+
+typedef long LONG_PTR, *PLONG_PTR;
+typedef unsigned long ULONG_PTR, *PULONG_PTR;
+typedef ULONG_PTR DWORD_PTR, *PDWORD_PTR;
+
+#define Z7_OLD_WIN_SDK
+#endif // _W64
+#endif // _MSC_VER == 1200
+
+#ifdef Z7_OLD_WIN_SDK
+
+#ifndef INVALID_FILE_ATTRIBUTES
+#define INVALID_FILE_ATTRIBUTES ((DWORD)-1)
+#endif
+#ifndef INVALID_SET_FILE_POINTER
+#define INVALID_SET_FILE_POINTER ((DWORD)-1)
+#endif
+#ifndef FILE_SPECIAL_ACCESS
+#define FILE_SPECIAL_ACCESS (FILE_ANY_ACCESS)
+#endif
+
+// ShlObj.h:
+// #define BIF_NEWDIALOGSTYLE 0x0040
+
+#pragma warning(disable : 4201)
+// #pragma warning(disable : 4115)
+
+#undef VARIANT_TRUE
+#define VARIANT_TRUE ((VARIANT_BOOL)-1)
+#endif
+
+#endif // Z7_OLD_WIN_SDK
+
+#ifdef UNDER_CE
+#undef VARIANT_TRUE
+#define VARIANT_TRUE ((VARIANT_BOOL)-1)
+#endif
+
+
+#if defined(_MSC_VER)
+#if _MSC_VER >= 1400 && _MSC_VER <= 1600
+ // BaseTsd.h(148) : 'HandleToULong' : unreferenced inline function has been removed
+ // string.h
+ // #pragma warning(disable : 4514)
+#endif
+#endif
+
+
+/* #include "7zTypes.h" */
+
+#endif
--- /dev/null
+/* Alloc.h -- Memory allocation functions
+2024-01-22 : Igor Pavlov : Public domain */
+
+#ifndef ZIP7_INC_ALLOC_H
+#define ZIP7_INC_ALLOC_H
+
+#include "7zTypes.h"
+
+EXTERN_C_BEGIN
+
+/*
+ MyFree(NULL) : is allowed, as free(NULL)
+ MyAlloc(0) : returns NULL : but malloc(0) is allowed to return NULL or non_NULL
+ MyRealloc(NULL, 0) : returns NULL : but realloc(NULL, 0) is allowed to return NULL or non_NULL
+MyRealloc() is similar to realloc() for the following cases:
+ MyRealloc(non_NULL, 0) : returns NULL and always calls MyFree(ptr)
+ MyRealloc(NULL, non_ZERO) : returns NULL, if allocation failed
+ MyRealloc(non_NULL, non_ZERO) : returns NULL, if reallocation failed
+*/
+
+void *MyAlloc(size_t size);
+void MyFree(void *address);
+void *MyRealloc(void *address, size_t size);
+
+void *z7_AlignedAlloc(size_t size);
+void z7_AlignedFree(void *p);
+
+#ifdef _WIN32
+
+#ifdef Z7_LARGE_PAGES
+void SetLargePageSize(void);
+#endif
+
+void *MidAlloc(size_t size);
+void MidFree(void *address);
+void *BigAlloc(size_t size);
+void BigFree(void *address);
+
+/* #define Z7_BIG_ALLOC_IS_ZERO_FILLED */
+
+#else
+
+#define MidAlloc(size) z7_AlignedAlloc(size)
+#define MidFree(address) z7_AlignedFree(address)
+#define BigAlloc(size) z7_AlignedAlloc(size)
+#define BigFree(address) z7_AlignedFree(address)
+
+#endif
+
+extern const ISzAlloc g_Alloc;
+
+#ifdef _WIN32
+extern const ISzAlloc g_BigAlloc;
+extern const ISzAlloc g_MidAlloc;
+#else
+#define g_BigAlloc g_AlignedAlloc
+#define g_MidAlloc g_AlignedAlloc
+#endif
+
+extern const ISzAlloc g_AlignedAlloc;
+
+
+typedef struct
+{
+ ISzAlloc vt;
+ ISzAllocPtr baseAlloc;
+ unsigned numAlignBits; /* ((1 << numAlignBits) >= sizeof(void *)) */
+ size_t offset; /* (offset == (k * sizeof(void *)) && offset < (1 << numAlignBits) */
+} CAlignOffsetAlloc;
+
+void AlignOffsetAlloc_CreateVTable(CAlignOffsetAlloc *p);
+
+
+EXTERN_C_END
+
+#endif
--- /dev/null
+/* Bra.h -- Branch converters for executables
+2024-01-20 : Igor Pavlov : Public domain */
+
+#ifndef ZIP7_INC_BRA_H
+#define ZIP7_INC_BRA_H
+
+#include "7zTypes.h"
+
+EXTERN_C_BEGIN
+
+/* #define PPC BAD_PPC_11 // for debug */
+
+#define Z7_BRANCH_CONV_DEC_2(name) z7_ ## name ## _Dec
+#define Z7_BRANCH_CONV_ENC_2(name) z7_ ## name ## _Enc
+#define Z7_BRANCH_CONV_DEC(name) Z7_BRANCH_CONV_DEC_2(BranchConv_ ## name)
+#define Z7_BRANCH_CONV_ENC(name) Z7_BRANCH_CONV_ENC_2(BranchConv_ ## name)
+#define Z7_BRANCH_CONV_ST_DEC(name) z7_BranchConvSt_ ## name ## _Dec
+#define Z7_BRANCH_CONV_ST_ENC(name) z7_BranchConvSt_ ## name ## _Enc
+
+#define Z7_BRANCH_CONV_DECL(name) Byte * name(Byte *data, SizeT size, UInt32 pc)
+#define Z7_BRANCH_CONV_ST_DECL(name) Byte * name(Byte *data, SizeT size, UInt32 pc, UInt32 *state)
+
+typedef Z7_BRANCH_CONV_DECL( (*z7_Func_BranchConv));
+typedef Z7_BRANCH_CONV_ST_DECL((*z7_Func_BranchConvSt));
+
+#define Z7_BRANCH_CONV_ST_X86_STATE_INIT_VAL 0
+Z7_BRANCH_CONV_ST_DECL (Z7_BRANCH_CONV_ST_DEC(X86));
+Z7_BRANCH_CONV_ST_DECL (Z7_BRANCH_CONV_ST_ENC(X86));
+
+#define Z7_BRANCH_FUNCS_DECL(name) \
+Z7_BRANCH_CONV_DECL (Z7_BRANCH_CONV_DEC_2(name)); \
+Z7_BRANCH_CONV_DECL (Z7_BRANCH_CONV_ENC_2(name));
+
+Z7_BRANCH_FUNCS_DECL (BranchConv_ARM64)
+Z7_BRANCH_FUNCS_DECL (BranchConv_ARM)
+Z7_BRANCH_FUNCS_DECL (BranchConv_ARMT)
+Z7_BRANCH_FUNCS_DECL (BranchConv_PPC)
+Z7_BRANCH_FUNCS_DECL (BranchConv_SPARC)
+Z7_BRANCH_FUNCS_DECL (BranchConv_IA64)
+Z7_BRANCH_FUNCS_DECL (BranchConv_RISCV)
+
+/*
+These functions convert data that contain CPU instructions.
+Each such function converts relative addresses to absolute addresses in some
+branch instructions: CALL (in all converters) and JUMP (X86 converter only).
+Such conversion allows to increase compression ratio, if we compress that data.
+
+There are 2 types of converters:
+ Byte * Conv_RISC (Byte *data, SizeT size, UInt32 pc);
+ Byte * ConvSt_X86(Byte *data, SizeT size, UInt32 pc, UInt32 *state);
+Each Converter supports 2 versions: one for encoding
+and one for decoding (_Enc/_Dec postfixes in function name).
+
+In params:
+ data : data buffer
+ size : size of data
+ pc : current virtual Program Counter (Instruction Pointer) value
+In/Out param:
+ state : pointer to state variable (for X86 converter only)
+
+Return:
+ The pointer to position in (data) buffer after last byte that was processed.
+ If the caller calls converter again, it must call it starting with that position.
+ But the caller is allowed to move data in buffer. So pointer to
+ current processed position also will be changed for next call.
+ Also the caller must increase internal (pc) value for next call.
+
+Each converter has some characteristics: Endian, Alignment, LookAhead.
+ Type Endian Alignment LookAhead
+
+ X86 little 1 4
+ ARMT little 2 2
+ RISCV little 2 6
+ ARM little 4 0
+ ARM64 little 4 0
+ PPC big 4 0
+ SPARC big 4 0
+ IA64 little 16 0
+
+ (data) must be aligned for (Alignment).
+ processed size can be calculated as:
+ SizeT processed = Conv(data, size, pc) - data;
+ if (processed == 0)
+ it means that converter needs more data for processing.
+ If (size < Alignment + LookAhead)
+ then (processed == 0) is allowed.
+
+Example code for conversion in loop:
+ UInt32 pc = 0;
+ size = 0;
+ for (;;)
+ {
+ size += Load_more_input_data(data + size);
+ SizeT processed = Conv(data, size, pc) - data;
+ if (processed == 0 && no_more_input_data_after_size)
+ break; // we stop convert loop
+ data += processed;
+ size -= processed;
+ pc += processed;
+ }
+*/
+
+EXTERN_C_END
+
+#endif
--- /dev/null
+/* Compiler.h : Compiler specific defines and pragmas
+2024-01-22 : Igor Pavlov : Public domain */
+
+#ifndef ZIP7_INC_COMPILER_H
+#define ZIP7_INC_COMPILER_H
+
+#if defined(__clang__)
+# define Z7_CLANG_VERSION (__clang_major__ * 10000 + __clang_minor__ * 100 + __clang_patchlevel__)
+#endif
+#if defined(__clang__) && defined(__apple_build_version__)
+# define Z7_APPLE_CLANG_VERSION Z7_CLANG_VERSION
+#elif defined(__clang__)
+# define Z7_LLVM_CLANG_VERSION Z7_CLANG_VERSION
+#elif defined(__GNUC__)
+# define Z7_GCC_VERSION (__GNUC__ * 10000 + __GNUC_MINOR__ * 100 + __GNUC_PATCHLEVEL__)
+#endif
+
+#ifdef _MSC_VER
+#if !defined(__clang__) && !defined(__GNUC__)
+#define Z7_MSC_VER_ORIGINAL _MSC_VER
+#endif
+#endif
+
+#if defined(__MINGW32__) || defined(__MINGW64__)
+#define Z7_MINGW
+#endif
+
+#if defined(__LCC__) && (defined(__MCST__) || defined(__e2k__))
+#define Z7_MCST_LCC
+#define Z7_MCST_LCC_VERSION (__LCC__ * 100 + __LCC_MINOR__)
+#endif
+
+/*
+#if defined(__AVX2__) \
+ || defined(Z7_GCC_VERSION) && (Z7_GCC_VERSION >= 40900) \
+ || defined(Z7_APPLE_CLANG_VERSION) && (Z7_APPLE_CLANG_VERSION >= 40600) \
+ || defined(Z7_LLVM_CLANG_VERSION) && (Z7_LLVM_CLANG_VERSION >= 30100) \
+ || defined(Z7_MSC_VER_ORIGINAL) && (Z7_MSC_VER_ORIGINAL >= 1800) \
+ || defined(__INTEL_COMPILER) && (__INTEL_COMPILER >= 1400)
+ #define Z7_COMPILER_AVX2_SUPPORTED
+ #endif
+#endif
+*/
+
+// #pragma GCC diagnostic ignored "-Wunknown-pragmas"
+
+#ifdef __clang__
+// padding size of '' with 4 bytes to alignment boundary
+#pragma GCC diagnostic ignored "-Wpadded"
+
+#if defined(Z7_LLVM_CLANG_VERSION) && (__clang_major__ == 13) \
+ && defined(__FreeBSD__)
+// freebsd:
+#pragma GCC diagnostic ignored "-Wexcess-padding"
+#endif
+
+#if __clang_major__ >= 16
+#pragma GCC diagnostic ignored "-Wunsafe-buffer-usage"
+#endif
+
+#if __clang_major__ == 13
+#if defined(__SIZEOF_POINTER__) && (__SIZEOF_POINTER__ == 16)
+// cheri
+#pragma GCC diagnostic ignored "-Wcapability-to-integer-cast"
+#endif
+#endif
+
+#if __clang_major__ == 13
+ // for <arm_neon.h>
+ #pragma GCC diagnostic ignored "-Wreserved-identifier"
+#endif
+
+#endif // __clang__
+
+#if defined(_WIN32) && defined(__clang__) && __clang_major__ >= 16
+// #pragma GCC diagnostic ignored "-Wcast-function-type-strict"
+#define Z7_DIAGNOSTIC_IGNORE_CAST_FUNCTION \
+ _Pragma("GCC diagnostic ignored \"-Wcast-function-type-strict\"")
+#else
+#define Z7_DIAGNOSTIC_IGNORE_CAST_FUNCTION
+#endif
+
+typedef void (*Z7_void_Function)(void);
+#if defined(__clang__) || defined(__GNUC__)
+#define Z7_CAST_FUNC_C (Z7_void_Function)
+#elif defined(_MSC_VER) && _MSC_VER > 1920
+#define Z7_CAST_FUNC_C (void *)
+// #pragma warning(disable : 4191) // 'type cast': unsafe conversion from 'FARPROC' to 'void (__cdecl *)()'
+#else
+#define Z7_CAST_FUNC_C
+#endif
+/*
+#if (defined(__GNUC__) && (__GNUC__ >= 8)) || defined(__clang__)
+ // #pragma GCC diagnostic ignored "-Wcast-function-type"
+#endif
+*/
+#ifdef __GNUC__
+#if defined(Z7_GCC_VERSION) && (Z7_GCC_VERSION >= 40000) && (Z7_GCC_VERSION < 70000)
+#pragma GCC diagnostic ignored "-Wstrict-aliasing"
+#endif
+#endif
+
+
+#ifdef _MSC_VER
+
+ #ifdef UNDER_CE
+ #define RPC_NO_WINDOWS_H
+ /* #pragma warning(disable : 4115) // '_RPC_ASYNC_STATE' : named type definition in parentheses */
+ #pragma warning(disable : 4201) // nonstandard extension used : nameless struct/union
+ #pragma warning(disable : 4214) // nonstandard extension used : bit field types other than int
+ #endif
+
+#if defined(_MSC_VER) && _MSC_VER >= 1800
+#pragma warning(disable : 4464) // relative include path contains '..'
+#endif
+
+// == 1200 : -O1 : for __forceinline
+// >= 1900 : -O1 : for printf
+#pragma warning(disable : 4710) // function not inlined
+
+#if _MSC_VER < 1900
+// winnt.h: 'Int64ShllMod32'
+#pragma warning(disable : 4514) // unreferenced inline function has been removed
+#endif
+
+#if _MSC_VER < 1300
+// #pragma warning(disable : 4702) // unreachable code
+// Bra.c : -O1:
+#pragma warning(disable : 4714) // function marked as __forceinline not inlined
+#endif
+
+/*
+#if _MSC_VER > 1400 && _MSC_VER <= 1900
+// strcat: This function or variable may be unsafe
+// sysinfoapi.h: kit10: GetVersion was declared deprecated
+#pragma warning(disable : 4996)
+#endif
+*/
+
+#if _MSC_VER > 1200
+// -Wall warnings
+
+#pragma warning(disable : 4711) // function selected for automatic inline expansion
+#pragma warning(disable : 4820) // '2' bytes padding added after data member
+
+#if _MSC_VER >= 1400 && _MSC_VER < 1920
+// 1400: string.h: _DBG_MEMCPY_INLINE_
+// 1600 - 191x : smmintrin.h __cplusplus'
+// is not defined as a preprocessor macro, replacing with '0' for '#if/#elif'
+#pragma warning(disable : 4668)
+
+// 1400 - 1600 : WinDef.h : 'FARPROC' :
+// 1900 - 191x : immintrin.h: _readfsbase_u32
+// no function prototype given : converting '()' to '(void)'
+#pragma warning(disable : 4255)
+#endif
+
+#if _MSC_VER >= 1914
+// Compiler will insert Spectre mitigation for memory load if /Qspectre switch specified
+#pragma warning(disable : 5045)
+#endif
+
+#endif // _MSC_VER > 1200
+#endif // _MSC_VER
+
+
+#if defined(__clang__) && (__clang_major__ >= 4)
+ #define Z7_PRAGMA_OPT_DISABLE_LOOP_UNROLL_VECTORIZE \
+ _Pragma("clang loop unroll(disable)") \
+ _Pragma("clang loop vectorize(disable)")
+ #define Z7_ATTRIB_NO_VECTORIZE
+#elif defined(__GNUC__) && (__GNUC__ >= 5) \
+ && (!defined(Z7_MCST_LCC_VERSION) || (Z7_MCST_LCC_VERSION >= 12610))
+ #define Z7_ATTRIB_NO_VECTORIZE __attribute__((optimize("no-tree-vectorize")))
+ // __attribute__((optimize("no-unroll-loops")));
+ #define Z7_PRAGMA_OPT_DISABLE_LOOP_UNROLL_VECTORIZE
+#elif defined(_MSC_VER) && (_MSC_VER >= 1920)
+ #define Z7_PRAGMA_OPT_DISABLE_LOOP_UNROLL_VECTORIZE \
+ _Pragma("loop( no_vector )")
+ #define Z7_ATTRIB_NO_VECTORIZE
+#else
+ #define Z7_PRAGMA_OPT_DISABLE_LOOP_UNROLL_VECTORIZE
+ #define Z7_ATTRIB_NO_VECTORIZE
+#endif
+
+#if defined(MY_CPU_X86_OR_AMD64) && ( \
+ defined(__clang__) && (__clang_major__ >= 4) \
+ || defined(__GNUC__) && (__GNUC__ >= 5))
+ #define Z7_ATTRIB_NO_SSE __attribute__((__target__("no-sse")))
+#else
+ #define Z7_ATTRIB_NO_SSE
+#endif
+
+#define Z7_ATTRIB_NO_VECTOR \
+ Z7_ATTRIB_NO_VECTORIZE \
+ Z7_ATTRIB_NO_SSE
+
+
+#if defined(__clang__) && (__clang_major__ >= 8) \
+ || defined(__GNUC__) && (__GNUC__ >= 1000) \
+ /* || defined(_MSC_VER) && (_MSC_VER >= 1920) */
+ // GCC is not good for __builtin_expect()
+ #define Z7_LIKELY(x) (__builtin_expect((x), 1))
+ #define Z7_UNLIKELY(x) (__builtin_expect((x), 0))
+ // #define Z7_unlikely [[unlikely]]
+ // #define Z7_likely [[likely]]
+#else
+ #define Z7_LIKELY(x) (x)
+ #define Z7_UNLIKELY(x) (x)
+ // #define Z7_likely
+#endif
+
+
+#if (defined(Z7_CLANG_VERSION) && (Z7_CLANG_VERSION >= 30600))
+
+#if (Z7_CLANG_VERSION < 130000)
+#define Z7_DIAGNOSTIC_IGNORE_BEGIN_RESERVED_MACRO_IDENTIFIER \
+ _Pragma("GCC diagnostic push") \
+ _Pragma("GCC diagnostic ignored \"-Wreserved-id-macro\"")
+#else
+#define Z7_DIAGNOSTIC_IGNORE_BEGIN_RESERVED_MACRO_IDENTIFIER \
+ _Pragma("GCC diagnostic push") \
+ _Pragma("GCC diagnostic ignored \"-Wreserved-macro-identifier\"")
+#endif
+
+#define Z7_DIAGNOSTIC_IGNORE_END_RESERVED_MACRO_IDENTIFIER \
+ _Pragma("GCC diagnostic pop")
+#else
+#define Z7_DIAGNOSTIC_IGNORE_BEGIN_RESERVED_MACRO_IDENTIFIER
+#define Z7_DIAGNOSTIC_IGNORE_END_RESERVED_MACRO_IDENTIFIER
+#endif
+
+#define UNUSED_VAR(x) (void)x;
+/* #define UNUSED_VAR(x) x=x; */
+
+#endif
--- /dev/null
+/* CpuArch.h -- CPU specific code
+2024-05-13 : Igor Pavlov : Public domain */
+
+#ifndef ZIP7_INC_CPU_ARCH_H
+#define ZIP7_INC_CPU_ARCH_H
+
+#include "7zTypes.h"
+
+EXTERN_C_BEGIN
+
+/*
+MY_CPU_LE means that CPU is LITTLE ENDIAN.
+MY_CPU_BE means that CPU is BIG ENDIAN.
+If MY_CPU_LE and MY_CPU_BE are not defined, we don't know about ENDIANNESS of platform.
+
+MY_CPU_LE_UNALIGN means that CPU is LITTLE ENDIAN and CPU supports unaligned memory accesses.
+
+MY_CPU_64BIT means that processor can work with 64-bit registers.
+ MY_CPU_64BIT can be used to select fast code branch
+ MY_CPU_64BIT doesn't mean that (sizeof(void *) == 8)
+*/
+
+#if !defined(_M_ARM64EC)
+#if defined(_M_X64) \
+ || defined(_M_AMD64) \
+ || defined(__x86_64__) \
+ || defined(__AMD64__) \
+ || defined(__amd64__)
+ #define MY_CPU_AMD64
+ #ifdef __ILP32__
+ #define MY_CPU_NAME "x32"
+ #define MY_CPU_SIZEOF_POINTER 4
+ #else
+ #define MY_CPU_NAME "x64"
+ #define MY_CPU_SIZEOF_POINTER 8
+ #endif
+ #define MY_CPU_64BIT
+#endif
+#endif
+
+
+#if defined(_M_IX86) \
+ || defined(__i386__)
+ #define MY_CPU_X86
+ #define MY_CPU_NAME "x86"
+ /* #define MY_CPU_32BIT */
+ #define MY_CPU_SIZEOF_POINTER 4
+#endif
+
+
+#if defined(_M_ARM64) \
+ || defined(_M_ARM64EC) \
+ || defined(__AARCH64EL__) \
+ || defined(__AARCH64EB__) \
+ || defined(__aarch64__)
+ #define MY_CPU_ARM64
+#if defined(__ILP32__) \
+ || defined(__SIZEOF_POINTER__) && (__SIZEOF_POINTER__ == 4)
+ #define MY_CPU_NAME "arm64-32"
+ #define MY_CPU_SIZEOF_POINTER 4
+#elif defined(__SIZEOF_POINTER__) && (__SIZEOF_POINTER__ == 16)
+ #define MY_CPU_NAME "arm64-128"
+ #define MY_CPU_SIZEOF_POINTER 16
+#else
+#if defined(_M_ARM64EC)
+ #define MY_CPU_NAME "arm64ec"
+#else
+ #define MY_CPU_NAME "arm64"
+#endif
+ #define MY_CPU_SIZEOF_POINTER 8
+#endif
+ #define MY_CPU_64BIT
+#endif
+
+
+#if defined(_M_ARM) \
+ || defined(_M_ARM_NT) \
+ || defined(_M_ARMT) \
+ || defined(__arm__) \
+ || defined(__thumb__) \
+ || defined(__ARMEL__) \
+ || defined(__ARMEB__) \
+ || defined(__THUMBEL__) \
+ || defined(__THUMBEB__)
+ #define MY_CPU_ARM
+
+ #if defined(__thumb__) || defined(__THUMBEL__) || defined(_M_ARMT)
+ #define MY_CPU_ARMT
+ #define MY_CPU_NAME "armt"
+ #else
+ #define MY_CPU_ARM32
+ #define MY_CPU_NAME "arm"
+ #endif
+ /* #define MY_CPU_32BIT */
+ #define MY_CPU_SIZEOF_POINTER 4
+#endif
+
+
+#if defined(_M_IA64) \
+ || defined(__ia64__)
+ #define MY_CPU_IA64
+ #define MY_CPU_NAME "ia64"
+ #define MY_CPU_64BIT
+#endif
+
+
+#if defined(__mips64) \
+ || defined(__mips64__) \
+ || (defined(__mips) && (__mips == 64 || __mips == 4 || __mips == 3))
+ #define MY_CPU_NAME "mips64"
+ #define MY_CPU_64BIT
+#elif defined(__mips__)
+ #define MY_CPU_NAME "mips"
+ /* #define MY_CPU_32BIT */
+#endif
+
+
+#if defined(__ppc64__) \
+ || defined(__powerpc64__) \
+ || defined(__ppc__) \
+ || defined(__powerpc__) \
+ || defined(__PPC__) \
+ || defined(_POWER)
+
+#define MY_CPU_PPC_OR_PPC64
+
+#if defined(__ppc64__) \
+ || defined(__powerpc64__) \
+ || defined(_LP64) \
+ || defined(__64BIT__)
+ #ifdef __ILP32__
+ #define MY_CPU_NAME "ppc64-32"
+ #define MY_CPU_SIZEOF_POINTER 4
+ #else
+ #define MY_CPU_NAME "ppc64"
+ #define MY_CPU_SIZEOF_POINTER 8
+ #endif
+ #define MY_CPU_64BIT
+#else
+ #define MY_CPU_NAME "ppc"
+ #define MY_CPU_SIZEOF_POINTER 4
+ /* #define MY_CPU_32BIT */
+#endif
+#endif
+
+
+#if defined(__sparc__) \
+ || defined(__sparc)
+ #define MY_CPU_SPARC
+ #if defined(__LP64__) \
+ || defined(_LP64) \
+ || defined(__SIZEOF_POINTER__) && (__SIZEOF_POINTER__ == 8)
+ #define MY_CPU_NAME "sparcv9"
+ #define MY_CPU_SIZEOF_POINTER 8
+ #define MY_CPU_64BIT
+ #elif defined(__sparc_v9__) \
+ || defined(__sparcv9)
+ #define MY_CPU_64BIT
+ #if defined(__SIZEOF_POINTER__) && (__SIZEOF_POINTER__ == 4)
+ #define MY_CPU_NAME "sparcv9-32"
+ #else
+ #define MY_CPU_NAME "sparcv9m"
+ #endif
+ #elif defined(__sparc_v8__) \
+ || defined(__sparcv8)
+ #define MY_CPU_NAME "sparcv8"
+ #define MY_CPU_SIZEOF_POINTER 4
+ #else
+ #define MY_CPU_NAME "sparc"
+ #endif
+#endif
+
+
+#if defined(__riscv) \
+ || defined(__riscv__)
+ #define MY_CPU_RISCV
+ #if __riscv_xlen == 32
+ #define MY_CPU_NAME "riscv32"
+ #elif __riscv_xlen == 64
+ #define MY_CPU_NAME "riscv64"
+ #else
+ #define MY_CPU_NAME "riscv"
+ #endif
+#endif
+
+
+#if defined(__loongarch__)
+ #define MY_CPU_LOONGARCH
+ #if defined(__loongarch64) || defined(__loongarch_grlen) && (__loongarch_grlen == 64)
+ #define MY_CPU_64BIT
+ #endif
+ #if defined(__loongarch64)
+ #define MY_CPU_NAME "loongarch64"
+ #define MY_CPU_LOONGARCH64
+ #else
+ #define MY_CPU_NAME "loongarch"
+ #endif
+#endif
+
+
+// #undef MY_CPU_NAME
+// #undef MY_CPU_SIZEOF_POINTER
+// #define __e2k__
+// #define __SIZEOF_POINTER__ 4
+#if defined(__e2k__)
+ #define MY_CPU_E2K
+ #if defined(__ILP32__) || defined(__SIZEOF_POINTER__) && (__SIZEOF_POINTER__ == 4)
+ #define MY_CPU_NAME "e2k-32"
+ #define MY_CPU_SIZEOF_POINTER 4
+ #else
+ #define MY_CPU_NAME "e2k"
+ #if defined(__LP64__) || defined(__SIZEOF_POINTER__) && (__SIZEOF_POINTER__ == 8)
+ #define MY_CPU_SIZEOF_POINTER 8
+ #endif
+ #endif
+ #define MY_CPU_64BIT
+#endif
+
+
+#if defined(MY_CPU_X86) || defined(MY_CPU_AMD64)
+#define MY_CPU_X86_OR_AMD64
+#endif
+
+#if defined(MY_CPU_ARM) || defined(MY_CPU_ARM64)
+#define MY_CPU_ARM_OR_ARM64
+#endif
+
+
+#ifdef _WIN32
+
+ #ifdef MY_CPU_ARM
+ #define MY_CPU_ARM_LE
+ #endif
+
+ #ifdef MY_CPU_ARM64
+ #define MY_CPU_ARM64_LE
+ #endif
+
+ #ifdef _M_IA64
+ #define MY_CPU_IA64_LE
+ #endif
+
+#endif
+
+
+#if defined(MY_CPU_X86_OR_AMD64) \
+ || defined(MY_CPU_ARM_LE) \
+ || defined(MY_CPU_ARM64_LE) \
+ || defined(MY_CPU_IA64_LE) \
+ || defined(_LITTLE_ENDIAN) \
+ || defined(__LITTLE_ENDIAN__) \
+ || defined(__ARMEL__) \
+ || defined(__THUMBEL__) \
+ || defined(__AARCH64EL__) \
+ || defined(__MIPSEL__) \
+ || defined(__MIPSEL) \
+ || defined(_MIPSEL) \
+ || defined(__BFIN__) \
+ || (defined(__BYTE_ORDER__) && (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__))
+ #define MY_CPU_LE
+#endif
+
+#if defined(__BIG_ENDIAN__) \
+ || defined(__ARMEB__) \
+ || defined(__THUMBEB__) \
+ || defined(__AARCH64EB__) \
+ || defined(__MIPSEB__) \
+ || defined(__MIPSEB) \
+ || defined(_MIPSEB) \
+ || defined(__m68k__) \
+ || defined(__s390__) \
+ || defined(__s390x__) \
+ || defined(__zarch__) \
+ || (defined(__BYTE_ORDER__) && (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__))
+ #define MY_CPU_BE
+#endif
+
+
+#if defined(MY_CPU_LE) && defined(MY_CPU_BE)
+ #error Stop_Compiling_Bad_Endian
+#endif
+
+#if !defined(MY_CPU_LE) && !defined(MY_CPU_BE)
+ #error Stop_Compiling_CPU_ENDIAN_must_be_detected_at_compile_time
+#endif
+
+#if defined(MY_CPU_32BIT) && defined(MY_CPU_64BIT)
+ #error Stop_Compiling_Bad_32_64_BIT
+#endif
+
+#ifdef __SIZEOF_POINTER__
+ #ifdef MY_CPU_SIZEOF_POINTER
+ #if MY_CPU_SIZEOF_POINTER != __SIZEOF_POINTER__
+ #error Stop_Compiling_Bad_MY_CPU_PTR_SIZE
+ #endif
+ #else
+ #define MY_CPU_SIZEOF_POINTER __SIZEOF_POINTER__
+ #endif
+#endif
+
+#if defined(MY_CPU_SIZEOF_POINTER) && (MY_CPU_SIZEOF_POINTER == 4)
+#if defined (_LP64)
+ #error Stop_Compiling_Bad_MY_CPU_PTR_SIZE
+#endif
+#endif
+
+#ifdef _MSC_VER
+ #if _MSC_VER >= 1300
+ #define MY_CPU_pragma_pack_push_1 __pragma(pack(push, 1))
+ #define MY_CPU_pragma_pop __pragma(pack(pop))
+ #else
+ #define MY_CPU_pragma_pack_push_1
+ #define MY_CPU_pragma_pop
+ #endif
+#else
+ #ifdef __xlC__
+ #define MY_CPU_pragma_pack_push_1 _Pragma("pack(1)")
+ #define MY_CPU_pragma_pop _Pragma("pack()")
+ #else
+ #define MY_CPU_pragma_pack_push_1 _Pragma("pack(push, 1)")
+ #define MY_CPU_pragma_pop _Pragma("pack(pop)")
+ #endif
+#endif
+
+
+#ifndef MY_CPU_NAME
+ // #define MY_CPU_IS_UNKNOWN
+ #ifdef MY_CPU_LE
+ #define MY_CPU_NAME "LE"
+ #elif defined(MY_CPU_BE)
+ #define MY_CPU_NAME "BE"
+ #else
+ /*
+ #define MY_CPU_NAME ""
+ */
+ #endif
+#endif
+
+
+
+
+
+#ifdef __has_builtin
+ #define Z7_has_builtin(x) __has_builtin(x)
+#else
+ #define Z7_has_builtin(x) 0
+#endif
+
+
+#define Z7_BSWAP32_CONST(v) \
+ ( (((UInt32)(v) << 24) ) \
+ | (((UInt32)(v) << 8) & (UInt32)0xff0000) \
+ | (((UInt32)(v) >> 8) & (UInt32)0xff00 ) \
+ | (((UInt32)(v) >> 24) ))
+
+
+#if defined(_MSC_VER) && (_MSC_VER >= 1300)
+
+#include <stdlib.h>
+
+/* Note: these macros will use bswap instruction (486), that is unsupported in 386 cpu */
+
+#pragma intrinsic(_byteswap_ushort)
+#pragma intrinsic(_byteswap_ulong)
+#pragma intrinsic(_byteswap_uint64)
+
+#define Z7_BSWAP16(v) _byteswap_ushort(v)
+#define Z7_BSWAP32(v) _byteswap_ulong (v)
+#define Z7_BSWAP64(v) _byteswap_uint64(v)
+#define Z7_CPU_FAST_BSWAP_SUPPORTED
+
+/* GCC can generate slow code that calls function for __builtin_bswap32() for:
+ - GCC for RISCV, if Zbb extension is not used.
+ - GCC for SPARC.
+ The code from CLANG for SPARC also is not fastest.
+ So we don't define Z7_CPU_FAST_BSWAP_SUPPORTED in some cases.
+*/
+#elif (!defined(MY_CPU_RISCV) || defined (__riscv_zbb)) \
+ && !defined(MY_CPU_SPARC) \
+ && ( \
+ (defined(__GNUC__) && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 3))) \
+ || (defined(__clang__) && Z7_has_builtin(__builtin_bswap16)) \
+ )
+
+#define Z7_BSWAP16(v) __builtin_bswap16(v)
+#define Z7_BSWAP32(v) __builtin_bswap32(v)
+#define Z7_BSWAP64(v) __builtin_bswap64(v)
+#define Z7_CPU_FAST_BSWAP_SUPPORTED
+
+#else
+
+#define Z7_BSWAP16(v) ((UInt16) \
+ ( ((UInt32)(v) << 8) \
+ | ((UInt32)(v) >> 8) \
+ ))
+
+#define Z7_BSWAP32(v) Z7_BSWAP32_CONST(v)
+
+#define Z7_BSWAP64(v) \
+ ( ( ( (UInt64)(v) ) << 8 * 7 ) \
+ | ( ( (UInt64)(v) & ((UInt32)0xff << 8 * 1) ) << 8 * 5 ) \
+ | ( ( (UInt64)(v) & ((UInt32)0xff << 8 * 2) ) << 8 * 3 ) \
+ | ( ( (UInt64)(v) & ((UInt32)0xff << 8 * 3) ) << 8 * 1 ) \
+ | ( ( (UInt64)(v) >> 8 * 1 ) & ((UInt32)0xff << 8 * 3) ) \
+ | ( ( (UInt64)(v) >> 8 * 3 ) & ((UInt32)0xff << 8 * 2) ) \
+ | ( ( (UInt64)(v) >> 8 * 5 ) & ((UInt32)0xff << 8 * 1) ) \
+ | ( ( (UInt64)(v) >> 8 * 7 ) ) \
+ )
+
+#endif
+
+
+
+#ifdef MY_CPU_LE
+ #if defined(MY_CPU_X86_OR_AMD64) \
+ || defined(MY_CPU_ARM64) \
+ || defined(MY_CPU_RISCV) && defined(__riscv_misaligned_fast) \
+ || defined(MY_CPU_E2K) && defined(__iset__) && (__iset__ >= 6)
+ #define MY_CPU_LE_UNALIGN
+ #define MY_CPU_LE_UNALIGN_64
+ #elif defined(__ARM_FEATURE_UNALIGNED)
+/* === ALIGNMENT on 32-bit arm and LDRD/STRD/LDM/STM instructions.
+ Description of problems:
+problem-1 : 32-bit ARM architecture:
+ multi-access (pair of 32-bit accesses) instructions (LDRD/STRD/LDM/STM)
+ require 32-bit (WORD) alignment (by 32-bit ARM architecture).
+ So there is "Alignment fault exception", if data is not aligned for 32-bit.
+
+problem-2 : 32-bit kernels and arm64 kernels:
+ 32-bit linux kernels provide fixup for these "paired" instruction "Alignment fault exception".
+ So unaligned paired-access instructions work via exception handler in kernel in 32-bit linux.
+
+ But some arm64 kernels do not handle these faults in 32-bit programs.
+ So we have unhandled exception for such instructions.
+ Probably some new arm64 kernels have fixed it, and unaligned
+ paired-access instructions work in new kernels?
+
+problem-3 : compiler for 32-bit arm:
+ Compilers use LDRD/STRD/LDM/STM for UInt64 accesses
+ and for another cases where two 32-bit accesses are fused
+ to one multi-access instruction.
+ So UInt64 variables must be aligned for 32-bit, and each
+ 32-bit access must be aligned for 32-bit, if we want to
+ avoid "Alignment fault" exception (handled or unhandled).
+
+problem-4 : performace:
+ Even if unaligned access is handled by kernel, it will be slow.
+ So if we allow unaligned access, we can get fast unaligned
+ single-access, and slow unaligned paired-access.
+
+ We don't allow unaligned access on 32-bit arm, because compiler
+ genarates paired-access instructions that require 32-bit alignment,
+ and some arm64 kernels have no handler for these instructions.
+ Also unaligned paired-access instructions will be slow, if kernel handles them.
+*/
+ // it must be disabled:
+ // #define MY_CPU_LE_UNALIGN
+ #endif
+#endif
+
+
+#ifdef MY_CPU_LE_UNALIGN
+
+#define GetUi16(p) (*(const UInt16 *)(const void *)(p))
+#define GetUi32(p) (*(const UInt32 *)(const void *)(p))
+#ifdef MY_CPU_LE_UNALIGN_64
+#define GetUi64(p) (*(const UInt64 *)(const void *)(p))
+#define SetUi64(p, v) { *(UInt64 *)(void *)(p) = (v); }
+#endif
+
+#define SetUi16(p, v) { *(UInt16 *)(void *)(p) = (v); }
+#define SetUi32(p, v) { *(UInt32 *)(void *)(p) = (v); }
+
+#else
+
+#define GetUi16(p) ( (UInt16) ( \
+ ((const Byte *)(p))[0] | \
+ ((UInt16)((const Byte *)(p))[1] << 8) ))
+
+#define GetUi32(p) ( \
+ ((const Byte *)(p))[0] | \
+ ((UInt32)((const Byte *)(p))[1] << 8) | \
+ ((UInt32)((const Byte *)(p))[2] << 16) | \
+ ((UInt32)((const Byte *)(p))[3] << 24))
+
+#define SetUi16(p, v) { Byte *_ppp_ = (Byte *)(p); UInt32 _vvv_ = (v); \
+ _ppp_[0] = (Byte)_vvv_; \
+ _ppp_[1] = (Byte)(_vvv_ >> 8); }
+
+#define SetUi32(p, v) { Byte *_ppp_ = (Byte *)(p); UInt32 _vvv_ = (v); \
+ _ppp_[0] = (Byte)_vvv_; \
+ _ppp_[1] = (Byte)(_vvv_ >> 8); \
+ _ppp_[2] = (Byte)(_vvv_ >> 16); \
+ _ppp_[3] = (Byte)(_vvv_ >> 24); }
+
+#endif
+
+
+#ifndef GetUi64
+#define GetUi64(p) (GetUi32(p) | ((UInt64)GetUi32(((const Byte *)(p)) + 4) << 32))
+#endif
+
+#ifndef SetUi64
+#define SetUi64(p, v) { Byte *_ppp2_ = (Byte *)(p); UInt64 _vvv2_ = (v); \
+ SetUi32(_ppp2_ , (UInt32)_vvv2_) \
+ SetUi32(_ppp2_ + 4, (UInt32)(_vvv2_ >> 32)) }
+#endif
+
+
+#if defined(MY_CPU_LE_UNALIGN) && defined(Z7_CPU_FAST_BSWAP_SUPPORTED)
+
+#define GetBe32(p) Z7_BSWAP32 (*(const UInt32 *)(const void *)(p))
+#define SetBe32(p, v) { (*(UInt32 *)(void *)(p)) = Z7_BSWAP32(v); }
+
+#if defined(MY_CPU_LE_UNALIGN_64)
+#define GetBe64(p) Z7_BSWAP64 (*(const UInt64 *)(const void *)(p))
+#endif
+
+#else
+
+#define GetBe32(p) ( \
+ ((UInt32)((const Byte *)(p))[0] << 24) | \
+ ((UInt32)((const Byte *)(p))[1] << 16) | \
+ ((UInt32)((const Byte *)(p))[2] << 8) | \
+ ((const Byte *)(p))[3] )
+
+#define SetBe32(p, v) { Byte *_ppp_ = (Byte *)(p); UInt32 _vvv_ = (v); \
+ _ppp_[0] = (Byte)(_vvv_ >> 24); \
+ _ppp_[1] = (Byte)(_vvv_ >> 16); \
+ _ppp_[2] = (Byte)(_vvv_ >> 8); \
+ _ppp_[3] = (Byte)_vvv_; }
+
+#endif
+
+#ifndef GetBe64
+#define GetBe64(p) (((UInt64)GetBe32(p) << 32) | GetBe32(((const Byte *)(p)) + 4))
+#endif
+
+#ifndef GetBe16
+#define GetBe16(p) ( (UInt16) ( \
+ ((UInt16)((const Byte *)(p))[0] << 8) | \
+ ((const Byte *)(p))[1] ))
+#endif
+
+
+#if defined(MY_CPU_BE)
+#define Z7_CONV_BE_TO_NATIVE_CONST32(v) (v)
+#define Z7_CONV_LE_TO_NATIVE_CONST32(v) Z7_BSWAP32_CONST(v)
+#define Z7_CONV_NATIVE_TO_BE_32(v) (v)
+#elif defined(MY_CPU_LE)
+#define Z7_CONV_BE_TO_NATIVE_CONST32(v) Z7_BSWAP32_CONST(v)
+#define Z7_CONV_LE_TO_NATIVE_CONST32(v) (v)
+#define Z7_CONV_NATIVE_TO_BE_32(v) Z7_BSWAP32(v)
+#else
+#error Stop_Compiling_Unknown_Endian_CONV
+#endif
+
+
+#if defined(MY_CPU_BE)
+
+#define GetBe64a(p) (*(const UInt64 *)(const void *)(p))
+#define GetBe32a(p) (*(const UInt32 *)(const void *)(p))
+#define GetBe16a(p) (*(const UInt16 *)(const void *)(p))
+#define SetBe32a(p, v) { *(UInt32 *)(void *)(p) = (v); }
+#define SetBe16a(p, v) { *(UInt16 *)(void *)(p) = (v); }
+
+#define GetUi32a(p) GetUi32(p)
+#define GetUi16a(p) GetUi16(p)
+#define SetUi32a(p, v) SetUi32(p, v)
+#define SetUi16a(p, v) SetUi16(p, v)
+
+#elif defined(MY_CPU_LE)
+
+#define GetUi32a(p) (*(const UInt32 *)(const void *)(p))
+#define GetUi16a(p) (*(const UInt16 *)(const void *)(p))
+#define SetUi32a(p, v) { *(UInt32 *)(void *)(p) = (v); }
+#define SetUi16a(p, v) { *(UInt16 *)(void *)(p) = (v); }
+
+#define GetBe64a(p) GetBe64(p)
+#define GetBe32a(p) GetBe32(p)
+#define GetBe16a(p) GetBe16(p)
+#define SetBe32a(p, v) SetBe32(p, v)
+#define SetBe16a(p, v) SetBe16(p, v)
+
+#else
+#error Stop_Compiling_Unknown_Endian_CPU_a
+#endif
+
+
+#if defined(MY_CPU_X86_OR_AMD64) \
+ || defined(MY_CPU_ARM_OR_ARM64) \
+ || defined(MY_CPU_PPC_OR_PPC64)
+ #define Z7_CPU_FAST_ROTATE_SUPPORTED
+#endif
+
+
+#ifdef MY_CPU_X86_OR_AMD64
+
+void Z7_FASTCALL z7_x86_cpuid(UInt32 a[4], UInt32 function);
+UInt32 Z7_FASTCALL z7_x86_cpuid_GetMaxFunc(void);
+#if defined(MY_CPU_AMD64)
+#define Z7_IF_X86_CPUID_SUPPORTED
+#else
+#define Z7_IF_X86_CPUID_SUPPORTED if (z7_x86_cpuid_GetMaxFunc())
+#endif
+
+BoolInt CPU_IsSupported_AES(void);
+BoolInt CPU_IsSupported_AVX(void);
+BoolInt CPU_IsSupported_AVX2(void);
+// BoolInt CPU_IsSupported_AVX512F_AVX512VL(void);
+BoolInt CPU_IsSupported_VAES_AVX2(void);
+BoolInt CPU_IsSupported_CMOV(void);
+BoolInt CPU_IsSupported_SSE(void);
+BoolInt CPU_IsSupported_SSE2(void);
+BoolInt CPU_IsSupported_SSSE3(void);
+BoolInt CPU_IsSupported_SSE41(void);
+BoolInt CPU_IsSupported_SHA(void);
+BoolInt CPU_IsSupported_PageGB(void);
+
+#elif defined(MY_CPU_ARM_OR_ARM64)
+
+BoolInt CPU_IsSupported_CRC32(void);
+BoolInt CPU_IsSupported_NEON(void);
+
+#if defined(_WIN32)
+BoolInt CPU_IsSupported_CRYPTO(void);
+#define CPU_IsSupported_SHA1 CPU_IsSupported_CRYPTO
+#define CPU_IsSupported_SHA2 CPU_IsSupported_CRYPTO
+#define CPU_IsSupported_AES CPU_IsSupported_CRYPTO
+#else
+BoolInt CPU_IsSupported_SHA1(void);
+BoolInt CPU_IsSupported_SHA2(void);
+BoolInt CPU_IsSupported_AES(void);
+#endif
+
+#endif
+
+#if defined(__APPLE__)
+int z7_sysctlbyname_Get(const char *name, void *buf, size_t *bufSize);
+int z7_sysctlbyname_Get_UInt32(const char *name, UInt32 *val);
+#endif
+
+EXTERN_C_END
+
+#endif
/* Delta.h -- Delta converter
-2013-01-18 : Igor Pavlov : Public domain */
+2023-03-03 : Igor Pavlov : Public domain */
-#ifndef __DELTA_H
-#define __DELTA_H
+#ifndef ZIP7_INC_DELTA_H
+#define ZIP7_INC_DELTA_H
#include "7zTypes.h"
-/* LzFind.h -- Match finder for LZ algorithms\r
-2021-07-13 : Igor Pavlov : Public domain */\r
-\r
-#ifndef __LZ_FIND_H\r
-#define __LZ_FIND_H\r
-\r
-#include "7zTypes.h"\r
-\r
-EXTERN_C_BEGIN\r
-\r
-typedef UInt32 CLzRef;\r
-\r
-typedef struct _CMatchFinder\r
-{\r
- Byte *buffer;\r
- UInt32 pos;\r
- UInt32 posLimit;\r
- UInt32 streamPos; /* wrap over Zero is allowed (streamPos < pos). Use (UInt32)(streamPos - pos) */\r
- UInt32 lenLimit;\r
-\r
- UInt32 cyclicBufferPos;\r
- UInt32 cyclicBufferSize; /* it must be = (historySize + 1) */\r
-\r
- Byte streamEndWasReached;\r
- Byte btMode;\r
- Byte bigHash;\r
- Byte directInput;\r
-\r
- UInt32 matchMaxLen;\r
- CLzRef *hash;\r
- CLzRef *son;\r
- UInt32 hashMask;\r
- UInt32 cutValue;\r
-\r
- Byte *bufferBase;\r
- ISeqInStream *stream;\r
- \r
- UInt32 blockSize;\r
- UInt32 keepSizeBefore;\r
- UInt32 keepSizeAfter;\r
-\r
- UInt32 numHashBytes;\r
- size_t directInputRem;\r
- UInt32 historySize;\r
- UInt32 fixedHashSize;\r
- UInt32 hashSizeSum;\r
- SRes result;\r
- UInt32 crc[256];\r
- size_t numRefs;\r
-\r
- UInt64 expectedDataSize;\r
-} CMatchFinder;\r
-\r
-#define Inline_MatchFinder_GetPointerToCurrentPos(p) ((const Byte *)(p)->buffer)\r
-\r
-#define Inline_MatchFinder_GetNumAvailableBytes(p) ((UInt32)((p)->streamPos - (p)->pos))\r
-\r
-/*\r
-#define Inline_MatchFinder_IsFinishedOK(p) \\r
- ((p)->streamEndWasReached \\r
- && (p)->streamPos == (p)->pos \\r
- && (!(p)->directInput || (p)->directInputRem == 0))\r
-*/\r
- \r
-int MatchFinder_NeedMove(CMatchFinder *p);\r
-/* Byte *MatchFinder_GetPointerToCurrentPos(CMatchFinder *p); */\r
-void MatchFinder_MoveBlock(CMatchFinder *p);\r
-void MatchFinder_ReadIfRequired(CMatchFinder *p);\r
-\r
-void MatchFinder_Construct(CMatchFinder *p);\r
-\r
-/* Conditions:\r
- historySize <= 3 GB\r
- keepAddBufferBefore + matchMaxLen + keepAddBufferAfter < 511MB\r
-*/\r
-int MatchFinder_Create(CMatchFinder *p, UInt32 historySize,\r
- UInt32 keepAddBufferBefore, UInt32 matchMaxLen, UInt32 keepAddBufferAfter,\r
- ISzAllocPtr alloc);\r
-void MatchFinder_Free(CMatchFinder *p, ISzAllocPtr alloc);\r
-void MatchFinder_Normalize3(UInt32 subValue, CLzRef *items, size_t numItems);\r
-// void MatchFinder_ReduceOffsets(CMatchFinder *p, UInt32 subValue);\r
-\r
-/*\r
-#define Inline_MatchFinder_InitPos(p, val) \\r
- (p)->pos = (val); \\r
- (p)->streamPos = (val);\r
-*/\r
-\r
-#define Inline_MatchFinder_ReduceOffsets(p, subValue) \\r
- (p)->pos -= (subValue); \\r
- (p)->streamPos -= (subValue);\r
-\r
-\r
-UInt32 * GetMatchesSpec1(UInt32 lenLimit, UInt32 curMatch, UInt32 pos, const Byte *buffer, CLzRef *son,\r
- size_t _cyclicBufferPos, UInt32 _cyclicBufferSize, UInt32 _cutValue,\r
- UInt32 *distances, UInt32 maxLen);\r
-\r
-/*\r
-Conditions:\r
- Mf_GetNumAvailableBytes_Func must be called before each Mf_GetMatchLen_Func.\r
- Mf_GetPointerToCurrentPos_Func's result must be used only before any other function\r
-*/\r
-\r
-typedef void (*Mf_Init_Func)(void *object);\r
-typedef UInt32 (*Mf_GetNumAvailableBytes_Func)(void *object);\r
-typedef const Byte * (*Mf_GetPointerToCurrentPos_Func)(void *object);\r
-typedef UInt32 * (*Mf_GetMatches_Func)(void *object, UInt32 *distances);\r
-typedef void (*Mf_Skip_Func)(void *object, UInt32);\r
-\r
-typedef struct _IMatchFinder\r
-{\r
- Mf_Init_Func Init;\r
- Mf_GetNumAvailableBytes_Func GetNumAvailableBytes;\r
- Mf_GetPointerToCurrentPos_Func GetPointerToCurrentPos;\r
- Mf_GetMatches_Func GetMatches;\r
- Mf_Skip_Func Skip;\r
-} IMatchFinder2;\r
-\r
-void MatchFinder_CreateVTable(CMatchFinder *p, IMatchFinder2 *vTable);\r
-\r
-void MatchFinder_Init_LowHash(CMatchFinder *p);\r
-void MatchFinder_Init_HighHash(CMatchFinder *p);\r
-void MatchFinder_Init_4(CMatchFinder *p);\r
-void MatchFinder_Init(CMatchFinder *p);\r
-\r
-UInt32* Bt3Zip_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances);\r
-UInt32* Hc3Zip_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances);\r
-\r
-void Bt3Zip_MatchFinder_Skip(CMatchFinder *p, UInt32 num);\r
-void Hc3Zip_MatchFinder_Skip(CMatchFinder *p, UInt32 num);\r
-\r
-void LzFindPrepare(void);\r
-\r
-EXTERN_C_END\r
-\r
-#endif\r
+/* LzFind.h -- Match finder for LZ algorithms
+2024-01-22 : Igor Pavlov : Public domain */
+
+#ifndef ZIP7_INC_LZ_FIND_H
+#define ZIP7_INC_LZ_FIND_H
+
+#include "7zTypes.h"
+
+EXTERN_C_BEGIN
+
+typedef UInt32 CLzRef;
+
+typedef struct
+{
+ const Byte *buffer;
+ UInt32 pos;
+ UInt32 posLimit;
+ UInt32 streamPos; /* wrap over Zero is allowed (streamPos < pos). Use (UInt32)(streamPos - pos) */
+ UInt32 lenLimit;
+
+ UInt32 cyclicBufferPos;
+ UInt32 cyclicBufferSize; /* it must be = (historySize + 1) */
+
+ Byte streamEndWasReached;
+ Byte btMode;
+ Byte bigHash;
+ Byte directInput;
+
+ UInt32 matchMaxLen;
+ CLzRef *hash;
+ CLzRef *son;
+ UInt32 hashMask;
+ UInt32 cutValue;
+
+ Byte *bufBase;
+ ISeqInStreamPtr stream;
+
+ UInt32 blockSize;
+ UInt32 keepSizeBefore;
+ UInt32 keepSizeAfter;
+
+ UInt32 numHashBytes;
+ size_t directInputRem;
+ UInt32 historySize;
+ UInt32 fixedHashSize;
+ Byte numHashBytes_Min;
+ Byte numHashOutBits;
+ Byte _pad2_[2];
+ SRes result;
+ UInt32 crc[256];
+ size_t numRefs;
+
+ UInt64 expectedDataSize;
+} CMatchFinder;
+
+#define Inline_MatchFinder_GetPointerToCurrentPos(p) ((const Byte *)(p)->buffer)
+
+#define Inline_MatchFinder_GetNumAvailableBytes(p) ((UInt32)((p)->streamPos - (p)->pos))
+
+/*
+#define Inline_MatchFinder_IsFinishedOK(p) \
+ ((p)->streamEndWasReached \
+ && (p)->streamPos == (p)->pos \
+ && (!(p)->directInput || (p)->directInputRem == 0))
+*/
+
+int MatchFinder_NeedMove(CMatchFinder *p);
+/* Byte *MatchFinder_GetPointerToCurrentPos(CMatchFinder *p); */
+void MatchFinder_MoveBlock(CMatchFinder *p);
+void MatchFinder_ReadIfRequired(CMatchFinder *p);
+
+void MatchFinder_Construct(CMatchFinder *p);
+
+/* (directInput = 0) is default value.
+ It's required to provide correct (directInput) value
+ before calling MatchFinder_Create().
+ You can set (directInput) by any of the following calls:
+ - MatchFinder_SET_DIRECT_INPUT_BUF()
+ - MatchFinder_SET_STREAM()
+ - MatchFinder_SET_STREAM_MODE()
+*/
+
+#define MatchFinder_SET_DIRECT_INPUT_BUF(p, _src_, _srcLen_) { \
+ (p)->stream = NULL; \
+ (p)->directInput = 1; \
+ (p)->buffer = (_src_); \
+ (p)->directInputRem = (_srcLen_); }
+
+/*
+#define MatchFinder_SET_STREAM_MODE(p) { \
+ (p)->directInput = 0; }
+*/
+
+#define MatchFinder_SET_STREAM(p, _stream_) { \
+ (p)->stream = _stream_; \
+ (p)->directInput = 0; }
+
+
+int MatchFinder_Create(CMatchFinder *p, UInt32 historySize,
+ UInt32 keepAddBufferBefore, UInt32 matchMaxLen, UInt32 keepAddBufferAfter,
+ ISzAllocPtr alloc);
+void MatchFinder_Free(CMatchFinder *p, ISzAllocPtr alloc);
+void MatchFinder_Normalize3(UInt32 subValue, CLzRef *items, size_t numItems);
+
+/*
+#define MatchFinder_INIT_POS(p, val) \
+ (p)->pos = (val); \
+ (p)->streamPos = (val);
+*/
+
+// void MatchFinder_ReduceOffsets(CMatchFinder *p, UInt32 subValue);
+#define MatchFinder_REDUCE_OFFSETS(p, subValue) \
+ (p)->pos -= (subValue); \
+ (p)->streamPos -= (subValue);
+
+
+UInt32 * GetMatchesSpec1(UInt32 lenLimit, UInt32 curMatch, UInt32 pos, const Byte *buffer, CLzRef *son,
+ size_t _cyclicBufferPos, UInt32 _cyclicBufferSize, UInt32 _cutValue,
+ UInt32 *distances, UInt32 maxLen);
+
+/*
+Conditions:
+ Mf_GetNumAvailableBytes_Func must be called before each Mf_GetMatchLen_Func.
+ Mf_GetPointerToCurrentPos_Func's result must be used only before any other function
+*/
+
+typedef void (*Mf_Init_Func)(void *object);
+typedef UInt32 (*Mf_GetNumAvailableBytes_Func)(void *object);
+typedef const Byte * (*Mf_GetPointerToCurrentPos_Func)(void *object);
+typedef UInt32 * (*Mf_GetMatches_Func)(void *object, UInt32 *distances);
+typedef void (*Mf_Skip_Func)(void *object, UInt32);
+
+typedef struct
+{
+ Mf_Init_Func Init;
+ Mf_GetNumAvailableBytes_Func GetNumAvailableBytes;
+ Mf_GetPointerToCurrentPos_Func GetPointerToCurrentPos;
+ Mf_GetMatches_Func GetMatches;
+ Mf_Skip_Func Skip;
+} IMatchFinder2;
+
+void MatchFinder_CreateVTable(CMatchFinder *p, IMatchFinder2 *vTable);
+
+void MatchFinder_Init_LowHash(CMatchFinder *p);
+void MatchFinder_Init_HighHash(CMatchFinder *p);
+void MatchFinder_Init_4(CMatchFinder *p);
+// void MatchFinder_Init(CMatchFinder *p);
+void MatchFinder_Init(void *p);
+
+UInt32* Bt3Zip_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances);
+UInt32* Hc3Zip_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances);
+
+void Bt3Zip_MatchFinder_Skip(CMatchFinder *p, UInt32 num);
+void Hc3Zip_MatchFinder_Skip(CMatchFinder *p, UInt32 num);
+
+void LzFindPrepare(void);
+
+EXTERN_C_END
+
+#endif
-/* LzHash.h -- HASH functions for LZ algorithms\r
-2019-10-30 : Igor Pavlov : Public domain */\r
-\r
-#ifndef __LZ_HASH_H\r
-#define __LZ_HASH_H\r
-\r
-/*\r
- (kHash2Size >= (1 << 8)) : Required\r
- (kHash3Size >= (1 << 16)) : Required\r
-*/\r
-\r
-#define kHash2Size (1 << 10)\r
-#define kHash3Size (1 << 16)\r
-// #define kHash4Size (1 << 20)\r
-\r
-#define kFix3HashSize (kHash2Size)\r
-#define kFix4HashSize (kHash2Size + kHash3Size)\r
-// #define kFix5HashSize (kHash2Size + kHash3Size + kHash4Size)\r
-\r
-/*\r
- We use up to 3 crc values for hash:\r
- crc0\r
- crc1 << Shift_1\r
- crc2 << Shift_2\r
- (Shift_1 = 5) and (Shift_2 = 10) is good tradeoff.\r
- Small values for Shift are not good for collision rate.\r
- Big value for Shift_2 increases the minimum size\r
- of hash table, that will be slow for small files.\r
-*/\r
-\r
-#define kLzHash_CrcShift_1 5\r
-#define kLzHash_CrcShift_2 10\r
-\r
-#endif\r
+/* LzHash.h -- HASH constants for LZ algorithms
+2023-03-05 : Igor Pavlov : Public domain */
+
+#ifndef ZIP7_INC_LZ_HASH_H
+#define ZIP7_INC_LZ_HASH_H
+
+/*
+ (kHash2Size >= (1 << 8)) : Required
+ (kHash3Size >= (1 << 16)) : Required
+*/
+
+#define kHash2Size (1 << 10)
+#define kHash3Size (1 << 16)
+// #define kHash4Size (1 << 20)
+
+#define kFix3HashSize (kHash2Size)
+#define kFix4HashSize (kHash2Size + kHash3Size)
+// #define kFix5HashSize (kHash2Size + kHash3Size + kHash4Size)
+
+/*
+ We use up to 3 crc values for hash:
+ crc0
+ crc1 << Shift_1
+ crc2 << Shift_2
+ (Shift_1 = 5) and (Shift_2 = 10) is good tradeoff.
+ Small values for Shift are not good for collision rate.
+ Big value for Shift_2 increases the minimum size
+ of hash table, that will be slow for small files.
+*/
+
+#define kLzHash_CrcShift_1 5
+#define kLzHash_CrcShift_2 10
+
+#endif
/* Lzma86.h -- LZMA + x86 (BCJ) Filter
-2013-01-18 : Igor Pavlov : Public domain */
+2023-03-03 : Igor Pavlov : Public domain */
-#ifndef __LZMA86_H
-#define __LZMA86_H
+#ifndef ZIP7_INC_LZMA86_H
+#define ZIP7_INC_LZMA86_H
#include "7zTypes.h"
-/* LzmaDec.h -- LZMA Decoder\r
-2020-03-19 : Igor Pavlov : Public domain */\r
-\r
-#ifndef __LZMA_DEC_H\r
-#define __LZMA_DEC_H\r
-\r
-#include "7zTypes.h"\r
-\r
-EXTERN_C_BEGIN\r
-\r
-/* #define _LZMA_PROB32 */\r
-/* _LZMA_PROB32 can increase the speed on some CPUs,\r
- but memory usage for CLzmaDec::probs will be doubled in that case */\r
-\r
-typedef\r
-#ifdef _LZMA_PROB32\r
- UInt32\r
-#else\r
- UInt16\r
-#endif\r
- CLzmaProb;\r
-\r
-\r
-/* ---------- LZMA Properties ---------- */\r
-\r
-#define LZMA_PROPS_SIZE 5\r
-\r
-typedef struct _CLzmaProps\r
-{\r
- Byte lc;\r
- Byte lp;\r
- Byte pb;\r
- Byte _pad_;\r
- UInt32 dicSize;\r
-} CLzmaProps;\r
-\r
-/* LzmaProps_Decode - decodes properties\r
-Returns:\r
- SZ_OK\r
- SZ_ERROR_UNSUPPORTED - Unsupported properties\r
-*/\r
-\r
-SRes LzmaProps_Decode(CLzmaProps *p, const Byte *data, unsigned size);\r
-\r
-\r
-/* ---------- LZMA Decoder state ---------- */\r
-\r
-/* LZMA_REQUIRED_INPUT_MAX = number of required input bytes for worst case.\r
- Num bits = log2((2^11 / 31) ^ 22) + 26 < 134 + 26 = 160; */\r
-\r
-#define LZMA_REQUIRED_INPUT_MAX 20\r
-\r
-typedef struct\r
-{\r
- /* Don't change this structure. ASM code can use it. */\r
- CLzmaProps prop;\r
- CLzmaProb *probs;\r
- CLzmaProb *probs_1664;\r
- Byte *dic;\r
- SizeT dicBufSize;\r
- SizeT dicPos;\r
- const Byte *buf;\r
- UInt32 range;\r
- UInt32 code;\r
- UInt32 processedPos;\r
- UInt32 checkDicSize;\r
- UInt32 reps[4];\r
- UInt32 state;\r
- UInt32 remainLen;\r
-\r
- UInt32 numProbs;\r
- unsigned tempBufSize;\r
- Byte tempBuf[LZMA_REQUIRED_INPUT_MAX];\r
-} CLzmaDec;\r
-\r
-#define LzmaDec_Construct(p) { (p)->dic = NULL; (p)->probs = NULL; }\r
-\r
-void LzmaDec_Init(CLzmaDec *p);\r
-\r
-/* There are two types of LZMA streams:\r
- - Stream with end mark. That end mark adds about 6 bytes to compressed size.\r
- - Stream without end mark. You must know exact uncompressed size to decompress such stream. */\r
-\r
-typedef enum\r
-{\r
- LZMA_FINISH_ANY, /* finish at any point */\r
- LZMA_FINISH_END /* block must be finished at the end */\r
-} ELzmaFinishMode;\r
-\r
-/* ELzmaFinishMode has meaning only if the decoding reaches output limit !!!\r
-\r
- You must use LZMA_FINISH_END, when you know that current output buffer\r
- covers last bytes of block. In other cases you must use LZMA_FINISH_ANY.\r
-\r
- If LZMA decoder sees end marker before reaching output limit, it returns SZ_OK,\r
- and output value of destLen will be less than output buffer size limit.\r
- You can check status result also.\r
-\r
- You can use multiple checks to test data integrity after full decompression:\r
- 1) Check Result and "status" variable.\r
- 2) Check that output(destLen) = uncompressedSize, if you know real uncompressedSize.\r
- 3) Check that output(srcLen) = compressedSize, if you know real compressedSize.\r
- You must use correct finish mode in that case. */\r
-\r
-typedef enum\r
-{\r
- LZMA_STATUS_NOT_SPECIFIED, /* use main error code instead */\r
- LZMA_STATUS_FINISHED_WITH_MARK, /* stream was finished with end mark. */\r
- LZMA_STATUS_NOT_FINISHED, /* stream was not finished */\r
- LZMA_STATUS_NEEDS_MORE_INPUT, /* you must provide more input bytes */\r
- LZMA_STATUS_MAYBE_FINISHED_WITHOUT_MARK /* there is probability that stream was finished without end mark */\r
-} ELzmaStatus;\r
-\r
-/* ELzmaStatus is used only as output value for function call */\r
-\r
-\r
-/* ---------- Interfaces ---------- */\r
-\r
-/* There are 3 levels of interfaces:\r
- 1) Dictionary Interface\r
- 2) Buffer Interface\r
- 3) One Call Interface\r
- You can select any of these interfaces, but don't mix functions from different\r
- groups for same object. */\r
-\r
-\r
-/* There are two variants to allocate state for Dictionary Interface:\r
- 1) LzmaDec_Allocate / LzmaDec_Free\r
- 2) LzmaDec_AllocateProbs / LzmaDec_FreeProbs\r
- You can use variant 2, if you set dictionary buffer manually.\r
- For Buffer Interface you must always use variant 1.\r
-\r
-LzmaDec_Allocate* can return:\r
- SZ_OK\r
- SZ_ERROR_MEM - Memory allocation error\r
- SZ_ERROR_UNSUPPORTED - Unsupported properties\r
-*/\r
- \r
-SRes LzmaDec_AllocateProbs(CLzmaDec *p, const Byte *props, unsigned propsSize, ISzAllocPtr alloc);\r
-void LzmaDec_FreeProbs(CLzmaDec *p, ISzAllocPtr alloc);\r
-\r
-SRes LzmaDec_Allocate(CLzmaDec *p, const Byte *props, unsigned propsSize, ISzAllocPtr alloc);\r
-void LzmaDec_Free(CLzmaDec *p, ISzAllocPtr alloc);\r
-\r
-/* ---------- Dictionary Interface ---------- */\r
-\r
-/* You can use it, if you want to eliminate the overhead for data copying from\r
- dictionary to some other external buffer.\r
- You must work with CLzmaDec variables directly in this interface.\r
-\r
- STEPS:\r
- LzmaDec_Construct()\r
- LzmaDec_Allocate()\r
- for (each new stream)\r
- {\r
- LzmaDec_Init()\r
- while (it needs more decompression)\r
- {\r
- LzmaDec_DecodeToDic()\r
- use data from CLzmaDec::dic and update CLzmaDec::dicPos\r
- }\r
- }\r
- LzmaDec_Free()\r
-*/\r
-\r
-/* LzmaDec_DecodeToDic\r
- \r
- The decoding to internal dictionary buffer (CLzmaDec::dic).\r
- You must manually update CLzmaDec::dicPos, if it reaches CLzmaDec::dicBufSize !!!\r
-\r
-finishMode:\r
- It has meaning only if the decoding reaches output limit (dicLimit).\r
- LZMA_FINISH_ANY - Decode just dicLimit bytes.\r
- LZMA_FINISH_END - Stream must be finished after dicLimit.\r
-\r
-Returns:\r
- SZ_OK\r
- status:\r
- LZMA_STATUS_FINISHED_WITH_MARK\r
- LZMA_STATUS_NOT_FINISHED\r
- LZMA_STATUS_NEEDS_MORE_INPUT\r
- LZMA_STATUS_MAYBE_FINISHED_WITHOUT_MARK\r
- SZ_ERROR_DATA - Data error\r
- SZ_ERROR_FAIL - Some unexpected error: internal error of code, memory corruption or hardware failure\r
-*/\r
-\r
-SRes LzmaDec_DecodeToDic(CLzmaDec *p, SizeT dicLimit,\r
- const Byte *src, SizeT *srcLen, ELzmaFinishMode finishMode, ELzmaStatus *status);\r
-\r
-\r
-/* ---------- Buffer Interface ---------- */\r
-\r
-/* It's zlib-like interface.\r
- See LzmaDec_DecodeToDic description for information about STEPS and return results,\r
- but you must use LzmaDec_DecodeToBuf instead of LzmaDec_DecodeToDic and you don't need\r
- to work with CLzmaDec variables manually.\r
-\r
-finishMode:\r
- It has meaning only if the decoding reaches output limit (*destLen).\r
- LZMA_FINISH_ANY - Decode just destLen bytes.\r
- LZMA_FINISH_END - Stream must be finished after (*destLen).\r
-*/\r
-\r
-SRes LzmaDec_DecodeToBuf(CLzmaDec *p, Byte *dest, SizeT *destLen,\r
- const Byte *src, SizeT *srcLen, ELzmaFinishMode finishMode, ELzmaStatus *status);\r
-\r
-\r
-/* ---------- One Call Interface ---------- */\r
-\r
-/* LzmaDecode\r
-\r
-finishMode:\r
- It has meaning only if the decoding reaches output limit (*destLen).\r
- LZMA_FINISH_ANY - Decode just destLen bytes.\r
- LZMA_FINISH_END - Stream must be finished after (*destLen).\r
-\r
-Returns:\r
- SZ_OK\r
- status:\r
- LZMA_STATUS_FINISHED_WITH_MARK\r
- LZMA_STATUS_NOT_FINISHED\r
- LZMA_STATUS_MAYBE_FINISHED_WITHOUT_MARK\r
- SZ_ERROR_DATA - Data error\r
- SZ_ERROR_MEM - Memory allocation error\r
- SZ_ERROR_UNSUPPORTED - Unsupported properties\r
- SZ_ERROR_INPUT_EOF - It needs more bytes in input buffer (src).\r
- SZ_ERROR_FAIL - Some unexpected error: internal error of code, memory corruption or hardware failure\r
-*/\r
-\r
-SRes LzmaDecode(Byte *dest, SizeT *destLen, const Byte *src, SizeT *srcLen,\r
- const Byte *propData, unsigned propSize, ELzmaFinishMode finishMode,\r
- ELzmaStatus *status, ISzAllocPtr alloc);\r
-\r
-EXTERN_C_END\r
-\r
-#endif\r
+/* LzmaDec.h -- LZMA Decoder
+2023-04-02 : Igor Pavlov : Public domain */
+
+#ifndef ZIP7_INC_LZMA_DEC_H
+#define ZIP7_INC_LZMA_DEC_H
+
+#include "7zTypes.h"
+
+EXTERN_C_BEGIN
+
+/* #define Z7_LZMA_PROB32 */
+/* Z7_LZMA_PROB32 can increase the speed on some CPUs,
+ but memory usage for CLzmaDec::probs will be doubled in that case */
+
+typedef
+#ifdef Z7_LZMA_PROB32
+ UInt32
+#else
+ UInt16
+#endif
+ CLzmaProb;
+
+
+/* ---------- LZMA Properties ---------- */
+
+#define LZMA_PROPS_SIZE 5
+
+typedef struct
+{
+ Byte lc;
+ Byte lp;
+ Byte pb;
+ Byte _pad_;
+ UInt32 dicSize;
+} CLzmaProps;
+
+/* LzmaProps_Decode - decodes properties
+Returns:
+ SZ_OK
+ SZ_ERROR_UNSUPPORTED - Unsupported properties
+*/
+
+SRes LzmaProps_Decode(CLzmaProps *p, const Byte *data, unsigned size);
+
+
+/* ---------- LZMA Decoder state ---------- */
+
+/* LZMA_REQUIRED_INPUT_MAX = number of required input bytes for worst case.
+ Num bits = log2((2^11 / 31) ^ 22) + 26 < 134 + 26 = 160; */
+
+#define LZMA_REQUIRED_INPUT_MAX 20
+
+typedef struct
+{
+ /* Don't change this structure. ASM code can use it. */
+ CLzmaProps prop;
+ CLzmaProb *probs;
+ CLzmaProb *probs_1664;
+ Byte *dic;
+ SizeT dicBufSize;
+ SizeT dicPos;
+ const Byte *buf;
+ UInt32 range;
+ UInt32 code;
+ UInt32 processedPos;
+ UInt32 checkDicSize;
+ UInt32 reps[4];
+ UInt32 state;
+ UInt32 remainLen;
+
+ UInt32 numProbs;
+ unsigned tempBufSize;
+ Byte tempBuf[LZMA_REQUIRED_INPUT_MAX];
+} CLzmaDec;
+
+#define LzmaDec_CONSTRUCT(p) { (p)->dic = NULL; (p)->probs = NULL; }
+#define LzmaDec_Construct(p) LzmaDec_CONSTRUCT(p)
+
+void LzmaDec_Init(CLzmaDec *p);
+
+/* There are two types of LZMA streams:
+ - Stream with end mark. That end mark adds about 6 bytes to compressed size.
+ - Stream without end mark. You must know exact uncompressed size to decompress such stream. */
+
+typedef enum
+{
+ LZMA_FINISH_ANY, /* finish at any point */
+ LZMA_FINISH_END /* block must be finished at the end */
+} ELzmaFinishMode;
+
+/* ELzmaFinishMode has meaning only if the decoding reaches output limit !!!
+
+ You must use LZMA_FINISH_END, when you know that current output buffer
+ covers last bytes of block. In other cases you must use LZMA_FINISH_ANY.
+
+ If LZMA decoder sees end marker before reaching output limit, it returns SZ_OK,
+ and output value of destLen will be less than output buffer size limit.
+ You can check status result also.
+
+ You can use multiple checks to test data integrity after full decompression:
+ 1) Check Result and "status" variable.
+ 2) Check that output(destLen) = uncompressedSize, if you know real uncompressedSize.
+ 3) Check that output(srcLen) = compressedSize, if you know real compressedSize.
+ You must use correct finish mode in that case. */
+
+typedef enum
+{
+ LZMA_STATUS_NOT_SPECIFIED, /* use main error code instead */
+ LZMA_STATUS_FINISHED_WITH_MARK, /* stream was finished with end mark. */
+ LZMA_STATUS_NOT_FINISHED, /* stream was not finished */
+ LZMA_STATUS_NEEDS_MORE_INPUT, /* you must provide more input bytes */
+ LZMA_STATUS_MAYBE_FINISHED_WITHOUT_MARK /* there is probability that stream was finished without end mark */
+} ELzmaStatus;
+
+/* ELzmaStatus is used only as output value for function call */
+
+
+/* ---------- Interfaces ---------- */
+
+/* There are 3 levels of interfaces:
+ 1) Dictionary Interface
+ 2) Buffer Interface
+ 3) One Call Interface
+ You can select any of these interfaces, but don't mix functions from different
+ groups for same object. */
+
+
+/* There are two variants to allocate state for Dictionary Interface:
+ 1) LzmaDec_Allocate / LzmaDec_Free
+ 2) LzmaDec_AllocateProbs / LzmaDec_FreeProbs
+ You can use variant 2, if you set dictionary buffer manually.
+ For Buffer Interface you must always use variant 1.
+
+LzmaDec_Allocate* can return:
+ SZ_OK
+ SZ_ERROR_MEM - Memory allocation error
+ SZ_ERROR_UNSUPPORTED - Unsupported properties
+*/
+
+SRes LzmaDec_AllocateProbs(CLzmaDec *p, const Byte *props, unsigned propsSize, ISzAllocPtr alloc);
+void LzmaDec_FreeProbs(CLzmaDec *p, ISzAllocPtr alloc);
+
+SRes LzmaDec_Allocate(CLzmaDec *p, const Byte *props, unsigned propsSize, ISzAllocPtr alloc);
+void LzmaDec_Free(CLzmaDec *p, ISzAllocPtr alloc);
+
+/* ---------- Dictionary Interface ---------- */
+
+/* You can use it, if you want to eliminate the overhead for data copying from
+ dictionary to some other external buffer.
+ You must work with CLzmaDec variables directly in this interface.
+
+ STEPS:
+ LzmaDec_Construct()
+ LzmaDec_Allocate()
+ for (each new stream)
+ {
+ LzmaDec_Init()
+ while (it needs more decompression)
+ {
+ LzmaDec_DecodeToDic()
+ use data from CLzmaDec::dic and update CLzmaDec::dicPos
+ }
+ }
+ LzmaDec_Free()
+*/
+
+/* LzmaDec_DecodeToDic
+
+ The decoding to internal dictionary buffer (CLzmaDec::dic).
+ You must manually update CLzmaDec::dicPos, if it reaches CLzmaDec::dicBufSize !!!
+
+finishMode:
+ It has meaning only if the decoding reaches output limit (dicLimit).
+ LZMA_FINISH_ANY - Decode just dicLimit bytes.
+ LZMA_FINISH_END - Stream must be finished after dicLimit.
+
+Returns:
+ SZ_OK
+ status:
+ LZMA_STATUS_FINISHED_WITH_MARK
+ LZMA_STATUS_NOT_FINISHED
+ LZMA_STATUS_NEEDS_MORE_INPUT
+ LZMA_STATUS_MAYBE_FINISHED_WITHOUT_MARK
+ SZ_ERROR_DATA - Data error
+ SZ_ERROR_FAIL - Some unexpected error: internal error of code, memory corruption or hardware failure
+*/
+
+SRes LzmaDec_DecodeToDic(CLzmaDec *p, SizeT dicLimit,
+ const Byte *src, SizeT *srcLen, ELzmaFinishMode finishMode, ELzmaStatus *status);
+
+
+/* ---------- Buffer Interface ---------- */
+
+/* It's zlib-like interface.
+ See LzmaDec_DecodeToDic description for information about STEPS and return results,
+ but you must use LzmaDec_DecodeToBuf instead of LzmaDec_DecodeToDic and you don't need
+ to work with CLzmaDec variables manually.
+
+finishMode:
+ It has meaning only if the decoding reaches output limit (*destLen).
+ LZMA_FINISH_ANY - Decode just destLen bytes.
+ LZMA_FINISH_END - Stream must be finished after (*destLen).
+*/
+
+SRes LzmaDec_DecodeToBuf(CLzmaDec *p, Byte *dest, SizeT *destLen,
+ const Byte *src, SizeT *srcLen, ELzmaFinishMode finishMode, ELzmaStatus *status);
+
+
+/* ---------- One Call Interface ---------- */
+
+/* LzmaDecode
+
+finishMode:
+ It has meaning only if the decoding reaches output limit (*destLen).
+ LZMA_FINISH_ANY - Decode just destLen bytes.
+ LZMA_FINISH_END - Stream must be finished after (*destLen).
+
+Returns:
+ SZ_OK
+ status:
+ LZMA_STATUS_FINISHED_WITH_MARK
+ LZMA_STATUS_NOT_FINISHED
+ LZMA_STATUS_MAYBE_FINISHED_WITHOUT_MARK
+ SZ_ERROR_DATA - Data error
+ SZ_ERROR_MEM - Memory allocation error
+ SZ_ERROR_UNSUPPORTED - Unsupported properties
+ SZ_ERROR_INPUT_EOF - It needs more bytes in input buffer (src).
+ SZ_ERROR_FAIL - Some unexpected error: internal error of code, memory corruption or hardware failure
+*/
+
+SRes LzmaDecode(Byte *dest, SizeT *destLen, const Byte *src, SizeT *srcLen,
+ const Byte *propData, unsigned propSize, ELzmaFinishMode finishMode,
+ ELzmaStatus *status, ISzAllocPtr alloc);
+
+EXTERN_C_END
+
+#endif
-/* LzmaEnc.h -- LZMA Encoder\r
-2019-10-30 : Igor Pavlov : Public domain */\r
-\r
-#ifndef __LZMA_ENC_H\r
-#define __LZMA_ENC_H\r
-\r
-#include "7zTypes.h"\r
-\r
-EXTERN_C_BEGIN\r
-\r
-#define LZMA_PROPS_SIZE 5\r
-\r
-typedef struct _CLzmaEncProps\r
-{\r
- int level; /* 0 <= level <= 9 */\r
- UInt32 dictSize; /* (1 << 12) <= dictSize <= (1 << 27) for 32-bit version\r
- (1 << 12) <= dictSize <= (3 << 29) for 64-bit version\r
- default = (1 << 24) */\r
- int lc; /* 0 <= lc <= 8, default = 3 */\r
- int lp; /* 0 <= lp <= 4, default = 0 */\r
- int pb; /* 0 <= pb <= 4, default = 2 */\r
- int algo; /* 0 - fast, 1 - normal, default = 1 */\r
- int fb; /* 5 <= fb <= 273, default = 32 */\r
- int btMode; /* 0 - hashChain Mode, 1 - binTree mode - normal, default = 1 */\r
- int numHashBytes; /* 2, 3 or 4, default = 4 */\r
- UInt32 mc; /* 1 <= mc <= (1 << 30), default = 32 */\r
- unsigned writeEndMark; /* 0 - do not write EOPM, 1 - write EOPM, default = 0 */\r
- int numThreads; /* 1 or 2, default = 2 */\r
-\r
- UInt64 reduceSize; /* estimated size of data that will be compressed. default = (UInt64)(Int64)-1.\r
- Encoder uses this value to reduce dictionary size */\r
-\r
- UInt64 affinity;\r
-} CLzmaEncProps;\r
-\r
-void LzmaEncProps_Init(CLzmaEncProps *p);\r
-void LzmaEncProps_Normalize(CLzmaEncProps *p);\r
-UInt32 LzmaEncProps_GetDictSize(const CLzmaEncProps *props2);\r
-\r
-\r
-/* ---------- CLzmaEncHandle Interface ---------- */\r
-\r
-/* LzmaEnc* functions can return the following exit codes:\r
-SRes:\r
- SZ_OK - OK\r
- SZ_ERROR_MEM - Memory allocation error\r
- SZ_ERROR_PARAM - Incorrect paramater in props\r
- SZ_ERROR_WRITE - ISeqOutStream write callback error\r
- SZ_ERROR_OUTPUT_EOF - output buffer overflow - version with (Byte *) output\r
- SZ_ERROR_PROGRESS - some break from progress callback\r
- SZ_ERROR_THREAD - error in multithreading functions (only for Mt version)\r
-*/\r
-\r
-typedef void * CLzmaEncHandle;\r
-\r
-CLzmaEncHandle LzmaEnc_Create(ISzAllocPtr alloc);\r
-void LzmaEnc_Destroy(CLzmaEncHandle p, ISzAllocPtr alloc, ISzAllocPtr allocBig);\r
-\r
-SRes LzmaEnc_SetProps(CLzmaEncHandle p, const CLzmaEncProps *props);\r
-void LzmaEnc_SetDataSize(CLzmaEncHandle p, UInt64 expectedDataSiize);\r
-SRes LzmaEnc_WriteProperties(CLzmaEncHandle p, Byte *properties, SizeT *size);\r
-unsigned LzmaEnc_IsWriteEndMark(CLzmaEncHandle p);\r
-\r
-SRes LzmaEnc_Encode(CLzmaEncHandle p, ISeqOutStream *outStream, ISeqInStream *inStream,\r
- ICompressProgress *progress, ISzAllocPtr alloc, ISzAllocPtr allocBig);\r
-SRes LzmaEnc_MemEncode(CLzmaEncHandle p, Byte *dest, SizeT *destLen, const Byte *src, SizeT srcLen,\r
- int writeEndMark, ICompressProgress *progress, ISzAllocPtr alloc, ISzAllocPtr allocBig);\r
-\r
-\r
-/* ---------- One Call Interface ---------- */\r
-\r
-SRes LzmaEncode(Byte *dest, SizeT *destLen, const Byte *src, SizeT srcLen,\r
- const CLzmaEncProps *props, Byte *propsEncoded, SizeT *propsSize, int writeEndMark,\r
- ICompressProgress *progress, ISzAllocPtr alloc, ISzAllocPtr allocBig);\r
-\r
-EXTERN_C_END\r
-\r
-#endif\r
+/* LzmaEnc.h -- LZMA Encoder
+2023-04-13 : Igor Pavlov : Public domain */
+
+#ifndef ZIP7_INC_LZMA_ENC_H
+#define ZIP7_INC_LZMA_ENC_H
+
+#include "7zTypes.h"
+
+EXTERN_C_BEGIN
+
+#define LZMA_PROPS_SIZE 5
+
+typedef struct
+{
+ int level; /* 0 <= level <= 9 */
+ UInt32 dictSize; /* (1 << 12) <= dictSize <= (1 << 27) for 32-bit version
+ (1 << 12) <= dictSize <= (3 << 29) for 64-bit version
+ default = (1 << 24) */
+ int lc; /* 0 <= lc <= 8, default = 3 */
+ int lp; /* 0 <= lp <= 4, default = 0 */
+ int pb; /* 0 <= pb <= 4, default = 2 */
+ int algo; /* 0 - fast, 1 - normal, default = 1 */
+ int fb; /* 5 <= fb <= 273, default = 32 */
+ int btMode; /* 0 - hashChain Mode, 1 - binTree mode - normal, default = 1 */
+ int numHashBytes; /* 2, 3 or 4, default = 4 */
+ unsigned numHashOutBits; /* default = ? */
+ UInt32 mc; /* 1 <= mc <= (1 << 30), default = 32 */
+ unsigned writeEndMark; /* 0 - do not write EOPM, 1 - write EOPM, default = 0 */
+ int numThreads; /* 1 or 2, default = 2 */
+
+ // int _pad;
+
+ UInt64 reduceSize; /* estimated size of data that will be compressed. default = (UInt64)(Int64)-1.
+ Encoder uses this value to reduce dictionary size */
+
+ UInt64 affinity;
+} CLzmaEncProps;
+
+void LzmaEncProps_Init(CLzmaEncProps *p);
+void LzmaEncProps_Normalize(CLzmaEncProps *p);
+UInt32 LzmaEncProps_GetDictSize(const CLzmaEncProps *props2);
+
+
+/* ---------- CLzmaEncHandle Interface ---------- */
+
+/* LzmaEnc* functions can return the following exit codes:
+SRes:
+ SZ_OK - OK
+ SZ_ERROR_MEM - Memory allocation error
+ SZ_ERROR_PARAM - Incorrect paramater in props
+ SZ_ERROR_WRITE - ISeqOutStream write callback error
+ SZ_ERROR_OUTPUT_EOF - output buffer overflow - version with (Byte *) output
+ SZ_ERROR_PROGRESS - some break from progress callback
+ SZ_ERROR_THREAD - error in multithreading functions (only for Mt version)
+*/
+
+typedef struct CLzmaEnc CLzmaEnc;
+typedef CLzmaEnc * CLzmaEncHandle;
+// Z7_DECLARE_HANDLE(CLzmaEncHandle)
+
+CLzmaEncHandle LzmaEnc_Create(ISzAllocPtr alloc);
+void LzmaEnc_Destroy(CLzmaEncHandle p, ISzAllocPtr alloc, ISzAllocPtr allocBig);
+
+SRes LzmaEnc_SetProps(CLzmaEncHandle p, const CLzmaEncProps *props);
+void LzmaEnc_SetDataSize(CLzmaEncHandle p, UInt64 expectedDataSiize);
+SRes LzmaEnc_WriteProperties(CLzmaEncHandle p, Byte *properties, SizeT *size);
+unsigned LzmaEnc_IsWriteEndMark(CLzmaEncHandle p);
+
+SRes LzmaEnc_Encode(CLzmaEncHandle p, ISeqOutStreamPtr outStream, ISeqInStreamPtr inStream,
+ ICompressProgressPtr progress, ISzAllocPtr alloc, ISzAllocPtr allocBig);
+SRes LzmaEnc_MemEncode(CLzmaEncHandle p, Byte *dest, SizeT *destLen, const Byte *src, SizeT srcLen,
+ int writeEndMark, ICompressProgressPtr progress, ISzAllocPtr alloc, ISzAllocPtr allocBig);
+
+
+/* ---------- One Call Interface ---------- */
+
+SRes LzmaEncode(Byte *dest, SizeT *destLen, const Byte *src, SizeT srcLen,
+ const CLzmaEncProps *props, Byte *propsEncoded, SizeT *propsSize, int writeEndMark,
+ ICompressProgressPtr progress, ISzAllocPtr alloc, ISzAllocPtr allocBig);
+
+EXTERN_C_END
+
+#endif
--- /dev/null
+/* Precomp.h -- precompilation file
+2024-01-25 : Igor Pavlov : Public domain */
+
+#ifndef ZIP7_INC_PRECOMP_H
+#define ZIP7_INC_PRECOMP_H
+
+/*
+ this file must be included before another *.h files and before <windows.h>.
+ this file is included from the following files:
+ C\*.c
+ C\Util\*\Precomp.h <- C\Util\*\*.c
+ CPP\Common\Common.h <- *\StdAfx.h <- *\*.cpp
+
+ this file can set the following macros:
+ Z7_LARGE_PAGES 1
+ Z7_LONG_PATH 1
+ Z7_WIN32_WINNT_MIN 0x0500 (or higher) : we require at least win2000+ for 7-Zip
+ _WIN32_WINNT 0x0500 (or higher)
+ WINVER _WIN32_WINNT
+ UNICODE 1
+ _UNICODE 1
+*/
+
+#include "Compiler.h"
+
+#ifdef _MSC_VER
+// #pragma warning(disable : 4206) // nonstandard extension used : translation unit is empty
+#if _MSC_VER >= 1912
+// #pragma warning(disable : 5039) // pointer or reference to potentially throwing function passed to 'extern "C"' function under - EHc.Undefined behavior may occur if this function throws an exception.
+#endif
+#endif
+
+/*
+// for debug:
+#define UNICODE 1
+#define _UNICODE 1
+#define _WIN32_WINNT 0x0500 // win2000
+#ifndef WINVER
+ #define WINVER _WIN32_WINNT
+#endif
+*/
+
+#ifdef _WIN32
+/*
+ this "Precomp.h" file must be included before <windows.h>,
+ if we want to define _WIN32_WINNT before <windows.h>.
+*/
+
+#ifndef Z7_LARGE_PAGES
+#ifndef Z7_NO_LARGE_PAGES
+#define Z7_LARGE_PAGES 1
+#endif
+#endif
+
+#ifndef Z7_LONG_PATH
+#ifndef Z7_NO_LONG_PATH
+#define Z7_LONG_PATH 1
+#endif
+#endif
+
+#ifndef Z7_DEVICE_FILE
+#ifndef Z7_NO_DEVICE_FILE
+// #define Z7_DEVICE_FILE 1
+#endif
+#endif
+
+// we don't change macros if included after <windows.h>
+#ifndef _WINDOWS_
+
+#ifndef Z7_WIN32_WINNT_MIN
+ #if defined(_M_ARM64) || defined(__aarch64__)
+ // #define Z7_WIN32_WINNT_MIN 0x0a00 // win10
+ #define Z7_WIN32_WINNT_MIN 0x0600 // vista
+ #elif defined(_M_ARM) && defined(_M_ARMT) && defined(_M_ARM_NT)
+ // #define Z7_WIN32_WINNT_MIN 0x0602 // win8
+ #define Z7_WIN32_WINNT_MIN 0x0600 // vista
+ #elif defined(_M_X64) || defined(_M_AMD64) || defined(__x86_64__) || defined(_M_IA64)
+ #define Z7_WIN32_WINNT_MIN 0x0503 // win2003
+ // #elif defined(_M_IX86) || defined(__i386__)
+ // #define Z7_WIN32_WINNT_MIN 0x0500 // win2000
+ #else // x86 and another(old) systems
+ #define Z7_WIN32_WINNT_MIN 0x0500 // win2000
+ // #define Z7_WIN32_WINNT_MIN 0x0502 // win2003 // for debug
+ #endif
+#endif // Z7_WIN32_WINNT_MIN
+
+
+#ifndef Z7_DO_NOT_DEFINE_WIN32_WINNT
+#ifdef _WIN32_WINNT
+ // #error Stop_Compiling_Bad_WIN32_WINNT
+#else
+ #ifndef Z7_NO_DEFINE_WIN32_WINNT
+Z7_DIAGNOSTIC_IGNORE_BEGIN_RESERVED_MACRO_IDENTIFIER
+ #define _WIN32_WINNT Z7_WIN32_WINNT_MIN
+Z7_DIAGNOSTIC_IGNORE_END_RESERVED_MACRO_IDENTIFIER
+ #endif
+#endif // _WIN32_WINNT
+
+#ifndef WINVER
+ #define WINVER _WIN32_WINNT
+#endif
+#endif // Z7_DO_NOT_DEFINE_WIN32_WINNT
+
+
+#ifndef _MBCS
+#ifndef Z7_NO_UNICODE
+// UNICODE and _UNICODE are used by <windows.h> and by 7-zip code.
+
+#ifndef UNICODE
+#define UNICODE 1
+#endif
+
+#ifndef _UNICODE
+Z7_DIAGNOSTIC_IGNORE_BEGIN_RESERVED_MACRO_IDENTIFIER
+#define _UNICODE 1
+Z7_DIAGNOSTIC_IGNORE_END_RESERVED_MACRO_IDENTIFIER
+#endif
+
+#endif // Z7_NO_UNICODE
+#endif // _MBCS
+#endif // _WINDOWS_
+
+// #include "7zWindows.h"
+
+#endif // _WIN32
+
+#endif
--- /dev/null
+/* RotateDefs.h -- Rotate functions
+2023-06-18 : Igor Pavlov : Public domain */
+
+#ifndef ZIP7_INC_ROTATE_DEFS_H
+#define ZIP7_INC_ROTATE_DEFS_H
+
+#ifdef _MSC_VER
+
+#include <stdlib.h>
+
+/* don't use _rotl with old MINGW. It can insert slow call to function. */
+
+/* #if (_MSC_VER >= 1200) */
+#pragma intrinsic(_rotl)
+#pragma intrinsic(_rotr)
+/* #endif */
+
+#define rotlFixed(x, n) _rotl((x), (n))
+#define rotrFixed(x, n) _rotr((x), (n))
+
+#if (_MSC_VER >= 1300)
+#define Z7_ROTL64(x, n) _rotl64((x), (n))
+#define Z7_ROTR64(x, n) _rotr64((x), (n))
+#else
+#define Z7_ROTL64(x, n) (((x) << (n)) | ((x) >> (64 - (n))))
+#define Z7_ROTR64(x, n) (((x) >> (n)) | ((x) << (64 - (n))))
+#endif
+
+#else
+
+/* new compilers can translate these macros to fast commands. */
+
+#if defined(__clang__) && (__clang_major__ >= 4) \
+ || defined(__GNUC__) && (__GNUC__ >= 5)
+/* GCC 4.9.0 and clang 3.5 can recognize more correct version: */
+#define rotlFixed(x, n) (((x) << (n)) | ((x) >> (-(n) & 31)))
+#define rotrFixed(x, n) (((x) >> (n)) | ((x) << (-(n) & 31)))
+#define Z7_ROTL64(x, n) (((x) << (n)) | ((x) >> (-(n) & 63)))
+#define Z7_ROTR64(x, n) (((x) >> (n)) | ((x) << (-(n) & 63)))
+#else
+/* for old GCC / clang: */
+#define rotlFixed(x, n) (((x) << (n)) | ((x) >> (32 - (n))))
+#define rotrFixed(x, n) (((x) >> (n)) | ((x) << (32 - (n))))
+#define Z7_ROTL64(x, n) (((x) << (n)) | ((x) >> (64 - (n))))
+#define Z7_ROTR64(x, n) (((x) >> (n)) | ((x) << (64 - (n))))
+#endif
+
+#endif
+
+#endif
/* Sort.h -- Sort functions
-2014-04-05 : Igor Pavlov : Public domain */
+2023-03-05 : Igor Pavlov : Public domain */
-#ifndef __7Z_SORT_H
-#define __7Z_SORT_H
+#ifndef ZIP7_INC_SORT_H
+#define ZIP7_INC_SORT_H
#include "7zTypes.h"
-/* Alloc.c -- Memory allocation functions\r
-2021-07-13 : Igor Pavlov : Public domain */\r
-\r
-#include "Precomp.h"\r
-\r
-#include <stdio.h>\r
-\r
-#ifdef _WIN32\r
-#include <windows.h>\r
-#endif\r
-#include <stdlib.h>\r
-\r
-#include "Alloc.h"\r
-\r
-/* #define _SZ_ALLOC_DEBUG */\r
-\r
-/* use _SZ_ALLOC_DEBUG to debug alloc/free operations */\r
-#ifdef _SZ_ALLOC_DEBUG\r
-\r
-#include <stdio.h>\r
-int g_allocCount = 0;\r
-int g_allocCountMid = 0;\r
-int g_allocCountBig = 0;\r
-\r
-\r
-#define CONVERT_INT_TO_STR(charType, tempSize) \\r
- unsigned char temp[tempSize]; unsigned i = 0; \\r
- while (val >= 10) { temp[i++] = (unsigned char)('0' + (unsigned)(val % 10)); val /= 10; } \\r
- *s++ = (charType)('0' + (unsigned)val); \\r
- while (i != 0) { i--; *s++ = temp[i]; } \\r
- *s = 0;\r
-\r
-static void ConvertUInt64ToString(UInt64 val, char *s)\r
-{\r
- CONVERT_INT_TO_STR(char, 24);\r
-}\r
-\r
-#define GET_HEX_CHAR(t) ((char)(((t < 10) ? ('0' + t) : ('A' + (t - 10)))))\r
-\r
-static void ConvertUInt64ToHex(UInt64 val, char *s)\r
-{\r
- UInt64 v = val;\r
- unsigned i;\r
- for (i = 1;; i++)\r
- {\r
- v >>= 4;\r
- if (v == 0)\r
- break;\r
- }\r
- s[i] = 0;\r
- do\r
- {\r
- unsigned t = (unsigned)(val & 0xF);\r
- val >>= 4;\r
- s[--i] = GET_HEX_CHAR(t);\r
- }\r
- while (i);\r
-}\r
-\r
-#define DEBUG_OUT_STREAM stderr\r
-\r
-static void Print(const char *s)\r
-{\r
- fputs(s, DEBUG_OUT_STREAM);\r
-}\r
-\r
-static void PrintAligned(const char *s, size_t align)\r
-{\r
- size_t len = strlen(s);\r
- for(;;)\r
- {\r
- fputc(' ', DEBUG_OUT_STREAM);\r
- if (len >= align)\r
- break;\r
- ++len;\r
- }\r
- Print(s);\r
-}\r
-\r
-static void PrintLn()\r
-{\r
- Print("\n");\r
-}\r
-\r
-static void PrintHex(UInt64 v, size_t align)\r
-{\r
- char s[32];\r
- ConvertUInt64ToHex(v, s);\r
- PrintAligned(s, align);\r
-}\r
-\r
-static void PrintDec(UInt64 v, size_t align)\r
-{\r
- char s[32];\r
- ConvertUInt64ToString(v, s);\r
- PrintAligned(s, align);\r
-}\r
-\r
-static void PrintAddr(void *p)\r
-{\r
- PrintHex((UInt64)(size_t)(ptrdiff_t)p, 12);\r
-}\r
-\r
-\r
-#define PRINT_ALLOC(name, cnt, size, ptr) \\r
- Print(name " "); \\r
- PrintDec(cnt++, 10); \\r
- PrintHex(size, 10); \\r
- PrintAddr(ptr); \\r
- PrintLn();\r
- \r
-#define PRINT_FREE(name, cnt, ptr) if (ptr) { \\r
- Print(name " "); \\r
- PrintDec(--cnt, 10); \\r
- PrintAddr(ptr); \\r
- PrintLn(); }\r
- \r
-#else\r
-\r
-#define PRINT_ALLOC(name, cnt, size, ptr)\r
-#define PRINT_FREE(name, cnt, ptr)\r
-#define Print(s)\r
-#define PrintLn()\r
-#define PrintHex(v, align)\r
-#define PrintAddr(p)\r
-\r
-#endif\r
-\r
-\r
-\r
-void *MyAlloc(size_t size)\r
-{\r
- if (size == 0)\r
- return NULL;\r
- PRINT_ALLOC("Alloc ", g_allocCount, size, NULL);\r
- #ifdef _SZ_ALLOC_DEBUG\r
- {\r
- void *p = malloc(size);\r
- // PRINT_ALLOC("Alloc ", g_allocCount, size, p);\r
- return p;\r
- }\r
- #else\r
- return malloc(size);\r
- #endif\r
-}\r
-\r
-void MyFree(void *address)\r
-{\r
- PRINT_FREE("Free ", g_allocCount, address);\r
- \r
- free(address);\r
-}\r
-\r
-#ifdef _WIN32\r
-\r
-void *MidAlloc(size_t size)\r
-{\r
- if (size == 0)\r
- return NULL;\r
- \r
- PRINT_ALLOC("Alloc-Mid", g_allocCountMid, size, NULL);\r
- \r
- return VirtualAlloc(NULL, size, MEM_COMMIT, PAGE_READWRITE);\r
-}\r
-\r
-void MidFree(void *address)\r
-{\r
- PRINT_FREE("Free-Mid", g_allocCountMid, address);\r
-\r
- if (!address)\r
- return;\r
- VirtualFree(address, 0, MEM_RELEASE);\r
-}\r
-\r
-#ifdef _7ZIP_LARGE_PAGES\r
-\r
-#ifdef MEM_LARGE_PAGES\r
- #define MY__MEM_LARGE_PAGES MEM_LARGE_PAGES\r
-#else\r
- #define MY__MEM_LARGE_PAGES 0x20000000\r
-#endif\r
-\r
-extern\r
-SIZE_T g_LargePageSize;\r
-SIZE_T g_LargePageSize = 0;\r
-typedef SIZE_T (WINAPI *GetLargePageMinimumP)(VOID);\r
-\r
-#endif // _7ZIP_LARGE_PAGES\r
-\r
-void SetLargePageSize()\r
-{\r
- #ifdef _7ZIP_LARGE_PAGES\r
- SIZE_T size;\r
- GetLargePageMinimumP largePageMinimum = (GetLargePageMinimumP)\r
- GetProcAddress(GetModuleHandle(TEXT("kernel32.dll")), "GetLargePageMinimum");\r
- if (!largePageMinimum)\r
- return;\r
- size = largePageMinimum();\r
- if (size == 0 || (size & (size - 1)) != 0)\r
- return;\r
- g_LargePageSize = size;\r
- #endif\r
-}\r
-\r
-\r
-void *BigAlloc(size_t size)\r
-{\r
- if (size == 0)\r
- return NULL;\r
-\r
- PRINT_ALLOC("Alloc-Big", g_allocCountBig, size, NULL);\r
- \r
- #ifdef _7ZIP_LARGE_PAGES\r
- {\r
- SIZE_T ps = g_LargePageSize;\r
- if (ps != 0 && ps <= (1 << 30) && size > (ps / 2))\r
- {\r
- size_t size2;\r
- ps--;\r
- size2 = (size + ps) & ~ps;\r
- if (size2 >= size)\r
- {\r
- void *res = VirtualAlloc(NULL, size2, MEM_COMMIT | MY__MEM_LARGE_PAGES, PAGE_READWRITE);\r
- if (res)\r
- return res;\r
- }\r
- }\r
- }\r
- #endif\r
-\r
- return VirtualAlloc(NULL, size, MEM_COMMIT, PAGE_READWRITE);\r
-}\r
-\r
-void BigFree(void *address)\r
-{\r
- PRINT_FREE("Free-Big", g_allocCountBig, address);\r
- \r
- if (!address)\r
- return;\r
- VirtualFree(address, 0, MEM_RELEASE);\r
-}\r
-\r
-#endif\r
-\r
-\r
-static void *SzAlloc(ISzAllocPtr p, size_t size) { UNUSED_VAR(p); return MyAlloc(size); }\r
-static void SzFree(ISzAllocPtr p, void *address) { UNUSED_VAR(p); MyFree(address); }\r
-const ISzAlloc g_Alloc = { SzAlloc, SzFree };\r
-\r
-#ifdef _WIN32\r
-static void *SzMidAlloc(ISzAllocPtr p, size_t size) { UNUSED_VAR(p); return MidAlloc(size); }\r
-static void SzMidFree(ISzAllocPtr p, void *address) { UNUSED_VAR(p); MidFree(address); }\r
-static void *SzBigAlloc(ISzAllocPtr p, size_t size) { UNUSED_VAR(p); return BigAlloc(size); }\r
-static void SzBigFree(ISzAllocPtr p, void *address) { UNUSED_VAR(p); BigFree(address); }\r
-const ISzAlloc g_MidAlloc = { SzMidAlloc, SzMidFree };\r
-const ISzAlloc g_BigAlloc = { SzBigAlloc, SzBigFree };\r
-#endif\r
-\r
-/*\r
- uintptr_t : <stdint.h> C99 (optional)\r
- : unsupported in VS6\r
-*/\r
-\r
-#ifdef _WIN32\r
- typedef UINT_PTR UIntPtr;\r
-#else\r
- /*\r
- typedef uintptr_t UIntPtr;\r
- */\r
- typedef ptrdiff_t UIntPtr;\r
-#endif\r
-\r
-\r
-#define ADJUST_ALLOC_SIZE 0\r
-/*\r
-#define ADJUST_ALLOC_SIZE (sizeof(void *) - 1)\r
-*/\r
-/*\r
- Use (ADJUST_ALLOC_SIZE = (sizeof(void *) - 1)), if\r
- MyAlloc() can return address that is NOT multiple of sizeof(void *).\r
-*/\r
-\r
-\r
-/*\r
-#define MY_ALIGN_PTR_DOWN(p, align) ((void *)((char *)(p) - ((size_t)(UIntPtr)(p) & ((align) - 1))))\r
-*/\r
-#define MY_ALIGN_PTR_DOWN(p, align) ((void *)((((UIntPtr)(p)) & ~((UIntPtr)(align) - 1))))\r
-\r
-\r
-#if !defined(_WIN32) && defined(_POSIX_C_SOURCE) && (_POSIX_C_SOURCE >= 200112L)\r
- #define USE_posix_memalign\r
-#endif\r
-\r
-#ifndef USE_posix_memalign\r
-#define MY_ALIGN_PTR_UP_PLUS(p, align) MY_ALIGN_PTR_DOWN(((char *)(p) + (align) + ADJUST_ALLOC_SIZE), align)\r
-#endif\r
-\r
-/*\r
- This posix_memalign() is for test purposes only.\r
- We also need special Free() function instead of free(),\r
- if this posix_memalign() is used.\r
-*/\r
-\r
-/*\r
-static int posix_memalign(void **ptr, size_t align, size_t size)\r
-{\r
- size_t newSize = size + align;\r
- void *p;\r
- void *pAligned;\r
- *ptr = NULL;\r
- if (newSize < size)\r
- return 12; // ENOMEM\r
- p = MyAlloc(newSize);\r
- if (!p)\r
- return 12; // ENOMEM\r
- pAligned = MY_ALIGN_PTR_UP_PLUS(p, align);\r
- ((void **)pAligned)[-1] = p;\r
- *ptr = pAligned;\r
- return 0;\r
-}\r
-*/\r
-\r
-/*\r
- ALLOC_ALIGN_SIZE >= sizeof(void *)\r
- ALLOC_ALIGN_SIZE >= cache_line_size\r
-*/\r
-\r
-#define ALLOC_ALIGN_SIZE ((size_t)1 << 7)\r
-\r
-static void *SzAlignedAlloc(ISzAllocPtr pp, size_t size)\r
-{\r
- #ifndef USE_posix_memalign\r
- \r
- void *p;\r
- void *pAligned;\r
- size_t newSize;\r
- UNUSED_VAR(pp);\r
-\r
- /* also we can allocate additional dummy ALLOC_ALIGN_SIZE bytes after aligned\r
- block to prevent cache line sharing with another allocated blocks */\r
-\r
- newSize = size + ALLOC_ALIGN_SIZE * 1 + ADJUST_ALLOC_SIZE;\r
- if (newSize < size)\r
- return NULL;\r
-\r
- p = MyAlloc(newSize);\r
- \r
- if (!p)\r
- return NULL;\r
- pAligned = MY_ALIGN_PTR_UP_PLUS(p, ALLOC_ALIGN_SIZE);\r
-\r
- Print(" size="); PrintHex(size, 8);\r
- Print(" a_size="); PrintHex(newSize, 8);\r
- Print(" ptr="); PrintAddr(p);\r
- Print(" a_ptr="); PrintAddr(pAligned);\r
- PrintLn();\r
-\r
- ((void **)pAligned)[-1] = p;\r
-\r
- return pAligned;\r
-\r
- #else\r
-\r
- void *p;\r
- UNUSED_VAR(pp);\r
- if (posix_memalign(&p, ALLOC_ALIGN_SIZE, size))\r
- return NULL;\r
-\r
- Print(" posix_memalign="); PrintAddr(p);\r
- PrintLn();\r
-\r
- return p;\r
-\r
- #endif\r
-}\r
-\r
-\r
-static void SzAlignedFree(ISzAllocPtr pp, void *address)\r
-{\r
- UNUSED_VAR(pp);\r
- #ifndef USE_posix_memalign\r
- if (address)\r
- MyFree(((void **)address)[-1]);\r
- #else\r
- free(address);\r
- #endif\r
-}\r
-\r
-\r
-const ISzAlloc g_AlignedAlloc = { SzAlignedAlloc, SzAlignedFree };\r
-\r
-\r
-\r
-#define MY_ALIGN_PTR_DOWN_1(p) MY_ALIGN_PTR_DOWN(p, sizeof(void *))\r
-\r
-/* we align ptr to support cases where CAlignOffsetAlloc::offset is not multiply of sizeof(void *) */\r
-#define REAL_BLOCK_PTR_VAR(p) ((void **)MY_ALIGN_PTR_DOWN_1(p))[-1]\r
-/*\r
-#define REAL_BLOCK_PTR_VAR(p) ((void **)(p))[-1]\r
-*/\r
-\r
-static void *AlignOffsetAlloc_Alloc(ISzAllocPtr pp, size_t size)\r
-{\r
- CAlignOffsetAlloc *p = CONTAINER_FROM_VTBL(pp, CAlignOffsetAlloc, vt);\r
- void *adr;\r
- void *pAligned;\r
- size_t newSize;\r
- size_t extra;\r
- size_t alignSize = (size_t)1 << p->numAlignBits;\r
-\r
- if (alignSize < sizeof(void *))\r
- alignSize = sizeof(void *);\r
- \r
- if (p->offset >= alignSize)\r
- return NULL;\r
-\r
- /* also we can allocate additional dummy ALLOC_ALIGN_SIZE bytes after aligned\r
- block to prevent cache line sharing with another allocated blocks */\r
- extra = p->offset & (sizeof(void *) - 1);\r
- newSize = size + alignSize + extra + ADJUST_ALLOC_SIZE;\r
- if (newSize < size)\r
- return NULL;\r
-\r
- adr = ISzAlloc_Alloc(p->baseAlloc, newSize);\r
- \r
- if (!adr)\r
- return NULL;\r
-\r
- pAligned = (char *)MY_ALIGN_PTR_DOWN((char *)adr +\r
- alignSize - p->offset + extra + ADJUST_ALLOC_SIZE, alignSize) + p->offset;\r
-\r
- PrintLn();\r
- Print("- Aligned: ");\r
- Print(" size="); PrintHex(size, 8);\r
- Print(" a_size="); PrintHex(newSize, 8);\r
- Print(" ptr="); PrintAddr(adr);\r
- Print(" a_ptr="); PrintAddr(pAligned);\r
- PrintLn();\r
-\r
- REAL_BLOCK_PTR_VAR(pAligned) = adr;\r
-\r
- return pAligned;\r
-}\r
-\r
-\r
-static void AlignOffsetAlloc_Free(ISzAllocPtr pp, void *address)\r
-{\r
- if (address)\r
- {\r
- CAlignOffsetAlloc *p = CONTAINER_FROM_VTBL(pp, CAlignOffsetAlloc, vt);\r
- PrintLn();\r
- Print("- Aligned Free: ");\r
- PrintLn();\r
- ISzAlloc_Free(p->baseAlloc, REAL_BLOCK_PTR_VAR(address));\r
- }\r
-}\r
-\r
-\r
-void AlignOffsetAlloc_CreateVTable(CAlignOffsetAlloc *p)\r
-{\r
- p->vt.Alloc = AlignOffsetAlloc_Alloc;\r
- p->vt.Free = AlignOffsetAlloc_Free;\r
-}\r
+/* Alloc.c -- Memory allocation functions
+2024-02-18 : Igor Pavlov : Public domain */
+
+#include "Precomp.h"
+
+#ifdef _WIN32
+#include "7zWindows.h"
+#endif
+#include <stdlib.h>
+
+#include "Alloc.h"
+
+#if defined(Z7_LARGE_PAGES) && defined(_WIN32) && \
+ (!defined(Z7_WIN32_WINNT_MIN) || Z7_WIN32_WINNT_MIN < 0x0502) // < Win2003 (xp-64)
+ #define Z7_USE_DYN_GetLargePageMinimum
+#endif
+
+// for debug:
+#if 0
+#if defined(__CHERI__) && defined(__SIZEOF_POINTER__) && (__SIZEOF_POINTER__ == 16)
+// #pragma message("=== Z7_ALLOC_NO_OFFSET_ALLOCATOR === ")
+#define Z7_ALLOC_NO_OFFSET_ALLOCATOR
+#endif
+#endif
+
+// #define SZ_ALLOC_DEBUG
+/* #define SZ_ALLOC_DEBUG */
+
+/* use SZ_ALLOC_DEBUG to debug alloc/free operations */
+#ifdef SZ_ALLOC_DEBUG
+
+#include <string.h>
+#include <stdio.h>
+static int g_allocCount = 0;
+#ifdef _WIN32
+static int g_allocCountMid = 0;
+static int g_allocCountBig = 0;
+#endif
+
+
+#define CONVERT_INT_TO_STR(charType, tempSize) \
+ char temp[tempSize]; unsigned i = 0; \
+ while (val >= 10) { temp[i++] = (char)('0' + (unsigned)(val % 10)); val /= 10; } \
+ *s++ = (charType)('0' + (unsigned)val); \
+ while (i != 0) { i--; *s++ = temp[i]; } \
+ *s = 0;
+
+static void ConvertUInt64ToString(UInt64 val, char *s)
+{
+ CONVERT_INT_TO_STR(char, 24)
+}
+
+#define GET_HEX_CHAR(t) ((char)(((t < 10) ? ('0' + t) : ('A' + (t - 10)))))
+
+static void ConvertUInt64ToHex(UInt64 val, char *s)
+{
+ UInt64 v = val;
+ unsigned i;
+ for (i = 1;; i++)
+ {
+ v >>= 4;
+ if (v == 0)
+ break;
+ }
+ s[i] = 0;
+ do
+ {
+ unsigned t = (unsigned)(val & 0xF);
+ val >>= 4;
+ s[--i] = GET_HEX_CHAR(t);
+ }
+ while (i);
+}
+
+#define DEBUG_OUT_STREAM stderr
+
+static void Print(const char *s)
+{
+ fputs(s, DEBUG_OUT_STREAM);
+}
+
+static void PrintAligned(const char *s, size_t align)
+{
+ size_t len = strlen(s);
+ for(;;)
+ {
+ fputc(' ', DEBUG_OUT_STREAM);
+ if (len >= align)
+ break;
+ ++len;
+ }
+ Print(s);
+}
+
+static void PrintLn(void)
+{
+ Print("\n");
+}
+
+static void PrintHex(UInt64 v, size_t align)
+{
+ char s[32];
+ ConvertUInt64ToHex(v, s);
+ PrintAligned(s, align);
+}
+
+static void PrintDec(int v, size_t align)
+{
+ char s[32];
+ ConvertUInt64ToString((unsigned)v, s);
+ PrintAligned(s, align);
+}
+
+static void PrintAddr(void *p)
+{
+ PrintHex((UInt64)(size_t)(ptrdiff_t)p, 12);
+}
+
+
+#define PRINT_REALLOC(name, cnt, size, ptr) { \
+ Print(name " "); \
+ if (!ptr) PrintDec(cnt++, 10); \
+ PrintHex(size, 10); \
+ PrintAddr(ptr); \
+ PrintLn(); }
+
+#define PRINT_ALLOC(name, cnt, size, ptr) { \
+ Print(name " "); \
+ PrintDec(cnt++, 10); \
+ PrintHex(size, 10); \
+ PrintAddr(ptr); \
+ PrintLn(); }
+
+#define PRINT_FREE(name, cnt, ptr) if (ptr) { \
+ Print(name " "); \
+ PrintDec(--cnt, 10); \
+ PrintAddr(ptr); \
+ PrintLn(); }
+
+#else
+
+#ifdef _WIN32
+#define PRINT_ALLOC(name, cnt, size, ptr)
+#endif
+#define PRINT_FREE(name, cnt, ptr)
+#define Print(s)
+#define PrintLn()
+#ifndef Z7_ALLOC_NO_OFFSET_ALLOCATOR
+#define PrintHex(v, align)
+#endif
+#define PrintAddr(p)
+
+#endif
+
+
+/*
+by specification:
+ malloc(non_NULL, 0) : returns NULL or a unique pointer value that can later be successfully passed to free()
+ realloc(NULL, size) : the call is equivalent to malloc(size)
+ realloc(non_NULL, 0) : the call is equivalent to free(ptr)
+
+in main compilers:
+ malloc(0) : returns non_NULL
+ realloc(NULL, 0) : returns non_NULL
+ realloc(non_NULL, 0) : returns NULL
+*/
+
+
+void *MyAlloc(size_t size)
+{
+ if (size == 0)
+ return NULL;
+ // PRINT_ALLOC("Alloc ", g_allocCount, size, NULL)
+ #ifdef SZ_ALLOC_DEBUG
+ {
+ void *p = malloc(size);
+ if (p)
+ {
+ PRINT_ALLOC("Alloc ", g_allocCount, size, p)
+ }
+ return p;
+ }
+ #else
+ return malloc(size);
+ #endif
+}
+
+void MyFree(void *address)
+{
+ PRINT_FREE("Free ", g_allocCount, address)
+
+ free(address);
+}
+
+void *MyRealloc(void *address, size_t size)
+{
+ if (size == 0)
+ {
+ MyFree(address);
+ return NULL;
+ }
+ // PRINT_REALLOC("Realloc ", g_allocCount, size, address)
+ #ifdef SZ_ALLOC_DEBUG
+ {
+ void *p = realloc(address, size);
+ if (p)
+ {
+ PRINT_REALLOC("Realloc ", g_allocCount, size, address)
+ }
+ return p;
+ }
+ #else
+ return realloc(address, size);
+ #endif
+}
+
+
+#ifdef _WIN32
+
+void *MidAlloc(size_t size)
+{
+ if (size == 0)
+ return NULL;
+ #ifdef SZ_ALLOC_DEBUG
+ {
+ void *p = VirtualAlloc(NULL, size, MEM_COMMIT, PAGE_READWRITE);
+ if (p)
+ {
+ PRINT_ALLOC("Alloc-Mid", g_allocCountMid, size, p)
+ }
+ return p;
+ }
+ #else
+ return VirtualAlloc(NULL, size, MEM_COMMIT, PAGE_READWRITE);
+ #endif
+}
+
+void MidFree(void *address)
+{
+ PRINT_FREE("Free-Mid", g_allocCountMid, address)
+
+ if (!address)
+ return;
+ VirtualFree(address, 0, MEM_RELEASE);
+}
+
+#ifdef Z7_LARGE_PAGES
+
+#ifdef MEM_LARGE_PAGES
+ #define MY_MEM_LARGE_PAGES MEM_LARGE_PAGES
+#else
+ #define MY_MEM_LARGE_PAGES 0x20000000
+#endif
+
+extern
+SIZE_T g_LargePageSize;
+SIZE_T g_LargePageSize = 0;
+typedef SIZE_T (WINAPI *Func_GetLargePageMinimum)(VOID);
+
+void SetLargePageSize(void)
+{
+ SIZE_T size;
+#ifdef Z7_USE_DYN_GetLargePageMinimum
+Z7_DIAGNOSTIC_IGNORE_CAST_FUNCTION
+
+ const
+ Func_GetLargePageMinimum fn =
+ (Func_GetLargePageMinimum) Z7_CAST_FUNC_C GetProcAddress(GetModuleHandle(TEXT("kernel32.dll")),
+ "GetLargePageMinimum");
+ if (!fn)
+ return;
+ size = fn();
+#else
+ size = GetLargePageMinimum();
+#endif
+ if (size == 0 || (size & (size - 1)) != 0)
+ return;
+ g_LargePageSize = size;
+}
+
+#endif // Z7_LARGE_PAGES
+
+void *BigAlloc(size_t size)
+{
+ if (size == 0)
+ return NULL;
+
+ PRINT_ALLOC("Alloc-Big", g_allocCountBig, size, NULL)
+
+ #ifdef Z7_LARGE_PAGES
+ {
+ SIZE_T ps = g_LargePageSize;
+ if (ps != 0 && ps <= (1 << 30) && size > (ps / 2))
+ {
+ size_t size2;
+ ps--;
+ size2 = (size + ps) & ~ps;
+ if (size2 >= size)
+ {
+ void *p = VirtualAlloc(NULL, size2, MEM_COMMIT | MY_MEM_LARGE_PAGES, PAGE_READWRITE);
+ if (p)
+ {
+ PRINT_ALLOC("Alloc-BM ", g_allocCountMid, size2, p)
+ return p;
+ }
+ }
+ }
+ }
+ #endif
+
+ return MidAlloc(size);
+}
+
+void BigFree(void *address)
+{
+ PRINT_FREE("Free-Big", g_allocCountBig, address)
+ MidFree(address);
+}
+
+#endif // _WIN32
+
+
+static void *SzAlloc(ISzAllocPtr p, size_t size) { UNUSED_VAR(p) return MyAlloc(size); }
+static void SzFree(ISzAllocPtr p, void *address) { UNUSED_VAR(p) MyFree(address); }
+const ISzAlloc g_Alloc = { SzAlloc, SzFree };
+
+#ifdef _WIN32
+static void *SzMidAlloc(ISzAllocPtr p, size_t size) { UNUSED_VAR(p) return MidAlloc(size); }
+static void SzMidFree(ISzAllocPtr p, void *address) { UNUSED_VAR(p) MidFree(address); }
+static void *SzBigAlloc(ISzAllocPtr p, size_t size) { UNUSED_VAR(p) return BigAlloc(size); }
+static void SzBigFree(ISzAllocPtr p, void *address) { UNUSED_VAR(p) BigFree(address); }
+const ISzAlloc g_MidAlloc = { SzMidAlloc, SzMidFree };
+const ISzAlloc g_BigAlloc = { SzBigAlloc, SzBigFree };
+#endif
+
+#ifndef Z7_ALLOC_NO_OFFSET_ALLOCATOR
+
+#define ADJUST_ALLOC_SIZE 0
+/*
+#define ADJUST_ALLOC_SIZE (sizeof(void *) - 1)
+*/
+/*
+ Use (ADJUST_ALLOC_SIZE = (sizeof(void *) - 1)), if
+ MyAlloc() can return address that is NOT multiple of sizeof(void *).
+*/
+
+/*
+ uintptr_t : <stdint.h> C99 (optional)
+ : unsupported in VS6
+*/
+typedef
+ #ifdef _WIN32
+ UINT_PTR
+ #elif 1
+ uintptr_t
+ #else
+ ptrdiff_t
+ #endif
+ MY_uintptr_t;
+
+#if 0 \
+ || (defined(__CHERI__) \
+ || defined(__SIZEOF_POINTER__) && (__SIZEOF_POINTER__ > 8))
+// for 128-bit pointers (cheri):
+#define MY_ALIGN_PTR_DOWN(p, align) \
+ ((void *)((char *)(p) - ((size_t)(MY_uintptr_t)(p) & ((align) - 1))))
+#else
+#define MY_ALIGN_PTR_DOWN(p, align) \
+ ((void *)((((MY_uintptr_t)(p)) & ~((MY_uintptr_t)(align) - 1))))
+#endif
+
+#endif
+
+#if !defined(_WIN32) \
+ && (defined(Z7_ALLOC_NO_OFFSET_ALLOCATOR) \
+ || defined(_POSIX_C_SOURCE) && (_POSIX_C_SOURCE >= 200112L))
+ #define USE_posix_memalign
+#endif
+
+#ifndef USE_posix_memalign
+#define MY_ALIGN_PTR_UP_PLUS(p, align) MY_ALIGN_PTR_DOWN(((char *)(p) + (align) + ADJUST_ALLOC_SIZE), align)
+#endif
+
+/*
+ This posix_memalign() is for test purposes only.
+ We also need special Free() function instead of free(),
+ if this posix_memalign() is used.
+*/
+
+/*
+static int posix_memalign(void **ptr, size_t align, size_t size)
+{
+ size_t newSize = size + align;
+ void *p;
+ void *pAligned;
+ *ptr = NULL;
+ if (newSize < size)
+ return 12; // ENOMEM
+ p = MyAlloc(newSize);
+ if (!p)
+ return 12; // ENOMEM
+ pAligned = MY_ALIGN_PTR_UP_PLUS(p, align);
+ ((void **)pAligned)[-1] = p;
+ *ptr = pAligned;
+ return 0;
+}
+*/
+
+/*
+ ALLOC_ALIGN_SIZE >= sizeof(void *)
+ ALLOC_ALIGN_SIZE >= cache_line_size
+*/
+
+#define ALLOC_ALIGN_SIZE ((size_t)1 << 7)
+
+void *z7_AlignedAlloc(size_t size)
+{
+#ifndef USE_posix_memalign
+
+ void *p;
+ void *pAligned;
+ size_t newSize;
+
+ /* also we can allocate additional dummy ALLOC_ALIGN_SIZE bytes after aligned
+ block to prevent cache line sharing with another allocated blocks */
+
+ newSize = size + ALLOC_ALIGN_SIZE * 1 + ADJUST_ALLOC_SIZE;
+ if (newSize < size)
+ return NULL;
+
+ p = MyAlloc(newSize);
+
+ if (!p)
+ return NULL;
+ pAligned = MY_ALIGN_PTR_UP_PLUS(p, ALLOC_ALIGN_SIZE);
+
+ Print(" size="); PrintHex(size, 8);
+ Print(" a_size="); PrintHex(newSize, 8);
+ Print(" ptr="); PrintAddr(p);
+ Print(" a_ptr="); PrintAddr(pAligned);
+ PrintLn();
+
+ ((void **)pAligned)[-1] = p;
+
+ return pAligned;
+
+#else
+
+ void *p;
+ if (posix_memalign(&p, ALLOC_ALIGN_SIZE, size))
+ return NULL;
+
+ Print(" posix_memalign="); PrintAddr(p);
+ PrintLn();
+
+ return p;
+
+#endif
+}
+
+
+void z7_AlignedFree(void *address)
+{
+#ifndef USE_posix_memalign
+ if (address)
+ MyFree(((void **)address)[-1]);
+#else
+ free(address);
+#endif
+}
+
+
+static void *SzAlignedAlloc(ISzAllocPtr pp, size_t size)
+{
+ UNUSED_VAR(pp)
+ return z7_AlignedAlloc(size);
+}
+
+
+static void SzAlignedFree(ISzAllocPtr pp, void *address)
+{
+ UNUSED_VAR(pp)
+#ifndef USE_posix_memalign
+ if (address)
+ MyFree(((void **)address)[-1]);
+#else
+ free(address);
+#endif
+}
+
+
+const ISzAlloc g_AlignedAlloc = { SzAlignedAlloc, SzAlignedFree };
+
+
+
+/* we align ptr to support cases where CAlignOffsetAlloc::offset is not multiply of sizeof(void *) */
+#ifndef Z7_ALLOC_NO_OFFSET_ALLOCATOR
+#if 1
+ #define MY_ALIGN_PTR_DOWN_1(p) MY_ALIGN_PTR_DOWN(p, sizeof(void *))
+ #define REAL_BLOCK_PTR_VAR(p) ((void **)MY_ALIGN_PTR_DOWN_1(p))[-1]
+#else
+ // we can use this simplified code,
+ // if (CAlignOffsetAlloc::offset == (k * sizeof(void *))
+ #define REAL_BLOCK_PTR_VAR(p) (((void **)(p))[-1])
+#endif
+#endif
+
+
+#if 0
+#ifndef Z7_ALLOC_NO_OFFSET_ALLOCATOR
+#include <stdio.h>
+static void PrintPtr(const char *s, const void *p)
+{
+ const Byte *p2 = (const Byte *)&p;
+ unsigned i;
+ printf("%s %p ", s, p);
+ for (i = sizeof(p); i != 0;)
+ {
+ i--;
+ printf("%02x", p2[i]);
+ }
+ printf("\n");
+}
+#endif
+#endif
+
+
+static void *AlignOffsetAlloc_Alloc(ISzAllocPtr pp, size_t size)
+{
+#if defined(Z7_ALLOC_NO_OFFSET_ALLOCATOR)
+ UNUSED_VAR(pp)
+ return z7_AlignedAlloc(size);
+#else
+ const CAlignOffsetAlloc *p = Z7_CONTAINER_FROM_VTBL_CONST(pp, CAlignOffsetAlloc, vt);
+ void *adr;
+ void *pAligned;
+ size_t newSize;
+ size_t extra;
+ size_t alignSize = (size_t)1 << p->numAlignBits;
+
+ if (alignSize < sizeof(void *))
+ alignSize = sizeof(void *);
+
+ if (p->offset >= alignSize)
+ return NULL;
+
+ /* also we can allocate additional dummy ALLOC_ALIGN_SIZE bytes after aligned
+ block to prevent cache line sharing with another allocated blocks */
+ extra = p->offset & (sizeof(void *) - 1);
+ newSize = size + alignSize + extra + ADJUST_ALLOC_SIZE;
+ if (newSize < size)
+ return NULL;
+
+ adr = ISzAlloc_Alloc(p->baseAlloc, newSize);
+
+ if (!adr)
+ return NULL;
+
+ pAligned = (char *)MY_ALIGN_PTR_DOWN((char *)adr +
+ alignSize - p->offset + extra + ADJUST_ALLOC_SIZE, alignSize) + p->offset;
+
+#if 0
+ printf("\nalignSize = %6x, offset=%6x, size=%8x \n", (unsigned)alignSize, (unsigned)p->offset, (unsigned)size);
+ PrintPtr("base", adr);
+ PrintPtr("alig", pAligned);
+#endif
+
+ PrintLn();
+ Print("- Aligned: ");
+ Print(" size="); PrintHex(size, 8);
+ Print(" a_size="); PrintHex(newSize, 8);
+ Print(" ptr="); PrintAddr(adr);
+ Print(" a_ptr="); PrintAddr(pAligned);
+ PrintLn();
+
+ REAL_BLOCK_PTR_VAR(pAligned) = adr;
+
+ return pAligned;
+#endif
+}
+
+
+static void AlignOffsetAlloc_Free(ISzAllocPtr pp, void *address)
+{
+#if defined(Z7_ALLOC_NO_OFFSET_ALLOCATOR)
+ UNUSED_VAR(pp)
+ z7_AlignedFree(address);
+#else
+ if (address)
+ {
+ const CAlignOffsetAlloc *p = Z7_CONTAINER_FROM_VTBL_CONST(pp, CAlignOffsetAlloc, vt);
+ PrintLn();
+ Print("- Aligned Free: ");
+ PrintLn();
+ ISzAlloc_Free(p->baseAlloc, REAL_BLOCK_PTR_VAR(address));
+ }
+#endif
+}
+
+
+void AlignOffsetAlloc_CreateVTable(CAlignOffsetAlloc *p)
+{
+ p->vt.Alloc = AlignOffsetAlloc_Alloc;
+ p->vt.Free = AlignOffsetAlloc_Free;
+}
--- /dev/null
+/* Bra.c -- Branch converters for RISC code
+2024-01-20 : Igor Pavlov : Public domain */
+
+#include "Precomp.h"
+
+#include "Bra.h"
+#include "RotateDefs.h"
+#include "CpuArch.h"
+
+#if defined(MY_CPU_SIZEOF_POINTER) \
+ && ( MY_CPU_SIZEOF_POINTER == 4 \
+ || MY_CPU_SIZEOF_POINTER == 8)
+ #define BR_CONV_USE_OPT_PC_PTR
+#endif
+
+#ifdef BR_CONV_USE_OPT_PC_PTR
+#define BR_PC_INIT pc -= (UInt32)(SizeT)p;
+#define BR_PC_GET (pc + (UInt32)(SizeT)p)
+#else
+#define BR_PC_INIT pc += (UInt32)size;
+#define BR_PC_GET (pc - (UInt32)(SizeT)(lim - p))
+// #define BR_PC_INIT
+// #define BR_PC_GET (pc + (UInt32)(SizeT)(p - data))
+#endif
+
+#define BR_CONVERT_VAL(v, c) if (encoding) v += c; else v -= c;
+// #define BR_CONVERT_VAL(v, c) if (!encoding) c = (UInt32)0 - c; v += c;
+
+#define Z7_BRANCH_CONV(name) z7_ ## name
+
+#define Z7_BRANCH_FUNC_MAIN(name) \
+static \
+Z7_FORCE_INLINE \
+Z7_ATTRIB_NO_VECTOR \
+Byte *Z7_BRANCH_CONV(name)(Byte *p, SizeT size, UInt32 pc, int encoding)
+
+#define Z7_BRANCH_FUNC_IMP(name, m, encoding) \
+Z7_NO_INLINE \
+Z7_ATTRIB_NO_VECTOR \
+Byte *m(name)(Byte *data, SizeT size, UInt32 pc) \
+ { return Z7_BRANCH_CONV(name)(data, size, pc, encoding); } \
+
+#ifdef Z7_EXTRACT_ONLY
+#define Z7_BRANCH_FUNCS_IMP(name) \
+ Z7_BRANCH_FUNC_IMP(name, Z7_BRANCH_CONV_DEC_2, 0)
+#else
+#define Z7_BRANCH_FUNCS_IMP(name) \
+ Z7_BRANCH_FUNC_IMP(name, Z7_BRANCH_CONV_DEC_2, 0) \
+ Z7_BRANCH_FUNC_IMP(name, Z7_BRANCH_CONV_ENC_2, 1)
+#endif
+
+#if defined(__clang__)
+#define BR_EXTERNAL_FOR
+#define BR_NEXT_ITERATION continue;
+#else
+#define BR_EXTERNAL_FOR for (;;)
+#define BR_NEXT_ITERATION break;
+#endif
+
+#if defined(__clang__) && (__clang_major__ >= 8) \
+ || defined(__GNUC__) && (__GNUC__ >= 1000) \
+ // GCC is not good for __builtin_expect() here
+ /* || defined(_MSC_VER) && (_MSC_VER >= 1920) */
+ // #define Z7_unlikely [[unlikely]]
+ // #define Z7_LIKELY(x) (__builtin_expect((x), 1))
+ #define Z7_UNLIKELY(x) (__builtin_expect((x), 0))
+ // #define Z7_likely [[likely]]
+#else
+ // #define Z7_LIKELY(x) (x)
+ #define Z7_UNLIKELY(x) (x)
+ // #define Z7_likely
+#endif
+
+
+Z7_BRANCH_FUNC_MAIN(BranchConv_ARM64)
+{
+ // Byte *p = data;
+ const Byte *lim;
+ const UInt32 flag = (UInt32)1 << (24 - 4);
+ const UInt32 mask = ((UInt32)1 << 24) - (flag << 1);
+ size &= ~(SizeT)3;
+ // if (size == 0) return p;
+ lim = p + size;
+ BR_PC_INIT
+ pc -= 4; // because (p) will point to next instruction
+
+ BR_EXTERNAL_FOR
+ {
+ // Z7_PRAGMA_OPT_DISABLE_LOOP_UNROLL_VECTORIZE
+ for (;;)
+ {
+ UInt32 v;
+ if Z7_UNLIKELY(p == lim)
+ return p;
+ v = GetUi32a(p);
+ p += 4;
+ if Z7_UNLIKELY(((v - 0x94000000) & 0xfc000000) == 0)
+ {
+ UInt32 c = BR_PC_GET >> 2;
+ BR_CONVERT_VAL(v, c)
+ v &= 0x03ffffff;
+ v |= 0x94000000;
+ SetUi32a(p - 4, v)
+ BR_NEXT_ITERATION
+ }
+ // v = rotlFixed(v, 8); v += (flag << 8) - 0x90; if Z7_UNLIKELY((v & ((mask << 8) + 0x9f)) == 0)
+ v -= 0x90000000; if Z7_UNLIKELY((v & 0x9f000000) == 0)
+ {
+ UInt32 z, c;
+ // v = rotrFixed(v, 8);
+ v += flag; if Z7_UNLIKELY(v & mask) continue;
+ z = (v & 0xffffffe0) | (v >> 26);
+ c = (BR_PC_GET >> (12 - 3)) & ~(UInt32)7;
+ BR_CONVERT_VAL(z, c)
+ v &= 0x1f;
+ v |= 0x90000000;
+ v |= z << 26;
+ v |= 0x00ffffe0 & ((z & (((flag << 1) - 1))) - flag);
+ SetUi32a(p - 4, v)
+ }
+ }
+ }
+}
+Z7_BRANCH_FUNCS_IMP(BranchConv_ARM64)
+
+
+Z7_BRANCH_FUNC_MAIN(BranchConv_ARM)
+{
+ // Byte *p = data;
+ const Byte *lim;
+ size &= ~(SizeT)3;
+ lim = p + size;
+ BR_PC_INIT
+ /* in ARM: branch offset is relative to the +2 instructions from current instruction.
+ (p) will point to next instruction */
+ pc += 8 - 4;
+
+ for (;;)
+ {
+ for (;;)
+ {
+ if Z7_UNLIKELY(p >= lim) { return p; } p += 4; if Z7_UNLIKELY(p[-1] == 0xeb) break;
+ if Z7_UNLIKELY(p >= lim) { return p; } p += 4; if Z7_UNLIKELY(p[-1] == 0xeb) break;
+ }
+ {
+ UInt32 v = GetUi32a(p - 4);
+ UInt32 c = BR_PC_GET >> 2;
+ BR_CONVERT_VAL(v, c)
+ v &= 0x00ffffff;
+ v |= 0xeb000000;
+ SetUi32a(p - 4, v)
+ }
+ }
+}
+Z7_BRANCH_FUNCS_IMP(BranchConv_ARM)
+
+
+Z7_BRANCH_FUNC_MAIN(BranchConv_PPC)
+{
+ // Byte *p = data;
+ const Byte *lim;
+ size &= ~(SizeT)3;
+ lim = p + size;
+ BR_PC_INIT
+ pc -= 4; // because (p) will point to next instruction
+
+ for (;;)
+ {
+ UInt32 v;
+ for (;;)
+ {
+ if Z7_UNLIKELY(p == lim)
+ return p;
+ // v = GetBe32a(p);
+ v = *(UInt32 *)(void *)p;
+ p += 4;
+ // if ((v & 0xfc000003) == 0x48000001) break;
+ // if ((p[-4] & 0xFC) == 0x48 && (p[-1] & 3) == 1) break;
+ if Z7_UNLIKELY(
+ ((v - Z7_CONV_BE_TO_NATIVE_CONST32(0x48000001))
+ & Z7_CONV_BE_TO_NATIVE_CONST32(0xfc000003)) == 0) break;
+ }
+ {
+ v = Z7_CONV_NATIVE_TO_BE_32(v);
+ {
+ UInt32 c = BR_PC_GET;
+ BR_CONVERT_VAL(v, c)
+ }
+ v &= 0x03ffffff;
+ v |= 0x48000000;
+ SetBe32a(p - 4, v)
+ }
+ }
+}
+Z7_BRANCH_FUNCS_IMP(BranchConv_PPC)
+
+
+#ifdef Z7_CPU_FAST_ROTATE_SUPPORTED
+#define BR_SPARC_USE_ROTATE
+#endif
+
+Z7_BRANCH_FUNC_MAIN(BranchConv_SPARC)
+{
+ // Byte *p = data;
+ const Byte *lim;
+ const UInt32 flag = (UInt32)1 << 22;
+ size &= ~(SizeT)3;
+ lim = p + size;
+ BR_PC_INIT
+ pc -= 4; // because (p) will point to next instruction
+ for (;;)
+ {
+ UInt32 v;
+ for (;;)
+ {
+ if Z7_UNLIKELY(p == lim)
+ return p;
+ /* // the code without GetBe32a():
+ { const UInt32 v = GetUi16a(p) & 0xc0ff; p += 4; if (v == 0x40 || v == 0xc07f) break; }
+ */
+ v = GetBe32a(p);
+ p += 4;
+ #ifdef BR_SPARC_USE_ROTATE
+ v = rotlFixed(v, 2);
+ v += (flag << 2) - 1;
+ if Z7_UNLIKELY((v & (3 - (flag << 3))) == 0)
+ #else
+ v += (UInt32)5 << 29;
+ v ^= (UInt32)7 << 29;
+ v += flag;
+ if Z7_UNLIKELY((v & (0 - (flag << 1))) == 0)
+ #endif
+ break;
+ }
+ {
+ // UInt32 v = GetBe32a(p - 4);
+ #ifndef BR_SPARC_USE_ROTATE
+ v <<= 2;
+ #endif
+ {
+ UInt32 c = BR_PC_GET;
+ BR_CONVERT_VAL(v, c)
+ }
+ v &= (flag << 3) - 1;
+ #ifdef BR_SPARC_USE_ROTATE
+ v -= (flag << 2) - 1;
+ v = rotrFixed(v, 2);
+ #else
+ v -= (flag << 2);
+ v >>= 2;
+ v |= (UInt32)1 << 30;
+ #endif
+ SetBe32a(p - 4, v)
+ }
+ }
+}
+Z7_BRANCH_FUNCS_IMP(BranchConv_SPARC)
+
+
+Z7_BRANCH_FUNC_MAIN(BranchConv_ARMT)
+{
+ // Byte *p = data;
+ Byte *lim;
+ size &= ~(SizeT)1;
+ // if (size == 0) return p;
+ if (size <= 2) return p;
+ size -= 2;
+ lim = p + size;
+ BR_PC_INIT
+ /* in ARM: branch offset is relative to the +2 instructions from current instruction.
+ (p) will point to the +2 instructions from current instruction */
+ // pc += 4 - 4;
+ // if (encoding) pc -= 0xf800 << 1; else pc += 0xf800 << 1;
+ // #define ARMT_TAIL_PROC { goto armt_tail; }
+ #define ARMT_TAIL_PROC { return p; }
+
+ do
+ {
+ /* in MSVC 32-bit x86 compilers:
+ UInt32 version : it loads value from memory with movzx
+ Byte version : it loads value to 8-bit register (AL/CL)
+ movzx version is slightly faster in some cpus
+ */
+ unsigned b1;
+ // Byte / unsigned
+ b1 = p[1];
+ // optimized version to reduce one (p >= lim) check:
+ // unsigned a1 = p[1]; b1 = p[3]; p += 2; if Z7_LIKELY((b1 & (a1 ^ 8)) < 0xf8)
+ for (;;)
+ {
+ unsigned b3; // Byte / UInt32
+ /* (Byte)(b3) normalization can use low byte computations in MSVC.
+ It gives smaller code, and no loss of speed in some compilers/cpus.
+ But new MSVC 32-bit x86 compilers use more slow load
+ from memory to low byte register in that case.
+ So we try to use full 32-bit computations for faster code.
+ */
+ // if (p >= lim) { ARMT_TAIL_PROC } b3 = b1 + 8; b1 = p[3]; p += 2; if ((b3 & b1) >= 0xf8) break;
+ if Z7_UNLIKELY(p >= lim) { ARMT_TAIL_PROC } b3 = p[3]; p += 2; if Z7_UNLIKELY((b3 & (b1 ^ 8)) >= 0xf8) break;
+ if Z7_UNLIKELY(p >= lim) { ARMT_TAIL_PROC } b1 = p[3]; p += 2; if Z7_UNLIKELY((b1 & (b3 ^ 8)) >= 0xf8) break;
+ }
+ {
+ /* we can adjust pc for (0xf800) to rid of (& 0x7FF) operation.
+ But gcc/clang for arm64 can use bfi instruction for full code here */
+ UInt32 v =
+ ((UInt32)GetUi16a(p - 2) << 11) |
+ ((UInt32)GetUi16a(p) & 0x7FF);
+ /*
+ UInt32 v =
+ ((UInt32)p[1 - 2] << 19)
+ + (((UInt32)p[1] & 0x7) << 8)
+ + (((UInt32)p[-2] << 11))
+ + (p[0]);
+ */
+ p += 2;
+ {
+ UInt32 c = BR_PC_GET >> 1;
+ BR_CONVERT_VAL(v, c)
+ }
+ SetUi16a(p - 4, (UInt16)(((v >> 11) & 0x7ff) | 0xf000))
+ SetUi16a(p - 2, (UInt16)(v | 0xf800))
+ /*
+ p[-4] = (Byte)(v >> 11);
+ p[-3] = (Byte)(0xf0 | ((v >> 19) & 0x7));
+ p[-2] = (Byte)v;
+ p[-1] = (Byte)(0xf8 | (v >> 8));
+ */
+ }
+ }
+ while (p < lim);
+ return p;
+ // armt_tail:
+ // if ((Byte)((lim[1] & 0xf8)) != 0xf0) { lim += 2; } return lim;
+ // return (Byte *)(lim + ((Byte)((lim[1] ^ 0xf0) & 0xf8) == 0 ? 0 : 2));
+ // return (Byte *)(lim + (((lim[1] ^ ~0xfu) & ~7u) == 0 ? 0 : 2));
+ // return (Byte *)(lim + 2 - (((((unsigned)lim[1] ^ 8) + 8) >> 7) & 2));
+}
+Z7_BRANCH_FUNCS_IMP(BranchConv_ARMT)
+
+
+// #define BR_IA64_NO_INLINE
+
+Z7_BRANCH_FUNC_MAIN(BranchConv_IA64)
+{
+ // Byte *p = data;
+ const Byte *lim;
+ size &= ~(SizeT)15;
+ lim = p + size;
+ pc -= 1 << 4;
+ pc >>= 4 - 1;
+ // pc -= 1 << 1;
+
+ for (;;)
+ {
+ unsigned m;
+ for (;;)
+ {
+ if Z7_UNLIKELY(p == lim)
+ return p;
+ m = (unsigned)((UInt32)0x334b0000 >> (*p & 0x1e));
+ p += 16;
+ pc += 1 << 1;
+ if (m &= 3)
+ break;
+ }
+ {
+ p += (ptrdiff_t)m * 5 - 20; // negative value is expected here.
+ do
+ {
+ const UInt32 t =
+ #if defined(MY_CPU_X86_OR_AMD64)
+ // we use 32-bit load here to reduce code size on x86:
+ GetUi32(p);
+ #else
+ GetUi16(p);
+ #endif
+ UInt32 z = GetUi32(p + 1) >> m;
+ p += 5;
+ if (((t >> m) & (0x70 << 1)) == 0
+ && ((z - (0x5000000 << 1)) & (0xf000000 << 1)) == 0)
+ {
+ UInt32 v = (UInt32)((0x8fffff << 1) | 1) & z;
+ z ^= v;
+ #ifdef BR_IA64_NO_INLINE
+ v |= (v & ((UInt32)1 << (23 + 1))) >> 3;
+ {
+ UInt32 c = pc;
+ BR_CONVERT_VAL(v, c)
+ }
+ v &= (0x1fffff << 1) | 1;
+ #else
+ {
+ if (encoding)
+ {
+ // pc &= ~(0xc00000 << 1); // we just need to clear at least 2 bits
+ pc &= (0x1fffff << 1) | 1;
+ v += pc;
+ }
+ else
+ {
+ // pc |= 0xc00000 << 1; // we need to set at least 2 bits
+ pc |= ~(UInt32)((0x1fffff << 1) | 1);
+ v -= pc;
+ }
+ }
+ v &= ~(UInt32)(0x600000 << 1);
+ #endif
+ v += (0x700000 << 1);
+ v &= (0x8fffff << 1) | 1;
+ z |= v;
+ z <<= m;
+ SetUi32(p + 1 - 5, z)
+ }
+ m++;
+ }
+ while (m &= 3); // while (m < 4);
+ }
+ }
+}
+Z7_BRANCH_FUNCS_IMP(BranchConv_IA64)
+
+
+#define BR_CONVERT_VAL_ENC(v) v += BR_PC_GET;
+#define BR_CONVERT_VAL_DEC(v) v -= BR_PC_GET;
+
+#if 1 && defined(MY_CPU_LE_UNALIGN)
+ #define RISCV_USE_UNALIGNED_LOAD
+#endif
+
+#ifdef RISCV_USE_UNALIGNED_LOAD
+ #define RISCV_GET_UI32(p) GetUi32(p)
+ #define RISCV_SET_UI32(p, v) { SetUi32(p, v) }
+#else
+ #define RISCV_GET_UI32(p) \
+ ((UInt32)GetUi16a(p) + \
+ ((UInt32)GetUi16a((p) + 2) << 16))
+ #define RISCV_SET_UI32(p, v) { \
+ SetUi16a(p, (UInt16)(v)) \
+ SetUi16a((p) + 2, (UInt16)(v >> 16)) }
+#endif
+
+#if 1 && defined(MY_CPU_LE)
+ #define RISCV_USE_16BIT_LOAD
+#endif
+
+#ifdef RISCV_USE_16BIT_LOAD
+ #define RISCV_LOAD_VAL(p) GetUi16a(p)
+#else
+ #define RISCV_LOAD_VAL(p) (*(p))
+#endif
+
+#define RISCV_INSTR_SIZE 2
+#define RISCV_STEP_1 (4 + RISCV_INSTR_SIZE)
+#define RISCV_STEP_2 4
+#define RISCV_REG_VAL (2 << 7)
+#define RISCV_CMD_VAL 3
+#if 1
+ // for code size optimization:
+ #define RISCV_DELTA_7F 0x7f
+#else
+ #define RISCV_DELTA_7F 0
+#endif
+
+#define RISCV_CHECK_1(v, b) \
+ (((((b) - RISCV_CMD_VAL) ^ ((v) << 8)) & (0xf8000 + RISCV_CMD_VAL)) == 0)
+
+#if 1
+ #define RISCV_CHECK_2(v, r) \
+ ((((v) - ((RISCV_CMD_VAL << 12) | RISCV_REG_VAL | 8)) \
+ << 18) \
+ < ((r) & 0x1d))
+#else
+ // this branch gives larger code, because
+ // compilers generate larger code for big constants.
+ #define RISCV_CHECK_2(v, r) \
+ ((((v) - ((RISCV_CMD_VAL << 12) | RISCV_REG_VAL)) \
+ & ((RISCV_CMD_VAL << 12) | RISCV_REG_VAL)) \
+ < ((r) & 0x1d))
+#endif
+
+
+#define RISCV_SCAN_LOOP \
+ Byte *lim; \
+ size &= ~(SizeT)(RISCV_INSTR_SIZE - 1); \
+ if (size <= 6) return p; \
+ size -= 6; \
+ lim = p + size; \
+ BR_PC_INIT \
+ for (;;) \
+ { \
+ UInt32 a, v; \
+ /* Z7_PRAGMA_OPT_DISABLE_LOOP_UNROLL_VECTORIZE */ \
+ for (;;) \
+ { \
+ if Z7_UNLIKELY(p >= lim) { return p; } \
+ a = (RISCV_LOAD_VAL(p) ^ 0x10u) + 1; \
+ if ((a & 0x77) == 0) break; \
+ a = (RISCV_LOAD_VAL(p + RISCV_INSTR_SIZE) ^ 0x10u) + 1; \
+ p += RISCV_INSTR_SIZE * 2; \
+ if ((a & 0x77) == 0) \
+ { \
+ p -= RISCV_INSTR_SIZE; \
+ if Z7_UNLIKELY(p >= lim) { return p; } \
+ break; \
+ } \
+ }
+// (xx6f ^ 10) + 1 = xx7f + 1 = xx80 : JAL
+// (xxef ^ 10) + 1 = xxff + 1 = xx00 + 100 : JAL
+// (xx17 ^ 10) + 1 = xx07 + 1 = xx08 : AUIPC
+// (xx97 ^ 10) + 1 = xx87 + 1 = xx88 : AUIPC
+
+Byte * Z7_BRANCH_CONV_ENC(RISCV)(Byte *p, SizeT size, UInt32 pc)
+{
+ RISCV_SCAN_LOOP
+ v = a;
+ a = RISCV_GET_UI32(p);
+#ifndef RISCV_USE_16BIT_LOAD
+ v += (UInt32)p[1] << 8;
+#endif
+
+ if ((v & 8) == 0) // JAL
+ {
+ if ((v - (0x100 /* - RISCV_DELTA_7F */)) & 0xd80)
+ {
+ p += RISCV_INSTR_SIZE;
+ continue;
+ }
+ {
+ v = ((a & 1u << 31) >> 11)
+ | ((a & 0x3ff << 21) >> 20)
+ | ((a & 1 << 20) >> 9)
+ | (a & 0xff << 12);
+ BR_CONVERT_VAL_ENC(v)
+ // ((v & 1) == 0)
+ // v: bits [1 : 20] contain offset bits
+#if 0 && defined(RISCV_USE_UNALIGNED_LOAD)
+ a &= 0xfff;
+ a |= ((UInt32)(v << 23))
+ | ((UInt32)(v << 7) & ((UInt32)0xff << 16))
+ | ((UInt32)(v >> 5) & ((UInt32)0xf0 << 8));
+ RISCV_SET_UI32(p, a)
+#else // aligned
+#if 0
+ SetUi16a(p, (UInt16)(((v >> 5) & 0xf000) | (a & 0xfff)))
+#else
+ p[1] = (Byte)(((v >> 13) & 0xf0) | ((a >> 8) & 0xf));
+#endif
+
+#if 1 && defined(Z7_CPU_FAST_BSWAP_SUPPORTED) && defined(MY_CPU_LE)
+ v <<= 15;
+ v = Z7_BSWAP32(v);
+ SetUi16a(p + 2, (UInt16)v)
+#else
+ p[2] = (Byte)(v >> 9);
+ p[3] = (Byte)(v >> 1);
+#endif
+#endif // aligned
+ }
+ p += 4;
+ continue;
+ } // JAL
+
+ {
+ // AUIPC
+ if (v & 0xe80) // (not x0) and (not x2)
+ {
+ const UInt32 b = RISCV_GET_UI32(p + 4);
+ if (RISCV_CHECK_1(v, b))
+ {
+ {
+ const UInt32 temp = (b << 12) | (0x17 + RISCV_REG_VAL);
+ RISCV_SET_UI32(p, temp)
+ }
+ a &= 0xfffff000;
+ {
+#if 1
+ const int t = -1 >> 1;
+ if (t != -1)
+ a += (b >> 20) - ((b >> 19) & 0x1000); // arithmetic right shift emulation
+ else
+#endif
+ a += (UInt32)((Int32)b >> 20); // arithmetic right shift (sign-extension).
+ }
+ BR_CONVERT_VAL_ENC(a)
+#if 1 && defined(Z7_CPU_FAST_BSWAP_SUPPORTED) && defined(MY_CPU_LE)
+ a = Z7_BSWAP32(a);
+ RISCV_SET_UI32(p + 4, a)
+#else
+ SetBe32(p + 4, a)
+#endif
+ p += 8;
+ }
+ else
+ p += RISCV_STEP_1;
+ }
+ else
+ {
+ UInt32 r = a >> 27;
+ if (RISCV_CHECK_2(v, r))
+ {
+ v = RISCV_GET_UI32(p + 4);
+ r = (r << 7) + 0x17 + (v & 0xfffff000);
+ a = (a >> 12) | (v << 20);
+ RISCV_SET_UI32(p, r)
+ RISCV_SET_UI32(p + 4, a)
+ p += 8;
+ }
+ else
+ p += RISCV_STEP_2;
+ }
+ }
+ } // for
+}
+
+
+Byte * Z7_BRANCH_CONV_DEC(RISCV)(Byte *p, SizeT size, UInt32 pc)
+{
+ RISCV_SCAN_LOOP
+#ifdef RISCV_USE_16BIT_LOAD
+ if ((a & 8) == 0)
+ {
+#else
+ v = a;
+ a += (UInt32)p[1] << 8;
+ if ((v & 8) == 0)
+ {
+#endif
+ // JAL
+ a -= 0x100 - RISCV_DELTA_7F;
+ if (a & 0xd80)
+ {
+ p += RISCV_INSTR_SIZE;
+ continue;
+ }
+ {
+ const UInt32 a_old = (a + (0xef - RISCV_DELTA_7F)) & 0xfff;
+#if 0 // unaligned
+ a = GetUi32(p);
+ v = (UInt32)(a >> 23) & ((UInt32)0xff << 1)
+ | (UInt32)(a >> 7) & ((UInt32)0xff << 9)
+#elif 1 && defined(Z7_CPU_FAST_BSWAP_SUPPORTED) && defined(MY_CPU_LE)
+ v = GetUi16a(p + 2);
+ v = Z7_BSWAP32(v) >> 15
+#else
+ v = (UInt32)p[3] << 1
+ | (UInt32)p[2] << 9
+#endif
+ | (UInt32)((a & 0xf000) << 5);
+ BR_CONVERT_VAL_DEC(v)
+ a = a_old
+ | (v << 11 & 1u << 31)
+ | (v << 20 & 0x3ff << 21)
+ | (v << 9 & 1 << 20)
+ | (v & 0xff << 12);
+ RISCV_SET_UI32(p, a)
+ }
+ p += 4;
+ continue;
+ } // JAL
+
+ {
+ // AUIPC
+ v = a;
+#if 1 && defined(RISCV_USE_UNALIGNED_LOAD)
+ a = GetUi32(p);
+#else
+ a |= (UInt32)GetUi16a(p + 2) << 16;
+#endif
+ if ((v & 0xe80) == 0) // x0/x2
+ {
+ const UInt32 r = a >> 27;
+ if (RISCV_CHECK_2(v, r))
+ {
+ UInt32 b;
+#if 1 && defined(Z7_CPU_FAST_BSWAP_SUPPORTED) && defined(MY_CPU_LE)
+ b = RISCV_GET_UI32(p + 4);
+ b = Z7_BSWAP32(b);
+#else
+ b = GetBe32(p + 4);
+#endif
+ v = a >> 12;
+ BR_CONVERT_VAL_DEC(b)
+ a = (r << 7) + 0x17;
+ a += (b + 0x800) & 0xfffff000;
+ v |= b << 20;
+ RISCV_SET_UI32(p, a)
+ RISCV_SET_UI32(p + 4, v)
+ p += 8;
+ }
+ else
+ p += RISCV_STEP_2;
+ }
+ else
+ {
+ const UInt32 b = RISCV_GET_UI32(p + 4);
+ if (!RISCV_CHECK_1(v, b))
+ p += RISCV_STEP_1;
+ else
+ {
+ v = (a & 0xfffff000) | (b >> 20);
+ a = (b << 12) | (0x17 + RISCV_REG_VAL);
+ RISCV_SET_UI32(p, a)
+ RISCV_SET_UI32(p + 4, v)
+ p += 8;
+ }
+ }
+ }
+ } // for
+}
--- /dev/null
+/* Bra86.c -- Branch converter for X86 code (BCJ)
+2023-04-02 : Igor Pavlov : Public domain */
+
+#include "Precomp.h"
+
+#include "Bra.h"
+#include "CpuArch.h"
+
+
+#if defined(MY_CPU_SIZEOF_POINTER) \
+ && ( MY_CPU_SIZEOF_POINTER == 4 \
+ || MY_CPU_SIZEOF_POINTER == 8)
+ #define BR_CONV_USE_OPT_PC_PTR
+#endif
+
+#ifdef BR_CONV_USE_OPT_PC_PTR
+#define BR_PC_INIT pc -= (UInt32)(SizeT)p; // (MY_uintptr_t)
+#define BR_PC_GET (pc + (UInt32)(SizeT)p)
+#else
+#define BR_PC_INIT pc += (UInt32)size;
+#define BR_PC_GET (pc - (UInt32)(SizeT)(lim - p))
+// #define BR_PC_INIT
+// #define BR_PC_GET (pc + (UInt32)(SizeT)(p - data))
+#endif
+
+#define BR_CONVERT_VAL(v, c) if (encoding) v += c; else v -= c;
+// #define BR_CONVERT_VAL(v, c) if (!encoding) c = (UInt32)0 - c; v += c;
+
+#define Z7_BRANCH_CONV_ST(name) z7_BranchConvSt_ ## name
+
+#define BR86_NEED_CONV_FOR_MS_BYTE(b) ((((b) + 1) & 0xfe) == 0)
+
+#ifdef MY_CPU_LE_UNALIGN
+ #define BR86_PREPARE_BCJ_SCAN const UInt32 v = GetUi32(p) ^ 0xe8e8e8e8;
+ #define BR86_IS_BCJ_BYTE(n) ((v & ((UInt32)0xfe << (n) * 8)) == 0)
+#else
+ #define BR86_PREPARE_BCJ_SCAN
+ // bad for MSVC X86 (partial write to byte reg):
+ #define BR86_IS_BCJ_BYTE(n) ((p[n - 4] & 0xfe) == 0xe8)
+ // bad for old MSVC (partial write to byte reg):
+ // #define BR86_IS_BCJ_BYTE(n) (((*p ^ 0xe8) & 0xfe) == 0)
+#endif
+
+static
+Z7_FORCE_INLINE
+Z7_ATTRIB_NO_VECTOR
+Byte *Z7_BRANCH_CONV_ST(X86)(Byte *p, SizeT size, UInt32 pc, UInt32 *state, int encoding)
+{
+ if (size < 5)
+ return p;
+ {
+ // Byte *p = data;
+ const Byte *lim = p + size - 4;
+ unsigned mask = (unsigned)*state; // & 7;
+#ifdef BR_CONV_USE_OPT_PC_PTR
+ /* if BR_CONV_USE_OPT_PC_PTR is defined: we need to adjust (pc) for (+4),
+ because call/jump offset is relative to the next instruction.
+ if BR_CONV_USE_OPT_PC_PTR is not defined : we don't need to adjust (pc) for (+4),
+ because BR_PC_GET uses (pc - (lim - p)), and lim was adjusted for (-4) before.
+ */
+ pc += 4;
+#endif
+ BR_PC_INIT
+ goto start;
+
+ for (;; mask |= 4)
+ {
+ // cont: mask |= 4;
+ start:
+ if (p >= lim)
+ goto fin;
+ {
+ BR86_PREPARE_BCJ_SCAN
+ p += 4;
+ if (BR86_IS_BCJ_BYTE(0)) { goto m0; } mask >>= 1;
+ if (BR86_IS_BCJ_BYTE(1)) { goto m1; } mask >>= 1;
+ if (BR86_IS_BCJ_BYTE(2)) { goto m2; } mask = 0;
+ if (BR86_IS_BCJ_BYTE(3)) { goto a3; }
+ }
+ goto main_loop;
+
+ m0: p--;
+ m1: p--;
+ m2: p--;
+ if (mask == 0)
+ goto a3;
+ if (p > lim)
+ goto fin_p;
+
+ // if (((0x17u >> mask) & 1) == 0)
+ if (mask > 4 || mask == 3)
+ {
+ mask >>= 1;
+ continue; // goto cont;
+ }
+ mask >>= 1;
+ if (BR86_NEED_CONV_FOR_MS_BYTE(p[mask]))
+ continue; // goto cont;
+ // if (!BR86_NEED_CONV_FOR_MS_BYTE(p[3])) continue; // goto cont;
+ {
+ UInt32 v = GetUi32(p);
+ UInt32 c;
+ v += (1 << 24); if (v & 0xfe000000) continue; // goto cont;
+ c = BR_PC_GET;
+ BR_CONVERT_VAL(v, c)
+ {
+ mask <<= 3;
+ if (BR86_NEED_CONV_FOR_MS_BYTE(v >> mask))
+ {
+ v ^= (((UInt32)0x100 << mask) - 1);
+ #ifdef MY_CPU_X86
+ // for X86 : we can recalculate (c) to reduce register pressure
+ c = BR_PC_GET;
+ #endif
+ BR_CONVERT_VAL(v, c)
+ }
+ mask = 0;
+ }
+ // v = (v & ((1 << 24) - 1)) - (v & (1 << 24));
+ v &= (1 << 25) - 1; v -= (1 << 24);
+ SetUi32(p, v)
+ p += 4;
+ goto main_loop;
+ }
+
+ main_loop:
+ if (p >= lim)
+ goto fin;
+ for (;;)
+ {
+ BR86_PREPARE_BCJ_SCAN
+ p += 4;
+ if (BR86_IS_BCJ_BYTE(0)) { goto a0; }
+ if (BR86_IS_BCJ_BYTE(1)) { goto a1; }
+ if (BR86_IS_BCJ_BYTE(2)) { goto a2; }
+ if (BR86_IS_BCJ_BYTE(3)) { goto a3; }
+ if (p >= lim)
+ goto fin;
+ }
+
+ a0: p--;
+ a1: p--;
+ a2: p--;
+ a3:
+ if (p > lim)
+ goto fin_p;
+ // if (!BR86_NEED_CONV_FOR_MS_BYTE(p[3])) continue; // goto cont;
+ {
+ UInt32 v = GetUi32(p);
+ UInt32 c;
+ v += (1 << 24); if (v & 0xfe000000) continue; // goto cont;
+ c = BR_PC_GET;
+ BR_CONVERT_VAL(v, c)
+ // v = (v & ((1 << 24) - 1)) - (v & (1 << 24));
+ v &= (1 << 25) - 1; v -= (1 << 24);
+ SetUi32(p, v)
+ p += 4;
+ goto main_loop;
+ }
+ }
+
+fin_p:
+ p--;
+fin:
+ // the following processing for tail is optional and can be commented
+ /*
+ lim += 4;
+ for (; p < lim; p++, mask >>= 1)
+ if ((*p & 0xfe) == 0xe8)
+ break;
+ */
+ *state = (UInt32)mask;
+ return p;
+ }
+}
+
+
+#define Z7_BRANCH_CONV_ST_FUNC_IMP(name, m, encoding) \
+Z7_NO_INLINE \
+Z7_ATTRIB_NO_VECTOR \
+Byte *m(name)(Byte *data, SizeT size, UInt32 pc, UInt32 *state) \
+ { return Z7_BRANCH_CONV_ST(name)(data, size, pc, state, encoding); }
+
+Z7_BRANCH_CONV_ST_FUNC_IMP(X86, Z7_BRANCH_CONV_ST_DEC, 0)
+#ifndef Z7_EXTRACT_ONLY
+Z7_BRANCH_CONV_ST_FUNC_IMP(X86, Z7_BRANCH_CONV_ST_ENC, 1)
+#endif
--- /dev/null
+/* BraIA64.c -- Converter for IA-64 code
+2023-02-20 : Igor Pavlov : Public domain */
+
+#include "Precomp.h"
+
+// the code was moved to Bra.c
+
+#ifdef _MSC_VER
+#pragma warning(disable : 4206) // nonstandard extension used : translation unit is empty
+#endif
+
+#if defined(__clang__)
+#pragma GCC diagnostic ignored "-Wempty-translation-unit"
+#endif
--- /dev/null
+/* CpuArch.c -- CPU specific code
+2024-03-02 : Igor Pavlov : Public domain */
+
+#include "Precomp.h"
+
+// #include <stdio.h>
+
+#include "CpuArch.h"
+
+#ifdef MY_CPU_X86_OR_AMD64
+
+#undef NEED_CHECK_FOR_CPUID
+#if !defined(MY_CPU_AMD64)
+#define NEED_CHECK_FOR_CPUID
+#endif
+
+/*
+ cpuid instruction supports (subFunction) parameter in ECX,
+ that is used only with some specific (function) parameter values.
+ But we always use only (subFunction==0).
+*/
+/*
+ __cpuid(): MSVC and GCC/CLANG use same function/macro name
+ but parameters are different.
+ We use MSVC __cpuid() parameters style for our z7_x86_cpuid() function.
+*/
+
+#if defined(__GNUC__) /* && (__GNUC__ >= 10) */ \
+ || defined(__clang__) /* && (__clang_major__ >= 10) */
+
+/* there was some CLANG/GCC compilers that have issues with
+ rbx(ebx) handling in asm blocks in -fPIC mode (__PIC__ is defined).
+ compiler's <cpuid.h> contains the macro __cpuid() that is similar to our code.
+ The history of __cpuid() changes in CLANG/GCC:
+ GCC:
+ 2007: it preserved ebx for (__PIC__ && __i386__)
+ 2013: it preserved rbx and ebx for __PIC__
+ 2014: it doesn't preserves rbx and ebx anymore
+ we suppose that (__GNUC__ >= 5) fixed that __PIC__ ebx/rbx problem.
+ CLANG:
+ 2014+: it preserves rbx, but only for 64-bit code. No __PIC__ check.
+ Why CLANG cares about 64-bit mode only, and doesn't care about ebx (in 32-bit)?
+ Do we need __PIC__ test for CLANG or we must care about rbx even if
+ __PIC__ is not defined?
+*/
+
+#define ASM_LN "\n"
+
+#if defined(MY_CPU_AMD64) && defined(__PIC__) \
+ && ((defined (__GNUC__) && (__GNUC__ < 5)) || defined(__clang__))
+
+#define x86_cpuid_MACRO(p, func) { \
+ __asm__ __volatile__ ( \
+ ASM_LN "mov %%rbx, %q1" \
+ ASM_LN "cpuid" \
+ ASM_LN "xchg %%rbx, %q1" \
+ : "=a" ((p)[0]), "=&r" ((p)[1]), "=c" ((p)[2]), "=d" ((p)[3]) : "0" (func), "2"(0)); }
+
+ /* "=&r" selects free register. It can select even rbx, if that register is free.
+ "=&D" for (RDI) also works, but the code can be larger with "=&D"
+ "2"(0) means (subFunction = 0),
+ 2 is (zero-based) index in the output constraint list "=c" (ECX). */
+
+#elif defined(MY_CPU_X86) && defined(__PIC__) \
+ && ((defined (__GNUC__) && (__GNUC__ < 5)) || defined(__clang__))
+
+#define x86_cpuid_MACRO(p, func) { \
+ __asm__ __volatile__ ( \
+ ASM_LN "mov %%ebx, %k1" \
+ ASM_LN "cpuid" \
+ ASM_LN "xchg %%ebx, %k1" \
+ : "=a" ((p)[0]), "=&r" ((p)[1]), "=c" ((p)[2]), "=d" ((p)[3]) : "0" (func), "2"(0)); }
+
+#else
+
+#define x86_cpuid_MACRO(p, func) { \
+ __asm__ __volatile__ ( \
+ ASM_LN "cpuid" \
+ : "=a" ((p)[0]), "=b" ((p)[1]), "=c" ((p)[2]), "=d" ((p)[3]) : "0" (func), "2"(0)); }
+
+#endif
+
+
+void Z7_FASTCALL z7_x86_cpuid(UInt32 p[4], UInt32 func)
+{
+ x86_cpuid_MACRO(p, func)
+}
+
+
+Z7_NO_INLINE
+UInt32 Z7_FASTCALL z7_x86_cpuid_GetMaxFunc(void)
+{
+ #if defined(NEED_CHECK_FOR_CPUID)
+ #define EFALGS_CPUID_BIT 21
+ UInt32 a;
+ __asm__ __volatile__ (
+ ASM_LN "pushf"
+ ASM_LN "pushf"
+ ASM_LN "pop %0"
+ // ASM_LN "movl %0, %1"
+ // ASM_LN "xorl $0x200000, %0"
+ ASM_LN "btc %1, %0"
+ ASM_LN "push %0"
+ ASM_LN "popf"
+ ASM_LN "pushf"
+ ASM_LN "pop %0"
+ ASM_LN "xorl (%%esp), %0"
+
+ ASM_LN "popf"
+ ASM_LN
+ : "=&r" (a) // "=a"
+ : "i" (EFALGS_CPUID_BIT)
+ );
+ if ((a & (1 << EFALGS_CPUID_BIT)) == 0)
+ return 0;
+ #endif
+ {
+ UInt32 p[4];
+ x86_cpuid_MACRO(p, 0)
+ return p[0];
+ }
+}
+
+#undef ASM_LN
+
+#elif !defined(_MSC_VER)
+
+/*
+// for gcc/clang and other: we can try to use __cpuid macro:
+#include <cpuid.h>
+void Z7_FASTCALL z7_x86_cpuid(UInt32 p[4], UInt32 func)
+{
+ __cpuid(func, p[0], p[1], p[2], p[3]);
+}
+UInt32 Z7_FASTCALL z7_x86_cpuid_GetMaxFunc(void)
+{
+ return (UInt32)__get_cpuid_max(0, NULL);
+}
+*/
+// for unsupported cpuid:
+void Z7_FASTCALL z7_x86_cpuid(UInt32 p[4], UInt32 func)
+{
+ UNUSED_VAR(func)
+ p[0] = p[1] = p[2] = p[3] = 0;
+}
+UInt32 Z7_FASTCALL z7_x86_cpuid_GetMaxFunc(void)
+{
+ return 0;
+}
+
+#else // _MSC_VER
+
+#if !defined(MY_CPU_AMD64)
+
+UInt32 __declspec(naked) Z7_FASTCALL z7_x86_cpuid_GetMaxFunc(void)
+{
+ #if defined(NEED_CHECK_FOR_CPUID)
+ #define EFALGS_CPUID_BIT 21
+ __asm pushfd
+ __asm pushfd
+ /*
+ __asm pop eax
+ // __asm mov edx, eax
+ __asm btc eax, EFALGS_CPUID_BIT
+ __asm push eax
+ */
+ __asm btc dword ptr [esp], EFALGS_CPUID_BIT
+ __asm popfd
+ __asm pushfd
+ __asm pop eax
+ // __asm xor eax, edx
+ __asm xor eax, [esp]
+ // __asm push edx
+ __asm popfd
+ __asm and eax, (1 shl EFALGS_CPUID_BIT)
+ __asm jz end_func
+ #endif
+ __asm push ebx
+ __asm xor eax, eax // func
+ __asm xor ecx, ecx // subFunction (optional) for (func == 0)
+ __asm cpuid
+ __asm pop ebx
+ #if defined(NEED_CHECK_FOR_CPUID)
+ end_func:
+ #endif
+ __asm ret 0
+}
+
+void __declspec(naked) Z7_FASTCALL z7_x86_cpuid(UInt32 p[4], UInt32 func)
+{
+ UNUSED_VAR(p)
+ UNUSED_VAR(func)
+ __asm push ebx
+ __asm push edi
+ __asm mov edi, ecx // p
+ __asm mov eax, edx // func
+ __asm xor ecx, ecx // subfunction (optional) for (func == 0)
+ __asm cpuid
+ __asm mov [edi ], eax
+ __asm mov [edi + 4], ebx
+ __asm mov [edi + 8], ecx
+ __asm mov [edi + 12], edx
+ __asm pop edi
+ __asm pop ebx
+ __asm ret 0
+}
+
+#else // MY_CPU_AMD64
+
+ #if _MSC_VER >= 1600
+ #include <intrin.h>
+ #define MY_cpuidex __cpuidex
+ #else
+/*
+ __cpuid (func == (0 or 7)) requires subfunction number in ECX.
+ MSDN: The __cpuid intrinsic clears the ECX register before calling the cpuid instruction.
+ __cpuid() in new MSVC clears ECX.
+ __cpuid() in old MSVC (14.00) x64 doesn't clear ECX
+ We still can use __cpuid for low (func) values that don't require ECX,
+ but __cpuid() in old MSVC will be incorrect for some func values: (func == 7).
+ So here we use the hack for old MSVC to send (subFunction) in ECX register to cpuid instruction,
+ where ECX value is first parameter for FASTCALL / NO_INLINE func,
+ So the caller of MY_cpuidex_HACK() sets ECX as subFunction, and
+ old MSVC for __cpuid() doesn't change ECX and cpuid instruction gets (subFunction) value.
+
+DON'T remove Z7_NO_INLINE and Z7_FASTCALL for MY_cpuidex_HACK(): !!!
+*/
+static
+Z7_NO_INLINE void Z7_FASTCALL MY_cpuidex_HACK(Int32 subFunction, Int32 func, Int32 *CPUInfo)
+{
+ UNUSED_VAR(subFunction)
+ __cpuid(CPUInfo, func);
+}
+ #define MY_cpuidex(info, func, func2) MY_cpuidex_HACK(func2, func, info)
+ #pragma message("======== MY_cpuidex_HACK WAS USED ========")
+ #endif // _MSC_VER >= 1600
+
+#if !defined(MY_CPU_AMD64)
+/* inlining for __cpuid() in MSVC x86 (32-bit) produces big ineffective code,
+ so we disable inlining here */
+Z7_NO_INLINE
+#endif
+void Z7_FASTCALL z7_x86_cpuid(UInt32 p[4], UInt32 func)
+{
+ MY_cpuidex((Int32 *)p, (Int32)func, 0);
+}
+
+Z7_NO_INLINE
+UInt32 Z7_FASTCALL z7_x86_cpuid_GetMaxFunc(void)
+{
+ Int32 a[4];
+ MY_cpuidex(a, 0, 0);
+ return a[0];
+}
+
+#endif // MY_CPU_AMD64
+#endif // _MSC_VER
+
+#if defined(NEED_CHECK_FOR_CPUID)
+#define CHECK_CPUID_IS_SUPPORTED { if (z7_x86_cpuid_GetMaxFunc() == 0) return 0; }
+#else
+#define CHECK_CPUID_IS_SUPPORTED
+#endif
+#undef NEED_CHECK_FOR_CPUID
+
+
+static
+BoolInt x86cpuid_Func_1(UInt32 *p)
+{
+ CHECK_CPUID_IS_SUPPORTED
+ z7_x86_cpuid(p, 1);
+ return True;
+}
+
+/*
+static const UInt32 kVendors[][1] =
+{
+ { 0x756E6547 }, // , 0x49656E69, 0x6C65746E },
+ { 0x68747541 }, // , 0x69746E65, 0x444D4163 },
+ { 0x746E6543 } // , 0x48727561, 0x736C7561 }
+};
+*/
+
+/*
+typedef struct
+{
+ UInt32 maxFunc;
+ UInt32 vendor[3];
+ UInt32 ver;
+ UInt32 b;
+ UInt32 c;
+ UInt32 d;
+} Cx86cpuid;
+
+enum
+{
+ CPU_FIRM_INTEL,
+ CPU_FIRM_AMD,
+ CPU_FIRM_VIA
+};
+int x86cpuid_GetFirm(const Cx86cpuid *p);
+#define x86cpuid_ver_GetFamily(ver) (((ver >> 16) & 0xff0) | ((ver >> 8) & 0xf))
+#define x86cpuid_ver_GetModel(ver) (((ver >> 12) & 0xf0) | ((ver >> 4) & 0xf))
+#define x86cpuid_ver_GetStepping(ver) (ver & 0xf)
+
+int x86cpuid_GetFirm(const Cx86cpuid *p)
+{
+ unsigned i;
+ for (i = 0; i < sizeof(kVendors) / sizeof(kVendors[0]); i++)
+ {
+ const UInt32 *v = kVendors[i];
+ if (v[0] == p->vendor[0]
+ // && v[1] == p->vendor[1]
+ // && v[2] == p->vendor[2]
+ )
+ return (int)i;
+ }
+ return -1;
+}
+
+BoolInt CPU_Is_InOrder()
+{
+ Cx86cpuid p;
+ UInt32 family, model;
+ if (!x86cpuid_CheckAndRead(&p))
+ return True;
+
+ family = x86cpuid_ver_GetFamily(p.ver);
+ model = x86cpuid_ver_GetModel(p.ver);
+
+ switch (x86cpuid_GetFirm(&p))
+ {
+ case CPU_FIRM_INTEL: return (family < 6 || (family == 6 && (
+ // In-Order Atom CPU
+ model == 0x1C // 45 nm, N4xx, D4xx, N5xx, D5xx, 230, 330
+ || model == 0x26 // 45 nm, Z6xx
+ || model == 0x27 // 32 nm, Z2460
+ || model == 0x35 // 32 nm, Z2760
+ || model == 0x36 // 32 nm, N2xxx, D2xxx
+ )));
+ case CPU_FIRM_AMD: return (family < 5 || (family == 5 && (model < 6 || model == 0xA)));
+ case CPU_FIRM_VIA: return (family < 6 || (family == 6 && model < 0xF));
+ }
+ return False; // v23 : unknown processors are not In-Order
+}
+*/
+
+#ifdef _WIN32
+#include "7zWindows.h"
+#endif
+
+#if !defined(MY_CPU_AMD64) && defined(_WIN32)
+
+/* for legacy SSE ia32: there is no user-space cpu instruction to check
+ that OS supports SSE register storing/restoring on context switches.
+ So we need some OS-specific function to check that it's safe to use SSE registers.
+*/
+
+Z7_FORCE_INLINE
+static BoolInt CPU_Sys_Is_SSE_Supported(void)
+{
+#ifdef _MSC_VER
+ #pragma warning(push)
+ #pragma warning(disable : 4996) // `GetVersion': was declared deprecated
+#endif
+ /* low byte is major version of Windows
+ We suppose that any Windows version since
+ Windows2000 (major == 5) supports SSE registers */
+ return (Byte)GetVersion() >= 5;
+#if defined(_MSC_VER)
+ #pragma warning(pop)
+#endif
+}
+#define CHECK_SYS_SSE_SUPPORT if (!CPU_Sys_Is_SSE_Supported()) return False;
+#else
+#define CHECK_SYS_SSE_SUPPORT
+#endif
+
+
+#if !defined(MY_CPU_AMD64)
+
+BoolInt CPU_IsSupported_CMOV(void)
+{
+ UInt32 a[4];
+ if (!x86cpuid_Func_1(&a[0]))
+ return 0;
+ return (BoolInt)(a[3] >> 15) & 1;
+}
+
+BoolInt CPU_IsSupported_SSE(void)
+{
+ UInt32 a[4];
+ CHECK_SYS_SSE_SUPPORT
+ if (!x86cpuid_Func_1(&a[0]))
+ return 0;
+ return (BoolInt)(a[3] >> 25) & 1;
+}
+
+BoolInt CPU_IsSupported_SSE2(void)
+{
+ UInt32 a[4];
+ CHECK_SYS_SSE_SUPPORT
+ if (!x86cpuid_Func_1(&a[0]))
+ return 0;
+ return (BoolInt)(a[3] >> 26) & 1;
+}
+
+#endif
+
+
+static UInt32 x86cpuid_Func_1_ECX(void)
+{
+ UInt32 a[4];
+ CHECK_SYS_SSE_SUPPORT
+ if (!x86cpuid_Func_1(&a[0]))
+ return 0;
+ return a[2];
+}
+
+BoolInt CPU_IsSupported_AES(void)
+{
+ return (BoolInt)(x86cpuid_Func_1_ECX() >> 25) & 1;
+}
+
+BoolInt CPU_IsSupported_SSSE3(void)
+{
+ return (BoolInt)(x86cpuid_Func_1_ECX() >> 9) & 1;
+}
+
+BoolInt CPU_IsSupported_SSE41(void)
+{
+ return (BoolInt)(x86cpuid_Func_1_ECX() >> 19) & 1;
+}
+
+BoolInt CPU_IsSupported_SHA(void)
+{
+ CHECK_SYS_SSE_SUPPORT
+
+ if (z7_x86_cpuid_GetMaxFunc() < 7)
+ return False;
+ {
+ UInt32 d[4];
+ z7_x86_cpuid(d, 7);
+ return (BoolInt)(d[1] >> 29) & 1;
+ }
+}
+
+/*
+MSVC: _xgetbv() intrinsic is available since VS2010SP1.
+ MSVC also defines (_XCR_XFEATURE_ENABLED_MASK) macro in
+ <immintrin.h> that we can use or check.
+ For any 32-bit x86 we can use asm code in MSVC,
+ but MSVC asm code is huge after compilation.
+ So _xgetbv() is better
+
+ICC: _xgetbv() intrinsic is available (in what version of ICC?)
+ ICC defines (__GNUC___) and it supports gnu assembler
+ also ICC supports MASM style code with -use-msasm switch.
+ but ICC doesn't support __attribute__((__target__))
+
+GCC/CLANG 9:
+ _xgetbv() is macro that works via __builtin_ia32_xgetbv()
+ and we need __attribute__((__target__("xsave")).
+ But with __target__("xsave") the function will be not
+ inlined to function that has no __target__("xsave") attribute.
+ If we want _xgetbv() call inlining, then we should use asm version
+ instead of calling _xgetbv().
+ Note:intrinsic is broke before GCC 8.2:
+ https://gcc.gnu.org/bugzilla/show_bug.cgi?id=85684
+*/
+
+#if defined(__INTEL_COMPILER) && (__INTEL_COMPILER >= 1100) \
+ || defined(_MSC_VER) && (_MSC_VER >= 1600) && (_MSC_FULL_VER >= 160040219) \
+ || defined(__GNUC__) && (__GNUC__ >= 9) \
+ || defined(__clang__) && (__clang_major__ >= 9)
+// we define ATTRIB_XGETBV, if we want to use predefined _xgetbv() from compiler
+#if defined(__INTEL_COMPILER)
+#define ATTRIB_XGETBV
+#elif defined(__GNUC__) || defined(__clang__)
+// we don't define ATTRIB_XGETBV here, because asm version is better for inlining.
+// #define ATTRIB_XGETBV __attribute__((__target__("xsave")))
+#else
+#define ATTRIB_XGETBV
+#endif
+#endif
+
+#if defined(ATTRIB_XGETBV)
+#include <immintrin.h>
+#endif
+
+
+// XFEATURE_ENABLED_MASK/XCR0
+#define MY_XCR_XFEATURE_ENABLED_MASK 0
+
+#if defined(ATTRIB_XGETBV)
+ATTRIB_XGETBV
+#endif
+static UInt64 x86_xgetbv_0(UInt32 num)
+{
+#if defined(ATTRIB_XGETBV)
+ {
+ return
+ #if (defined(_MSC_VER))
+ _xgetbv(num);
+ #else
+ __builtin_ia32_xgetbv(
+ #if !defined(__clang__)
+ (int)
+ #endif
+ num);
+ #endif
+ }
+
+#elif defined(__GNUC__) || defined(__clang__) || defined(__SUNPRO_CC)
+
+ UInt32 a, d;
+ #if defined(__GNUC__) && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 4))
+ __asm__
+ (
+ "xgetbv"
+ : "=a"(a), "=d"(d) : "c"(num) : "cc"
+ );
+ #else // is old gcc
+ __asm__
+ (
+ ".byte 0x0f, 0x01, 0xd0" "\n\t"
+ : "=a"(a), "=d"(d) : "c"(num) : "cc"
+ );
+ #endif
+ return ((UInt64)d << 32) | a;
+ // return a;
+
+#elif defined(_MSC_VER) && !defined(MY_CPU_AMD64)
+
+ UInt32 a, d;
+ __asm {
+ push eax
+ push edx
+ push ecx
+ mov ecx, num;
+ // xor ecx, ecx // = MY_XCR_XFEATURE_ENABLED_MASK
+ _emit 0x0f
+ _emit 0x01
+ _emit 0xd0
+ mov a, eax
+ mov d, edx
+ pop ecx
+ pop edx
+ pop eax
+ }
+ return ((UInt64)d << 32) | a;
+ // return a;
+
+#else // it's unknown compiler
+ // #error "Need xgetbv function"
+ UNUSED_VAR(num)
+ // for MSVC-X64 we could call external function from external file.
+ /* Actually we had checked OSXSAVE/AVX in cpuid before.
+ So it's expected that OS supports at least AVX and below. */
+ // if (num != MY_XCR_XFEATURE_ENABLED_MASK) return 0; // if not XCR0
+ return
+ // (1 << 0) | // x87
+ (1 << 1) // SSE
+ | (1 << 2); // AVX
+
+#endif
+}
+
+#ifdef _WIN32
+/*
+ Windows versions do not know about new ISA extensions that
+ can be introduced. But we still can use new extensions,
+ even if Windows doesn't report about supporting them,
+ But we can use new extensions, only if Windows knows about new ISA extension
+ that changes the number or size of registers: SSE, AVX/XSAVE, AVX512
+ So it's enough to check
+ MY_PF_AVX_INSTRUCTIONS_AVAILABLE
+ instead of
+ MY_PF_AVX2_INSTRUCTIONS_AVAILABLE
+*/
+#define MY_PF_XSAVE_ENABLED 17
+// #define MY_PF_SSSE3_INSTRUCTIONS_AVAILABLE 36
+// #define MY_PF_SSE4_1_INSTRUCTIONS_AVAILABLE 37
+// #define MY_PF_SSE4_2_INSTRUCTIONS_AVAILABLE 38
+// #define MY_PF_AVX_INSTRUCTIONS_AVAILABLE 39
+// #define MY_PF_AVX2_INSTRUCTIONS_AVAILABLE 40
+// #define MY_PF_AVX512F_INSTRUCTIONS_AVAILABLE 41
+#endif
+
+BoolInt CPU_IsSupported_AVX(void)
+{
+ #ifdef _WIN32
+ if (!IsProcessorFeaturePresent(MY_PF_XSAVE_ENABLED))
+ return False;
+ /* PF_AVX_INSTRUCTIONS_AVAILABLE probably is supported starting from
+ some latest Win10 revisions. But we need AVX in older Windows also.
+ So we don't use the following check: */
+ /*
+ if (!IsProcessorFeaturePresent(MY_PF_AVX_INSTRUCTIONS_AVAILABLE))
+ return False;
+ */
+ #endif
+
+ /*
+ OS must use new special XSAVE/XRSTOR instructions to save
+ AVX registers when it required for context switching.
+ At OS statring:
+ OS sets CR4.OSXSAVE flag to signal the processor that OS supports the XSAVE extensions.
+ Also OS sets bitmask in XCR0 register that defines what
+ registers will be processed by XSAVE instruction:
+ XCR0.SSE[bit 0] - x87 registers and state
+ XCR0.SSE[bit 1] - SSE registers and state
+ XCR0.AVX[bit 2] - AVX registers and state
+ CR4.OSXSAVE is reflected to CPUID.1:ECX.OSXSAVE[bit 27].
+ So we can read that bit in user-space.
+ XCR0 is available for reading in user-space by new XGETBV instruction.
+ */
+ {
+ const UInt32 c = x86cpuid_Func_1_ECX();
+ if (0 == (1
+ & (c >> 28) // AVX instructions are supported by hardware
+ & (c >> 27))) // OSXSAVE bit: XSAVE and related instructions are enabled by OS.
+ return False;
+ }
+
+ /* also we can check
+ CPUID.1:ECX.XSAVE [bit 26] : that shows that
+ XSAVE, XRESTOR, XSETBV, XGETBV instructions are supported by hardware.
+ But that check is redundant, because if OSXSAVE bit is set, then XSAVE is also set */
+
+ /* If OS have enabled XSAVE extension instructions (OSXSAVE == 1),
+ in most cases we expect that OS also will support storing/restoring
+ for AVX and SSE states at least.
+ But to be ensure for that we call user-space instruction
+ XGETBV(0) to get XCR0 value that contains bitmask that defines
+ what exact states(registers) OS have enabled for storing/restoring.
+ */
+
+ {
+ const UInt32 bm = (UInt32)x86_xgetbv_0(MY_XCR_XFEATURE_ENABLED_MASK);
+ // printf("\n=== XGetBV=%d\n", bm);
+ return 1
+ & (BoolInt)(bm >> 1) // SSE state is supported (set by OS) for storing/restoring
+ & (BoolInt)(bm >> 2); // AVX state is supported (set by OS) for storing/restoring
+ }
+ // since Win7SP1: we can use GetEnabledXStateFeatures();
+}
+
+
+BoolInt CPU_IsSupported_AVX2(void)
+{
+ if (!CPU_IsSupported_AVX())
+ return False;
+ if (z7_x86_cpuid_GetMaxFunc() < 7)
+ return False;
+ {
+ UInt32 d[4];
+ z7_x86_cpuid(d, 7);
+ // printf("\ncpuid(7): ebx=%8x ecx=%8x\n", d[1], d[2]);
+ return 1
+ & (BoolInt)(d[1] >> 5); // avx2
+ }
+}
+
+/*
+// fix it:
+BoolInt CPU_IsSupported_AVX512F_AVX512VL(void)
+{
+ if (!CPU_IsSupported_AVX())
+ return False;
+ if (z7_x86_cpuid_GetMaxFunc() < 7)
+ return False;
+ {
+ UInt32 d[4];
+ z7_x86_cpuid(d, 7);
+ // printf("\ncpuid(7): ebx=%8x ecx=%8x\n", d[1], d[2]);
+ return 1
+ & (BoolInt)(d[1] >> 16) // avx512-f
+ & (BoolInt)(d[1] >> 31); // avx512-Vl
+ }
+}
+*/
+
+BoolInt CPU_IsSupported_VAES_AVX2(void)
+{
+ if (!CPU_IsSupported_AVX())
+ return False;
+ if (z7_x86_cpuid_GetMaxFunc() < 7)
+ return False;
+ {
+ UInt32 d[4];
+ z7_x86_cpuid(d, 7);
+ // printf("\ncpuid(7): ebx=%8x ecx=%8x\n", d[1], d[2]);
+ return 1
+ & (BoolInt)(d[1] >> 5) // avx2
+ // & (d[1] >> 31) // avx512vl
+ & (BoolInt)(d[2] >> 9); // vaes // VEX-256/EVEX
+ }
+}
+
+BoolInt CPU_IsSupported_PageGB(void)
+{
+ CHECK_CPUID_IS_SUPPORTED
+ {
+ UInt32 d[4];
+ z7_x86_cpuid(d, 0x80000000);
+ if (d[0] < 0x80000001)
+ return False;
+ z7_x86_cpuid(d, 0x80000001);
+ return (BoolInt)(d[3] >> 26) & 1;
+ }
+}
+
+
+#elif defined(MY_CPU_ARM_OR_ARM64)
+
+#ifdef _WIN32
+
+#include "7zWindows.h"
+
+BoolInt CPU_IsSupported_CRC32(void) { return IsProcessorFeaturePresent(PF_ARM_V8_CRC32_INSTRUCTIONS_AVAILABLE) ? 1 : 0; }
+BoolInt CPU_IsSupported_CRYPTO(void) { return IsProcessorFeaturePresent(PF_ARM_V8_CRYPTO_INSTRUCTIONS_AVAILABLE) ? 1 : 0; }
+BoolInt CPU_IsSupported_NEON(void) { return IsProcessorFeaturePresent(PF_ARM_NEON_INSTRUCTIONS_AVAILABLE) ? 1 : 0; }
+
+#else
+
+#if defined(__APPLE__)
+
+/*
+#include <stdio.h>
+#include <string.h>
+static void Print_sysctlbyname(const char *name)
+{
+ size_t bufSize = 256;
+ char buf[256];
+ int res = sysctlbyname(name, &buf, &bufSize, NULL, 0);
+ {
+ int i;
+ printf("\nres = %d : %s : '%s' : bufSize = %d, numeric", res, name, buf, (unsigned)bufSize);
+ for (i = 0; i < 20; i++)
+ printf(" %2x", (unsigned)(Byte)buf[i]);
+
+ }
+}
+*/
+/*
+ Print_sysctlbyname("hw.pagesize");
+ Print_sysctlbyname("machdep.cpu.brand_string");
+*/
+
+static BoolInt z7_sysctlbyname_Get_BoolInt(const char *name)
+{
+ UInt32 val = 0;
+ if (z7_sysctlbyname_Get_UInt32(name, &val) == 0 && val == 1)
+ return 1;
+ return 0;
+}
+
+BoolInt CPU_IsSupported_CRC32(void)
+{
+ return z7_sysctlbyname_Get_BoolInt("hw.optional.armv8_crc32");
+}
+
+BoolInt CPU_IsSupported_NEON(void)
+{
+ return z7_sysctlbyname_Get_BoolInt("hw.optional.neon");
+}
+
+#ifdef MY_CPU_ARM64
+#define APPLE_CRYPTO_SUPPORT_VAL 1
+#else
+#define APPLE_CRYPTO_SUPPORT_VAL 0
+#endif
+
+BoolInt CPU_IsSupported_SHA1(void) { return APPLE_CRYPTO_SUPPORT_VAL; }
+BoolInt CPU_IsSupported_SHA2(void) { return APPLE_CRYPTO_SUPPORT_VAL; }
+BoolInt CPU_IsSupported_AES (void) { return APPLE_CRYPTO_SUPPORT_VAL; }
+
+
+#else // __APPLE__
+
+#if defined(__GLIBC__) && (__GLIBC__ * 100 + __GLIBC_MINOR__ >= 216)
+ #define Z7_GETAUXV_AVAILABLE
+#else
+// #pragma message("=== is not NEW GLIBC === ")
+ #if defined __has_include
+ #if __has_include (<sys/auxv.h>)
+// #pragma message("=== sys/auxv.h is avail=== ")
+ #define Z7_GETAUXV_AVAILABLE
+ #endif
+ #endif
+#endif
+
+#ifdef Z7_GETAUXV_AVAILABLE
+// #pragma message("=== Z7_GETAUXV_AVAILABLE === ")
+#include <sys/auxv.h>
+#define USE_HWCAP
+#endif
+
+#ifdef USE_HWCAP
+
+#if defined(__FreeBSD__)
+static unsigned long MY_getauxval(int aux)
+{
+ unsigned long val;
+ if (elf_aux_info(aux, &val, sizeof(val)))
+ return 0;
+ return val;
+}
+#else
+#define MY_getauxval getauxval
+ #if defined __has_include
+ #if __has_include (<asm/hwcap.h>)
+#include <asm/hwcap.h>
+ #endif
+ #endif
+#endif
+
+ #define MY_HWCAP_CHECK_FUNC_2(name1, name2) \
+ BoolInt CPU_IsSupported_ ## name1(void) { return (MY_getauxval(AT_HWCAP) & (HWCAP_ ## name2)); }
+
+#ifdef MY_CPU_ARM64
+ #define MY_HWCAP_CHECK_FUNC(name) \
+ MY_HWCAP_CHECK_FUNC_2(name, name)
+#if 1 || defined(__ARM_NEON)
+ BoolInt CPU_IsSupported_NEON(void) { return True; }
+#else
+ MY_HWCAP_CHECK_FUNC_2(NEON, ASIMD)
+#endif
+// MY_HWCAP_CHECK_FUNC (ASIMD)
+#elif defined(MY_CPU_ARM)
+ #define MY_HWCAP_CHECK_FUNC(name) \
+ BoolInt CPU_IsSupported_ ## name(void) { return (MY_getauxval(AT_HWCAP2) & (HWCAP2_ ## name)); }
+ MY_HWCAP_CHECK_FUNC_2(NEON, NEON)
+#endif
+
+#else // USE_HWCAP
+
+ #define MY_HWCAP_CHECK_FUNC(name) \
+ BoolInt CPU_IsSupported_ ## name(void) { return 0; }
+
+#if defined(__SWITCH__) || defined(__vita__)
+ BoolInt CPU_IsSupported_NEON(void) { return 1; }
+#else
+ MY_HWCAP_CHECK_FUNC(NEON)
+#endif
+
+#endif // USE_HWCAP
+
+MY_HWCAP_CHECK_FUNC (CRC32)
+MY_HWCAP_CHECK_FUNC (SHA1)
+MY_HWCAP_CHECK_FUNC (SHA2)
+MY_HWCAP_CHECK_FUNC (AES)
+
+#endif // __APPLE__
+#endif // _WIN32
+
+#endif // MY_CPU_ARM_OR_ARM64
+
+
+
+#ifdef __APPLE__
+
+#include <sys/sysctl.h>
+
+int z7_sysctlbyname_Get(const char *name, void *buf, size_t *bufSize)
+{
+ return sysctlbyname(name, buf, bufSize, NULL, 0);
+}
+
+int z7_sysctlbyname_Get_UInt32(const char *name, UInt32 *val)
+{
+ size_t bufSize = sizeof(*val);
+ const int res = z7_sysctlbyname_Get(name, val, &bufSize);
+ if (res == 0 && bufSize != sizeof(*val))
+ return EFAULT;
+ return res;
+}
+
+#endif
-/* Delta.c -- Delta converter\r
-2021-02-09 : Igor Pavlov : Public domain */\r
-\r
-#include "Precomp.h"\r
-\r
-#include "Delta.h"\r
-\r
-void Delta_Init(Byte *state)\r
-{\r
- unsigned i;\r
- for (i = 0; i < DELTA_STATE_SIZE; i++)\r
- state[i] = 0;\r
-}\r
-\r
-\r
-void Delta_Encode(Byte *state, unsigned delta, Byte *data, SizeT size)\r
-{\r
- Byte temp[DELTA_STATE_SIZE];\r
-\r
- if (size == 0)\r
- return;\r
-\r
- {\r
- unsigned i = 0;\r
- do\r
- temp[i] = state[i];\r
- while (++i != delta);\r
- }\r
-\r
- if (size <= delta)\r
- {\r
- unsigned i = 0, k;\r
- do\r
- {\r
- Byte b = *data;\r
- *data++ = (Byte)(b - temp[i]);\r
- temp[i] = b;\r
- }\r
- while (++i != size);\r
- \r
- k = 0;\r
- \r
- do\r
- {\r
- if (i == delta)\r
- i = 0;\r
- state[k] = temp[i++];\r
- }\r
- while (++k != delta);\r
- \r
- return;\r
- }\r
- \r
- {\r
- Byte *p = data + size - delta;\r
- {\r
- unsigned i = 0;\r
- do\r
- state[i] = *p++;\r
- while (++i != delta);\r
- }\r
- {\r
- const Byte *lim = data + delta;\r
- ptrdiff_t dif = -(ptrdiff_t)delta;\r
- \r
- if (((ptrdiff_t)size + dif) & 1)\r
- {\r
- --p; *p = (Byte)(*p - p[dif]);\r
- }\r
-\r
- while (p != lim)\r
- {\r
- --p; *p = (Byte)(*p - p[dif]);\r
- --p; *p = (Byte)(*p - p[dif]);\r
- }\r
- \r
- dif = -dif;\r
- \r
- do\r
- {\r
- --p; *p = (Byte)(*p - temp[--dif]);\r
- }\r
- while (dif != 0);\r
- }\r
- }\r
-}\r
-\r
-\r
-void Delta_Decode(Byte *state, unsigned delta, Byte *data, SizeT size)\r
-{\r
- unsigned i;\r
- const Byte *lim;\r
-\r
- if (size == 0)\r
- return;\r
- \r
- i = 0;\r
- lim = data + size;\r
- \r
- if (size <= delta)\r
- {\r
- do\r
- *data = (Byte)(*data + state[i++]);\r
- while (++data != lim);\r
-\r
- for (; delta != i; state++, delta--)\r
- *state = state[i];\r
- data -= i;\r
- }\r
- else\r
- {\r
- /*\r
- #define B(n) b ## n\r
- #define I(n) Byte B(n) = state[n];\r
- #define U(n) { B(n) = (Byte)((B(n)) + *data++); data[-1] = (B(n)); }\r
- #define F(n) if (data != lim) { U(n) }\r
-\r
- if (delta == 1)\r
- {\r
- I(0)\r
- if ((lim - data) & 1) { U(0) }\r
- while (data != lim) { U(0) U(0) }\r
- data -= 1;\r
- }\r
- else if (delta == 2)\r
- {\r
- I(0) I(1)\r
- lim -= 1; while (data < lim) { U(0) U(1) }\r
- lim += 1; F(0)\r
- data -= 2;\r
- }\r
- else if (delta == 3)\r
- {\r
- I(0) I(1) I(2)\r
- lim -= 2; while (data < lim) { U(0) U(1) U(2) }\r
- lim += 2; F(0) F(1)\r
- data -= 3;\r
- }\r
- else if (delta == 4)\r
- {\r
- I(0) I(1) I(2) I(3)\r
- lim -= 3; while (data < lim) { U(0) U(1) U(2) U(3) }\r
- lim += 3; F(0) F(1) F(2)\r
- data -= 4;\r
- }\r
- else\r
- */\r
- {\r
- do\r
- {\r
- *data = (Byte)(*data + state[i++]);\r
- data++;\r
- }\r
- while (i != delta);\r
- \r
- {\r
- ptrdiff_t dif = -(ptrdiff_t)delta;\r
- do\r
- *data = (Byte)(*data + data[dif]);\r
- while (++data != lim);\r
- data += dif;\r
- }\r
- }\r
- }\r
-\r
- do\r
- *state++ = *data;\r
- while (++data != lim);\r
-}\r
+/* Delta.c -- Delta converter
+2021-02-09 : Igor Pavlov : Public domain */
+
+#include "Precomp.h"
+
+#include "Delta.h"
+
+void Delta_Init(Byte *state)
+{
+ unsigned i;
+ for (i = 0; i < DELTA_STATE_SIZE; i++)
+ state[i] = 0;
+}
+
+
+void Delta_Encode(Byte *state, unsigned delta, Byte *data, SizeT size)
+{
+ Byte temp[DELTA_STATE_SIZE];
+
+ if (size == 0)
+ return;
+
+ {
+ unsigned i = 0;
+ do
+ temp[i] = state[i];
+ while (++i != delta);
+ }
+
+ if (size <= delta)
+ {
+ unsigned i = 0, k;
+ do
+ {
+ Byte b = *data;
+ *data++ = (Byte)(b - temp[i]);
+ temp[i] = b;
+ }
+ while (++i != size);
+
+ k = 0;
+
+ do
+ {
+ if (i == delta)
+ i = 0;
+ state[k] = temp[i++];
+ }
+ while (++k != delta);
+
+ return;
+ }
+
+ {
+ Byte *p = data + size - delta;
+ {
+ unsigned i = 0;
+ do
+ state[i] = *p++;
+ while (++i != delta);
+ }
+ {
+ const Byte *lim = data + delta;
+ ptrdiff_t dif = -(ptrdiff_t)delta;
+
+ if (((ptrdiff_t)size + dif) & 1)
+ {
+ --p; *p = (Byte)(*p - p[dif]);
+ }
+
+ while (p != lim)
+ {
+ --p; *p = (Byte)(*p - p[dif]);
+ --p; *p = (Byte)(*p - p[dif]);
+ }
+
+ dif = -dif;
+
+ do
+ {
+ --p; *p = (Byte)(*p - temp[--dif]);
+ }
+ while (dif != 0);
+ }
+ }
+}
+
+
+void Delta_Decode(Byte *state, unsigned delta, Byte *data, SizeT size)
+{
+ unsigned i;
+ const Byte *lim;
+
+ if (size == 0)
+ return;
+
+ i = 0;
+ lim = data + size;
+
+ if (size <= delta)
+ {
+ do
+ *data = (Byte)(*data + state[i++]);
+ while (++data != lim);
+
+ for (; delta != i; state++, delta--)
+ *state = state[i];
+ data -= i;
+ }
+ else
+ {
+ /*
+ #define B(n) b ## n
+ #define I(n) Byte B(n) = state[n];
+ #define U(n) { B(n) = (Byte)((B(n)) + *data++); data[-1] = (B(n)); }
+ #define F(n) if (data != lim) { U(n) }
+
+ if (delta == 1)
+ {
+ I(0)
+ if ((lim - data) & 1) { U(0) }
+ while (data != lim) { U(0) U(0) }
+ data -= 1;
+ }
+ else if (delta == 2)
+ {
+ I(0) I(1)
+ lim -= 1; while (data < lim) { U(0) U(1) }
+ lim += 1; F(0)
+ data -= 2;
+ }
+ else if (delta == 3)
+ {
+ I(0) I(1) I(2)
+ lim -= 2; while (data < lim) { U(0) U(1) U(2) }
+ lim += 2; F(0) F(1)
+ data -= 3;
+ }
+ else if (delta == 4)
+ {
+ I(0) I(1) I(2) I(3)
+ lim -= 3; while (data < lim) { U(0) U(1) U(2) U(3) }
+ lim += 3; F(0) F(1) F(2)
+ data -= 4;
+ }
+ else
+ */
+ {
+ do
+ {
+ *data = (Byte)(*data + state[i++]);
+ data++;
+ }
+ while (i != delta);
+
+ {
+ ptrdiff_t dif = -(ptrdiff_t)delta;
+ do
+ *data = (Byte)(*data + data[dif]);
+ while (++data != lim);
+ data += dif;
+ }
+ }
+ }
+
+ do
+ *state++ = *data;
+ while (++data != lim);
+}
-/* LzFind.c -- Match finder for LZ algorithms\r
-2021-11-29 : Igor Pavlov : Public domain */\r
-\r
-#include "Precomp.h"\r
-\r
-#include <string.h>\r
-// #include <stdio.h>\r
-\r
-#include "CpuArch.h"\r
-#include "LzFind.h"\r
-#include "LzHash.h"\r
-\r
-#define kBlockMoveAlign (1 << 7) // alignment for memmove()\r
-#define kBlockSizeAlign (1 << 16) // alignment for block allocation\r
-#define kBlockSizeReserveMin (1 << 24) // it's 1/256 from 4 GB dictinary\r
-\r
-#define kEmptyHashValue 0\r
-\r
-#define kMaxValForNormalize ((UInt32)0)\r
-// #define kMaxValForNormalize ((UInt32)(1 << 20) + 0xFFF) // for debug\r
-\r
-// #define kNormalizeAlign (1 << 7) // alignment for speculated accesses\r
-\r
-#define GET_AVAIL_BYTES(p) \\r
- Inline_MatchFinder_GetNumAvailableBytes(p)\r
-\r
-\r
-// #define kFix5HashSize (kHash2Size + kHash3Size + kHash4Size)\r
-#define kFix5HashSize kFix4HashSize\r
-\r
-/*\r
- HASH2_CALC:\r
- if (hv) match, then cur[0] and cur[1] also match\r
-*/\r
-#define HASH2_CALC hv = GetUi16(cur);\r
-\r
-// (crc[0 ... 255] & 0xFF) provides one-to-one correspondence to [0 ... 255]\r
-\r
-/*\r
- HASH3_CALC:\r
- if (cur[0]) and (h2) match, then cur[1] also match\r
- if (cur[0]) and (hv) match, then cur[1] and cur[2] also match\r
-*/\r
-#define HASH3_CALC { \\r
- UInt32 temp = p->crc[cur[0]] ^ cur[1]; \\r
- h2 = temp & (kHash2Size - 1); \\r
- hv = (temp ^ ((UInt32)cur[2] << 8)) & p->hashMask; }\r
-\r
-#define HASH4_CALC { \\r
- UInt32 temp = p->crc[cur[0]] ^ cur[1]; \\r
- h2 = temp & (kHash2Size - 1); \\r
- temp ^= ((UInt32)cur[2] << 8); \\r
- h3 = temp & (kHash3Size - 1); \\r
- hv = (temp ^ (p->crc[cur[3]] << kLzHash_CrcShift_1)) & p->hashMask; }\r
-\r
-#define HASH5_CALC { \\r
- UInt32 temp = p->crc[cur[0]] ^ cur[1]; \\r
- h2 = temp & (kHash2Size - 1); \\r
- temp ^= ((UInt32)cur[2] << 8); \\r
- h3 = temp & (kHash3Size - 1); \\r
- temp ^= (p->crc[cur[3]] << kLzHash_CrcShift_1); \\r
- /* h4 = temp & p->hash4Mask; */ /* (kHash4Size - 1); */ \\r
- hv = (temp ^ (p->crc[cur[4]] << kLzHash_CrcShift_2)) & p->hashMask; }\r
-\r
-#define HASH_ZIP_CALC hv = ((cur[2] | ((UInt32)cur[0] << 8)) ^ p->crc[cur[1]]) & 0xFFFF;\r
-\r
-\r
-static void LzInWindow_Free(CMatchFinder *p, ISzAllocPtr alloc)\r
-{\r
- if (!p->directInput)\r
- {\r
- ISzAlloc_Free(alloc, p->bufferBase);\r
- p->bufferBase = NULL;\r
- }\r
-}\r
-\r
-\r
-static int LzInWindow_Create2(CMatchFinder *p, UInt32 blockSize, ISzAllocPtr alloc)\r
-{\r
- if (blockSize == 0)\r
- return 0;\r
- if (!p->bufferBase || p->blockSize != blockSize)\r
- {\r
- // size_t blockSizeT;\r
- LzInWindow_Free(p, alloc);\r
- p->blockSize = blockSize;\r
- // blockSizeT = blockSize;\r
- \r
- // printf("\nblockSize = 0x%x\n", blockSize);\r
- /*\r
- #if defined _WIN64\r
- // we can allocate 4GiB, but still use UInt32 for (p->blockSize)\r
- // we use UInt32 type for (p->blockSize), because\r
- // we don't want to wrap over 4 GiB,\r
- // when we use (p->streamPos - p->pos) that is UInt32.\r
- if (blockSize >= (UInt32)0 - (UInt32)kBlockSizeAlign)\r
- {\r
- blockSizeT = ((size_t)1 << 32);\r
- printf("\nchanged to blockSizeT = 4GiB\n");\r
- }\r
- #endif\r
- */\r
- \r
- p->bufferBase = (Byte *)ISzAlloc_Alloc(alloc, blockSize);\r
- // printf("\nbufferBase = %p\n", p->bufferBase);\r
- // return 0; // for debug\r
- }\r
- return (p->bufferBase != NULL);\r
-}\r
-\r
-static const Byte *MatchFinder_GetPointerToCurrentPos(CMatchFinder *p) { return p->buffer; }\r
-\r
-static UInt32 MatchFinder_GetNumAvailableBytes(CMatchFinder *p) { return GET_AVAIL_BYTES(p); }\r
-\r
-\r
-MY_NO_INLINE\r
-static void MatchFinder_ReadBlock(CMatchFinder *p)\r
-{\r
- if (p->streamEndWasReached || p->result != SZ_OK)\r
- return;\r
-\r
- /* We use (p->streamPos - p->pos) value.\r
- (p->streamPos < p->pos) is allowed. */\r
-\r
- if (p->directInput)\r
- {\r
- UInt32 curSize = 0xFFFFFFFF - GET_AVAIL_BYTES(p);\r
- if (curSize > p->directInputRem)\r
- curSize = (UInt32)p->directInputRem;\r
- p->directInputRem -= curSize;\r
- p->streamPos += curSize;\r
- if (p->directInputRem == 0)\r
- p->streamEndWasReached = 1;\r
- return;\r
- }\r
- \r
- for (;;)\r
- {\r
- Byte *dest = p->buffer + GET_AVAIL_BYTES(p);\r
- size_t size = (size_t)(p->bufferBase + p->blockSize - dest);\r
- if (size == 0)\r
- {\r
- /* we call ReadBlock() after NeedMove() and MoveBlock().\r
- NeedMove() and MoveBlock() povide more than (keepSizeAfter)\r
- to the end of (blockSize).\r
- So we don't execute this branch in normal code flow.\r
- We can go here, if we will call ReadBlock() before NeedMove(), MoveBlock().\r
- */\r
- // p->result = SZ_ERROR_FAIL; // we can show error here\r
- return;\r
- }\r
-\r
- // #define kRead 3\r
- // if (size > kRead) size = kRead; // for debug\r
-\r
- p->result = ISeqInStream_Read(p->stream, dest, &size);\r
- if (p->result != SZ_OK)\r
- return;\r
- if (size == 0)\r
- {\r
- p->streamEndWasReached = 1;\r
- return;\r
- }\r
- p->streamPos += (UInt32)size;\r
- if (GET_AVAIL_BYTES(p) > p->keepSizeAfter)\r
- return;\r
- /* here and in another (p->keepSizeAfter) checks we keep on 1 byte more than was requested by Create() function\r
- (GET_AVAIL_BYTES(p) >= p->keepSizeAfter) - minimal required size */\r
- }\r
-\r
- // on exit: (p->result != SZ_OK || p->streamEndWasReached || GET_AVAIL_BYTES(p) > p->keepSizeAfter)\r
-}\r
-\r
-\r
-\r
-MY_NO_INLINE\r
-void MatchFinder_MoveBlock(CMatchFinder *p)\r
-{\r
- const size_t offset = (size_t)(p->buffer - p->bufferBase) - p->keepSizeBefore;\r
- const size_t keepBefore = (offset & (kBlockMoveAlign - 1)) + p->keepSizeBefore;\r
- p->buffer = p->bufferBase + keepBefore;\r
- memmove(p->bufferBase,\r
- p->bufferBase + (offset & ~((size_t)kBlockMoveAlign - 1)),\r
- keepBefore + (size_t)GET_AVAIL_BYTES(p));\r
-}\r
-\r
-/* We call MoveBlock() before ReadBlock().\r
- So MoveBlock() can be wasteful operation, if the whole input data\r
- can fit in current block even without calling MoveBlock().\r
- in important case where (dataSize <= historySize)\r
- condition (p->blockSize > dataSize + p->keepSizeAfter) is met\r
- So there is no MoveBlock() in that case case.\r
-*/\r
-\r
-int MatchFinder_NeedMove(CMatchFinder *p)\r
-{\r
- if (p->directInput)\r
- return 0;\r
- if (p->streamEndWasReached || p->result != SZ_OK)\r
- return 0;\r
- return ((size_t)(p->bufferBase + p->blockSize - p->buffer) <= p->keepSizeAfter);\r
-}\r
-\r
-void MatchFinder_ReadIfRequired(CMatchFinder *p)\r
-{\r
- if (p->keepSizeAfter >= GET_AVAIL_BYTES(p))\r
- MatchFinder_ReadBlock(p);\r
-}\r
-\r
-\r
-\r
-static void MatchFinder_SetDefaultSettings(CMatchFinder *p)\r
-{\r
- p->cutValue = 32;\r
- p->btMode = 1;\r
- p->numHashBytes = 4;\r
- p->bigHash = 0;\r
-}\r
-\r
-#define kCrcPoly 0xEDB88320\r
-\r
-void MatchFinder_Construct(CMatchFinder *p)\r
-{\r
- unsigned i;\r
- p->bufferBase = NULL;\r
- p->directInput = 0;\r
- p->hash = NULL;\r
- p->expectedDataSize = (UInt64)(Int64)-1;\r
- MatchFinder_SetDefaultSettings(p);\r
-\r
- for (i = 0; i < 256; i++)\r
- {\r
- UInt32 r = (UInt32)i;\r
- unsigned j;\r
- for (j = 0; j < 8; j++)\r
- r = (r >> 1) ^ (kCrcPoly & ((UInt32)0 - (r & 1)));\r
- p->crc[i] = r;\r
- }\r
-}\r
-\r
-static void MatchFinder_FreeThisClassMemory(CMatchFinder *p, ISzAllocPtr alloc)\r
-{\r
- ISzAlloc_Free(alloc, p->hash);\r
- p->hash = NULL;\r
-}\r
-\r
-void MatchFinder_Free(CMatchFinder *p, ISzAllocPtr alloc)\r
-{\r
- MatchFinder_FreeThisClassMemory(p, alloc);\r
- LzInWindow_Free(p, alloc);\r
-}\r
-\r
-static CLzRef* AllocRefs(size_t num, ISzAllocPtr alloc)\r
-{\r
- size_t sizeInBytes = (size_t)num * sizeof(CLzRef);\r
- if (sizeInBytes / sizeof(CLzRef) != num)\r
- return NULL;\r
- return (CLzRef *)ISzAlloc_Alloc(alloc, sizeInBytes);\r
-}\r
-\r
-#if (kBlockSizeReserveMin < kBlockSizeAlign * 2)\r
- #error Stop_Compiling_Bad_Reserve\r
-#endif\r
-\r
-\r
-\r
-static UInt32 GetBlockSize(CMatchFinder *p, UInt32 historySize)\r
-{\r
- UInt32 blockSize = (p->keepSizeBefore + p->keepSizeAfter);\r
- /*\r
- if (historySize > kMaxHistorySize)\r
- return 0;\r
- */\r
- // printf("\nhistorySize == 0x%x\n", historySize);\r
- \r
- if (p->keepSizeBefore < historySize || blockSize < p->keepSizeBefore) // if 32-bit overflow\r
- return 0;\r
- \r
- {\r
- const UInt32 kBlockSizeMax = (UInt32)0 - (UInt32)kBlockSizeAlign;\r
- const UInt32 rem = kBlockSizeMax - blockSize;\r
- const UInt32 reserve = (blockSize >> (blockSize < ((UInt32)1 << 30) ? 1 : 2))\r
- + (1 << 12) + kBlockMoveAlign + kBlockSizeAlign; // do not overflow 32-bit here\r
- if (blockSize >= kBlockSizeMax\r
- || rem < kBlockSizeReserveMin) // we reject settings that will be slow\r
- return 0;\r
- if (reserve >= rem)\r
- blockSize = kBlockSizeMax;\r
- else\r
- {\r
- blockSize += reserve;\r
- blockSize &= ~(UInt32)(kBlockSizeAlign - 1);\r
- }\r
- }\r
- // printf("\n LzFind_blockSize = %x\n", blockSize);\r
- // printf("\n LzFind_blockSize = %d\n", blockSize >> 20);\r
- return blockSize;\r
-}\r
-\r
-\r
-int MatchFinder_Create(CMatchFinder *p, UInt32 historySize,\r
- UInt32 keepAddBufferBefore, UInt32 matchMaxLen, UInt32 keepAddBufferAfter,\r
- ISzAllocPtr alloc)\r
-{\r
- /* we need one additional byte in (p->keepSizeBefore),\r
- since we use MoveBlock() after (p->pos++) and before dictionary using */\r
- // keepAddBufferBefore = (UInt32)0xFFFFFFFF - (1 << 22); // for debug\r
- p->keepSizeBefore = historySize + keepAddBufferBefore + 1;\r
-\r
- keepAddBufferAfter += matchMaxLen;\r
- /* we need (p->keepSizeAfter >= p->numHashBytes) */\r
- if (keepAddBufferAfter < p->numHashBytes)\r
- keepAddBufferAfter = p->numHashBytes;\r
- // keepAddBufferAfter -= 2; // for debug\r
- p->keepSizeAfter = keepAddBufferAfter;\r
-\r
- if (p->directInput)\r
- p->blockSize = 0;\r
- if (p->directInput || LzInWindow_Create2(p, GetBlockSize(p, historySize), alloc))\r
- {\r
- const UInt32 newCyclicBufferSize = historySize + 1; // do not change it\r
- UInt32 hs;\r
- p->matchMaxLen = matchMaxLen;\r
- {\r
- // UInt32 hs4;\r
- p->fixedHashSize = 0;\r
- hs = (1 << 16) - 1;\r
- if (p->numHashBytes != 2)\r
- {\r
- hs = historySize;\r
- if (hs > p->expectedDataSize)\r
- hs = (UInt32)p->expectedDataSize;\r
- if (hs != 0)\r
- hs--;\r
- hs |= (hs >> 1);\r
- hs |= (hs >> 2);\r
- hs |= (hs >> 4);\r
- hs |= (hs >> 8);\r
- // we propagated 16 bits in (hs). Low 16 bits must be set later\r
- hs >>= 1;\r
- if (hs >= (1 << 24))\r
- {\r
- if (p->numHashBytes == 3)\r
- hs = (1 << 24) - 1;\r
- else\r
- hs >>= 1;\r
- /* if (bigHash) mode, GetHeads4b() in LzFindMt.c needs (hs >= ((1 << 24) - 1))) */\r
- }\r
- \r
- // hs = ((UInt32)1 << 25) - 1; // for test\r
- \r
- // (hash_size >= (1 << 16)) : Required for (numHashBytes > 2)\r
- hs |= (1 << 16) - 1; /* don't change it! */\r
- \r
- // bt5: we adjust the size with recommended minimum size\r
- if (p->numHashBytes >= 5)\r
- hs |= (256 << kLzHash_CrcShift_2) - 1;\r
- }\r
- p->hashMask = hs;\r
- hs++;\r
-\r
- /*\r
- hs4 = (1 << 20);\r
- if (hs4 > hs)\r
- hs4 = hs;\r
- // hs4 = (1 << 16); // for test\r
- p->hash4Mask = hs4 - 1;\r
- */\r
-\r
- if (p->numHashBytes > 2) p->fixedHashSize += kHash2Size;\r
- if (p->numHashBytes > 3) p->fixedHashSize += kHash3Size;\r
- // if (p->numHashBytes > 4) p->fixedHashSize += hs4; // kHash4Size;\r
- hs += p->fixedHashSize;\r
- }\r
-\r
- {\r
- size_t newSize;\r
- size_t numSons;\r
- p->historySize = historySize;\r
- p->hashSizeSum = hs;\r
- p->cyclicBufferSize = newCyclicBufferSize; // it must be = (historySize + 1)\r
- \r
- numSons = newCyclicBufferSize;\r
- if (p->btMode)\r
- numSons <<= 1;\r
- newSize = hs + numSons;\r
-\r
- // aligned size is not required here, but it can be better for some loops\r
- #define NUM_REFS_ALIGN_MASK 0xF\r
- newSize = (newSize + NUM_REFS_ALIGN_MASK) & ~(size_t)NUM_REFS_ALIGN_MASK;\r
-\r
- if (p->hash && p->numRefs == newSize)\r
- return 1;\r
- \r
- MatchFinder_FreeThisClassMemory(p, alloc);\r
- p->numRefs = newSize;\r
- p->hash = AllocRefs(newSize, alloc);\r
- \r
- if (p->hash)\r
- {\r
- p->son = p->hash + p->hashSizeSum;\r
- return 1;\r
- }\r
- }\r
- }\r
-\r
- MatchFinder_Free(p, alloc);\r
- return 0;\r
-}\r
-\r
-\r
-static void MatchFinder_SetLimits(CMatchFinder *p)\r
-{\r
- UInt32 k;\r
- UInt32 n = kMaxValForNormalize - p->pos;\r
- if (n == 0)\r
- n = (UInt32)(Int32)-1; // we allow (pos == 0) at start even with (kMaxValForNormalize == 0)\r
- \r
- k = p->cyclicBufferSize - p->cyclicBufferPos;\r
- if (k < n)\r
- n = k;\r
-\r
- k = GET_AVAIL_BYTES(p);\r
- {\r
- const UInt32 ksa = p->keepSizeAfter;\r
- UInt32 mm = p->matchMaxLen;\r
- if (k > ksa)\r
- k -= ksa; // we must limit exactly to keepSizeAfter for ReadBlock\r
- else if (k >= mm)\r
- {\r
- // the limitation for (p->lenLimit) update\r
- k -= mm; // optimization : to reduce the number of checks\r
- k++;\r
- // k = 1; // non-optimized version : for debug\r
- }\r
- else\r
- {\r
- mm = k;\r
- if (k != 0)\r
- k = 1;\r
- }\r
- p->lenLimit = mm;\r
- }\r
- if (k < n)\r
- n = k;\r
- \r
- p->posLimit = p->pos + n;\r
-}\r
-\r
-\r
-void MatchFinder_Init_LowHash(CMatchFinder *p)\r
-{\r
- size_t i;\r
- CLzRef *items = p->hash;\r
- const size_t numItems = p->fixedHashSize;\r
- for (i = 0; i < numItems; i++)\r
- items[i] = kEmptyHashValue;\r
-}\r
-\r
-\r
-void MatchFinder_Init_HighHash(CMatchFinder *p)\r
-{\r
- size_t i;\r
- CLzRef *items = p->hash + p->fixedHashSize;\r
- const size_t numItems = (size_t)p->hashMask + 1;\r
- for (i = 0; i < numItems; i++)\r
- items[i] = kEmptyHashValue;\r
-}\r
-\r
-\r
-void MatchFinder_Init_4(CMatchFinder *p)\r
-{\r
- p->buffer = p->bufferBase;\r
- {\r
- /* kEmptyHashValue = 0 (Zero) is used in hash tables as NO-VALUE marker.\r
- the code in CMatchFinderMt expects (pos = 1) */\r
- p->pos =\r
- p->streamPos =\r
- 1; // it's smallest optimal value. do not change it\r
- // 0; // for debug\r
- }\r
- p->result = SZ_OK;\r
- p->streamEndWasReached = 0;\r
-}\r
-\r
-\r
-// (CYC_TO_POS_OFFSET == 0) is expected by some optimized code\r
-#define CYC_TO_POS_OFFSET 0\r
-// #define CYC_TO_POS_OFFSET 1 // for debug\r
-\r
-void MatchFinder_Init(CMatchFinder *p)\r
-{\r
- MatchFinder_Init_HighHash(p);\r
- MatchFinder_Init_LowHash(p);\r
- MatchFinder_Init_4(p);\r
- // if (readData)\r
- MatchFinder_ReadBlock(p);\r
-\r
- /* if we init (cyclicBufferPos = pos), then we can use one variable\r
- instead of both (cyclicBufferPos) and (pos) : only before (cyclicBufferPos) wrapping */\r
- p->cyclicBufferPos = (p->pos - CYC_TO_POS_OFFSET); // init with relation to (pos)\r
- // p->cyclicBufferPos = 0; // smallest value\r
- // p->son[0] = p->son[1] = 0; // unused: we can init skipped record for speculated accesses.\r
- MatchFinder_SetLimits(p);\r
-}\r
-\r
-\r
-\r
-#ifdef MY_CPU_X86_OR_AMD64\r
- #if defined(__clang__) && (__clang_major__ >= 8) \\r
- || defined(__GNUC__) && (__GNUC__ >= 8) \\r
- || defined(__INTEL_COMPILER) && (__INTEL_COMPILER >= 1900)\r
- #define USE_SATUR_SUB_128\r
- #define USE_AVX2\r
- #define ATTRIB_SSE41 __attribute__((__target__("sse4.1")))\r
- #define ATTRIB_AVX2 __attribute__((__target__("avx2")))\r
- #elif defined(_MSC_VER)\r
- #if (_MSC_VER >= 1600)\r
- #define USE_SATUR_SUB_128\r
- #if (_MSC_VER >= 1900)\r
- #define USE_AVX2\r
- #include <immintrin.h> // avx\r
- #endif\r
- #endif\r
- #endif\r
-\r
-// #elif defined(MY_CPU_ARM_OR_ARM64)\r
-#elif defined(MY_CPU_ARM64)\r
-\r
- #if defined(__clang__) && (__clang_major__ >= 8) \\r
- || defined(__GNUC__) && (__GNUC__ >= 8)\r
- #define USE_SATUR_SUB_128\r
- #ifdef MY_CPU_ARM64\r
- // #define ATTRIB_SSE41 __attribute__((__target__("")))\r
- #else\r
- // #define ATTRIB_SSE41 __attribute__((__target__("fpu=crypto-neon-fp-armv8")))\r
- #endif\r
-\r
- #elif defined(_MSC_VER)\r
- #if (_MSC_VER >= 1910)\r
- #define USE_SATUR_SUB_128\r
- #endif\r
- #endif\r
-\r
- #if defined(_MSC_VER) && defined(MY_CPU_ARM64)\r
- #include <arm64_neon.h>\r
- #else\r
- #include <arm_neon.h>\r
- #endif\r
-\r
-#endif\r
-\r
-/*\r
-#ifndef ATTRIB_SSE41\r
- #define ATTRIB_SSE41\r
-#endif\r
-#ifndef ATTRIB_AVX2\r
- #define ATTRIB_AVX2\r
-#endif\r
-*/\r
-\r
-#ifdef USE_SATUR_SUB_128\r
-\r
-// #define _SHOW_HW_STATUS\r
-\r
-#ifdef _SHOW_HW_STATUS\r
-#include <stdio.h>\r
-#define _PRF(x) x\r
-_PRF(;)\r
-#else\r
-#define _PRF(x)\r
-#endif\r
-\r
-#ifdef MY_CPU_ARM_OR_ARM64\r
-\r
-#ifdef MY_CPU_ARM64\r
-// #define FORCE_SATUR_SUB_128\r
-#endif\r
-\r
-typedef uint32x4_t v128;\r
-#define SASUB_128(i) \\r
- *(v128 *)(void *)(items + (i) * 4) = \\r
- vsubq_u32(vmaxq_u32(*(const v128 *)(const void *)(items + (i) * 4), sub2), sub2);\r
-\r
-#else\r
-\r
-#include <smmintrin.h> // sse4.1\r
-\r
-typedef __m128i v128;\r
-#define SASUB_128(i) \\r
- *(v128 *)(void *)(items + (i) * 4) = \\r
- _mm_sub_epi32(_mm_max_epu32(*(const v128 *)(const void *)(items + (i) * 4), sub2), sub2); // SSE 4.1\r
-\r
-#endif\r
-\r
-\r
-\r
-MY_NO_INLINE\r
-static\r
-#ifdef ATTRIB_SSE41\r
-ATTRIB_SSE41\r
-#endif\r
-void\r
-MY_FAST_CALL\r
-LzFind_SaturSub_128(UInt32 subValue, CLzRef *items, const CLzRef *lim)\r
-{\r
- v128 sub2 =\r
- #ifdef MY_CPU_ARM_OR_ARM64\r
- vdupq_n_u32(subValue);\r
- #else\r
- _mm_set_epi32((Int32)subValue, (Int32)subValue, (Int32)subValue, (Int32)subValue);\r
- #endif\r
- do\r
- {\r
- SASUB_128(0)\r
- SASUB_128(1)\r
- SASUB_128(2)\r
- SASUB_128(3)\r
- items += 4 * 4;\r
- }\r
- while (items != lim);\r
-}\r
-\r
-\r
-\r
-#ifdef USE_AVX2\r
-\r
-#include <immintrin.h> // avx\r
-\r
-#define SASUB_256(i) *(__m256i *)(void *)(items + (i) * 8) = _mm256_sub_epi32(_mm256_max_epu32(*(const __m256i *)(const void *)(items + (i) * 8), sub2), sub2); // AVX2\r
-\r
-MY_NO_INLINE\r
-static\r
-#ifdef ATTRIB_AVX2\r
-ATTRIB_AVX2\r
-#endif\r
-void\r
-MY_FAST_CALL\r
-LzFind_SaturSub_256(UInt32 subValue, CLzRef *items, const CLzRef *lim)\r
-{\r
- __m256i sub2 = _mm256_set_epi32(\r
- (Int32)subValue, (Int32)subValue, (Int32)subValue, (Int32)subValue,\r
- (Int32)subValue, (Int32)subValue, (Int32)subValue, (Int32)subValue);\r
- do\r
- {\r
- SASUB_256(0)\r
- SASUB_256(1)\r
- items += 2 * 8;\r
- }\r
- while (items != lim);\r
-}\r
-#endif // USE_AVX2\r
-\r
-#ifndef FORCE_SATUR_SUB_128\r
-typedef void (MY_FAST_CALL *LZFIND_SATUR_SUB_CODE_FUNC)(\r
- UInt32 subValue, CLzRef *items, const CLzRef *lim);\r
-static LZFIND_SATUR_SUB_CODE_FUNC g_LzFind_SaturSub;\r
-#endif // FORCE_SATUR_SUB_128\r
-\r
-#endif // USE_SATUR_SUB_128\r
-\r
-\r
-// kEmptyHashValue must be zero\r
-// #define SASUB_32(i) v = items[i]; m = v - subValue; if (v < subValue) m = kEmptyHashValue; items[i] = m;\r
-#define SASUB_32(i) v = items[i]; if (v < subValue) v = subValue; items[i] = v - subValue;\r
-\r
-#ifdef FORCE_SATUR_SUB_128\r
-\r
-#define DEFAULT_SaturSub LzFind_SaturSub_128\r
-\r
-#else\r
-\r
-#define DEFAULT_SaturSub LzFind_SaturSub_32\r
-\r
-MY_NO_INLINE\r
-static\r
-void\r
-MY_FAST_CALL\r
-LzFind_SaturSub_32(UInt32 subValue, CLzRef *items, const CLzRef *lim)\r
-{\r
- do\r
- {\r
- UInt32 v;\r
- SASUB_32(0)\r
- SASUB_32(1)\r
- SASUB_32(2)\r
- SASUB_32(3)\r
- SASUB_32(4)\r
- SASUB_32(5)\r
- SASUB_32(6)\r
- SASUB_32(7)\r
- items += 8;\r
- }\r
- while (items != lim);\r
-}\r
-\r
-#endif\r
-\r
-\r
-MY_NO_INLINE\r
-void MatchFinder_Normalize3(UInt32 subValue, CLzRef *items, size_t numItems)\r
-{\r
- #define K_NORM_ALIGN_BLOCK_SIZE (1 << 6)\r
- \r
- CLzRef *lim;\r
-\r
- for (; numItems != 0 && ((unsigned)(ptrdiff_t)items & (K_NORM_ALIGN_BLOCK_SIZE - 1)) != 0; numItems--)\r
- {\r
- UInt32 v;\r
- SASUB_32(0);\r
- items++;\r
- }\r
-\r
- {\r
- #define K_NORM_ALIGN_MASK (K_NORM_ALIGN_BLOCK_SIZE / 4 - 1)\r
- lim = items + (numItems & ~(size_t)K_NORM_ALIGN_MASK);\r
- numItems &= K_NORM_ALIGN_MASK;\r
- if (items != lim)\r
- {\r
- #if defined(USE_SATUR_SUB_128) && !defined(FORCE_SATUR_SUB_128)\r
- if (g_LzFind_SaturSub)\r
- g_LzFind_SaturSub(subValue, items, lim);\r
- else\r
- #endif\r
- DEFAULT_SaturSub(subValue, items, lim);\r
- }\r
- items = lim;\r
- }\r
-\r
-\r
- for (; numItems != 0; numItems--)\r
- {\r
- UInt32 v;\r
- SASUB_32(0);\r
- items++;\r
- }\r
-}\r
-\r
-\r
-\r
-// call MatchFinder_CheckLimits() only after (p->pos++) update\r
-\r
-MY_NO_INLINE\r
-static void MatchFinder_CheckLimits(CMatchFinder *p)\r
-{\r
- if (// !p->streamEndWasReached && p->result == SZ_OK &&\r
- p->keepSizeAfter == GET_AVAIL_BYTES(p))\r
- {\r
- // we try to read only in exact state (p->keepSizeAfter == GET_AVAIL_BYTES(p))\r
- if (MatchFinder_NeedMove(p))\r
- MatchFinder_MoveBlock(p);\r
- MatchFinder_ReadBlock(p);\r
- }\r
-\r
- if (p->pos == kMaxValForNormalize)\r
- if (GET_AVAIL_BYTES(p) >= p->numHashBytes) // optional optimization for last bytes of data.\r
- /*\r
- if we disable normalization for last bytes of data, and\r
- if (data_size == 4 GiB), we don't call wastfull normalization,\r
- but (pos) will be wrapped over Zero (0) in that case.\r
- And we cannot resume later to normal operation\r
- */\r
- {\r
- // MatchFinder_Normalize(p);\r
- /* after normalization we need (p->pos >= p->historySize + 1); */\r
- /* we can reduce subValue to aligned value, if want to keep alignment\r
- of (p->pos) and (p->buffer) for speculated accesses. */\r
- const UInt32 subValue = (p->pos - p->historySize - 1) /* & ~(UInt32)(kNormalizeAlign - 1) */;\r
- // const UInt32 subValue = (1 << 15); // for debug\r
- // printf("\nMatchFinder_Normalize() subValue == 0x%x\n", subValue);\r
- size_t numSonRefs = p->cyclicBufferSize;\r
- if (p->btMode)\r
- numSonRefs <<= 1;\r
- Inline_MatchFinder_ReduceOffsets(p, subValue);\r
- MatchFinder_Normalize3(subValue, p->hash, (size_t)p->hashSizeSum + numSonRefs);\r
- }\r
-\r
- if (p->cyclicBufferPos == p->cyclicBufferSize)\r
- p->cyclicBufferPos = 0;\r
- \r
- MatchFinder_SetLimits(p);\r
-}\r
-\r
-\r
-/*\r
- (lenLimit > maxLen)\r
-*/\r
-MY_FORCE_INLINE\r
-static UInt32 * Hc_GetMatchesSpec(size_t lenLimit, UInt32 curMatch, UInt32 pos, const Byte *cur, CLzRef *son,\r
- size_t _cyclicBufferPos, UInt32 _cyclicBufferSize, UInt32 cutValue,\r
- UInt32 *d, unsigned maxLen)\r
-{\r
- /*\r
- son[_cyclicBufferPos] = curMatch;\r
- for (;;)\r
- {\r
- UInt32 delta = pos - curMatch;\r
- if (cutValue-- == 0 || delta >= _cyclicBufferSize)\r
- return d;\r
- {\r
- const Byte *pb = cur - delta;\r
- curMatch = son[_cyclicBufferPos - delta + ((delta > _cyclicBufferPos) ? _cyclicBufferSize : 0)];\r
- if (pb[maxLen] == cur[maxLen] && *pb == *cur)\r
- {\r
- UInt32 len = 0;\r
- while (++len != lenLimit)\r
- if (pb[len] != cur[len])\r
- break;\r
- if (maxLen < len)\r
- {\r
- maxLen = len;\r
- *d++ = len;\r
- *d++ = delta - 1;\r
- if (len == lenLimit)\r
- return d;\r
- }\r
- }\r
- }\r
- }\r
- */\r
-\r
- const Byte *lim = cur + lenLimit;\r
- son[_cyclicBufferPos] = curMatch;\r
-\r
- do\r
- {\r
- UInt32 delta;\r
-\r
- if (curMatch == 0)\r
- break;\r
- // if (curMatch2 >= curMatch) return NULL;\r
- delta = pos - curMatch;\r
- if (delta >= _cyclicBufferSize)\r
- break;\r
- {\r
- ptrdiff_t diff;\r
- curMatch = son[_cyclicBufferPos - delta + ((delta > _cyclicBufferPos) ? _cyclicBufferSize : 0)];\r
- diff = (ptrdiff_t)0 - (ptrdiff_t)delta;\r
- if (cur[maxLen] == cur[(ptrdiff_t)maxLen + diff])\r
- {\r
- const Byte *c = cur;\r
- while (*c == c[diff])\r
- {\r
- if (++c == lim)\r
- {\r
- d[0] = (UInt32)(lim - cur);\r
- d[1] = delta - 1;\r
- return d + 2;\r
- }\r
- }\r
- {\r
- const unsigned len = (unsigned)(c - cur);\r
- if (maxLen < len)\r
- {\r
- maxLen = len;\r
- d[0] = (UInt32)len;\r
- d[1] = delta - 1;\r
- d += 2;\r
- }\r
- }\r
- }\r
- }\r
- }\r
- while (--cutValue);\r
- \r
- return d;\r
-}\r
-\r
-\r
-MY_FORCE_INLINE\r
-UInt32 * GetMatchesSpec1(UInt32 lenLimit, UInt32 curMatch, UInt32 pos, const Byte *cur, CLzRef *son,\r
- size_t _cyclicBufferPos, UInt32 _cyclicBufferSize, UInt32 cutValue,\r
- UInt32 *d, UInt32 maxLen)\r
-{\r
- CLzRef *ptr0 = son + ((size_t)_cyclicBufferPos << 1) + 1;\r
- CLzRef *ptr1 = son + ((size_t)_cyclicBufferPos << 1);\r
- unsigned len0 = 0, len1 = 0;\r
-\r
- UInt32 cmCheck;\r
-\r
- // if (curMatch >= pos) { *ptr0 = *ptr1 = kEmptyHashValue; return NULL; }\r
-\r
- cmCheck = (UInt32)(pos - _cyclicBufferSize);\r
- if ((UInt32)pos <= _cyclicBufferSize)\r
- cmCheck = 0;\r
-\r
- if (cmCheck < curMatch)\r
- do\r
- {\r
- const UInt32 delta = pos - curMatch;\r
- {\r
- CLzRef *pair = son + ((size_t)(_cyclicBufferPos - delta + ((delta > _cyclicBufferPos) ? _cyclicBufferSize : 0)) << 1);\r
- const Byte *pb = cur - delta;\r
- unsigned len = (len0 < len1 ? len0 : len1);\r
- const UInt32 pair0 = pair[0];\r
- if (pb[len] == cur[len])\r
- {\r
- if (++len != lenLimit && pb[len] == cur[len])\r
- while (++len != lenLimit)\r
- if (pb[len] != cur[len])\r
- break;\r
- if (maxLen < len)\r
- {\r
- maxLen = (UInt32)len;\r
- *d++ = (UInt32)len;\r
- *d++ = delta - 1;\r
- if (len == lenLimit)\r
- {\r
- *ptr1 = pair0;\r
- *ptr0 = pair[1];\r
- return d;\r
- }\r
- }\r
- }\r
- if (pb[len] < cur[len])\r
- {\r
- *ptr1 = curMatch;\r
- // const UInt32 curMatch2 = pair[1];\r
- // if (curMatch2 >= curMatch) { *ptr0 = *ptr1 = kEmptyHashValue; return NULL; }\r
- // curMatch = curMatch2;\r
- curMatch = pair[1];\r
- ptr1 = pair + 1;\r
- len1 = len;\r
- }\r
- else\r
- {\r
- *ptr0 = curMatch;\r
- curMatch = pair[0];\r
- ptr0 = pair;\r
- len0 = len;\r
- }\r
- }\r
- }\r
- while(--cutValue && cmCheck < curMatch);\r
-\r
- *ptr0 = *ptr1 = kEmptyHashValue;\r
- return d;\r
-}\r
-\r
-\r
-static void SkipMatchesSpec(UInt32 lenLimit, UInt32 curMatch, UInt32 pos, const Byte *cur, CLzRef *son,\r
- size_t _cyclicBufferPos, UInt32 _cyclicBufferSize, UInt32 cutValue)\r
-{\r
- CLzRef *ptr0 = son + ((size_t)_cyclicBufferPos << 1) + 1;\r
- CLzRef *ptr1 = son + ((size_t)_cyclicBufferPos << 1);\r
- unsigned len0 = 0, len1 = 0;\r
-\r
- UInt32 cmCheck;\r
-\r
- cmCheck = (UInt32)(pos - _cyclicBufferSize);\r
- if ((UInt32)pos <= _cyclicBufferSize)\r
- cmCheck = 0;\r
-\r
- if (// curMatch >= pos || // failure\r
- cmCheck < curMatch)\r
- do\r
- {\r
- const UInt32 delta = pos - curMatch;\r
- {\r
- CLzRef *pair = son + ((size_t)(_cyclicBufferPos - delta + ((delta > _cyclicBufferPos) ? _cyclicBufferSize : 0)) << 1);\r
- const Byte *pb = cur - delta;\r
- unsigned len = (len0 < len1 ? len0 : len1);\r
- if (pb[len] == cur[len])\r
- {\r
- while (++len != lenLimit)\r
- if (pb[len] != cur[len])\r
- break;\r
- {\r
- if (len == lenLimit)\r
- {\r
- *ptr1 = pair[0];\r
- *ptr0 = pair[1];\r
- return;\r
- }\r
- }\r
- }\r
- if (pb[len] < cur[len])\r
- {\r
- *ptr1 = curMatch;\r
- curMatch = pair[1];\r
- ptr1 = pair + 1;\r
- len1 = len;\r
- }\r
- else\r
- {\r
- *ptr0 = curMatch;\r
- curMatch = pair[0];\r
- ptr0 = pair;\r
- len0 = len;\r
- }\r
- }\r
- }\r
- while(--cutValue && cmCheck < curMatch);\r
- \r
- *ptr0 = *ptr1 = kEmptyHashValue;\r
- return;\r
-}\r
-\r
-\r
-#define MOVE_POS \\r
- ++p->cyclicBufferPos; \\r
- p->buffer++; \\r
- { const UInt32 pos1 = p->pos + 1; p->pos = pos1; if (pos1 == p->posLimit) MatchFinder_CheckLimits(p); }\r
-\r
-#define MOVE_POS_RET MOVE_POS return distances;\r
-\r
-MY_NO_INLINE\r
-static void MatchFinder_MovePos(CMatchFinder *p)\r
-{\r
- /* we go here at the end of stream data, when (avail < num_hash_bytes)\r
- We don't update sons[cyclicBufferPos << btMode].\r
- So (sons) record will contain junk. And we cannot resume match searching\r
- to normal operation, even if we will provide more input data in buffer.\r
- p->sons[p->cyclicBufferPos << p->btMode] = 0; // kEmptyHashValue\r
- if (p->btMode)\r
- p->sons[(p->cyclicBufferPos << p->btMode) + 1] = 0; // kEmptyHashValue\r
- */\r
- MOVE_POS;\r
-}\r
-\r
-#define GET_MATCHES_HEADER2(minLen, ret_op) \\r
- unsigned lenLimit; UInt32 hv; Byte *cur; UInt32 curMatch; \\r
- lenLimit = (unsigned)p->lenLimit; { if (lenLimit < minLen) { MatchFinder_MovePos(p); ret_op; }} \\r
- cur = p->buffer;\r
-\r
-#define GET_MATCHES_HEADER(minLen) GET_MATCHES_HEADER2(minLen, return distances)\r
-#define SKIP_HEADER(minLen) do { GET_MATCHES_HEADER2(minLen, continue)\r
-\r
-#define MF_PARAMS(p) lenLimit, curMatch, p->pos, p->buffer, p->son, p->cyclicBufferPos, p->cyclicBufferSize, p->cutValue\r
-\r
-#define SKIP_FOOTER SkipMatchesSpec(MF_PARAMS(p)); MOVE_POS; } while (--num);\r
-\r
-#define GET_MATCHES_FOOTER_BASE(_maxLen_, func) \\r
- distances = func(MF_PARAMS(p), \\r
- distances, (UInt32)_maxLen_); MOVE_POS_RET;\r
-\r
-#define GET_MATCHES_FOOTER_BT(_maxLen_) \\r
- GET_MATCHES_FOOTER_BASE(_maxLen_, GetMatchesSpec1)\r
-\r
-#define GET_MATCHES_FOOTER_HC(_maxLen_) \\r
- GET_MATCHES_FOOTER_BASE(_maxLen_, Hc_GetMatchesSpec)\r
-\r
-\r
-\r
-#define UPDATE_maxLen { \\r
- const ptrdiff_t diff = (ptrdiff_t)0 - (ptrdiff_t)d2; \\r
- const Byte *c = cur + maxLen; \\r
- const Byte *lim = cur + lenLimit; \\r
- for (; c != lim; c++) if (*(c + diff) != *c) break; \\r
- maxLen = (unsigned)(c - cur); }\r
-\r
-static UInt32* Bt2_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances)\r
-{\r
- GET_MATCHES_HEADER(2)\r
- HASH2_CALC;\r
- curMatch = p->hash[hv];\r
- p->hash[hv] = p->pos;\r
- GET_MATCHES_FOOTER_BT(1)\r
-}\r
-\r
-UInt32* Bt3Zip_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances)\r
-{\r
- GET_MATCHES_HEADER(3)\r
- HASH_ZIP_CALC;\r
- curMatch = p->hash[hv];\r
- p->hash[hv] = p->pos;\r
- GET_MATCHES_FOOTER_BT(2)\r
-}\r
-\r
-\r
-#define SET_mmm \\r
- mmm = p->cyclicBufferSize; \\r
- if (pos < mmm) \\r
- mmm = pos;\r
-\r
-\r
-static UInt32* Bt3_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances)\r
-{\r
- UInt32 mmm;\r
- UInt32 h2, d2, pos;\r
- unsigned maxLen;\r
- UInt32 *hash;\r
- GET_MATCHES_HEADER(3)\r
-\r
- HASH3_CALC;\r
-\r
- hash = p->hash;\r
- pos = p->pos;\r
-\r
- d2 = pos - hash[h2];\r
-\r
- curMatch = (hash + kFix3HashSize)[hv];\r
- \r
- hash[h2] = pos;\r
- (hash + kFix3HashSize)[hv] = pos;\r
-\r
- SET_mmm\r
-\r
- maxLen = 2;\r
-\r
- if (d2 < mmm && *(cur - d2) == *cur)\r
- {\r
- UPDATE_maxLen\r
- distances[0] = (UInt32)maxLen;\r
- distances[1] = d2 - 1;\r
- distances += 2;\r
- if (maxLen == lenLimit)\r
- {\r
- SkipMatchesSpec(MF_PARAMS(p));\r
- MOVE_POS_RET;\r
- }\r
- }\r
- \r
- GET_MATCHES_FOOTER_BT(maxLen)\r
-}\r
-\r
-\r
-static UInt32* Bt4_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances)\r
-{\r
- UInt32 mmm;\r
- UInt32 h2, h3, d2, d3, pos;\r
- unsigned maxLen;\r
- UInt32 *hash;\r
- GET_MATCHES_HEADER(4)\r
-\r
- HASH4_CALC;\r
-\r
- hash = p->hash;\r
- pos = p->pos;\r
-\r
- d2 = pos - hash [h2];\r
- d3 = pos - (hash + kFix3HashSize)[h3];\r
- curMatch = (hash + kFix4HashSize)[hv];\r
-\r
- hash [h2] = pos;\r
- (hash + kFix3HashSize)[h3] = pos;\r
- (hash + kFix4HashSize)[hv] = pos;\r
-\r
- SET_mmm\r
-\r
- maxLen = 3;\r
- \r
- for (;;)\r
- {\r
- if (d2 < mmm && *(cur - d2) == *cur)\r
- {\r
- distances[0] = 2;\r
- distances[1] = d2 - 1;\r
- distances += 2;\r
- if (*(cur - d2 + 2) == cur[2])\r
- {\r
- // distances[-2] = 3;\r
- }\r
- else if (d3 < mmm && *(cur - d3) == *cur)\r
- {\r
- d2 = d3;\r
- distances[1] = d3 - 1;\r
- distances += 2;\r
- }\r
- else\r
- break;\r
- }\r
- else if (d3 < mmm && *(cur - d3) == *cur)\r
- {\r
- d2 = d3;\r
- distances[1] = d3 - 1;\r
- distances += 2;\r
- }\r
- else\r
- break;\r
- \r
- UPDATE_maxLen\r
- distances[-2] = (UInt32)maxLen;\r
- if (maxLen == lenLimit)\r
- {\r
- SkipMatchesSpec(MF_PARAMS(p));\r
- MOVE_POS_RET\r
- }\r
- break;\r
- }\r
- \r
- GET_MATCHES_FOOTER_BT(maxLen)\r
-}\r
-\r
-\r
-static UInt32* Bt5_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances)\r
-{\r
- UInt32 mmm;\r
- UInt32 h2, h3, d2, d3, maxLen, pos;\r
- UInt32 *hash;\r
- GET_MATCHES_HEADER(5)\r
-\r
- HASH5_CALC;\r
-\r
- hash = p->hash;\r
- pos = p->pos;\r
-\r
- d2 = pos - hash [h2];\r
- d3 = pos - (hash + kFix3HashSize)[h3];\r
- // d4 = pos - (hash + kFix4HashSize)[h4];\r
-\r
- curMatch = (hash + kFix5HashSize)[hv];\r
-\r
- hash [h2] = pos;\r
- (hash + kFix3HashSize)[h3] = pos;\r
- // (hash + kFix4HashSize)[h4] = pos;\r
- (hash + kFix5HashSize)[hv] = pos;\r
-\r
- SET_mmm\r
-\r
- maxLen = 4;\r
-\r
- for (;;)\r
- {\r
- if (d2 < mmm && *(cur - d2) == *cur)\r
- {\r
- distances[0] = 2;\r
- distances[1] = d2 - 1;\r
- distances += 2;\r
- if (*(cur - d2 + 2) == cur[2])\r
- {\r
- }\r
- else if (d3 < mmm && *(cur - d3) == *cur)\r
- {\r
- distances[1] = d3 - 1;\r
- distances += 2;\r
- d2 = d3;\r
- }\r
- else\r
- break;\r
- }\r
- else if (d3 < mmm && *(cur - d3) == *cur)\r
- {\r
- distances[1] = d3 - 1;\r
- distances += 2;\r
- d2 = d3;\r
- }\r
- else\r
- break;\r
-\r
- distances[-2] = 3;\r
- if (*(cur - d2 + 3) != cur[3])\r
- break;\r
- UPDATE_maxLen\r
- distances[-2] = (UInt32)maxLen;\r
- if (maxLen == lenLimit)\r
- {\r
- SkipMatchesSpec(MF_PARAMS(p));\r
- MOVE_POS_RET;\r
- }\r
- break;\r
- }\r
- \r
- GET_MATCHES_FOOTER_BT(maxLen)\r
-}\r
-\r
-\r
-static UInt32* Hc4_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances)\r
-{\r
- UInt32 mmm;\r
- UInt32 h2, h3, d2, d3, pos;\r
- unsigned maxLen;\r
- UInt32 *hash;\r
- GET_MATCHES_HEADER(4)\r
-\r
- HASH4_CALC;\r
-\r
- hash = p->hash;\r
- pos = p->pos;\r
- \r
- d2 = pos - hash [h2];\r
- d3 = pos - (hash + kFix3HashSize)[h3];\r
- curMatch = (hash + kFix4HashSize)[hv];\r
-\r
- hash [h2] = pos;\r
- (hash + kFix3HashSize)[h3] = pos;\r
- (hash + kFix4HashSize)[hv] = pos;\r
-\r
- SET_mmm\r
-\r
- maxLen = 3;\r
-\r
- for (;;)\r
- {\r
- if (d2 < mmm && *(cur - d2) == *cur)\r
- {\r
- distances[0] = 2;\r
- distances[1] = d2 - 1;\r
- distances += 2;\r
- if (*(cur - d2 + 2) == cur[2])\r
- {\r
- // distances[-2] = 3;\r
- }\r
- else if (d3 < mmm && *(cur - d3) == *cur)\r
- {\r
- d2 = d3;\r
- distances[1] = d3 - 1;\r
- distances += 2;\r
- }\r
- else\r
- break;\r
- }\r
- else if (d3 < mmm && *(cur - d3) == *cur)\r
- {\r
- d2 = d3;\r
- distances[1] = d3 - 1;\r
- distances += 2;\r
- }\r
- else\r
- break;\r
-\r
- UPDATE_maxLen\r
- distances[-2] = (UInt32)maxLen;\r
- if (maxLen == lenLimit)\r
- {\r
- p->son[p->cyclicBufferPos] = curMatch;\r
- MOVE_POS_RET;\r
- }\r
- break;\r
- }\r
- \r
- GET_MATCHES_FOOTER_HC(maxLen);\r
-}\r
-\r
-\r
-static UInt32 * Hc5_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances)\r
-{\r
- UInt32 mmm;\r
- UInt32 h2, h3, d2, d3, maxLen, pos;\r
- UInt32 *hash;\r
- GET_MATCHES_HEADER(5)\r
-\r
- HASH5_CALC;\r
-\r
- hash = p->hash;\r
- pos = p->pos;\r
-\r
- d2 = pos - hash [h2];\r
- d3 = pos - (hash + kFix3HashSize)[h3];\r
- // d4 = pos - (hash + kFix4HashSize)[h4];\r
-\r
- curMatch = (hash + kFix5HashSize)[hv];\r
-\r
- hash [h2] = pos;\r
- (hash + kFix3HashSize)[h3] = pos;\r
- // (hash + kFix4HashSize)[h4] = pos;\r
- (hash + kFix5HashSize)[hv] = pos;\r
-\r
- SET_mmm\r
- \r
- maxLen = 4;\r
-\r
- for (;;)\r
- {\r
- if (d2 < mmm && *(cur - d2) == *cur)\r
- {\r
- distances[0] = 2;\r
- distances[1] = d2 - 1;\r
- distances += 2;\r
- if (*(cur - d2 + 2) == cur[2])\r
- {\r
- }\r
- else if (d3 < mmm && *(cur - d3) == *cur)\r
- {\r
- distances[1] = d3 - 1;\r
- distances += 2;\r
- d2 = d3;\r
- }\r
- else\r
- break;\r
- }\r
- else if (d3 < mmm && *(cur - d3) == *cur)\r
- {\r
- distances[1] = d3 - 1;\r
- distances += 2;\r
- d2 = d3;\r
- }\r
- else\r
- break;\r
-\r
- distances[-2] = 3;\r
- if (*(cur - d2 + 3) != cur[3])\r
- break;\r
- UPDATE_maxLen\r
- distances[-2] = maxLen;\r
- if (maxLen == lenLimit)\r
- {\r
- p->son[p->cyclicBufferPos] = curMatch;\r
- MOVE_POS_RET;\r
- }\r
- break;\r
- }\r
- \r
- GET_MATCHES_FOOTER_HC(maxLen);\r
-}\r
-\r
-\r
-UInt32* Hc3Zip_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances)\r
-{\r
- GET_MATCHES_HEADER(3)\r
- HASH_ZIP_CALC;\r
- curMatch = p->hash[hv];\r
- p->hash[hv] = p->pos;\r
- GET_MATCHES_FOOTER_HC(2)\r
-}\r
-\r
-\r
-static void Bt2_MatchFinder_Skip(CMatchFinder *p, UInt32 num)\r
-{\r
- SKIP_HEADER(2)\r
- {\r
- HASH2_CALC;\r
- curMatch = p->hash[hv];\r
- p->hash[hv] = p->pos;\r
- }\r
- SKIP_FOOTER\r
-}\r
-\r
-void Bt3Zip_MatchFinder_Skip(CMatchFinder *p, UInt32 num)\r
-{\r
- SKIP_HEADER(3)\r
- {\r
- HASH_ZIP_CALC;\r
- curMatch = p->hash[hv];\r
- p->hash[hv] = p->pos;\r
- }\r
- SKIP_FOOTER\r
-}\r
-\r
-static void Bt3_MatchFinder_Skip(CMatchFinder *p, UInt32 num)\r
-{\r
- SKIP_HEADER(3)\r
- {\r
- UInt32 h2;\r
- UInt32 *hash;\r
- HASH3_CALC;\r
- hash = p->hash;\r
- curMatch = (hash + kFix3HashSize)[hv];\r
- hash[h2] =\r
- (hash + kFix3HashSize)[hv] = p->pos;\r
- }\r
- SKIP_FOOTER\r
-}\r
-\r
-static void Bt4_MatchFinder_Skip(CMatchFinder *p, UInt32 num)\r
-{\r
- SKIP_HEADER(4)\r
- {\r
- UInt32 h2, h3;\r
- UInt32 *hash;\r
- HASH4_CALC;\r
- hash = p->hash;\r
- curMatch = (hash + kFix4HashSize)[hv];\r
- hash [h2] =\r
- (hash + kFix3HashSize)[h3] =\r
- (hash + kFix4HashSize)[hv] = p->pos;\r
- }\r
- SKIP_FOOTER\r
-}\r
-\r
-static void Bt5_MatchFinder_Skip(CMatchFinder *p, UInt32 num)\r
-{\r
- SKIP_HEADER(5)\r
- {\r
- UInt32 h2, h3;\r
- UInt32 *hash;\r
- HASH5_CALC;\r
- hash = p->hash;\r
- curMatch = (hash + kFix5HashSize)[hv];\r
- hash [h2] =\r
- (hash + kFix3HashSize)[h3] =\r
- // (hash + kFix4HashSize)[h4] =\r
- (hash + kFix5HashSize)[hv] = p->pos;\r
- }\r
- SKIP_FOOTER\r
-}\r
-\r
-\r
-#define HC_SKIP_HEADER(minLen) \\r
- do { if (p->lenLimit < minLen) { MatchFinder_MovePos(p); num--; continue; } { \\r
- Byte *cur; \\r
- UInt32 *hash; \\r
- UInt32 *son; \\r
- UInt32 pos = p->pos; \\r
- UInt32 num2 = num; \\r
- /* (p->pos == p->posLimit) is not allowed here !!! */ \\r
- { const UInt32 rem = p->posLimit - pos; if (num2 > rem) num2 = rem; } \\r
- num -= num2; \\r
- { const UInt32 cycPos = p->cyclicBufferPos; \\r
- son = p->son + cycPos; \\r
- p->cyclicBufferPos = cycPos + num2; } \\r
- cur = p->buffer; \\r
- hash = p->hash; \\r
- do { \\r
- UInt32 curMatch; \\r
- UInt32 hv;\r
-\r
-\r
-#define HC_SKIP_FOOTER \\r
- cur++; pos++; *son++ = curMatch; \\r
- } while (--num2); \\r
- p->buffer = cur; \\r
- p->pos = pos; \\r
- if (pos == p->posLimit) MatchFinder_CheckLimits(p); \\r
- }} while(num); \\r
-\r
-\r
-static void Hc4_MatchFinder_Skip(CMatchFinder *p, UInt32 num)\r
-{\r
- HC_SKIP_HEADER(4)\r
-\r
- UInt32 h2, h3;\r
- HASH4_CALC;\r
- curMatch = (hash + kFix4HashSize)[hv];\r
- hash [h2] =\r
- (hash + kFix3HashSize)[h3] =\r
- (hash + kFix4HashSize)[hv] = pos;\r
- \r
- HC_SKIP_FOOTER\r
-}\r
-\r
-\r
-static void Hc5_MatchFinder_Skip(CMatchFinder *p, UInt32 num)\r
-{\r
- HC_SKIP_HEADER(5)\r
- \r
- UInt32 h2, h3;\r
- HASH5_CALC\r
- curMatch = (hash + kFix5HashSize)[hv];\r
- hash [h2] =\r
- (hash + kFix3HashSize)[h3] =\r
- // (hash + kFix4HashSize)[h4] =\r
- (hash + kFix5HashSize)[hv] = pos;\r
- \r
- HC_SKIP_FOOTER\r
-}\r
-\r
-\r
-void Hc3Zip_MatchFinder_Skip(CMatchFinder *p, UInt32 num)\r
-{\r
- HC_SKIP_HEADER(3)\r
-\r
- HASH_ZIP_CALC;\r
- curMatch = hash[hv];\r
- hash[hv] = pos;\r
-\r
- HC_SKIP_FOOTER\r
-}\r
-\r
-\r
-void MatchFinder_CreateVTable(CMatchFinder *p, IMatchFinder2 *vTable)\r
-{\r
- vTable->Init = (Mf_Init_Func)MatchFinder_Init;\r
- vTable->GetNumAvailableBytes = (Mf_GetNumAvailableBytes_Func)MatchFinder_GetNumAvailableBytes;\r
- vTable->GetPointerToCurrentPos = (Mf_GetPointerToCurrentPos_Func)MatchFinder_GetPointerToCurrentPos;\r
- if (!p->btMode)\r
- {\r
- if (p->numHashBytes <= 4)\r
- {\r
- vTable->GetMatches = (Mf_GetMatches_Func)Hc4_MatchFinder_GetMatches;\r
- vTable->Skip = (Mf_Skip_Func)Hc4_MatchFinder_Skip;\r
- }\r
- else\r
- {\r
- vTable->GetMatches = (Mf_GetMatches_Func)Hc5_MatchFinder_GetMatches;\r
- vTable->Skip = (Mf_Skip_Func)Hc5_MatchFinder_Skip;\r
- }\r
- }\r
- else if (p->numHashBytes == 2)\r
- {\r
- vTable->GetMatches = (Mf_GetMatches_Func)Bt2_MatchFinder_GetMatches;\r
- vTable->Skip = (Mf_Skip_Func)Bt2_MatchFinder_Skip;\r
- }\r
- else if (p->numHashBytes == 3)\r
- {\r
- vTable->GetMatches = (Mf_GetMatches_Func)Bt3_MatchFinder_GetMatches;\r
- vTable->Skip = (Mf_Skip_Func)Bt3_MatchFinder_Skip;\r
- }\r
- else if (p->numHashBytes == 4)\r
- {\r
- vTable->GetMatches = (Mf_GetMatches_Func)Bt4_MatchFinder_GetMatches;\r
- vTable->Skip = (Mf_Skip_Func)Bt4_MatchFinder_Skip;\r
- }\r
- else\r
- {\r
- vTable->GetMatches = (Mf_GetMatches_Func)Bt5_MatchFinder_GetMatches;\r
- vTable->Skip = (Mf_Skip_Func)Bt5_MatchFinder_Skip;\r
- }\r
-}\r
-\r
-\r
-\r
-void LzFindPrepare()\r
-{\r
- #ifndef FORCE_SATUR_SUB_128\r
- #ifdef USE_SATUR_SUB_128\r
- LZFIND_SATUR_SUB_CODE_FUNC f = NULL;\r
- #ifdef MY_CPU_ARM_OR_ARM64\r
- {\r
- if (CPU_IsSupported_NEON())\r
- {\r
- // #pragma message ("=== LzFind NEON")\r
- _PRF(printf("\n=== LzFind NEON\n"));\r
- f = LzFind_SaturSub_128;\r
- }\r
- // f = 0; // for debug\r
- }\r
- #else // MY_CPU_ARM_OR_ARM64\r
- if (CPU_IsSupported_SSE41())\r
- {\r
- // #pragma message ("=== LzFind SSE41")\r
- _PRF(printf("\n=== LzFind SSE41\n"));\r
- f = LzFind_SaturSub_128;\r
-\r
- #ifdef USE_AVX2\r
- if (CPU_IsSupported_AVX2())\r
- {\r
- // #pragma message ("=== LzFind AVX2")\r
- _PRF(printf("\n=== LzFind AVX2\n"));\r
- f = LzFind_SaturSub_256;\r
- }\r
- #endif\r
- }\r
- #endif // MY_CPU_ARM_OR_ARM64\r
- g_LzFind_SaturSub = f;\r
- #endif // USE_SATUR_SUB_128\r
- #endif // FORCE_SATUR_SUB_128\r
-}\r
+/* LzFind.c -- Match finder for LZ algorithms
+2024-03-01 : Igor Pavlov : Public domain */
+
+#include "Precomp.h"
+
+#include <string.h>
+// #include <stdio.h>
+
+#include "CpuArch.h"
+#include "LzFind.h"
+#include "LzHash.h"
+
+#define kBlockMoveAlign (1 << 7) // alignment for memmove()
+#define kBlockSizeAlign (1 << 16) // alignment for block allocation
+#define kBlockSizeReserveMin (1 << 24) // it's 1/256 from 4 GB dictinary
+
+#define kEmptyHashValue 0
+
+#define kMaxValForNormalize ((UInt32)0)
+// #define kMaxValForNormalize ((UInt32)(1 << 20) + 0xfff) // for debug
+
+// #define kNormalizeAlign (1 << 7) // alignment for speculated accesses
+
+#define GET_AVAIL_BYTES(p) \
+ Inline_MatchFinder_GetNumAvailableBytes(p)
+
+
+// #define kFix5HashSize (kHash2Size + kHash3Size + kHash4Size)
+#define kFix5HashSize kFix4HashSize
+
+/*
+ HASH2_CALC:
+ if (hv) match, then cur[0] and cur[1] also match
+*/
+#define HASH2_CALC hv = GetUi16(cur);
+
+// (crc[0 ... 255] & 0xFF) provides one-to-one correspondence to [0 ... 255]
+
+/*
+ HASH3_CALC:
+ if (cur[0]) and (h2) match, then cur[1] also match
+ if (cur[0]) and (hv) match, then cur[1] and cur[2] also match
+*/
+#define HASH3_CALC { \
+ UInt32 temp = p->crc[cur[0]] ^ cur[1]; \
+ h2 = temp & (kHash2Size - 1); \
+ hv = (temp ^ ((UInt32)cur[2] << 8)) & p->hashMask; }
+
+#define HASH4_CALC { \
+ UInt32 temp = p->crc[cur[0]] ^ cur[1]; \
+ h2 = temp & (kHash2Size - 1); \
+ temp ^= ((UInt32)cur[2] << 8); \
+ h3 = temp & (kHash3Size - 1); \
+ hv = (temp ^ (p->crc[cur[3]] << kLzHash_CrcShift_1)) & p->hashMask; }
+
+#define HASH5_CALC { \
+ UInt32 temp = p->crc[cur[0]] ^ cur[1]; \
+ h2 = temp & (kHash2Size - 1); \
+ temp ^= ((UInt32)cur[2] << 8); \
+ h3 = temp & (kHash3Size - 1); \
+ temp ^= (p->crc[cur[3]] << kLzHash_CrcShift_1); \
+ /* h4 = temp & p->hash4Mask; */ /* (kHash4Size - 1); */ \
+ hv = (temp ^ (p->crc[cur[4]] << kLzHash_CrcShift_2)) & p->hashMask; }
+
+#define HASH_ZIP_CALC hv = ((cur[2] | ((UInt32)cur[0] << 8)) ^ p->crc[cur[1]]) & 0xFFFF;
+
+
+static void LzInWindow_Free(CMatchFinder *p, ISzAllocPtr alloc)
+{
+ // if (!p->directInput)
+ {
+ ISzAlloc_Free(alloc, p->bufBase);
+ p->bufBase = NULL;
+ }
+}
+
+
+static int LzInWindow_Create2(CMatchFinder *p, UInt32 blockSize, ISzAllocPtr alloc)
+{
+ if (blockSize == 0)
+ return 0;
+ if (!p->bufBase || p->blockSize != blockSize)
+ {
+ // size_t blockSizeT;
+ LzInWindow_Free(p, alloc);
+ p->blockSize = blockSize;
+ // blockSizeT = blockSize;
+
+ // printf("\nblockSize = 0x%x\n", blockSize);
+ /*
+ #if defined _WIN64
+ // we can allocate 4GiB, but still use UInt32 for (p->blockSize)
+ // we use UInt32 type for (p->blockSize), because
+ // we don't want to wrap over 4 GiB,
+ // when we use (p->streamPos - p->pos) that is UInt32.
+ if (blockSize >= (UInt32)0 - (UInt32)kBlockSizeAlign)
+ {
+ blockSizeT = ((size_t)1 << 32);
+ printf("\nchanged to blockSizeT = 4GiB\n");
+ }
+ #endif
+ */
+
+ p->bufBase = (Byte *)ISzAlloc_Alloc(alloc, blockSize);
+ // printf("\nbufferBase = %p\n", p->bufBase);
+ // return 0; // for debug
+ }
+ return (p->bufBase != NULL);
+}
+
+static const Byte *MatchFinder_GetPointerToCurrentPos(void *p)
+{
+ return ((CMatchFinder *)p)->buffer;
+}
+
+static UInt32 MatchFinder_GetNumAvailableBytes(void *p)
+{
+ return GET_AVAIL_BYTES((CMatchFinder *)p);
+}
+
+
+Z7_NO_INLINE
+static void MatchFinder_ReadBlock(CMatchFinder *p)
+{
+ if (p->streamEndWasReached || p->result != SZ_OK)
+ return;
+
+ /* We use (p->streamPos - p->pos) value.
+ (p->streamPos < p->pos) is allowed. */
+
+ if (p->directInput)
+ {
+ UInt32 curSize = 0xFFFFFFFF - GET_AVAIL_BYTES(p);
+ if (curSize > p->directInputRem)
+ curSize = (UInt32)p->directInputRem;
+ p->streamPos += curSize;
+ p->directInputRem -= curSize;
+ if (p->directInputRem == 0)
+ p->streamEndWasReached = 1;
+ return;
+ }
+
+ for (;;)
+ {
+ const Byte *dest = p->buffer + GET_AVAIL_BYTES(p);
+ size_t size = (size_t)(p->bufBase + p->blockSize - dest);
+ if (size == 0)
+ {
+ /* we call ReadBlock() after NeedMove() and MoveBlock().
+ NeedMove() and MoveBlock() povide more than (keepSizeAfter)
+ to the end of (blockSize).
+ So we don't execute this branch in normal code flow.
+ We can go here, if we will call ReadBlock() before NeedMove(), MoveBlock().
+ */
+ // p->result = SZ_ERROR_FAIL; // we can show error here
+ return;
+ }
+
+ // #define kRead 3
+ // if (size > kRead) size = kRead; // for debug
+
+ /*
+ // we need cast (Byte *)dest.
+ #ifdef __clang__
+ #pragma GCC diagnostic ignored "-Wcast-qual"
+ #endif
+ */
+ p->result = ISeqInStream_Read(p->stream,
+ p->bufBase + (dest - p->bufBase), &size);
+ if (p->result != SZ_OK)
+ return;
+ if (size == 0)
+ {
+ p->streamEndWasReached = 1;
+ return;
+ }
+ p->streamPos += (UInt32)size;
+ if (GET_AVAIL_BYTES(p) > p->keepSizeAfter)
+ return;
+ /* here and in another (p->keepSizeAfter) checks we keep on 1 byte more than was requested by Create() function
+ (GET_AVAIL_BYTES(p) >= p->keepSizeAfter) - minimal required size */
+ }
+
+ // on exit: (p->result != SZ_OK || p->streamEndWasReached || GET_AVAIL_BYTES(p) > p->keepSizeAfter)
+}
+
+
+
+Z7_NO_INLINE
+void MatchFinder_MoveBlock(CMatchFinder *p)
+{
+ const size_t offset = (size_t)(p->buffer - p->bufBase) - p->keepSizeBefore;
+ const size_t keepBefore = (offset & (kBlockMoveAlign - 1)) + p->keepSizeBefore;
+ p->buffer = p->bufBase + keepBefore;
+ memmove(p->bufBase,
+ p->bufBase + (offset & ~((size_t)kBlockMoveAlign - 1)),
+ keepBefore + (size_t)GET_AVAIL_BYTES(p));
+}
+
+/* We call MoveBlock() before ReadBlock().
+ So MoveBlock() can be wasteful operation, if the whole input data
+ can fit in current block even without calling MoveBlock().
+ in important case where (dataSize <= historySize)
+ condition (p->blockSize > dataSize + p->keepSizeAfter) is met
+ So there is no MoveBlock() in that case case.
+*/
+
+int MatchFinder_NeedMove(CMatchFinder *p)
+{
+ if (p->directInput)
+ return 0;
+ if (p->streamEndWasReached || p->result != SZ_OK)
+ return 0;
+ return ((size_t)(p->bufBase + p->blockSize - p->buffer) <= p->keepSizeAfter);
+}
+
+void MatchFinder_ReadIfRequired(CMatchFinder *p)
+{
+ if (p->keepSizeAfter >= GET_AVAIL_BYTES(p))
+ MatchFinder_ReadBlock(p);
+}
+
+
+
+static void MatchFinder_SetDefaultSettings(CMatchFinder *p)
+{
+ p->cutValue = 32;
+ p->btMode = 1;
+ p->numHashBytes = 4;
+ p->numHashBytes_Min = 2;
+ p->numHashOutBits = 0;
+ p->bigHash = 0;
+}
+
+#define kCrcPoly 0xEDB88320
+
+void MatchFinder_Construct(CMatchFinder *p)
+{
+ unsigned i;
+ p->buffer = NULL;
+ p->bufBase = NULL;
+ p->directInput = 0;
+ p->stream = NULL;
+ p->hash = NULL;
+ p->expectedDataSize = (UInt64)(Int64)-1;
+ MatchFinder_SetDefaultSettings(p);
+
+ for (i = 0; i < 256; i++)
+ {
+ UInt32 r = (UInt32)i;
+ unsigned j;
+ for (j = 0; j < 8; j++)
+ r = (r >> 1) ^ (kCrcPoly & ((UInt32)0 - (r & 1)));
+ p->crc[i] = r;
+ }
+}
+
+#undef kCrcPoly
+
+static void MatchFinder_FreeThisClassMemory(CMatchFinder *p, ISzAllocPtr alloc)
+{
+ ISzAlloc_Free(alloc, p->hash);
+ p->hash = NULL;
+}
+
+void MatchFinder_Free(CMatchFinder *p, ISzAllocPtr alloc)
+{
+ MatchFinder_FreeThisClassMemory(p, alloc);
+ LzInWindow_Free(p, alloc);
+}
+
+static CLzRef* AllocRefs(size_t num, ISzAllocPtr alloc)
+{
+ const size_t sizeInBytes = (size_t)num * sizeof(CLzRef);
+ if (sizeInBytes / sizeof(CLzRef) != num)
+ return NULL;
+ return (CLzRef *)ISzAlloc_Alloc(alloc, sizeInBytes);
+}
+
+#if (kBlockSizeReserveMin < kBlockSizeAlign * 2)
+ #error Stop_Compiling_Bad_Reserve
+#endif
+
+
+
+static UInt32 GetBlockSize(CMatchFinder *p, UInt32 historySize)
+{
+ UInt32 blockSize = (p->keepSizeBefore + p->keepSizeAfter);
+ /*
+ if (historySize > kMaxHistorySize)
+ return 0;
+ */
+ // printf("\nhistorySize == 0x%x\n", historySize);
+
+ if (p->keepSizeBefore < historySize || blockSize < p->keepSizeBefore) // if 32-bit overflow
+ return 0;
+
+ {
+ const UInt32 kBlockSizeMax = (UInt32)0 - (UInt32)kBlockSizeAlign;
+ const UInt32 rem = kBlockSizeMax - blockSize;
+ const UInt32 reserve = (blockSize >> (blockSize < ((UInt32)1 << 30) ? 1 : 2))
+ + (1 << 12) + kBlockMoveAlign + kBlockSizeAlign; // do not overflow 32-bit here
+ if (blockSize >= kBlockSizeMax
+ || rem < kBlockSizeReserveMin) // we reject settings that will be slow
+ return 0;
+ if (reserve >= rem)
+ blockSize = kBlockSizeMax;
+ else
+ {
+ blockSize += reserve;
+ blockSize &= ~(UInt32)(kBlockSizeAlign - 1);
+ }
+ }
+ // printf("\n LzFind_blockSize = %x\n", blockSize);
+ // printf("\n LzFind_blockSize = %d\n", blockSize >> 20);
+ return blockSize;
+}
+
+
+// input is historySize
+static UInt32 MatchFinder_GetHashMask2(CMatchFinder *p, UInt32 hs)
+{
+ if (p->numHashBytes == 2)
+ return (1 << 16) - 1;
+ if (hs != 0)
+ hs--;
+ hs |= (hs >> 1);
+ hs |= (hs >> 2);
+ hs |= (hs >> 4);
+ hs |= (hs >> 8);
+ // we propagated 16 bits in (hs). Low 16 bits must be set later
+ if (hs >= (1 << 24))
+ {
+ if (p->numHashBytes == 3)
+ hs = (1 << 24) - 1;
+ /* if (bigHash) mode, GetHeads4b() in LzFindMt.c needs (hs >= ((1 << 24) - 1))) */
+ }
+ // (hash_size >= (1 << 16)) : Required for (numHashBytes > 2)
+ hs |= (1 << 16) - 1; /* don't change it! */
+ // bt5: we adjust the size with recommended minimum size
+ if (p->numHashBytes >= 5)
+ hs |= (256 << kLzHash_CrcShift_2) - 1;
+ return hs;
+}
+
+// input is historySize
+static UInt32 MatchFinder_GetHashMask(CMatchFinder *p, UInt32 hs)
+{
+ if (p->numHashBytes == 2)
+ return (1 << 16) - 1;
+ if (hs != 0)
+ hs--;
+ hs |= (hs >> 1);
+ hs |= (hs >> 2);
+ hs |= (hs >> 4);
+ hs |= (hs >> 8);
+ // we propagated 16 bits in (hs). Low 16 bits must be set later
+ hs >>= 1;
+ if (hs >= (1 << 24))
+ {
+ if (p->numHashBytes == 3)
+ hs = (1 << 24) - 1;
+ else
+ hs >>= 1;
+ /* if (bigHash) mode, GetHeads4b() in LzFindMt.c needs (hs >= ((1 << 24) - 1))) */
+ }
+ // (hash_size >= (1 << 16)) : Required for (numHashBytes > 2)
+ hs |= (1 << 16) - 1; /* don't change it! */
+ // bt5: we adjust the size with recommended minimum size
+ if (p->numHashBytes >= 5)
+ hs |= (256 << kLzHash_CrcShift_2) - 1;
+ return hs;
+}
+
+
+int MatchFinder_Create(CMatchFinder *p, UInt32 historySize,
+ UInt32 keepAddBufferBefore, UInt32 matchMaxLen, UInt32 keepAddBufferAfter,
+ ISzAllocPtr alloc)
+{
+ /* we need one additional byte in (p->keepSizeBefore),
+ since we use MoveBlock() after (p->pos++) and before dictionary using */
+ // keepAddBufferBefore = (UInt32)0xFFFFFFFF - (1 << 22); // for debug
+ p->keepSizeBefore = historySize + keepAddBufferBefore + 1;
+
+ keepAddBufferAfter += matchMaxLen;
+ /* we need (p->keepSizeAfter >= p->numHashBytes) */
+ if (keepAddBufferAfter < p->numHashBytes)
+ keepAddBufferAfter = p->numHashBytes;
+ // keepAddBufferAfter -= 2; // for debug
+ p->keepSizeAfter = keepAddBufferAfter;
+
+ if (p->directInput)
+ p->blockSize = 0;
+ if (p->directInput || LzInWindow_Create2(p, GetBlockSize(p, historySize), alloc))
+ {
+ size_t hashSizeSum;
+ {
+ UInt32 hs;
+ UInt32 hsCur;
+
+ if (p->numHashOutBits != 0)
+ {
+ unsigned numBits = p->numHashOutBits;
+ const unsigned nbMax =
+ (p->numHashBytes == 2 ? 16 :
+ (p->numHashBytes == 3 ? 24 : 32));
+ if (numBits > nbMax)
+ numBits = nbMax;
+ if (numBits >= 32)
+ hs = (UInt32)0 - 1;
+ else
+ hs = ((UInt32)1 << numBits) - 1;
+ // (hash_size >= (1 << 16)) : Required for (numHashBytes > 2)
+ hs |= (1 << 16) - 1; /* don't change it! */
+ if (p->numHashBytes >= 5)
+ hs |= (256 << kLzHash_CrcShift_2) - 1;
+ {
+ const UInt32 hs2 = MatchFinder_GetHashMask2(p, historySize);
+ if (hs > hs2)
+ hs = hs2;
+ }
+ hsCur = hs;
+ if (p->expectedDataSize < historySize)
+ {
+ const UInt32 hs2 = MatchFinder_GetHashMask2(p, (UInt32)p->expectedDataSize);
+ if (hsCur > hs2)
+ hsCur = hs2;
+ }
+ }
+ else
+ {
+ hs = MatchFinder_GetHashMask(p, historySize);
+ hsCur = hs;
+ if (p->expectedDataSize < historySize)
+ {
+ hsCur = MatchFinder_GetHashMask(p, (UInt32)p->expectedDataSize);
+ if (hsCur > hs) // is it possible?
+ hsCur = hs;
+ }
+ }
+
+ p->hashMask = hsCur;
+
+ hashSizeSum = hs;
+ hashSizeSum++;
+ if (hashSizeSum < hs)
+ return 0;
+ {
+ UInt32 fixedHashSize = 0;
+ if (p->numHashBytes > 2 && p->numHashBytes_Min <= 2) fixedHashSize += kHash2Size;
+ if (p->numHashBytes > 3 && p->numHashBytes_Min <= 3) fixedHashSize += kHash3Size;
+ // if (p->numHashBytes > 4) p->fixedHashSize += hs4; // kHash4Size;
+ hashSizeSum += fixedHashSize;
+ p->fixedHashSize = fixedHashSize;
+ }
+ }
+
+ p->matchMaxLen = matchMaxLen;
+
+ {
+ size_t newSize;
+ size_t numSons;
+ const UInt32 newCyclicBufferSize = historySize + 1; // do not change it
+ p->historySize = historySize;
+ p->cyclicBufferSize = newCyclicBufferSize; // it must be = (historySize + 1)
+
+ numSons = newCyclicBufferSize;
+ if (p->btMode)
+ numSons <<= 1;
+ newSize = hashSizeSum + numSons;
+
+ if (numSons < newCyclicBufferSize || newSize < numSons)
+ return 0;
+
+ // aligned size is not required here, but it can be better for some loops
+ #define NUM_REFS_ALIGN_MASK 0xF
+ newSize = (newSize + NUM_REFS_ALIGN_MASK) & ~(size_t)NUM_REFS_ALIGN_MASK;
+
+ // 22.02: we don't reallocate buffer, if old size is enough
+ if (p->hash && p->numRefs >= newSize)
+ return 1;
+
+ MatchFinder_FreeThisClassMemory(p, alloc);
+ p->numRefs = newSize;
+ p->hash = AllocRefs(newSize, alloc);
+
+ if (p->hash)
+ {
+ p->son = p->hash + hashSizeSum;
+ return 1;
+ }
+ }
+ }
+
+ MatchFinder_Free(p, alloc);
+ return 0;
+}
+
+
+static void MatchFinder_SetLimits(CMatchFinder *p)
+{
+ UInt32 k;
+ UInt32 n = kMaxValForNormalize - p->pos;
+ if (n == 0)
+ n = (UInt32)(Int32)-1; // we allow (pos == 0) at start even with (kMaxValForNormalize == 0)
+
+ k = p->cyclicBufferSize - p->cyclicBufferPos;
+ if (k < n)
+ n = k;
+
+ k = GET_AVAIL_BYTES(p);
+ {
+ const UInt32 ksa = p->keepSizeAfter;
+ UInt32 mm = p->matchMaxLen;
+ if (k > ksa)
+ k -= ksa; // we must limit exactly to keepSizeAfter for ReadBlock
+ else if (k >= mm)
+ {
+ // the limitation for (p->lenLimit) update
+ k -= mm; // optimization : to reduce the number of checks
+ k++;
+ // k = 1; // non-optimized version : for debug
+ }
+ else
+ {
+ mm = k;
+ if (k != 0)
+ k = 1;
+ }
+ p->lenLimit = mm;
+ }
+ if (k < n)
+ n = k;
+
+ p->posLimit = p->pos + n;
+}
+
+
+void MatchFinder_Init_LowHash(CMatchFinder *p)
+{
+ size_t i;
+ CLzRef *items = p->hash;
+ const size_t numItems = p->fixedHashSize;
+ for (i = 0; i < numItems; i++)
+ items[i] = kEmptyHashValue;
+}
+
+
+void MatchFinder_Init_HighHash(CMatchFinder *p)
+{
+ size_t i;
+ CLzRef *items = p->hash + p->fixedHashSize;
+ const size_t numItems = (size_t)p->hashMask + 1;
+ for (i = 0; i < numItems; i++)
+ items[i] = kEmptyHashValue;
+}
+
+
+void MatchFinder_Init_4(CMatchFinder *p)
+{
+ if (!p->directInput)
+ p->buffer = p->bufBase;
+ {
+ /* kEmptyHashValue = 0 (Zero) is used in hash tables as NO-VALUE marker.
+ the code in CMatchFinderMt expects (pos = 1) */
+ p->pos =
+ p->streamPos =
+ 1; // it's smallest optimal value. do not change it
+ // 0; // for debug
+ }
+ p->result = SZ_OK;
+ p->streamEndWasReached = 0;
+}
+
+
+// (CYC_TO_POS_OFFSET == 0) is expected by some optimized code
+#define CYC_TO_POS_OFFSET 0
+// #define CYC_TO_POS_OFFSET 1 // for debug
+
+void MatchFinder_Init(void *_p)
+{
+ CMatchFinder *p = (CMatchFinder *)_p;
+ MatchFinder_Init_HighHash(p);
+ MatchFinder_Init_LowHash(p);
+ MatchFinder_Init_4(p);
+ // if (readData)
+ MatchFinder_ReadBlock(p);
+
+ /* if we init (cyclicBufferPos = pos), then we can use one variable
+ instead of both (cyclicBufferPos) and (pos) : only before (cyclicBufferPos) wrapping */
+ p->cyclicBufferPos = (p->pos - CYC_TO_POS_OFFSET); // init with relation to (pos)
+ // p->cyclicBufferPos = 0; // smallest value
+ // p->son[0] = p->son[1] = 0; // unused: we can init skipped record for speculated accesses.
+ MatchFinder_SetLimits(p);
+}
+
+
+
+#ifdef MY_CPU_X86_OR_AMD64
+ #if defined(__clang__) && (__clang_major__ >= 4) \
+ || defined(Z7_GCC_VERSION) && (Z7_GCC_VERSION >= 40701)
+ // || defined(__INTEL_COMPILER) && (__INTEL_COMPILER >= 1900)
+
+ #define USE_LZFIND_SATUR_SUB_128
+ #define USE_LZFIND_SATUR_SUB_256
+ #define LZFIND_ATTRIB_SSE41 __attribute__((__target__("sse4.1")))
+ #define LZFIND_ATTRIB_AVX2 __attribute__((__target__("avx2")))
+ #elif defined(_MSC_VER)
+ #if (_MSC_VER >= 1600)
+ #define USE_LZFIND_SATUR_SUB_128
+ #endif
+ #if (_MSC_VER >= 1900)
+ #define USE_LZFIND_SATUR_SUB_256
+ #endif
+ #endif
+
+#elif defined(MY_CPU_ARM64) \
+ /* || (defined(__ARM_ARCH) && (__ARM_ARCH >= 7)) */
+
+ #if defined(Z7_CLANG_VERSION) && (Z7_CLANG_VERSION >= 30800) \
+ || defined(__GNUC__) && (__GNUC__ >= 6)
+ #define USE_LZFIND_SATUR_SUB_128
+ #ifdef MY_CPU_ARM64
+ // #define LZFIND_ATTRIB_SSE41 __attribute__((__target__("")))
+ #else
+ #define LZFIND_ATTRIB_SSE41 __attribute__((__target__("fpu=neon")))
+ #endif
+
+ #elif defined(_MSC_VER)
+ #if (_MSC_VER >= 1910)
+ #define USE_LZFIND_SATUR_SUB_128
+ #endif
+ #endif
+
+ #if defined(Z7_MSC_VER_ORIGINAL) && defined(MY_CPU_ARM64)
+ #include <arm64_neon.h>
+ #else
+ #include <arm_neon.h>
+ #endif
+
+#endif
+
+
+#ifdef USE_LZFIND_SATUR_SUB_128
+
+// #define Z7_SHOW_HW_STATUS
+
+#ifdef Z7_SHOW_HW_STATUS
+#include <stdio.h>
+#define PRF(x) x
+PRF(;)
+#else
+#define PRF(x)
+#endif
+
+
+#ifdef MY_CPU_ARM_OR_ARM64
+
+#ifdef MY_CPU_ARM64
+// #define FORCE_LZFIND_SATUR_SUB_128
+#endif
+typedef uint32x4_t LzFind_v128;
+#define SASUB_128_V(v, s) \
+ vsubq_u32(vmaxq_u32(v, s), s)
+
+#else // MY_CPU_ARM_OR_ARM64
+
+#include <smmintrin.h> // sse4.1
+
+typedef __m128i LzFind_v128;
+// SSE 4.1
+#define SASUB_128_V(v, s) \
+ _mm_sub_epi32(_mm_max_epu32(v, s), s)
+
+#endif // MY_CPU_ARM_OR_ARM64
+
+
+#define SASUB_128(i) \
+ *( LzFind_v128 *)( void *)(items + (i) * 4) = SASUB_128_V( \
+ *(const LzFind_v128 *)(const void *)(items + (i) * 4), sub2);
+
+
+Z7_NO_INLINE
+static
+#ifdef LZFIND_ATTRIB_SSE41
+LZFIND_ATTRIB_SSE41
+#endif
+void
+Z7_FASTCALL
+LzFind_SaturSub_128(UInt32 subValue, CLzRef *items, const CLzRef *lim)
+{
+ const LzFind_v128 sub2 =
+ #ifdef MY_CPU_ARM_OR_ARM64
+ vdupq_n_u32(subValue);
+ #else
+ _mm_set_epi32((Int32)subValue, (Int32)subValue, (Int32)subValue, (Int32)subValue);
+ #endif
+ Z7_PRAGMA_OPT_DISABLE_LOOP_UNROLL_VECTORIZE
+ do
+ {
+ SASUB_128(0) SASUB_128(1) items += 2 * 4;
+ SASUB_128(0) SASUB_128(1) items += 2 * 4;
+ }
+ while (items != lim);
+}
+
+
+
+#ifdef USE_LZFIND_SATUR_SUB_256
+
+#include <immintrin.h> // avx
+/*
+clang :immintrin.h uses
+#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
+ defined(__AVX2__)
+#include <avx2intrin.h>
+#endif
+so we need <avxintrin.h> for clang-cl */
+
+#if defined(__clang__)
+#include <avxintrin.h>
+#include <avx2intrin.h>
+#endif
+
+// AVX2:
+#define SASUB_256(i) \
+ *( __m256i *)( void *)(items + (i) * 8) = \
+ _mm256_sub_epi32(_mm256_max_epu32( \
+ *(const __m256i *)(const void *)(items + (i) * 8), sub2), sub2);
+
+Z7_NO_INLINE
+static
+#ifdef LZFIND_ATTRIB_AVX2
+LZFIND_ATTRIB_AVX2
+#endif
+void
+Z7_FASTCALL
+LzFind_SaturSub_256(UInt32 subValue, CLzRef *items, const CLzRef *lim)
+{
+ const __m256i sub2 = _mm256_set_epi32(
+ (Int32)subValue, (Int32)subValue, (Int32)subValue, (Int32)subValue,
+ (Int32)subValue, (Int32)subValue, (Int32)subValue, (Int32)subValue);
+ Z7_PRAGMA_OPT_DISABLE_LOOP_UNROLL_VECTORIZE
+ do
+ {
+ SASUB_256(0) SASUB_256(1) items += 2 * 8;
+ SASUB_256(0) SASUB_256(1) items += 2 * 8;
+ }
+ while (items != lim);
+}
+#endif // USE_LZFIND_SATUR_SUB_256
+
+#ifndef FORCE_LZFIND_SATUR_SUB_128
+typedef void (Z7_FASTCALL *LZFIND_SATUR_SUB_CODE_FUNC)(
+ UInt32 subValue, CLzRef *items, const CLzRef *lim);
+static LZFIND_SATUR_SUB_CODE_FUNC g_LzFind_SaturSub;
+#endif // FORCE_LZFIND_SATUR_SUB_128
+
+#endif // USE_LZFIND_SATUR_SUB_128
+
+
+// kEmptyHashValue must be zero
+// #define SASUB_32(i) { UInt32 v = items[i]; UInt32 m = v - subValue; if (v < subValue) m = kEmptyHashValue; items[i] = m; }
+#define SASUB_32(i) { UInt32 v = items[i]; if (v < subValue) v = subValue; items[i] = v - subValue; }
+
+#ifdef FORCE_LZFIND_SATUR_SUB_128
+
+#define DEFAULT_SaturSub LzFind_SaturSub_128
+
+#else
+
+#define DEFAULT_SaturSub LzFind_SaturSub_32
+
+Z7_NO_INLINE
+static
+void
+Z7_FASTCALL
+LzFind_SaturSub_32(UInt32 subValue, CLzRef *items, const CLzRef *lim)
+{
+ Z7_PRAGMA_OPT_DISABLE_LOOP_UNROLL_VECTORIZE
+ do
+ {
+ SASUB_32(0) SASUB_32(1) items += 2;
+ SASUB_32(0) SASUB_32(1) items += 2;
+ SASUB_32(0) SASUB_32(1) items += 2;
+ SASUB_32(0) SASUB_32(1) items += 2;
+ }
+ while (items != lim);
+}
+
+#endif
+
+
+Z7_NO_INLINE
+void MatchFinder_Normalize3(UInt32 subValue, CLzRef *items, size_t numItems)
+{
+ #define LZFIND_NORM_ALIGN_BLOCK_SIZE (1 << 7)
+ Z7_PRAGMA_OPT_DISABLE_LOOP_UNROLL_VECTORIZE
+ for (; numItems != 0 && ((unsigned)(ptrdiff_t)items & (LZFIND_NORM_ALIGN_BLOCK_SIZE - 1)) != 0; numItems--)
+ {
+ SASUB_32(0)
+ items++;
+ }
+ {
+ const size_t k_Align_Mask = (LZFIND_NORM_ALIGN_BLOCK_SIZE / 4 - 1);
+ CLzRef *lim = items + (numItems & ~(size_t)k_Align_Mask);
+ numItems &= k_Align_Mask;
+ if (items != lim)
+ {
+ #if defined(USE_LZFIND_SATUR_SUB_128) && !defined(FORCE_LZFIND_SATUR_SUB_128)
+ if (g_LzFind_SaturSub)
+ g_LzFind_SaturSub(subValue, items, lim);
+ else
+ #endif
+ DEFAULT_SaturSub(subValue, items, lim);
+ }
+ items = lim;
+ }
+ Z7_PRAGMA_OPT_DISABLE_LOOP_UNROLL_VECTORIZE
+ for (; numItems != 0; numItems--)
+ {
+ SASUB_32(0)
+ items++;
+ }
+}
+
+
+
+// call MatchFinder_CheckLimits() only after (p->pos++) update
+
+Z7_NO_INLINE
+static void MatchFinder_CheckLimits(CMatchFinder *p)
+{
+ if (// !p->streamEndWasReached && p->result == SZ_OK &&
+ p->keepSizeAfter == GET_AVAIL_BYTES(p))
+ {
+ // we try to read only in exact state (p->keepSizeAfter == GET_AVAIL_BYTES(p))
+ if (MatchFinder_NeedMove(p))
+ MatchFinder_MoveBlock(p);
+ MatchFinder_ReadBlock(p);
+ }
+
+ if (p->pos == kMaxValForNormalize)
+ if (GET_AVAIL_BYTES(p) >= p->numHashBytes) // optional optimization for last bytes of data.
+ /*
+ if we disable normalization for last bytes of data, and
+ if (data_size == 4 GiB), we don't call wastfull normalization,
+ but (pos) will be wrapped over Zero (0) in that case.
+ And we cannot resume later to normal operation
+ */
+ {
+ // MatchFinder_Normalize(p);
+ /* after normalization we need (p->pos >= p->historySize + 1); */
+ /* we can reduce subValue to aligned value, if want to keep alignment
+ of (p->pos) and (p->buffer) for speculated accesses. */
+ const UInt32 subValue = (p->pos - p->historySize - 1) /* & ~(UInt32)(kNormalizeAlign - 1) */;
+ // const UInt32 subValue = (1 << 15); // for debug
+ // printf("\nMatchFinder_Normalize() subValue == 0x%x\n", subValue);
+ MatchFinder_REDUCE_OFFSETS(p, subValue)
+ MatchFinder_Normalize3(subValue, p->hash, (size_t)p->hashMask + 1 + p->fixedHashSize);
+ {
+ size_t numSonRefs = p->cyclicBufferSize;
+ if (p->btMode)
+ numSonRefs <<= 1;
+ MatchFinder_Normalize3(subValue, p->son, numSonRefs);
+ }
+ }
+
+ if (p->cyclicBufferPos == p->cyclicBufferSize)
+ p->cyclicBufferPos = 0;
+
+ MatchFinder_SetLimits(p);
+}
+
+
+/*
+ (lenLimit > maxLen)
+*/
+Z7_FORCE_INLINE
+static UInt32 * Hc_GetMatchesSpec(size_t lenLimit, UInt32 curMatch, UInt32 pos, const Byte *cur, CLzRef *son,
+ size_t _cyclicBufferPos, UInt32 _cyclicBufferSize, UInt32 cutValue,
+ UInt32 *d, unsigned maxLen)
+{
+ /*
+ son[_cyclicBufferPos] = curMatch;
+ for (;;)
+ {
+ UInt32 delta = pos - curMatch;
+ if (cutValue-- == 0 || delta >= _cyclicBufferSize)
+ return d;
+ {
+ const Byte *pb = cur - delta;
+ curMatch = son[_cyclicBufferPos - delta + ((delta > _cyclicBufferPos) ? _cyclicBufferSize : 0)];
+ if (pb[maxLen] == cur[maxLen] && *pb == *cur)
+ {
+ UInt32 len = 0;
+ while (++len != lenLimit)
+ if (pb[len] != cur[len])
+ break;
+ if (maxLen < len)
+ {
+ maxLen = len;
+ *d++ = len;
+ *d++ = delta - 1;
+ if (len == lenLimit)
+ return d;
+ }
+ }
+ }
+ }
+ */
+
+ const Byte *lim = cur + lenLimit;
+ son[_cyclicBufferPos] = curMatch;
+
+ do
+ {
+ UInt32 delta;
+
+ if (curMatch == 0)
+ break;
+ // if (curMatch2 >= curMatch) return NULL;
+ delta = pos - curMatch;
+ if (delta >= _cyclicBufferSize)
+ break;
+ {
+ ptrdiff_t diff;
+ curMatch = son[_cyclicBufferPos - delta + ((delta > _cyclicBufferPos) ? _cyclicBufferSize : 0)];
+ diff = (ptrdiff_t)0 - (ptrdiff_t)delta;
+ if (cur[maxLen] == cur[(ptrdiff_t)maxLen + diff])
+ {
+ const Byte *c = cur;
+ while (*c == c[diff])
+ {
+ if (++c == lim)
+ {
+ d[0] = (UInt32)(lim - cur);
+ d[1] = delta - 1;
+ return d + 2;
+ }
+ }
+ {
+ const unsigned len = (unsigned)(c - cur);
+ if (maxLen < len)
+ {
+ maxLen = len;
+ d[0] = (UInt32)len;
+ d[1] = delta - 1;
+ d += 2;
+ }
+ }
+ }
+ }
+ }
+ while (--cutValue);
+
+ return d;
+}
+
+
+Z7_FORCE_INLINE
+UInt32 * GetMatchesSpec1(UInt32 lenLimit, UInt32 curMatch, UInt32 pos, const Byte *cur, CLzRef *son,
+ size_t _cyclicBufferPos, UInt32 _cyclicBufferSize, UInt32 cutValue,
+ UInt32 *d, UInt32 maxLen)
+{
+ CLzRef *ptr0 = son + ((size_t)_cyclicBufferPos << 1) + 1;
+ CLzRef *ptr1 = son + ((size_t)_cyclicBufferPos << 1);
+ unsigned len0 = 0, len1 = 0;
+
+ UInt32 cmCheck;
+
+ // if (curMatch >= pos) { *ptr0 = *ptr1 = kEmptyHashValue; return NULL; }
+
+ cmCheck = (UInt32)(pos - _cyclicBufferSize);
+ if ((UInt32)pos <= _cyclicBufferSize)
+ cmCheck = 0;
+
+ if (cmCheck < curMatch)
+ do
+ {
+ const UInt32 delta = pos - curMatch;
+ {
+ CLzRef *pair = son + ((size_t)(_cyclicBufferPos - delta + ((delta > _cyclicBufferPos) ? _cyclicBufferSize : 0)) << 1);
+ const Byte *pb = cur - delta;
+ unsigned len = (len0 < len1 ? len0 : len1);
+ const UInt32 pair0 = pair[0];
+ if (pb[len] == cur[len])
+ {
+ if (++len != lenLimit && pb[len] == cur[len])
+ while (++len != lenLimit)
+ if (pb[len] != cur[len])
+ break;
+ if (maxLen < len)
+ {
+ maxLen = (UInt32)len;
+ *d++ = (UInt32)len;
+ *d++ = delta - 1;
+ if (len == lenLimit)
+ {
+ *ptr1 = pair0;
+ *ptr0 = pair[1];
+ return d;
+ }
+ }
+ }
+ if (pb[len] < cur[len])
+ {
+ *ptr1 = curMatch;
+ // const UInt32 curMatch2 = pair[1];
+ // if (curMatch2 >= curMatch) { *ptr0 = *ptr1 = kEmptyHashValue; return NULL; }
+ // curMatch = curMatch2;
+ curMatch = pair[1];
+ ptr1 = pair + 1;
+ len1 = len;
+ }
+ else
+ {
+ *ptr0 = curMatch;
+ curMatch = pair[0];
+ ptr0 = pair;
+ len0 = len;
+ }
+ }
+ }
+ while(--cutValue && cmCheck < curMatch);
+
+ *ptr0 = *ptr1 = kEmptyHashValue;
+ return d;
+}
+
+
+static void SkipMatchesSpec(UInt32 lenLimit, UInt32 curMatch, UInt32 pos, const Byte *cur, CLzRef *son,
+ size_t _cyclicBufferPos, UInt32 _cyclicBufferSize, UInt32 cutValue)
+{
+ CLzRef *ptr0 = son + ((size_t)_cyclicBufferPos << 1) + 1;
+ CLzRef *ptr1 = son + ((size_t)_cyclicBufferPos << 1);
+ unsigned len0 = 0, len1 = 0;
+
+ UInt32 cmCheck;
+
+ cmCheck = (UInt32)(pos - _cyclicBufferSize);
+ if ((UInt32)pos <= _cyclicBufferSize)
+ cmCheck = 0;
+
+ if (// curMatch >= pos || // failure
+ cmCheck < curMatch)
+ do
+ {
+ const UInt32 delta = pos - curMatch;
+ {
+ CLzRef *pair = son + ((size_t)(_cyclicBufferPos - delta + ((delta > _cyclicBufferPos) ? _cyclicBufferSize : 0)) << 1);
+ const Byte *pb = cur - delta;
+ unsigned len = (len0 < len1 ? len0 : len1);
+ if (pb[len] == cur[len])
+ {
+ while (++len != lenLimit)
+ if (pb[len] != cur[len])
+ break;
+ {
+ if (len == lenLimit)
+ {
+ *ptr1 = pair[0];
+ *ptr0 = pair[1];
+ return;
+ }
+ }
+ }
+ if (pb[len] < cur[len])
+ {
+ *ptr1 = curMatch;
+ curMatch = pair[1];
+ ptr1 = pair + 1;
+ len1 = len;
+ }
+ else
+ {
+ *ptr0 = curMatch;
+ curMatch = pair[0];
+ ptr0 = pair;
+ len0 = len;
+ }
+ }
+ }
+ while(--cutValue && cmCheck < curMatch);
+
+ *ptr0 = *ptr1 = kEmptyHashValue;
+ return;
+}
+
+
+#define MOVE_POS \
+ p->cyclicBufferPos++; \
+ p->buffer++; \
+ { const UInt32 pos1 = p->pos + 1; \
+ p->pos = pos1; \
+ if (pos1 == p->posLimit) MatchFinder_CheckLimits(p); }
+
+#define MOVE_POS_RET MOVE_POS return distances;
+
+Z7_NO_INLINE
+static void MatchFinder_MovePos(CMatchFinder *p)
+{
+ /* we go here at the end of stream data, when (avail < num_hash_bytes)
+ We don't update sons[cyclicBufferPos << btMode].
+ So (sons) record will contain junk. And we cannot resume match searching
+ to normal operation, even if we will provide more input data in buffer.
+ p->sons[p->cyclicBufferPos << p->btMode] = 0; // kEmptyHashValue
+ if (p->btMode)
+ p->sons[(p->cyclicBufferPos << p->btMode) + 1] = 0; // kEmptyHashValue
+ */
+ MOVE_POS
+}
+
+#define GET_MATCHES_HEADER2(minLen, ret_op) \
+ UInt32 hv; const Byte *cur; UInt32 curMatch; \
+ UInt32 lenLimit = p->lenLimit; \
+ if (lenLimit < minLen) { MatchFinder_MovePos(p); ret_op; } \
+ cur = p->buffer;
+
+#define GET_MATCHES_HEADER(minLen) GET_MATCHES_HEADER2(minLen, return distances)
+#define SKIP_HEADER(minLen) \
+ do { GET_MATCHES_HEADER2(minLen, continue)
+
+#define MF_PARAMS(p) lenLimit, curMatch, p->pos, p->buffer, p->son, \
+ p->cyclicBufferPos, p->cyclicBufferSize, p->cutValue
+
+#define SKIP_FOOTER \
+ SkipMatchesSpec(MF_PARAMS(p)); \
+ MOVE_POS \
+ } while (--num);
+
+#define GET_MATCHES_FOOTER_BASE(_maxLen_, func) \
+ distances = func(MF_PARAMS(p), distances, (UInt32)_maxLen_); \
+ MOVE_POS_RET
+
+#define GET_MATCHES_FOOTER_BT(_maxLen_) \
+ GET_MATCHES_FOOTER_BASE(_maxLen_, GetMatchesSpec1)
+
+#define GET_MATCHES_FOOTER_HC(_maxLen_) \
+ GET_MATCHES_FOOTER_BASE(_maxLen_, Hc_GetMatchesSpec)
+
+
+
+#define UPDATE_maxLen { \
+ const ptrdiff_t diff = (ptrdiff_t)0 - (ptrdiff_t)d2; \
+ const Byte *c = cur + maxLen; \
+ const Byte *lim = cur + lenLimit; \
+ for (; c != lim; c++) if (*(c + diff) != *c) break; \
+ maxLen = (unsigned)(c - cur); }
+
+static UInt32* Bt2_MatchFinder_GetMatches(void *_p, UInt32 *distances)
+{
+ CMatchFinder *p = (CMatchFinder *)_p;
+ GET_MATCHES_HEADER(2)
+ HASH2_CALC
+ curMatch = p->hash[hv];
+ p->hash[hv] = p->pos;
+ GET_MATCHES_FOOTER_BT(1)
+}
+
+UInt32* Bt3Zip_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances)
+{
+ GET_MATCHES_HEADER(3)
+ HASH_ZIP_CALC
+ curMatch = p->hash[hv];
+ p->hash[hv] = p->pos;
+ GET_MATCHES_FOOTER_BT(2)
+}
+
+
+#define SET_mmm \
+ mmm = p->cyclicBufferSize; \
+ if (pos < mmm) \
+ mmm = pos;
+
+
+static UInt32* Bt3_MatchFinder_GetMatches(void *_p, UInt32 *distances)
+{
+ CMatchFinder *p = (CMatchFinder *)_p;
+ UInt32 mmm;
+ UInt32 h2, d2, pos;
+ unsigned maxLen;
+ UInt32 *hash;
+ GET_MATCHES_HEADER(3)
+
+ HASH3_CALC
+
+ hash = p->hash;
+ pos = p->pos;
+
+ d2 = pos - hash[h2];
+
+ curMatch = (hash + kFix3HashSize)[hv];
+
+ hash[h2] = pos;
+ (hash + kFix3HashSize)[hv] = pos;
+
+ SET_mmm
+
+ maxLen = 2;
+
+ if (d2 < mmm && *(cur - d2) == *cur)
+ {
+ UPDATE_maxLen
+ distances[0] = (UInt32)maxLen;
+ distances[1] = d2 - 1;
+ distances += 2;
+ if (maxLen == lenLimit)
+ {
+ SkipMatchesSpec(MF_PARAMS(p));
+ MOVE_POS_RET
+ }
+ }
+
+ GET_MATCHES_FOOTER_BT(maxLen)
+}
+
+
+static UInt32* Bt4_MatchFinder_GetMatches(void *_p, UInt32 *distances)
+{
+ CMatchFinder *p = (CMatchFinder *)_p;
+ UInt32 mmm;
+ UInt32 h2, h3, d2, d3, pos;
+ unsigned maxLen;
+ UInt32 *hash;
+ GET_MATCHES_HEADER(4)
+
+ HASH4_CALC
+
+ hash = p->hash;
+ pos = p->pos;
+
+ d2 = pos - hash [h2];
+ d3 = pos - (hash + kFix3HashSize)[h3];
+ curMatch = (hash + kFix4HashSize)[hv];
+
+ hash [h2] = pos;
+ (hash + kFix3HashSize)[h3] = pos;
+ (hash + kFix4HashSize)[hv] = pos;
+
+ SET_mmm
+
+ maxLen = 3;
+
+ for (;;)
+ {
+ if (d2 < mmm && *(cur - d2) == *cur)
+ {
+ distances[0] = 2;
+ distances[1] = d2 - 1;
+ distances += 2;
+ if (*(cur - d2 + 2) == cur[2])
+ {
+ // distances[-2] = 3;
+ }
+ else if (d3 < mmm && *(cur - d3) == *cur)
+ {
+ d2 = d3;
+ distances[1] = d3 - 1;
+ distances += 2;
+ }
+ else
+ break;
+ }
+ else if (d3 < mmm && *(cur - d3) == *cur)
+ {
+ d2 = d3;
+ distances[1] = d3 - 1;
+ distances += 2;
+ }
+ else
+ break;
+
+ UPDATE_maxLen
+ distances[-2] = (UInt32)maxLen;
+ if (maxLen == lenLimit)
+ {
+ SkipMatchesSpec(MF_PARAMS(p));
+ MOVE_POS_RET
+ }
+ break;
+ }
+
+ GET_MATCHES_FOOTER_BT(maxLen)
+}
+
+
+static UInt32* Bt5_MatchFinder_GetMatches(void *_p, UInt32 *distances)
+{
+ CMatchFinder *p = (CMatchFinder *)_p;
+ UInt32 mmm;
+ UInt32 h2, h3, d2, d3, pos;
+ unsigned maxLen;
+ UInt32 *hash;
+ GET_MATCHES_HEADER(5)
+
+ HASH5_CALC
+
+ hash = p->hash;
+ pos = p->pos;
+
+ d2 = pos - hash [h2];
+ d3 = pos - (hash + kFix3HashSize)[h3];
+ // d4 = pos - (hash + kFix4HashSize)[h4];
+
+ curMatch = (hash + kFix5HashSize)[hv];
+
+ hash [h2] = pos;
+ (hash + kFix3HashSize)[h3] = pos;
+ // (hash + kFix4HashSize)[h4] = pos;
+ (hash + kFix5HashSize)[hv] = pos;
+
+ SET_mmm
+
+ maxLen = 4;
+
+ for (;;)
+ {
+ if (d2 < mmm && *(cur - d2) == *cur)
+ {
+ distances[0] = 2;
+ distances[1] = d2 - 1;
+ distances += 2;
+ if (*(cur - d2 + 2) == cur[2])
+ {
+ }
+ else if (d3 < mmm && *(cur - d3) == *cur)
+ {
+ distances[1] = d3 - 1;
+ distances += 2;
+ d2 = d3;
+ }
+ else
+ break;
+ }
+ else if (d3 < mmm && *(cur - d3) == *cur)
+ {
+ distances[1] = d3 - 1;
+ distances += 2;
+ d2 = d3;
+ }
+ else
+ break;
+
+ distances[-2] = 3;
+ if (*(cur - d2 + 3) != cur[3])
+ break;
+ UPDATE_maxLen
+ distances[-2] = (UInt32)maxLen;
+ if (maxLen == lenLimit)
+ {
+ SkipMatchesSpec(MF_PARAMS(p));
+ MOVE_POS_RET
+ }
+ break;
+ }
+
+ GET_MATCHES_FOOTER_BT(maxLen)
+}
+
+
+static UInt32* Hc4_MatchFinder_GetMatches(void *_p, UInt32 *distances)
+{
+ CMatchFinder *p = (CMatchFinder *)_p;
+ UInt32 mmm;
+ UInt32 h2, h3, d2, d3, pos;
+ unsigned maxLen;
+ UInt32 *hash;
+ GET_MATCHES_HEADER(4)
+
+ HASH4_CALC
+
+ hash = p->hash;
+ pos = p->pos;
+
+ d2 = pos - hash [h2];
+ d3 = pos - (hash + kFix3HashSize)[h3];
+ curMatch = (hash + kFix4HashSize)[hv];
+
+ hash [h2] = pos;
+ (hash + kFix3HashSize)[h3] = pos;
+ (hash + kFix4HashSize)[hv] = pos;
+
+ SET_mmm
+
+ maxLen = 3;
+
+ for (;;)
+ {
+ if (d2 < mmm && *(cur - d2) == *cur)
+ {
+ distances[0] = 2;
+ distances[1] = d2 - 1;
+ distances += 2;
+ if (*(cur - d2 + 2) == cur[2])
+ {
+ // distances[-2] = 3;
+ }
+ else if (d3 < mmm && *(cur - d3) == *cur)
+ {
+ d2 = d3;
+ distances[1] = d3 - 1;
+ distances += 2;
+ }
+ else
+ break;
+ }
+ else if (d3 < mmm && *(cur - d3) == *cur)
+ {
+ d2 = d3;
+ distances[1] = d3 - 1;
+ distances += 2;
+ }
+ else
+ break;
+
+ UPDATE_maxLen
+ distances[-2] = (UInt32)maxLen;
+ if (maxLen == lenLimit)
+ {
+ p->son[p->cyclicBufferPos] = curMatch;
+ MOVE_POS_RET
+ }
+ break;
+ }
+
+ GET_MATCHES_FOOTER_HC(maxLen)
+}
+
+
+static UInt32 * Hc5_MatchFinder_GetMatches(void *_p, UInt32 *distances)
+{
+ CMatchFinder *p = (CMatchFinder *)_p;
+ UInt32 mmm;
+ UInt32 h2, h3, d2, d3, pos;
+ unsigned maxLen;
+ UInt32 *hash;
+ GET_MATCHES_HEADER(5)
+
+ HASH5_CALC
+
+ hash = p->hash;
+ pos = p->pos;
+
+ d2 = pos - hash [h2];
+ d3 = pos - (hash + kFix3HashSize)[h3];
+ // d4 = pos - (hash + kFix4HashSize)[h4];
+
+ curMatch = (hash + kFix5HashSize)[hv];
+
+ hash [h2] = pos;
+ (hash + kFix3HashSize)[h3] = pos;
+ // (hash + kFix4HashSize)[h4] = pos;
+ (hash + kFix5HashSize)[hv] = pos;
+
+ SET_mmm
+
+ maxLen = 4;
+
+ for (;;)
+ {
+ if (d2 < mmm && *(cur - d2) == *cur)
+ {
+ distances[0] = 2;
+ distances[1] = d2 - 1;
+ distances += 2;
+ if (*(cur - d2 + 2) == cur[2])
+ {
+ }
+ else if (d3 < mmm && *(cur - d3) == *cur)
+ {
+ distances[1] = d3 - 1;
+ distances += 2;
+ d2 = d3;
+ }
+ else
+ break;
+ }
+ else if (d3 < mmm && *(cur - d3) == *cur)
+ {
+ distances[1] = d3 - 1;
+ distances += 2;
+ d2 = d3;
+ }
+ else
+ break;
+
+ distances[-2] = 3;
+ if (*(cur - d2 + 3) != cur[3])
+ break;
+ UPDATE_maxLen
+ distances[-2] = (UInt32)maxLen;
+ if (maxLen == lenLimit)
+ {
+ p->son[p->cyclicBufferPos] = curMatch;
+ MOVE_POS_RET
+ }
+ break;
+ }
+
+ GET_MATCHES_FOOTER_HC(maxLen)
+}
+
+
+UInt32* Hc3Zip_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances)
+{
+ GET_MATCHES_HEADER(3)
+ HASH_ZIP_CALC
+ curMatch = p->hash[hv];
+ p->hash[hv] = p->pos;
+ GET_MATCHES_FOOTER_HC(2)
+}
+
+
+static void Bt2_MatchFinder_Skip(void *_p, UInt32 num)
+{
+ CMatchFinder *p = (CMatchFinder *)_p;
+ SKIP_HEADER(2)
+ {
+ HASH2_CALC
+ curMatch = p->hash[hv];
+ p->hash[hv] = p->pos;
+ }
+ SKIP_FOOTER
+}
+
+void Bt3Zip_MatchFinder_Skip(CMatchFinder *p, UInt32 num)
+{
+ SKIP_HEADER(3)
+ {
+ HASH_ZIP_CALC
+ curMatch = p->hash[hv];
+ p->hash[hv] = p->pos;
+ }
+ SKIP_FOOTER
+}
+
+static void Bt3_MatchFinder_Skip(void *_p, UInt32 num)
+{
+ CMatchFinder *p = (CMatchFinder *)_p;
+ SKIP_HEADER(3)
+ {
+ UInt32 h2;
+ UInt32 *hash;
+ HASH3_CALC
+ hash = p->hash;
+ curMatch = (hash + kFix3HashSize)[hv];
+ hash[h2] =
+ (hash + kFix3HashSize)[hv] = p->pos;
+ }
+ SKIP_FOOTER
+}
+
+static void Bt4_MatchFinder_Skip(void *_p, UInt32 num)
+{
+ CMatchFinder *p = (CMatchFinder *)_p;
+ SKIP_HEADER(4)
+ {
+ UInt32 h2, h3;
+ UInt32 *hash;
+ HASH4_CALC
+ hash = p->hash;
+ curMatch = (hash + kFix4HashSize)[hv];
+ hash [h2] =
+ (hash + kFix3HashSize)[h3] =
+ (hash + kFix4HashSize)[hv] = p->pos;
+ }
+ SKIP_FOOTER
+}
+
+static void Bt5_MatchFinder_Skip(void *_p, UInt32 num)
+{
+ CMatchFinder *p = (CMatchFinder *)_p;
+ SKIP_HEADER(5)
+ {
+ UInt32 h2, h3;
+ UInt32 *hash;
+ HASH5_CALC
+ hash = p->hash;
+ curMatch = (hash + kFix5HashSize)[hv];
+ hash [h2] =
+ (hash + kFix3HashSize)[h3] =
+ // (hash + kFix4HashSize)[h4] =
+ (hash + kFix5HashSize)[hv] = p->pos;
+ }
+ SKIP_FOOTER
+}
+
+
+#define HC_SKIP_HEADER(minLen) \
+ do { if (p->lenLimit < minLen) { MatchFinder_MovePos(p); num--; continue; } { \
+ const Byte *cur; \
+ UInt32 *hash; \
+ UInt32 *son; \
+ UInt32 pos = p->pos; \
+ UInt32 num2 = num; \
+ /* (p->pos == p->posLimit) is not allowed here !!! */ \
+ { const UInt32 rem = p->posLimit - pos; if (num2 > rem) num2 = rem; } \
+ num -= num2; \
+ { const UInt32 cycPos = p->cyclicBufferPos; \
+ son = p->son + cycPos; \
+ p->cyclicBufferPos = cycPos + num2; } \
+ cur = p->buffer; \
+ hash = p->hash; \
+ do { \
+ UInt32 curMatch; \
+ UInt32 hv;
+
+
+#define HC_SKIP_FOOTER \
+ cur++; pos++; *son++ = curMatch; \
+ } while (--num2); \
+ p->buffer = cur; \
+ p->pos = pos; \
+ if (pos == p->posLimit) MatchFinder_CheckLimits(p); \
+ }} while(num); \
+
+
+static void Hc4_MatchFinder_Skip(void *_p, UInt32 num)
+{
+ CMatchFinder *p = (CMatchFinder *)_p;
+ HC_SKIP_HEADER(4)
+
+ UInt32 h2, h3;
+ HASH4_CALC
+ curMatch = (hash + kFix4HashSize)[hv];
+ hash [h2] =
+ (hash + kFix3HashSize)[h3] =
+ (hash + kFix4HashSize)[hv] = pos;
+
+ HC_SKIP_FOOTER
+}
+
+
+static void Hc5_MatchFinder_Skip(void *_p, UInt32 num)
+{
+ CMatchFinder *p = (CMatchFinder *)_p;
+ HC_SKIP_HEADER(5)
+
+ UInt32 h2, h3;
+ HASH5_CALC
+ curMatch = (hash + kFix5HashSize)[hv];
+ hash [h2] =
+ (hash + kFix3HashSize)[h3] =
+ // (hash + kFix4HashSize)[h4] =
+ (hash + kFix5HashSize)[hv] = pos;
+
+ HC_SKIP_FOOTER
+}
+
+
+void Hc3Zip_MatchFinder_Skip(CMatchFinder *p, UInt32 num)
+{
+ HC_SKIP_HEADER(3)
+
+ HASH_ZIP_CALC
+ curMatch = hash[hv];
+ hash[hv] = pos;
+
+ HC_SKIP_FOOTER
+}
+
+
+void MatchFinder_CreateVTable(CMatchFinder *p, IMatchFinder2 *vTable)
+{
+ vTable->Init = MatchFinder_Init;
+ vTable->GetNumAvailableBytes = MatchFinder_GetNumAvailableBytes;
+ vTable->GetPointerToCurrentPos = MatchFinder_GetPointerToCurrentPos;
+ if (!p->btMode)
+ {
+ if (p->numHashBytes <= 4)
+ {
+ vTable->GetMatches = Hc4_MatchFinder_GetMatches;
+ vTable->Skip = Hc4_MatchFinder_Skip;
+ }
+ else
+ {
+ vTable->GetMatches = Hc5_MatchFinder_GetMatches;
+ vTable->Skip = Hc5_MatchFinder_Skip;
+ }
+ }
+ else if (p->numHashBytes == 2)
+ {
+ vTable->GetMatches = Bt2_MatchFinder_GetMatches;
+ vTable->Skip = Bt2_MatchFinder_Skip;
+ }
+ else if (p->numHashBytes == 3)
+ {
+ vTable->GetMatches = Bt3_MatchFinder_GetMatches;
+ vTable->Skip = Bt3_MatchFinder_Skip;
+ }
+ else if (p->numHashBytes == 4)
+ {
+ vTable->GetMatches = Bt4_MatchFinder_GetMatches;
+ vTable->Skip = Bt4_MatchFinder_Skip;
+ }
+ else
+ {
+ vTable->GetMatches = Bt5_MatchFinder_GetMatches;
+ vTable->Skip = Bt5_MatchFinder_Skip;
+ }
+}
+
+
+
+void LzFindPrepare(void)
+{
+ #ifndef FORCE_LZFIND_SATUR_SUB_128
+ #ifdef USE_LZFIND_SATUR_SUB_128
+ LZFIND_SATUR_SUB_CODE_FUNC f = NULL;
+ #ifdef MY_CPU_ARM_OR_ARM64
+ {
+ if (CPU_IsSupported_NEON())
+ {
+ // #pragma message ("=== LzFind NEON")
+ PRF(printf("\n=== LzFind NEON\n"));
+ f = LzFind_SaturSub_128;
+ }
+ // f = 0; // for debug
+ }
+ #else // MY_CPU_ARM_OR_ARM64
+ if (CPU_IsSupported_SSE41())
+ {
+ // #pragma message ("=== LzFind SSE41")
+ PRF(printf("\n=== LzFind SSE41\n"));
+ f = LzFind_SaturSub_128;
+
+ #ifdef USE_LZFIND_SATUR_SUB_256
+ if (CPU_IsSupported_AVX2())
+ {
+ // #pragma message ("=== LzFind AVX2")
+ PRF(printf("\n=== LzFind AVX2\n"));
+ f = LzFind_SaturSub_256;
+ }
+ #endif
+ }
+ #endif // MY_CPU_ARM_OR_ARM64
+ g_LzFind_SaturSub = f;
+ #endif // USE_LZFIND_SATUR_SUB_128
+ #endif // FORCE_LZFIND_SATUR_SUB_128
+}
+
+
+#undef MOVE_POS
+#undef MOVE_POS_RET
+#undef PRF
/* Lzma86Dec.c -- LZMA + x86 (BCJ) Filter Decoder
-2016-05-16 : Igor Pavlov : Public domain */
+2023-03-03 : Igor Pavlov : Public domain */
#include "Precomp.h"
return res;
if (useFilter == 1)
{
- UInt32 x86State;
- x86_Convert_Init(x86State);
- x86_Convert(dest, *destLen, 0, &x86State, 0);
+ UInt32 x86State = Z7_BRANCH_CONV_ST_X86_STATE_INIT_VAL;
+ z7_BranchConvSt_X86_Dec(dest, *destLen, 0, &x86State);
}
return SZ_OK;
}
-/* LzmaDec.c -- LZMA Decoder\r
-2021-04-01 : Igor Pavlov : Public domain */\r
-\r
-#include "Precomp.h"\r
-\r
-#include <string.h>\r
-\r
-/* #include "CpuArch.h" */\r
-#include "LzmaDec.h"\r
-\r
-#define kNumTopBits 24\r
-#define kTopValue ((UInt32)1 << kNumTopBits)\r
-\r
-#define kNumBitModelTotalBits 11\r
-#define kBitModelTotal (1 << kNumBitModelTotalBits)\r
-\r
-#define RC_INIT_SIZE 5\r
-\r
-#ifndef _LZMA_DEC_OPT\r
-\r
-#define kNumMoveBits 5\r
-#define NORMALIZE if (range < kTopValue) { range <<= 8; code = (code << 8) | (*buf++); }\r
-\r
-#define IF_BIT_0(p) ttt = *(p); NORMALIZE; bound = (range >> kNumBitModelTotalBits) * (UInt32)ttt; if (code < bound)\r
-#define UPDATE_0(p) range = bound; *(p) = (CLzmaProb)(ttt + ((kBitModelTotal - ttt) >> kNumMoveBits));\r
-#define UPDATE_1(p) range -= bound; code -= bound; *(p) = (CLzmaProb)(ttt - (ttt >> kNumMoveBits));\r
-#define GET_BIT2(p, i, A0, A1) IF_BIT_0(p) \\r
- { UPDATE_0(p); i = (i + i); A0; } else \\r
- { UPDATE_1(p); i = (i + i) + 1; A1; }\r
-\r
-#define TREE_GET_BIT(probs, i) { GET_BIT2(probs + i, i, ;, ;); }\r
-\r
-#define REV_BIT(p, i, A0, A1) IF_BIT_0(p + i) \\r
- { UPDATE_0(p + i); A0; } else \\r
- { UPDATE_1(p + i); A1; }\r
-#define REV_BIT_VAR( p, i, m) REV_BIT(p, i, i += m; m += m, m += m; i += m; )\r
-#define REV_BIT_CONST(p, i, m) REV_BIT(p, i, i += m; , i += m * 2; )\r
-#define REV_BIT_LAST( p, i, m) REV_BIT(p, i, i -= m , ; )\r
-\r
-#define TREE_DECODE(probs, limit, i) \\r
- { i = 1; do { TREE_GET_BIT(probs, i); } while (i < limit); i -= limit; }\r
-\r
-/* #define _LZMA_SIZE_OPT */\r
-\r
-#ifdef _LZMA_SIZE_OPT\r
-#define TREE_6_DECODE(probs, i) TREE_DECODE(probs, (1 << 6), i)\r
-#else\r
-#define TREE_6_DECODE(probs, i) \\r
- { i = 1; \\r
- TREE_GET_BIT(probs, i); \\r
- TREE_GET_BIT(probs, i); \\r
- TREE_GET_BIT(probs, i); \\r
- TREE_GET_BIT(probs, i); \\r
- TREE_GET_BIT(probs, i); \\r
- TREE_GET_BIT(probs, i); \\r
- i -= 0x40; }\r
-#endif\r
-\r
-#define NORMAL_LITER_DEC TREE_GET_BIT(prob, symbol)\r
-#define MATCHED_LITER_DEC \\r
- matchByte += matchByte; \\r
- bit = offs; \\r
- offs &= matchByte; \\r
- probLit = prob + (offs + bit + symbol); \\r
- GET_BIT2(probLit, symbol, offs ^= bit; , ;)\r
-\r
-#endif // _LZMA_DEC_OPT\r
-\r
-\r
-#define NORMALIZE_CHECK if (range < kTopValue) { if (buf >= bufLimit) return DUMMY_INPUT_EOF; range <<= 8; code = (code << 8) | (*buf++); }\r
-\r
-#define IF_BIT_0_CHECK(p) ttt = *(p); NORMALIZE_CHECK; bound = (range >> kNumBitModelTotalBits) * (UInt32)ttt; if (code < bound)\r
-#define UPDATE_0_CHECK range = bound;\r
-#define UPDATE_1_CHECK range -= bound; code -= bound;\r
-#define GET_BIT2_CHECK(p, i, A0, A1) IF_BIT_0_CHECK(p) \\r
- { UPDATE_0_CHECK; i = (i + i); A0; } else \\r
- { UPDATE_1_CHECK; i = (i + i) + 1; A1; }\r
-#define GET_BIT_CHECK(p, i) GET_BIT2_CHECK(p, i, ; , ;)\r
-#define TREE_DECODE_CHECK(probs, limit, i) \\r
- { i = 1; do { GET_BIT_CHECK(probs + i, i) } while (i < limit); i -= limit; }\r
-\r
-\r
-#define REV_BIT_CHECK(p, i, m) IF_BIT_0_CHECK(p + i) \\r
- { UPDATE_0_CHECK; i += m; m += m; } else \\r
- { UPDATE_1_CHECK; m += m; i += m; }\r
-\r
-\r
-#define kNumPosBitsMax 4\r
-#define kNumPosStatesMax (1 << kNumPosBitsMax)\r
-\r
-#define kLenNumLowBits 3\r
-#define kLenNumLowSymbols (1 << kLenNumLowBits)\r
-#define kLenNumHighBits 8\r
-#define kLenNumHighSymbols (1 << kLenNumHighBits)\r
-\r
-#define LenLow 0\r
-#define LenHigh (LenLow + 2 * (kNumPosStatesMax << kLenNumLowBits))\r
-#define kNumLenProbs (LenHigh + kLenNumHighSymbols)\r
-\r
-#define LenChoice LenLow\r
-#define LenChoice2 (LenLow + (1 << kLenNumLowBits))\r
-\r
-#define kNumStates 12\r
-#define kNumStates2 16\r
-#define kNumLitStates 7\r
-\r
-#define kStartPosModelIndex 4\r
-#define kEndPosModelIndex 14\r
-#define kNumFullDistances (1 << (kEndPosModelIndex >> 1))\r
-\r
-#define kNumPosSlotBits 6\r
-#define kNumLenToPosStates 4\r
-\r
-#define kNumAlignBits 4\r
-#define kAlignTableSize (1 << kNumAlignBits)\r
-\r
-#define kMatchMinLen 2\r
-#define kMatchSpecLenStart (kMatchMinLen + kLenNumLowSymbols * 2 + kLenNumHighSymbols)\r
-\r
-#define kMatchSpecLen_Error_Data (1 << 9)\r
-#define kMatchSpecLen_Error_Fail (kMatchSpecLen_Error_Data - 1)\r
-\r
-/* External ASM code needs same CLzmaProb array layout. So don't change it. */\r
-\r
-/* (probs_1664) is faster and better for code size at some platforms */\r
-/*\r
-#ifdef MY_CPU_X86_OR_AMD64\r
-*/\r
-#define kStartOffset 1664\r
-#define GET_PROBS p->probs_1664\r
-/*\r
-#define GET_PROBS p->probs + kStartOffset\r
-#else\r
-#define kStartOffset 0\r
-#define GET_PROBS p->probs\r
-#endif\r
-*/\r
-\r
-#define SpecPos (-kStartOffset)\r
-#define IsRep0Long (SpecPos + kNumFullDistances)\r
-#define RepLenCoder (IsRep0Long + (kNumStates2 << kNumPosBitsMax))\r
-#define LenCoder (RepLenCoder + kNumLenProbs)\r
-#define IsMatch (LenCoder + kNumLenProbs)\r
-#define Align (IsMatch + (kNumStates2 << kNumPosBitsMax))\r
-#define IsRep (Align + kAlignTableSize)\r
-#define IsRepG0 (IsRep + kNumStates)\r
-#define IsRepG1 (IsRepG0 + kNumStates)\r
-#define IsRepG2 (IsRepG1 + kNumStates)\r
-#define PosSlot (IsRepG2 + kNumStates)\r
-#define Literal (PosSlot + (kNumLenToPosStates << kNumPosSlotBits))\r
-#define NUM_BASE_PROBS (Literal + kStartOffset)\r
-\r
-#if Align != 0 && kStartOffset != 0\r
- #error Stop_Compiling_Bad_LZMA_kAlign\r
-#endif\r
-\r
-#if NUM_BASE_PROBS != 1984\r
- #error Stop_Compiling_Bad_LZMA_PROBS\r
-#endif\r
-\r
-\r
-#define LZMA_LIT_SIZE 0x300\r
-\r
-#define LzmaProps_GetNumProbs(p) (NUM_BASE_PROBS + ((UInt32)LZMA_LIT_SIZE << ((p)->lc + (p)->lp)))\r
-\r
-\r
-#define CALC_POS_STATE(processedPos, pbMask) (((processedPos) & (pbMask)) << 4)\r
-#define COMBINED_PS_STATE (posState + state)\r
-#define GET_LEN_STATE (posState)\r
-\r
-#define LZMA_DIC_MIN (1 << 12)\r
-\r
-/*\r
-p->remainLen : shows status of LZMA decoder:\r
- < kMatchSpecLenStart : the number of bytes to be copied with (p->rep0) offset\r
- = kMatchSpecLenStart : the LZMA stream was finished with end mark\r
- = kMatchSpecLenStart + 1 : need init range coder\r
- = kMatchSpecLenStart + 2 : need init range coder and state\r
- = kMatchSpecLen_Error_Fail : Internal Code Failure\r
- = kMatchSpecLen_Error_Data + [0 ... 273] : LZMA Data Error\r
-*/\r
-\r
-/* ---------- LZMA_DECODE_REAL ---------- */\r
-/*\r
-LzmaDec_DecodeReal_3() can be implemented in external ASM file.\r
-3 - is the code compatibility version of that function for check at link time.\r
-*/\r
-\r
-#define LZMA_DECODE_REAL LzmaDec_DecodeReal_3\r
-\r
-/*\r
-LZMA_DECODE_REAL()\r
-In:\r
- RangeCoder is normalized\r
- if (p->dicPos == limit)\r
- {\r
- LzmaDec_TryDummy() was called before to exclude LITERAL and MATCH-REP cases.\r
- So first symbol can be only MATCH-NON-REP. And if that MATCH-NON-REP symbol\r
- is not END_OF_PAYALOAD_MARKER, then the function doesn't write any byte to dictionary,\r
- the function returns SZ_OK, and the caller can use (p->remainLen) and (p->reps[0]) later.\r
- }\r
-\r
-Processing:\r
- The first LZMA symbol will be decoded in any case.\r
- All main checks for limits are at the end of main loop,\r
- It decodes additional LZMA-symbols while (p->buf < bufLimit && dicPos < limit),\r
- RangeCoder is still without last normalization when (p->buf < bufLimit) is being checked.\r
- But if (p->buf < bufLimit), the caller provided at least (LZMA_REQUIRED_INPUT_MAX + 1) bytes for\r
- next iteration before limit (bufLimit + LZMA_REQUIRED_INPUT_MAX),\r
- that is enough for worst case LZMA symbol with one additional RangeCoder normalization for one bit.\r
- So that function never reads bufLimit [LZMA_REQUIRED_INPUT_MAX] byte.\r
-\r
-Out:\r
- RangeCoder is normalized\r
- Result:\r
- SZ_OK - OK\r
- p->remainLen:\r
- < kMatchSpecLenStart : the number of bytes to be copied with (p->reps[0]) offset\r
- = kMatchSpecLenStart : the LZMA stream was finished with end mark\r
-\r
- SZ_ERROR_DATA - error, when the MATCH-Symbol refers out of dictionary\r
- p->remainLen : undefined\r
- p->reps[*] : undefined\r
-*/\r
-\r
-\r
-#ifdef _LZMA_DEC_OPT\r
-\r
-int MY_FAST_CALL LZMA_DECODE_REAL(CLzmaDec *p, SizeT limit, const Byte *bufLimit);\r
-\r
-#else\r
-\r
-static\r
-int MY_FAST_CALL LZMA_DECODE_REAL(CLzmaDec *p, SizeT limit, const Byte *bufLimit)\r
-{\r
- CLzmaProb *probs = GET_PROBS;\r
- unsigned state = (unsigned)p->state;\r
- UInt32 rep0 = p->reps[0], rep1 = p->reps[1], rep2 = p->reps[2], rep3 = p->reps[3];\r
- unsigned pbMask = ((unsigned)1 << (p->prop.pb)) - 1;\r
- unsigned lc = p->prop.lc;\r
- unsigned lpMask = ((unsigned)0x100 << p->prop.lp) - ((unsigned)0x100 >> lc);\r
-\r
- Byte *dic = p->dic;\r
- SizeT dicBufSize = p->dicBufSize;\r
- SizeT dicPos = p->dicPos;\r
- \r
- UInt32 processedPos = p->processedPos;\r
- UInt32 checkDicSize = p->checkDicSize;\r
- unsigned len = 0;\r
-\r
- const Byte *buf = p->buf;\r
- UInt32 range = p->range;\r
- UInt32 code = p->code;\r
-\r
- do\r
- {\r
- CLzmaProb *prob;\r
- UInt32 bound;\r
- unsigned ttt;\r
- unsigned posState = CALC_POS_STATE(processedPos, pbMask);\r
-\r
- prob = probs + IsMatch + COMBINED_PS_STATE;\r
- IF_BIT_0(prob)\r
- {\r
- unsigned symbol;\r
- UPDATE_0(prob);\r
- prob = probs + Literal;\r
- if (processedPos != 0 || checkDicSize != 0)\r
- prob += (UInt32)3 * ((((processedPos << 8) + dic[(dicPos == 0 ? dicBufSize : dicPos) - 1]) & lpMask) << lc);\r
- processedPos++;\r
-\r
- if (state < kNumLitStates)\r
- {\r
- state -= (state < 4) ? state : 3;\r
- symbol = 1;\r
- #ifdef _LZMA_SIZE_OPT\r
- do { NORMAL_LITER_DEC } while (symbol < 0x100);\r
- #else\r
- NORMAL_LITER_DEC\r
- NORMAL_LITER_DEC\r
- NORMAL_LITER_DEC\r
- NORMAL_LITER_DEC\r
- NORMAL_LITER_DEC\r
- NORMAL_LITER_DEC\r
- NORMAL_LITER_DEC\r
- NORMAL_LITER_DEC\r
- #endif\r
- }\r
- else\r
- {\r
- unsigned matchByte = dic[dicPos - rep0 + (dicPos < rep0 ? dicBufSize : 0)];\r
- unsigned offs = 0x100;\r
- state -= (state < 10) ? 3 : 6;\r
- symbol = 1;\r
- #ifdef _LZMA_SIZE_OPT\r
- do\r
- {\r
- unsigned bit;\r
- CLzmaProb *probLit;\r
- MATCHED_LITER_DEC\r
- }\r
- while (symbol < 0x100);\r
- #else\r
- {\r
- unsigned bit;\r
- CLzmaProb *probLit;\r
- MATCHED_LITER_DEC\r
- MATCHED_LITER_DEC\r
- MATCHED_LITER_DEC\r
- MATCHED_LITER_DEC\r
- MATCHED_LITER_DEC\r
- MATCHED_LITER_DEC\r
- MATCHED_LITER_DEC\r
- MATCHED_LITER_DEC\r
- }\r
- #endif\r
- }\r
-\r
- dic[dicPos++] = (Byte)symbol;\r
- continue;\r
- }\r
- \r
- {\r
- UPDATE_1(prob);\r
- prob = probs + IsRep + state;\r
- IF_BIT_0(prob)\r
- {\r
- UPDATE_0(prob);\r
- state += kNumStates;\r
- prob = probs + LenCoder;\r
- }\r
- else\r
- {\r
- UPDATE_1(prob);\r
- prob = probs + IsRepG0 + state;\r
- IF_BIT_0(prob)\r
- {\r
- UPDATE_0(prob);\r
- prob = probs + IsRep0Long + COMBINED_PS_STATE;\r
- IF_BIT_0(prob)\r
- {\r
- UPDATE_0(prob);\r
- \r
- // that case was checked before with kBadRepCode\r
- // if (checkDicSize == 0 && processedPos == 0) { len = kMatchSpecLen_Error_Data + 1; break; }\r
- // The caller doesn't allow (dicPos == limit) case here\r
- // so we don't need the following check:\r
- // if (dicPos == limit) { state = state < kNumLitStates ? 9 : 11; len = 1; break; }\r
- \r
- dic[dicPos] = dic[dicPos - rep0 + (dicPos < rep0 ? dicBufSize : 0)];\r
- dicPos++;\r
- processedPos++;\r
- state = state < kNumLitStates ? 9 : 11;\r
- continue;\r
- }\r
- UPDATE_1(prob);\r
- }\r
- else\r
- {\r
- UInt32 distance;\r
- UPDATE_1(prob);\r
- prob = probs + IsRepG1 + state;\r
- IF_BIT_0(prob)\r
- {\r
- UPDATE_0(prob);\r
- distance = rep1;\r
- }\r
- else\r
- {\r
- UPDATE_1(prob);\r
- prob = probs + IsRepG2 + state;\r
- IF_BIT_0(prob)\r
- {\r
- UPDATE_0(prob);\r
- distance = rep2;\r
- }\r
- else\r
- {\r
- UPDATE_1(prob);\r
- distance = rep3;\r
- rep3 = rep2;\r
- }\r
- rep2 = rep1;\r
- }\r
- rep1 = rep0;\r
- rep0 = distance;\r
- }\r
- state = state < kNumLitStates ? 8 : 11;\r
- prob = probs + RepLenCoder;\r
- }\r
- \r
- #ifdef _LZMA_SIZE_OPT\r
- {\r
- unsigned lim, offset;\r
- CLzmaProb *probLen = prob + LenChoice;\r
- IF_BIT_0(probLen)\r
- {\r
- UPDATE_0(probLen);\r
- probLen = prob + LenLow + GET_LEN_STATE;\r
- offset = 0;\r
- lim = (1 << kLenNumLowBits);\r
- }\r
- else\r
- {\r
- UPDATE_1(probLen);\r
- probLen = prob + LenChoice2;\r
- IF_BIT_0(probLen)\r
- {\r
- UPDATE_0(probLen);\r
- probLen = prob + LenLow + GET_LEN_STATE + (1 << kLenNumLowBits);\r
- offset = kLenNumLowSymbols;\r
- lim = (1 << kLenNumLowBits);\r
- }\r
- else\r
- {\r
- UPDATE_1(probLen);\r
- probLen = prob + LenHigh;\r
- offset = kLenNumLowSymbols * 2;\r
- lim = (1 << kLenNumHighBits);\r
- }\r
- }\r
- TREE_DECODE(probLen, lim, len);\r
- len += offset;\r
- }\r
- #else\r
- {\r
- CLzmaProb *probLen = prob + LenChoice;\r
- IF_BIT_0(probLen)\r
- {\r
- UPDATE_0(probLen);\r
- probLen = prob + LenLow + GET_LEN_STATE;\r
- len = 1;\r
- TREE_GET_BIT(probLen, len);\r
- TREE_GET_BIT(probLen, len);\r
- TREE_GET_BIT(probLen, len);\r
- len -= 8;\r
- }\r
- else\r
- {\r
- UPDATE_1(probLen);\r
- probLen = prob + LenChoice2;\r
- IF_BIT_0(probLen)\r
- {\r
- UPDATE_0(probLen);\r
- probLen = prob + LenLow + GET_LEN_STATE + (1 << kLenNumLowBits);\r
- len = 1;\r
- TREE_GET_BIT(probLen, len);\r
- TREE_GET_BIT(probLen, len);\r
- TREE_GET_BIT(probLen, len);\r
- }\r
- else\r
- {\r
- UPDATE_1(probLen);\r
- probLen = prob + LenHigh;\r
- TREE_DECODE(probLen, (1 << kLenNumHighBits), len);\r
- len += kLenNumLowSymbols * 2;\r
- }\r
- }\r
- }\r
- #endif\r
-\r
- if (state >= kNumStates)\r
- {\r
- UInt32 distance;\r
- prob = probs + PosSlot +\r
- ((len < kNumLenToPosStates ? len : kNumLenToPosStates - 1) << kNumPosSlotBits);\r
- TREE_6_DECODE(prob, distance);\r
- if (distance >= kStartPosModelIndex)\r
- {\r
- unsigned posSlot = (unsigned)distance;\r
- unsigned numDirectBits = (unsigned)(((distance >> 1) - 1));\r
- distance = (2 | (distance & 1));\r
- if (posSlot < kEndPosModelIndex)\r
- {\r
- distance <<= numDirectBits;\r
- prob = probs + SpecPos;\r
- {\r
- UInt32 m = 1;\r
- distance++;\r
- do\r
- {\r
- REV_BIT_VAR(prob, distance, m);\r
- }\r
- while (--numDirectBits);\r
- distance -= m;\r
- }\r
- }\r
- else\r
- {\r
- numDirectBits -= kNumAlignBits;\r
- do\r
- {\r
- NORMALIZE\r
- range >>= 1;\r
- \r
- {\r
- UInt32 t;\r
- code -= range;\r
- t = (0 - ((UInt32)code >> 31)); /* (UInt32)((Int32)code >> 31) */\r
- distance = (distance << 1) + (t + 1);\r
- code += range & t;\r
- }\r
- /*\r
- distance <<= 1;\r
- if (code >= range)\r
- {\r
- code -= range;\r
- distance |= 1;\r
- }\r
- */\r
- }\r
- while (--numDirectBits);\r
- prob = probs + Align;\r
- distance <<= kNumAlignBits;\r
- {\r
- unsigned i = 1;\r
- REV_BIT_CONST(prob, i, 1);\r
- REV_BIT_CONST(prob, i, 2);\r
- REV_BIT_CONST(prob, i, 4);\r
- REV_BIT_LAST (prob, i, 8);\r
- distance |= i;\r
- }\r
- if (distance == (UInt32)0xFFFFFFFF)\r
- {\r
- len = kMatchSpecLenStart;\r
- state -= kNumStates;\r
- break;\r
- }\r
- }\r
- }\r
- \r
- rep3 = rep2;\r
- rep2 = rep1;\r
- rep1 = rep0;\r
- rep0 = distance + 1;\r
- state = (state < kNumStates + kNumLitStates) ? kNumLitStates : kNumLitStates + 3;\r
- if (distance >= (checkDicSize == 0 ? processedPos: checkDicSize))\r
- {\r
- len += kMatchSpecLen_Error_Data + kMatchMinLen;\r
- // len = kMatchSpecLen_Error_Data;\r
- // len += kMatchMinLen;\r
- break;\r
- }\r
- }\r
-\r
- len += kMatchMinLen;\r
-\r
- {\r
- SizeT rem;\r
- unsigned curLen;\r
- SizeT pos;\r
- \r
- if ((rem = limit - dicPos) == 0)\r
- {\r
- /*\r
- We stop decoding and return SZ_OK, and we can resume decoding later.\r
- Any error conditions can be tested later in caller code.\r
- For more strict mode we can stop decoding with error\r
- // len += kMatchSpecLen_Error_Data;\r
- */\r
- break;\r
- }\r
- \r
- curLen = ((rem < len) ? (unsigned)rem : len);\r
- pos = dicPos - rep0 + (dicPos < rep0 ? dicBufSize : 0);\r
-\r
- processedPos += (UInt32)curLen;\r
-\r
- len -= curLen;\r
- if (curLen <= dicBufSize - pos)\r
- {\r
- Byte *dest = dic + dicPos;\r
- ptrdiff_t src = (ptrdiff_t)pos - (ptrdiff_t)dicPos;\r
- const Byte *lim = dest + curLen;\r
- dicPos += (SizeT)curLen;\r
- do\r
- *(dest) = (Byte)*(dest + src);\r
- while (++dest != lim);\r
- }\r
- else\r
- {\r
- do\r
- {\r
- dic[dicPos++] = dic[pos];\r
- if (++pos == dicBufSize)\r
- pos = 0;\r
- }\r
- while (--curLen != 0);\r
- }\r
- }\r
- }\r
- }\r
- while (dicPos < limit && buf < bufLimit);\r
-\r
- NORMALIZE;\r
- \r
- p->buf = buf;\r
- p->range = range;\r
- p->code = code;\r
- p->remainLen = (UInt32)len; // & (kMatchSpecLen_Error_Data - 1); // we can write real length for error matches too.\r
- p->dicPos = dicPos;\r
- p->processedPos = processedPos;\r
- p->reps[0] = rep0;\r
- p->reps[1] = rep1;\r
- p->reps[2] = rep2;\r
- p->reps[3] = rep3;\r
- p->state = (UInt32)state;\r
- if (len >= kMatchSpecLen_Error_Data)\r
- return SZ_ERROR_DATA;\r
- return SZ_OK;\r
-}\r
-#endif\r
-\r
-\r
-\r
-static void MY_FAST_CALL LzmaDec_WriteRem(CLzmaDec *p, SizeT limit)\r
-{\r
- unsigned len = (unsigned)p->remainLen;\r
- if (len == 0 /* || len >= kMatchSpecLenStart */)\r
- return;\r
- {\r
- SizeT dicPos = p->dicPos;\r
- Byte *dic;\r
- SizeT dicBufSize;\r
- SizeT rep0; /* we use SizeT to avoid the BUG of VC14 for AMD64 */\r
- {\r
- SizeT rem = limit - dicPos;\r
- if (rem < len)\r
- {\r
- len = (unsigned)(rem);\r
- if (len == 0)\r
- return;\r
- }\r
- }\r
-\r
- if (p->checkDicSize == 0 && p->prop.dicSize - p->processedPos <= len)\r
- p->checkDicSize = p->prop.dicSize;\r
-\r
- p->processedPos += (UInt32)len;\r
- p->remainLen -= (UInt32)len;\r
- dic = p->dic;\r
- rep0 = p->reps[0];\r
- dicBufSize = p->dicBufSize;\r
- do\r
- {\r
- dic[dicPos] = dic[dicPos - rep0 + (dicPos < rep0 ? dicBufSize : 0)];\r
- dicPos++;\r
- }\r
- while (--len);\r
- p->dicPos = dicPos;\r
- }\r
-}\r
-\r
-\r
-/*\r
-At staring of new stream we have one of the following symbols:\r
- - Literal - is allowed\r
- - Non-Rep-Match - is allowed only if it's end marker symbol\r
- - Rep-Match - is not allowed\r
-We use early check of (RangeCoder:Code) over kBadRepCode to simplify main decoding code\r
-*/\r
-\r
-#define kRange0 0xFFFFFFFF\r
-#define kBound0 ((kRange0 >> kNumBitModelTotalBits) << (kNumBitModelTotalBits - 1))\r
-#define kBadRepCode (kBound0 + (((kRange0 - kBound0) >> kNumBitModelTotalBits) << (kNumBitModelTotalBits - 1)))\r
-#if kBadRepCode != (0xC0000000 - 0x400)\r
- #error Stop_Compiling_Bad_LZMA_Check\r
-#endif\r
-\r
-\r
-/*\r
-LzmaDec_DecodeReal2():\r
- It calls LZMA_DECODE_REAL() and it adjusts limit according (p->checkDicSize).\r
-\r
-We correct (p->checkDicSize) after LZMA_DECODE_REAL() and in LzmaDec_WriteRem(),\r
-and we support the following state of (p->checkDicSize):\r
- if (total_processed < p->prop.dicSize) then\r
- {\r
- (total_processed == p->processedPos)\r
- (p->checkDicSize == 0)\r
- }\r
- else\r
- (p->checkDicSize == p->prop.dicSize)\r
-*/\r
-\r
-static int MY_FAST_CALL LzmaDec_DecodeReal2(CLzmaDec *p, SizeT limit, const Byte *bufLimit)\r
-{\r
- if (p->checkDicSize == 0)\r
- {\r
- UInt32 rem = p->prop.dicSize - p->processedPos;\r
- if (limit - p->dicPos > rem)\r
- limit = p->dicPos + rem;\r
- }\r
- {\r
- int res = LZMA_DECODE_REAL(p, limit, bufLimit);\r
- if (p->checkDicSize == 0 && p->processedPos >= p->prop.dicSize)\r
- p->checkDicSize = p->prop.dicSize;\r
- return res;\r
- }\r
-}\r
-\r
-\r
-\r
-typedef enum\r
-{\r
- DUMMY_INPUT_EOF, /* need more input data */\r
- DUMMY_LIT,\r
- DUMMY_MATCH,\r
- DUMMY_REP\r
-} ELzmaDummy;\r
-\r
-\r
-#define IS_DUMMY_END_MARKER_POSSIBLE(dummyRes) ((dummyRes) == DUMMY_MATCH)\r
-\r
-static ELzmaDummy LzmaDec_TryDummy(const CLzmaDec *p, const Byte *buf, const Byte **bufOut)\r
-{\r
- UInt32 range = p->range;\r
- UInt32 code = p->code;\r
- const Byte *bufLimit = *bufOut;\r
- const CLzmaProb *probs = GET_PROBS;\r
- unsigned state = (unsigned)p->state;\r
- ELzmaDummy res;\r
-\r
- for (;;)\r
- {\r
- const CLzmaProb *prob;\r
- UInt32 bound;\r
- unsigned ttt;\r
- unsigned posState = CALC_POS_STATE(p->processedPos, ((unsigned)1 << p->prop.pb) - 1);\r
-\r
- prob = probs + IsMatch + COMBINED_PS_STATE;\r
- IF_BIT_0_CHECK(prob)\r
- {\r
- UPDATE_0_CHECK\r
-\r
- prob = probs + Literal;\r
- if (p->checkDicSize != 0 || p->processedPos != 0)\r
- prob += ((UInt32)LZMA_LIT_SIZE *\r
- ((((p->processedPos) & (((unsigned)1 << (p->prop.lp)) - 1)) << p->prop.lc) +\r
- ((unsigned)p->dic[(p->dicPos == 0 ? p->dicBufSize : p->dicPos) - 1] >> (8 - p->prop.lc))));\r
-\r
- if (state < kNumLitStates)\r
- {\r
- unsigned symbol = 1;\r
- do { GET_BIT_CHECK(prob + symbol, symbol) } while (symbol < 0x100);\r
- }\r
- else\r
- {\r
- unsigned matchByte = p->dic[p->dicPos - p->reps[0] +\r
- (p->dicPos < p->reps[0] ? p->dicBufSize : 0)];\r
- unsigned offs = 0x100;\r
- unsigned symbol = 1;\r
- do\r
- {\r
- unsigned bit;\r
- const CLzmaProb *probLit;\r
- matchByte += matchByte;\r
- bit = offs;\r
- offs &= matchByte;\r
- probLit = prob + (offs + bit + symbol);\r
- GET_BIT2_CHECK(probLit, symbol, offs ^= bit; , ; )\r
- }\r
- while (symbol < 0x100);\r
- }\r
- res = DUMMY_LIT;\r
- }\r
- else\r
- {\r
- unsigned len;\r
- UPDATE_1_CHECK;\r
-\r
- prob = probs + IsRep + state;\r
- IF_BIT_0_CHECK(prob)\r
- {\r
- UPDATE_0_CHECK;\r
- state = 0;\r
- prob = probs + LenCoder;\r
- res = DUMMY_MATCH;\r
- }\r
- else\r
- {\r
- UPDATE_1_CHECK;\r
- res = DUMMY_REP;\r
- prob = probs + IsRepG0 + state;\r
- IF_BIT_0_CHECK(prob)\r
- {\r
- UPDATE_0_CHECK;\r
- prob = probs + IsRep0Long + COMBINED_PS_STATE;\r
- IF_BIT_0_CHECK(prob)\r
- {\r
- UPDATE_0_CHECK;\r
- break;\r
- }\r
- else\r
- {\r
- UPDATE_1_CHECK;\r
- }\r
- }\r
- else\r
- {\r
- UPDATE_1_CHECK;\r
- prob = probs + IsRepG1 + state;\r
- IF_BIT_0_CHECK(prob)\r
- {\r
- UPDATE_0_CHECK;\r
- }\r
- else\r
- {\r
- UPDATE_1_CHECK;\r
- prob = probs + IsRepG2 + state;\r
- IF_BIT_0_CHECK(prob)\r
- {\r
- UPDATE_0_CHECK;\r
- }\r
- else\r
- {\r
- UPDATE_1_CHECK;\r
- }\r
- }\r
- }\r
- state = kNumStates;\r
- prob = probs + RepLenCoder;\r
- }\r
- {\r
- unsigned limit, offset;\r
- const CLzmaProb *probLen = prob + LenChoice;\r
- IF_BIT_0_CHECK(probLen)\r
- {\r
- UPDATE_0_CHECK;\r
- probLen = prob + LenLow + GET_LEN_STATE;\r
- offset = 0;\r
- limit = 1 << kLenNumLowBits;\r
- }\r
- else\r
- {\r
- UPDATE_1_CHECK;\r
- probLen = prob + LenChoice2;\r
- IF_BIT_0_CHECK(probLen)\r
- {\r
- UPDATE_0_CHECK;\r
- probLen = prob + LenLow + GET_LEN_STATE + (1 << kLenNumLowBits);\r
- offset = kLenNumLowSymbols;\r
- limit = 1 << kLenNumLowBits;\r
- }\r
- else\r
- {\r
- UPDATE_1_CHECK;\r
- probLen = prob + LenHigh;\r
- offset = kLenNumLowSymbols * 2;\r
- limit = 1 << kLenNumHighBits;\r
- }\r
- }\r
- TREE_DECODE_CHECK(probLen, limit, len);\r
- len += offset;\r
- }\r
-\r
- if (state < 4)\r
- {\r
- unsigned posSlot;\r
- prob = probs + PosSlot +\r
- ((len < kNumLenToPosStates - 1 ? len : kNumLenToPosStates - 1) <<\r
- kNumPosSlotBits);\r
- TREE_DECODE_CHECK(prob, 1 << kNumPosSlotBits, posSlot);\r
- if (posSlot >= kStartPosModelIndex)\r
- {\r
- unsigned numDirectBits = ((posSlot >> 1) - 1);\r
-\r
- if (posSlot < kEndPosModelIndex)\r
- {\r
- prob = probs + SpecPos + ((2 | (posSlot & 1)) << numDirectBits);\r
- }\r
- else\r
- {\r
- numDirectBits -= kNumAlignBits;\r
- do\r
- {\r
- NORMALIZE_CHECK\r
- range >>= 1;\r
- code -= range & (((code - range) >> 31) - 1);\r
- /* if (code >= range) code -= range; */\r
- }\r
- while (--numDirectBits);\r
- prob = probs + Align;\r
- numDirectBits = kNumAlignBits;\r
- }\r
- {\r
- unsigned i = 1;\r
- unsigned m = 1;\r
- do\r
- {\r
- REV_BIT_CHECK(prob, i, m);\r
- }\r
- while (--numDirectBits);\r
- }\r
- }\r
- }\r
- }\r
- break;\r
- }\r
- NORMALIZE_CHECK;\r
-\r
- *bufOut = buf;\r
- return res;\r
-}\r
-\r
-void LzmaDec_InitDicAndState(CLzmaDec *p, BoolInt initDic, BoolInt initState);\r
-void LzmaDec_InitDicAndState(CLzmaDec *p, BoolInt initDic, BoolInt initState)\r
-{\r
- p->remainLen = kMatchSpecLenStart + 1;\r
- p->tempBufSize = 0;\r
-\r
- if (initDic)\r
- {\r
- p->processedPos = 0;\r
- p->checkDicSize = 0;\r
- p->remainLen = kMatchSpecLenStart + 2;\r
- }\r
- if (initState)\r
- p->remainLen = kMatchSpecLenStart + 2;\r
-}\r
-\r
-void LzmaDec_Init(CLzmaDec *p)\r
-{\r
- p->dicPos = 0;\r
- LzmaDec_InitDicAndState(p, True, True);\r
-}\r
-\r
-\r
-/*\r
-LZMA supports optional end_marker.\r
-So the decoder can lookahead for one additional LZMA-Symbol to check end_marker.\r
-That additional LZMA-Symbol can require up to LZMA_REQUIRED_INPUT_MAX bytes in input stream.\r
-When the decoder reaches dicLimit, it looks (finishMode) parameter:\r
- if (finishMode == LZMA_FINISH_ANY), the decoder doesn't lookahead\r
- if (finishMode != LZMA_FINISH_ANY), the decoder lookahead, if end_marker is possible for current position\r
-\r
-When the decoder lookahead, and the lookahead symbol is not end_marker, we have two ways:\r
- 1) Strict mode (default) : the decoder returns SZ_ERROR_DATA.\r
- 2) The relaxed mode (alternative mode) : we could return SZ_OK, and the caller\r
- must check (status) value. The caller can show the error,\r
- if the end of stream is expected, and the (status) is noit\r
- LZMA_STATUS_FINISHED_WITH_MARK or LZMA_STATUS_MAYBE_FINISHED_WITHOUT_MARK.\r
-*/\r
-\r
-\r
-#define RETURN__NOT_FINISHED__FOR_FINISH \\r
- *status = LZMA_STATUS_NOT_FINISHED; \\r
- return SZ_ERROR_DATA; // for strict mode\r
- // return SZ_OK; // for relaxed mode\r
-\r
-\r
-SRes LzmaDec_DecodeToDic(CLzmaDec *p, SizeT dicLimit, const Byte *src, SizeT *srcLen,\r
- ELzmaFinishMode finishMode, ELzmaStatus *status)\r
-{\r
- SizeT inSize = *srcLen;\r
- (*srcLen) = 0;\r
- *status = LZMA_STATUS_NOT_SPECIFIED;\r
-\r
- if (p->remainLen > kMatchSpecLenStart)\r
- {\r
- if (p->remainLen > kMatchSpecLenStart + 2)\r
- return p->remainLen == kMatchSpecLen_Error_Fail ? SZ_ERROR_FAIL : SZ_ERROR_DATA;\r
-\r
- for (; inSize > 0 && p->tempBufSize < RC_INIT_SIZE; (*srcLen)++, inSize--)\r
- p->tempBuf[p->tempBufSize++] = *src++;\r
- if (p->tempBufSize != 0 && p->tempBuf[0] != 0)\r
- return SZ_ERROR_DATA;\r
- if (p->tempBufSize < RC_INIT_SIZE)\r
- {\r
- *status = LZMA_STATUS_NEEDS_MORE_INPUT;\r
- return SZ_OK;\r
- }\r
- p->code =\r
- ((UInt32)p->tempBuf[1] << 24)\r
- | ((UInt32)p->tempBuf[2] << 16)\r
- | ((UInt32)p->tempBuf[3] << 8)\r
- | ((UInt32)p->tempBuf[4]);\r
-\r
- if (p->checkDicSize == 0\r
- && p->processedPos == 0\r
- && p->code >= kBadRepCode)\r
- return SZ_ERROR_DATA;\r
-\r
- p->range = 0xFFFFFFFF;\r
- p->tempBufSize = 0;\r
-\r
- if (p->remainLen > kMatchSpecLenStart + 1)\r
- {\r
- SizeT numProbs = LzmaProps_GetNumProbs(&p->prop);\r
- SizeT i;\r
- CLzmaProb *probs = p->probs;\r
- for (i = 0; i < numProbs; i++)\r
- probs[i] = kBitModelTotal >> 1;\r
- p->reps[0] = p->reps[1] = p->reps[2] = p->reps[3] = 1;\r
- p->state = 0;\r
- }\r
-\r
- p->remainLen = 0;\r
- }\r
-\r
- for (;;)\r
- {\r
- if (p->remainLen == kMatchSpecLenStart)\r
- {\r
- if (p->code != 0)\r
- return SZ_ERROR_DATA;\r
- *status = LZMA_STATUS_FINISHED_WITH_MARK;\r
- return SZ_OK;\r
- }\r
-\r
- LzmaDec_WriteRem(p, dicLimit);\r
-\r
- {\r
- // (p->remainLen == 0 || p->dicPos == dicLimit)\r
-\r
- int checkEndMarkNow = 0;\r
-\r
- if (p->dicPos >= dicLimit)\r
- {\r
- if (p->remainLen == 0 && p->code == 0)\r
- {\r
- *status = LZMA_STATUS_MAYBE_FINISHED_WITHOUT_MARK;\r
- return SZ_OK;\r
- }\r
- if (finishMode == LZMA_FINISH_ANY)\r
- {\r
- *status = LZMA_STATUS_NOT_FINISHED;\r
- return SZ_OK;\r
- }\r
- if (p->remainLen != 0)\r
- {\r
- RETURN__NOT_FINISHED__FOR_FINISH;\r
- }\r
- checkEndMarkNow = 1;\r
- }\r
-\r
- // (p->remainLen == 0)\r
-\r
- if (p->tempBufSize == 0)\r
- {\r
- const Byte *bufLimit;\r
- int dummyProcessed = -1;\r
- \r
- if (inSize < LZMA_REQUIRED_INPUT_MAX || checkEndMarkNow)\r
- {\r
- const Byte *bufOut = src + inSize;\r
- \r
- ELzmaDummy dummyRes = LzmaDec_TryDummy(p, src, &bufOut);\r
- \r
- if (dummyRes == DUMMY_INPUT_EOF)\r
- {\r
- size_t i;\r
- if (inSize >= LZMA_REQUIRED_INPUT_MAX)\r
- break;\r
- (*srcLen) += inSize;\r
- p->tempBufSize = (unsigned)inSize;\r
- for (i = 0; i < inSize; i++)\r
- p->tempBuf[i] = src[i];\r
- *status = LZMA_STATUS_NEEDS_MORE_INPUT;\r
- return SZ_OK;\r
- }\r
- \r
- dummyProcessed = (int)(bufOut - src);\r
- if ((unsigned)dummyProcessed > LZMA_REQUIRED_INPUT_MAX)\r
- break;\r
- \r
- if (checkEndMarkNow && !IS_DUMMY_END_MARKER_POSSIBLE(dummyRes))\r
- {\r
- unsigned i;\r
- (*srcLen) += (unsigned)dummyProcessed;\r
- p->tempBufSize = (unsigned)dummyProcessed;\r
- for (i = 0; i < (unsigned)dummyProcessed; i++)\r
- p->tempBuf[i] = src[i];\r
- // p->remainLen = kMatchSpecLen_Error_Data;\r
- RETURN__NOT_FINISHED__FOR_FINISH;\r
- }\r
- \r
- bufLimit = src;\r
- // we will decode only one iteration\r
- }\r
- else\r
- bufLimit = src + inSize - LZMA_REQUIRED_INPUT_MAX;\r
-\r
- p->buf = src;\r
- \r
- {\r
- int res = LzmaDec_DecodeReal2(p, dicLimit, bufLimit);\r
- \r
- SizeT processed = (SizeT)(p->buf - src);\r
-\r
- if (dummyProcessed < 0)\r
- {\r
- if (processed > inSize)\r
- break;\r
- }\r
- else if ((unsigned)dummyProcessed != processed)\r
- break;\r
-\r
- src += processed;\r
- inSize -= processed;\r
- (*srcLen) += processed;\r
-\r
- if (res != SZ_OK)\r
- {\r
- p->remainLen = kMatchSpecLen_Error_Data;\r
- return SZ_ERROR_DATA;\r
- }\r
- }\r
- continue;\r
- }\r
-\r
- {\r
- // we have some data in (p->tempBuf)\r
- // in strict mode: tempBufSize is not enough for one Symbol decoding.\r
- // in relaxed mode: tempBufSize not larger than required for one Symbol decoding.\r
-\r
- unsigned rem = p->tempBufSize;\r
- unsigned ahead = 0;\r
- int dummyProcessed = -1;\r
- \r
- while (rem < LZMA_REQUIRED_INPUT_MAX && ahead < inSize)\r
- p->tempBuf[rem++] = src[ahead++];\r
- \r
- // ahead - the size of new data copied from (src) to (p->tempBuf)\r
- // rem - the size of temp buffer including new data from (src)\r
- \r
- if (rem < LZMA_REQUIRED_INPUT_MAX || checkEndMarkNow)\r
- {\r
- const Byte *bufOut = p->tempBuf + rem;\r
- \r
- ELzmaDummy dummyRes = LzmaDec_TryDummy(p, p->tempBuf, &bufOut);\r
- \r
- if (dummyRes == DUMMY_INPUT_EOF)\r
- {\r
- if (rem >= LZMA_REQUIRED_INPUT_MAX)\r
- break;\r
- p->tempBufSize = rem;\r
- (*srcLen) += (SizeT)ahead;\r
- *status = LZMA_STATUS_NEEDS_MORE_INPUT;\r
- return SZ_OK;\r
- }\r
- \r
- dummyProcessed = (int)(bufOut - p->tempBuf);\r
-\r
- if ((unsigned)dummyProcessed < p->tempBufSize)\r
- break;\r
-\r
- if (checkEndMarkNow && !IS_DUMMY_END_MARKER_POSSIBLE(dummyRes))\r
- {\r
- (*srcLen) += (unsigned)dummyProcessed - p->tempBufSize;\r
- p->tempBufSize = (unsigned)dummyProcessed;\r
- // p->remainLen = kMatchSpecLen_Error_Data;\r
- RETURN__NOT_FINISHED__FOR_FINISH;\r
- }\r
- }\r
-\r
- p->buf = p->tempBuf;\r
- \r
- {\r
- // we decode one symbol from (p->tempBuf) here, so the (bufLimit) is equal to (p->buf)\r
- int res = LzmaDec_DecodeReal2(p, dicLimit, p->buf);\r
-\r
- SizeT processed = (SizeT)(p->buf - p->tempBuf);\r
- rem = p->tempBufSize;\r
- \r
- if (dummyProcessed < 0)\r
- {\r
- if (processed > LZMA_REQUIRED_INPUT_MAX)\r
- break;\r
- if (processed < rem)\r
- break;\r
- }\r
- else if ((unsigned)dummyProcessed != processed)\r
- break;\r
- \r
- processed -= rem;\r
-\r
- src += processed;\r
- inSize -= processed;\r
- (*srcLen) += processed;\r
- p->tempBufSize = 0;\r
- \r
- if (res != SZ_OK)\r
- {\r
- p->remainLen = kMatchSpecLen_Error_Data;\r
- return SZ_ERROR_DATA;\r
- }\r
- }\r
- }\r
- }\r
- }\r
-\r
- /* Some unexpected error: internal error of code, memory corruption or hardware failure */\r
- p->remainLen = kMatchSpecLen_Error_Fail;\r
- return SZ_ERROR_FAIL;\r
-}\r
-\r
-\r
-\r
-SRes LzmaDec_DecodeToBuf(CLzmaDec *p, Byte *dest, SizeT *destLen, const Byte *src, SizeT *srcLen, ELzmaFinishMode finishMode, ELzmaStatus *status)\r
-{\r
- SizeT outSize = *destLen;\r
- SizeT inSize = *srcLen;\r
- *srcLen = *destLen = 0;\r
- for (;;)\r
- {\r
- SizeT inSizeCur = inSize, outSizeCur, dicPos;\r
- ELzmaFinishMode curFinishMode;\r
- SRes res;\r
- if (p->dicPos == p->dicBufSize)\r
- p->dicPos = 0;\r
- dicPos = p->dicPos;\r
- if (outSize > p->dicBufSize - dicPos)\r
- {\r
- outSizeCur = p->dicBufSize;\r
- curFinishMode = LZMA_FINISH_ANY;\r
- }\r
- else\r
- {\r
- outSizeCur = dicPos + outSize;\r
- curFinishMode = finishMode;\r
- }\r
-\r
- res = LzmaDec_DecodeToDic(p, outSizeCur, src, &inSizeCur, curFinishMode, status);\r
- src += inSizeCur;\r
- inSize -= inSizeCur;\r
- *srcLen += inSizeCur;\r
- outSizeCur = p->dicPos - dicPos;\r
- memcpy(dest, p->dic + dicPos, outSizeCur);\r
- dest += outSizeCur;\r
- outSize -= outSizeCur;\r
- *destLen += outSizeCur;\r
- if (res != 0)\r
- return res;\r
- if (outSizeCur == 0 || outSize == 0)\r
- return SZ_OK;\r
- }\r
-}\r
-\r
-void LzmaDec_FreeProbs(CLzmaDec *p, ISzAllocPtr alloc)\r
-{\r
- ISzAlloc_Free(alloc, p->probs);\r
- p->probs = NULL;\r
-}\r
-\r
-static void LzmaDec_FreeDict(CLzmaDec *p, ISzAllocPtr alloc)\r
-{\r
- ISzAlloc_Free(alloc, p->dic);\r
- p->dic = NULL;\r
-}\r
-\r
-void LzmaDec_Free(CLzmaDec *p, ISzAllocPtr alloc)\r
-{\r
- LzmaDec_FreeProbs(p, alloc);\r
- LzmaDec_FreeDict(p, alloc);\r
-}\r
-\r
-SRes LzmaProps_Decode(CLzmaProps *p, const Byte *data, unsigned size)\r
-{\r
- UInt32 dicSize;\r
- Byte d;\r
- \r
- if (size < LZMA_PROPS_SIZE)\r
- return SZ_ERROR_UNSUPPORTED;\r
- else\r
- dicSize = data[1] | ((UInt32)data[2] << 8) | ((UInt32)data[3] << 16) | ((UInt32)data[4] << 24);\r
- \r
- if (dicSize < LZMA_DIC_MIN)\r
- dicSize = LZMA_DIC_MIN;\r
- p->dicSize = dicSize;\r
-\r
- d = data[0];\r
- if (d >= (9 * 5 * 5))\r
- return SZ_ERROR_UNSUPPORTED;\r
-\r
- p->lc = (Byte)(d % 9);\r
- d /= 9;\r
- p->pb = (Byte)(d / 5);\r
- p->lp = (Byte)(d % 5);\r
-\r
- return SZ_OK;\r
-}\r
-\r
-static SRes LzmaDec_AllocateProbs2(CLzmaDec *p, const CLzmaProps *propNew, ISzAllocPtr alloc)\r
-{\r
- UInt32 numProbs = LzmaProps_GetNumProbs(propNew);\r
- if (!p->probs || numProbs != p->numProbs)\r
- {\r
- LzmaDec_FreeProbs(p, alloc);\r
- p->probs = (CLzmaProb *)ISzAlloc_Alloc(alloc, numProbs * sizeof(CLzmaProb));\r
- if (!p->probs)\r
- return SZ_ERROR_MEM;\r
- p->probs_1664 = p->probs + 1664;\r
- p->numProbs = numProbs;\r
- }\r
- return SZ_OK;\r
-}\r
-\r
-SRes LzmaDec_AllocateProbs(CLzmaDec *p, const Byte *props, unsigned propsSize, ISzAllocPtr alloc)\r
-{\r
- CLzmaProps propNew;\r
- RINOK(LzmaProps_Decode(&propNew, props, propsSize));\r
- RINOK(LzmaDec_AllocateProbs2(p, &propNew, alloc));\r
- p->prop = propNew;\r
- return SZ_OK;\r
-}\r
-\r
-SRes LzmaDec_Allocate(CLzmaDec *p, const Byte *props, unsigned propsSize, ISzAllocPtr alloc)\r
-{\r
- CLzmaProps propNew;\r
- SizeT dicBufSize;\r
- RINOK(LzmaProps_Decode(&propNew, props, propsSize));\r
- RINOK(LzmaDec_AllocateProbs2(p, &propNew, alloc));\r
-\r
- {\r
- UInt32 dictSize = propNew.dicSize;\r
- SizeT mask = ((UInt32)1 << 12) - 1;\r
- if (dictSize >= ((UInt32)1 << 30)) mask = ((UInt32)1 << 22) - 1;\r
- else if (dictSize >= ((UInt32)1 << 22)) mask = ((UInt32)1 << 20) - 1;;\r
- dicBufSize = ((SizeT)dictSize + mask) & ~mask;\r
- if (dicBufSize < dictSize)\r
- dicBufSize = dictSize;\r
- }\r
-\r
- if (!p->dic || dicBufSize != p->dicBufSize)\r
- {\r
- LzmaDec_FreeDict(p, alloc);\r
- p->dic = (Byte *)ISzAlloc_Alloc(alloc, dicBufSize);\r
- if (!p->dic)\r
- {\r
- LzmaDec_FreeProbs(p, alloc);\r
- return SZ_ERROR_MEM;\r
- }\r
- }\r
- p->dicBufSize = dicBufSize;\r
- p->prop = propNew;\r
- return SZ_OK;\r
-}\r
-\r
-SRes LzmaDecode(Byte *dest, SizeT *destLen, const Byte *src, SizeT *srcLen,\r
- const Byte *propData, unsigned propSize, ELzmaFinishMode finishMode,\r
- ELzmaStatus *status, ISzAllocPtr alloc)\r
-{\r
- CLzmaDec p;\r
- SRes res;\r
- SizeT outSize = *destLen, inSize = *srcLen;\r
- *destLen = *srcLen = 0;\r
- *status = LZMA_STATUS_NOT_SPECIFIED;\r
- if (inSize < RC_INIT_SIZE)\r
- return SZ_ERROR_INPUT_EOF;\r
- LzmaDec_Construct(&p);\r
- RINOK(LzmaDec_AllocateProbs(&p, propData, propSize, alloc));\r
- p.dic = dest;\r
- p.dicBufSize = outSize;\r
- LzmaDec_Init(&p);\r
- *srcLen = inSize;\r
- res = LzmaDec_DecodeToDic(&p, outSize, src, srcLen, finishMode, status);\r
- *destLen = p.dicPos;\r
- if (res == SZ_OK && *status == LZMA_STATUS_NEEDS_MORE_INPUT)\r
- res = SZ_ERROR_INPUT_EOF;\r
- LzmaDec_FreeProbs(&p, alloc);\r
- return res;\r
-}\r
+/* LzmaDec.c -- LZMA Decoder
+2023-04-07 : Igor Pavlov : Public domain */
+
+#include "Precomp.h"
+
+#include <string.h>
+
+/* #include "CpuArch.h" */
+#include "LzmaDec.h"
+
+// #define kNumTopBits 24
+#define kTopValue ((UInt32)1 << 24)
+
+#define kNumBitModelTotalBits 11
+#define kBitModelTotal (1 << kNumBitModelTotalBits)
+
+#define RC_INIT_SIZE 5
+
+#ifndef Z7_LZMA_DEC_OPT
+
+#define kNumMoveBits 5
+#define NORMALIZE if (range < kTopValue) { range <<= 8; code = (code << 8) | (*buf++); }
+
+#define IF_BIT_0(p) ttt = *(p); NORMALIZE; bound = (range >> kNumBitModelTotalBits) * (UInt32)ttt; if (code < bound)
+#define UPDATE_0(p) range = bound; *(p) = (CLzmaProb)(ttt + ((kBitModelTotal - ttt) >> kNumMoveBits));
+#define UPDATE_1(p) range -= bound; code -= bound; *(p) = (CLzmaProb)(ttt - (ttt >> kNumMoveBits));
+#define GET_BIT2(p, i, A0, A1) IF_BIT_0(p) \
+ { UPDATE_0(p) i = (i + i); A0; } else \
+ { UPDATE_1(p) i = (i + i) + 1; A1; }
+
+#define TREE_GET_BIT(probs, i) { GET_BIT2(probs + i, i, ;, ;); }
+
+#define REV_BIT(p, i, A0, A1) IF_BIT_0(p + i) \
+ { UPDATE_0(p + i) A0; } else \
+ { UPDATE_1(p + i) A1; }
+#define REV_BIT_VAR( p, i, m) REV_BIT(p, i, i += m; m += m, m += m; i += m; )
+#define REV_BIT_CONST(p, i, m) REV_BIT(p, i, i += m; , i += m * 2; )
+#define REV_BIT_LAST( p, i, m) REV_BIT(p, i, i -= m , ; )
+
+#define TREE_DECODE(probs, limit, i) \
+ { i = 1; do { TREE_GET_BIT(probs, i); } while (i < limit); i -= limit; }
+
+/* #define Z7_LZMA_SIZE_OPT */
+
+#ifdef Z7_LZMA_SIZE_OPT
+#define TREE_6_DECODE(probs, i) TREE_DECODE(probs, (1 << 6), i)
+#else
+#define TREE_6_DECODE(probs, i) \
+ { i = 1; \
+ TREE_GET_BIT(probs, i) \
+ TREE_GET_BIT(probs, i) \
+ TREE_GET_BIT(probs, i) \
+ TREE_GET_BIT(probs, i) \
+ TREE_GET_BIT(probs, i) \
+ TREE_GET_BIT(probs, i) \
+ i -= 0x40; }
+#endif
+
+#define NORMAL_LITER_DEC TREE_GET_BIT(prob, symbol)
+#define MATCHED_LITER_DEC \
+ matchByte += matchByte; \
+ bit = offs; \
+ offs &= matchByte; \
+ probLit = prob + (offs + bit + symbol); \
+ GET_BIT2(probLit, symbol, offs ^= bit; , ;)
+
+#endif // Z7_LZMA_DEC_OPT
+
+
+#define NORMALIZE_CHECK if (range < kTopValue) { if (buf >= bufLimit) return DUMMY_INPUT_EOF; range <<= 8; code = (code << 8) | (*buf++); }
+
+#define IF_BIT_0_CHECK(p) ttt = *(p); NORMALIZE_CHECK bound = (range >> kNumBitModelTotalBits) * (UInt32)ttt; if (code < bound)
+#define UPDATE_0_CHECK range = bound;
+#define UPDATE_1_CHECK range -= bound; code -= bound;
+#define GET_BIT2_CHECK(p, i, A0, A1) IF_BIT_0_CHECK(p) \
+ { UPDATE_0_CHECK i = (i + i); A0; } else \
+ { UPDATE_1_CHECK i = (i + i) + 1; A1; }
+#define GET_BIT_CHECK(p, i) GET_BIT2_CHECK(p, i, ; , ;)
+#define TREE_DECODE_CHECK(probs, limit, i) \
+ { i = 1; do { GET_BIT_CHECK(probs + i, i) } while (i < limit); i -= limit; }
+
+
+#define REV_BIT_CHECK(p, i, m) IF_BIT_0_CHECK(p + i) \
+ { UPDATE_0_CHECK i += m; m += m; } else \
+ { UPDATE_1_CHECK m += m; i += m; }
+
+
+#define kNumPosBitsMax 4
+#define kNumPosStatesMax (1 << kNumPosBitsMax)
+
+#define kLenNumLowBits 3
+#define kLenNumLowSymbols (1 << kLenNumLowBits)
+#define kLenNumHighBits 8
+#define kLenNumHighSymbols (1 << kLenNumHighBits)
+
+#define LenLow 0
+#define LenHigh (LenLow + 2 * (kNumPosStatesMax << kLenNumLowBits))
+#define kNumLenProbs (LenHigh + kLenNumHighSymbols)
+
+#define LenChoice LenLow
+#define LenChoice2 (LenLow + (1 << kLenNumLowBits))
+
+#define kNumStates 12
+#define kNumStates2 16
+#define kNumLitStates 7
+
+#define kStartPosModelIndex 4
+#define kEndPosModelIndex 14
+#define kNumFullDistances (1 << (kEndPosModelIndex >> 1))
+
+#define kNumPosSlotBits 6
+#define kNumLenToPosStates 4
+
+#define kNumAlignBits 4
+#define kAlignTableSize (1 << kNumAlignBits)
+
+#define kMatchMinLen 2
+#define kMatchSpecLenStart (kMatchMinLen + kLenNumLowSymbols * 2 + kLenNumHighSymbols)
+
+#define kMatchSpecLen_Error_Data (1 << 9)
+#define kMatchSpecLen_Error_Fail (kMatchSpecLen_Error_Data - 1)
+
+/* External ASM code needs same CLzmaProb array layout. So don't change it. */
+
+/* (probs_1664) is faster and better for code size at some platforms */
+/*
+#ifdef MY_CPU_X86_OR_AMD64
+*/
+#define kStartOffset 1664
+#define GET_PROBS p->probs_1664
+/*
+#define GET_PROBS p->probs + kStartOffset
+#else
+#define kStartOffset 0
+#define GET_PROBS p->probs
+#endif
+*/
+
+#define SpecPos (-kStartOffset)
+#define IsRep0Long (SpecPos + kNumFullDistances)
+#define RepLenCoder (IsRep0Long + (kNumStates2 << kNumPosBitsMax))
+#define LenCoder (RepLenCoder + kNumLenProbs)
+#define IsMatch (LenCoder + kNumLenProbs)
+#define Align (IsMatch + (kNumStates2 << kNumPosBitsMax))
+#define IsRep (Align + kAlignTableSize)
+#define IsRepG0 (IsRep + kNumStates)
+#define IsRepG1 (IsRepG0 + kNumStates)
+#define IsRepG2 (IsRepG1 + kNumStates)
+#define PosSlot (IsRepG2 + kNumStates)
+#define Literal (PosSlot + (kNumLenToPosStates << kNumPosSlotBits))
+#define NUM_BASE_PROBS (Literal + kStartOffset)
+
+#if Align != 0 && kStartOffset != 0
+ #error Stop_Compiling_Bad_LZMA_kAlign
+#endif
+
+#if NUM_BASE_PROBS != 1984
+ #error Stop_Compiling_Bad_LZMA_PROBS
+#endif
+
+
+#define LZMA_LIT_SIZE 0x300
+
+#define LzmaProps_GetNumProbs(p) (NUM_BASE_PROBS + ((UInt32)LZMA_LIT_SIZE << ((p)->lc + (p)->lp)))
+
+
+#define CALC_POS_STATE(processedPos, pbMask) (((processedPos) & (pbMask)) << 4)
+#define COMBINED_PS_STATE (posState + state)
+#define GET_LEN_STATE (posState)
+
+#define LZMA_DIC_MIN (1 << 12)
+
+/*
+p->remainLen : shows status of LZMA decoder:
+ < kMatchSpecLenStart : the number of bytes to be copied with (p->rep0) offset
+ = kMatchSpecLenStart : the LZMA stream was finished with end mark
+ = kMatchSpecLenStart + 1 : need init range coder
+ = kMatchSpecLenStart + 2 : need init range coder and state
+ = kMatchSpecLen_Error_Fail : Internal Code Failure
+ = kMatchSpecLen_Error_Data + [0 ... 273] : LZMA Data Error
+*/
+
+/* ---------- LZMA_DECODE_REAL ---------- */
+/*
+LzmaDec_DecodeReal_3() can be implemented in external ASM file.
+3 - is the code compatibility version of that function for check at link time.
+*/
+
+#define LZMA_DECODE_REAL LzmaDec_DecodeReal_3
+
+/*
+LZMA_DECODE_REAL()
+In:
+ RangeCoder is normalized
+ if (p->dicPos == limit)
+ {
+ LzmaDec_TryDummy() was called before to exclude LITERAL and MATCH-REP cases.
+ So first symbol can be only MATCH-NON-REP. And if that MATCH-NON-REP symbol
+ is not END_OF_PAYALOAD_MARKER, then the function doesn't write any byte to dictionary,
+ the function returns SZ_OK, and the caller can use (p->remainLen) and (p->reps[0]) later.
+ }
+
+Processing:
+ The first LZMA symbol will be decoded in any case.
+ All main checks for limits are at the end of main loop,
+ It decodes additional LZMA-symbols while (p->buf < bufLimit && dicPos < limit),
+ RangeCoder is still without last normalization when (p->buf < bufLimit) is being checked.
+ But if (p->buf < bufLimit), the caller provided at least (LZMA_REQUIRED_INPUT_MAX + 1) bytes for
+ next iteration before limit (bufLimit + LZMA_REQUIRED_INPUT_MAX),
+ that is enough for worst case LZMA symbol with one additional RangeCoder normalization for one bit.
+ So that function never reads bufLimit [LZMA_REQUIRED_INPUT_MAX] byte.
+
+Out:
+ RangeCoder is normalized
+ Result:
+ SZ_OK - OK
+ p->remainLen:
+ < kMatchSpecLenStart : the number of bytes to be copied with (p->reps[0]) offset
+ = kMatchSpecLenStart : the LZMA stream was finished with end mark
+
+ SZ_ERROR_DATA - error, when the MATCH-Symbol refers out of dictionary
+ p->remainLen : undefined
+ p->reps[*] : undefined
+*/
+
+
+#ifdef Z7_LZMA_DEC_OPT
+
+int Z7_FASTCALL LZMA_DECODE_REAL(CLzmaDec *p, SizeT limit, const Byte *bufLimit);
+
+#else
+
+static
+int Z7_FASTCALL LZMA_DECODE_REAL(CLzmaDec *p, SizeT limit, const Byte *bufLimit)
+{
+ CLzmaProb *probs = GET_PROBS;
+ unsigned state = (unsigned)p->state;
+ UInt32 rep0 = p->reps[0], rep1 = p->reps[1], rep2 = p->reps[2], rep3 = p->reps[3];
+ unsigned pbMask = ((unsigned)1 << (p->prop.pb)) - 1;
+ unsigned lc = p->prop.lc;
+ unsigned lpMask = ((unsigned)0x100 << p->prop.lp) - ((unsigned)0x100 >> lc);
+
+ Byte *dic = p->dic;
+ SizeT dicBufSize = p->dicBufSize;
+ SizeT dicPos = p->dicPos;
+
+ UInt32 processedPos = p->processedPos;
+ UInt32 checkDicSize = p->checkDicSize;
+ unsigned len = 0;
+
+ const Byte *buf = p->buf;
+ UInt32 range = p->range;
+ UInt32 code = p->code;
+
+ do
+ {
+ CLzmaProb *prob;
+ UInt32 bound;
+ unsigned ttt;
+ unsigned posState = CALC_POS_STATE(processedPos, pbMask);
+
+ prob = probs + IsMatch + COMBINED_PS_STATE;
+ IF_BIT_0(prob)
+ {
+ unsigned symbol;
+ UPDATE_0(prob)
+ prob = probs + Literal;
+ if (processedPos != 0 || checkDicSize != 0)
+ prob += (UInt32)3 * ((((processedPos << 8) + dic[(dicPos == 0 ? dicBufSize : dicPos) - 1]) & lpMask) << lc);
+ processedPos++;
+
+ if (state < kNumLitStates)
+ {
+ state -= (state < 4) ? state : 3;
+ symbol = 1;
+ #ifdef Z7_LZMA_SIZE_OPT
+ do { NORMAL_LITER_DEC } while (symbol < 0x100);
+ #else
+ NORMAL_LITER_DEC
+ NORMAL_LITER_DEC
+ NORMAL_LITER_DEC
+ NORMAL_LITER_DEC
+ NORMAL_LITER_DEC
+ NORMAL_LITER_DEC
+ NORMAL_LITER_DEC
+ NORMAL_LITER_DEC
+ #endif
+ }
+ else
+ {
+ unsigned matchByte = dic[dicPos - rep0 + (dicPos < rep0 ? dicBufSize : 0)];
+ unsigned offs = 0x100;
+ state -= (state < 10) ? 3 : 6;
+ symbol = 1;
+ #ifdef Z7_LZMA_SIZE_OPT
+ do
+ {
+ unsigned bit;
+ CLzmaProb *probLit;
+ MATCHED_LITER_DEC
+ }
+ while (symbol < 0x100);
+ #else
+ {
+ unsigned bit;
+ CLzmaProb *probLit;
+ MATCHED_LITER_DEC
+ MATCHED_LITER_DEC
+ MATCHED_LITER_DEC
+ MATCHED_LITER_DEC
+ MATCHED_LITER_DEC
+ MATCHED_LITER_DEC
+ MATCHED_LITER_DEC
+ MATCHED_LITER_DEC
+ }
+ #endif
+ }
+
+ dic[dicPos++] = (Byte)symbol;
+ continue;
+ }
+
+ {
+ UPDATE_1(prob)
+ prob = probs + IsRep + state;
+ IF_BIT_0(prob)
+ {
+ UPDATE_0(prob)
+ state += kNumStates;
+ prob = probs + LenCoder;
+ }
+ else
+ {
+ UPDATE_1(prob)
+ prob = probs + IsRepG0 + state;
+ IF_BIT_0(prob)
+ {
+ UPDATE_0(prob)
+ prob = probs + IsRep0Long + COMBINED_PS_STATE;
+ IF_BIT_0(prob)
+ {
+ UPDATE_0(prob)
+
+ // that case was checked before with kBadRepCode
+ // if (checkDicSize == 0 && processedPos == 0) { len = kMatchSpecLen_Error_Data + 1; break; }
+ // The caller doesn't allow (dicPos == limit) case here
+ // so we don't need the following check:
+ // if (dicPos == limit) { state = state < kNumLitStates ? 9 : 11; len = 1; break; }
+
+ dic[dicPos] = dic[dicPos - rep0 + (dicPos < rep0 ? dicBufSize : 0)];
+ dicPos++;
+ processedPos++;
+ state = state < kNumLitStates ? 9 : 11;
+ continue;
+ }
+ UPDATE_1(prob)
+ }
+ else
+ {
+ UInt32 distance;
+ UPDATE_1(prob)
+ prob = probs + IsRepG1 + state;
+ IF_BIT_0(prob)
+ {
+ UPDATE_0(prob)
+ distance = rep1;
+ }
+ else
+ {
+ UPDATE_1(prob)
+ prob = probs + IsRepG2 + state;
+ IF_BIT_0(prob)
+ {
+ UPDATE_0(prob)
+ distance = rep2;
+ }
+ else
+ {
+ UPDATE_1(prob)
+ distance = rep3;
+ rep3 = rep2;
+ }
+ rep2 = rep1;
+ }
+ rep1 = rep0;
+ rep0 = distance;
+ }
+ state = state < kNumLitStates ? 8 : 11;
+ prob = probs + RepLenCoder;
+ }
+
+ #ifdef Z7_LZMA_SIZE_OPT
+ {
+ unsigned lim, offset;
+ CLzmaProb *probLen = prob + LenChoice;
+ IF_BIT_0(probLen)
+ {
+ UPDATE_0(probLen)
+ probLen = prob + LenLow + GET_LEN_STATE;
+ offset = 0;
+ lim = (1 << kLenNumLowBits);
+ }
+ else
+ {
+ UPDATE_1(probLen)
+ probLen = prob + LenChoice2;
+ IF_BIT_0(probLen)
+ {
+ UPDATE_0(probLen)
+ probLen = prob + LenLow + GET_LEN_STATE + (1 << kLenNumLowBits);
+ offset = kLenNumLowSymbols;
+ lim = (1 << kLenNumLowBits);
+ }
+ else
+ {
+ UPDATE_1(probLen)
+ probLen = prob + LenHigh;
+ offset = kLenNumLowSymbols * 2;
+ lim = (1 << kLenNumHighBits);
+ }
+ }
+ TREE_DECODE(probLen, lim, len)
+ len += offset;
+ }
+ #else
+ {
+ CLzmaProb *probLen = prob + LenChoice;
+ IF_BIT_0(probLen)
+ {
+ UPDATE_0(probLen)
+ probLen = prob + LenLow + GET_LEN_STATE;
+ len = 1;
+ TREE_GET_BIT(probLen, len)
+ TREE_GET_BIT(probLen, len)
+ TREE_GET_BIT(probLen, len)
+ len -= 8;
+ }
+ else
+ {
+ UPDATE_1(probLen)
+ probLen = prob + LenChoice2;
+ IF_BIT_0(probLen)
+ {
+ UPDATE_0(probLen)
+ probLen = prob + LenLow + GET_LEN_STATE + (1 << kLenNumLowBits);
+ len = 1;
+ TREE_GET_BIT(probLen, len)
+ TREE_GET_BIT(probLen, len)
+ TREE_GET_BIT(probLen, len)
+ }
+ else
+ {
+ UPDATE_1(probLen)
+ probLen = prob + LenHigh;
+ TREE_DECODE(probLen, (1 << kLenNumHighBits), len)
+ len += kLenNumLowSymbols * 2;
+ }
+ }
+ }
+ #endif
+
+ if (state >= kNumStates)
+ {
+ UInt32 distance;
+ prob = probs + PosSlot +
+ ((len < kNumLenToPosStates ? len : kNumLenToPosStates - 1) << kNumPosSlotBits);
+ TREE_6_DECODE(prob, distance)
+ if (distance >= kStartPosModelIndex)
+ {
+ unsigned posSlot = (unsigned)distance;
+ unsigned numDirectBits = (unsigned)(((distance >> 1) - 1));
+ distance = (2 | (distance & 1));
+ if (posSlot < kEndPosModelIndex)
+ {
+ distance <<= numDirectBits;
+ prob = probs + SpecPos;
+ {
+ UInt32 m = 1;
+ distance++;
+ do
+ {
+ REV_BIT_VAR(prob, distance, m)
+ }
+ while (--numDirectBits);
+ distance -= m;
+ }
+ }
+ else
+ {
+ numDirectBits -= kNumAlignBits;
+ do
+ {
+ NORMALIZE
+ range >>= 1;
+
+ {
+ UInt32 t;
+ code -= range;
+ t = (0 - ((UInt32)code >> 31)); /* (UInt32)((Int32)code >> 31) */
+ distance = (distance << 1) + (t + 1);
+ code += range & t;
+ }
+ /*
+ distance <<= 1;
+ if (code >= range)
+ {
+ code -= range;
+ distance |= 1;
+ }
+ */
+ }
+ while (--numDirectBits);
+ prob = probs + Align;
+ distance <<= kNumAlignBits;
+ {
+ unsigned i = 1;
+ REV_BIT_CONST(prob, i, 1)
+ REV_BIT_CONST(prob, i, 2)
+ REV_BIT_CONST(prob, i, 4)
+ REV_BIT_LAST (prob, i, 8)
+ distance |= i;
+ }
+ if (distance == (UInt32)0xFFFFFFFF)
+ {
+ len = kMatchSpecLenStart;
+ state -= kNumStates;
+ break;
+ }
+ }
+ }
+
+ rep3 = rep2;
+ rep2 = rep1;
+ rep1 = rep0;
+ rep0 = distance + 1;
+ state = (state < kNumStates + kNumLitStates) ? kNumLitStates : kNumLitStates + 3;
+ if (distance >= (checkDicSize == 0 ? processedPos: checkDicSize))
+ {
+ len += kMatchSpecLen_Error_Data + kMatchMinLen;
+ // len = kMatchSpecLen_Error_Data;
+ // len += kMatchMinLen;
+ break;
+ }
+ }
+
+ len += kMatchMinLen;
+
+ {
+ SizeT rem;
+ unsigned curLen;
+ SizeT pos;
+
+ if ((rem = limit - dicPos) == 0)
+ {
+ /*
+ We stop decoding and return SZ_OK, and we can resume decoding later.
+ Any error conditions can be tested later in caller code.
+ For more strict mode we can stop decoding with error
+ // len += kMatchSpecLen_Error_Data;
+ */
+ break;
+ }
+
+ curLen = ((rem < len) ? (unsigned)rem : len);
+ pos = dicPos - rep0 + (dicPos < rep0 ? dicBufSize : 0);
+
+ processedPos += (UInt32)curLen;
+
+ len -= curLen;
+ if (curLen <= dicBufSize - pos)
+ {
+ Byte *dest = dic + dicPos;
+ ptrdiff_t src = (ptrdiff_t)pos - (ptrdiff_t)dicPos;
+ const Byte *lim = dest + curLen;
+ dicPos += (SizeT)curLen;
+ do
+ *(dest) = (Byte)*(dest + src);
+ while (++dest != lim);
+ }
+ else
+ {
+ do
+ {
+ dic[dicPos++] = dic[pos];
+ if (++pos == dicBufSize)
+ pos = 0;
+ }
+ while (--curLen != 0);
+ }
+ }
+ }
+ }
+ while (dicPos < limit && buf < bufLimit);
+
+ NORMALIZE
+
+ p->buf = buf;
+ p->range = range;
+ p->code = code;
+ p->remainLen = (UInt32)len; // & (kMatchSpecLen_Error_Data - 1); // we can write real length for error matches too.
+ p->dicPos = dicPos;
+ p->processedPos = processedPos;
+ p->reps[0] = rep0;
+ p->reps[1] = rep1;
+ p->reps[2] = rep2;
+ p->reps[3] = rep3;
+ p->state = (UInt32)state;
+ if (len >= kMatchSpecLen_Error_Data)
+ return SZ_ERROR_DATA;
+ return SZ_OK;
+}
+#endif
+
+
+
+static void Z7_FASTCALL LzmaDec_WriteRem(CLzmaDec *p, SizeT limit)
+{
+ unsigned len = (unsigned)p->remainLen;
+ if (len == 0 /* || len >= kMatchSpecLenStart */)
+ return;
+ {
+ SizeT dicPos = p->dicPos;
+ Byte *dic;
+ SizeT dicBufSize;
+ SizeT rep0; /* we use SizeT to avoid the BUG of VC14 for AMD64 */
+ {
+ SizeT rem = limit - dicPos;
+ if (rem < len)
+ {
+ len = (unsigned)(rem);
+ if (len == 0)
+ return;
+ }
+ }
+
+ if (p->checkDicSize == 0 && p->prop.dicSize - p->processedPos <= len)
+ p->checkDicSize = p->prop.dicSize;
+
+ p->processedPos += (UInt32)len;
+ p->remainLen -= (UInt32)len;
+ dic = p->dic;
+ rep0 = p->reps[0];
+ dicBufSize = p->dicBufSize;
+ do
+ {
+ dic[dicPos] = dic[dicPos - rep0 + (dicPos < rep0 ? dicBufSize : 0)];
+ dicPos++;
+ }
+ while (--len);
+ p->dicPos = dicPos;
+ }
+}
+
+
+/*
+At staring of new stream we have one of the following symbols:
+ - Literal - is allowed
+ - Non-Rep-Match - is allowed only if it's end marker symbol
+ - Rep-Match - is not allowed
+We use early check of (RangeCoder:Code) over kBadRepCode to simplify main decoding code
+*/
+
+#define kRange0 0xFFFFFFFF
+#define kBound0 ((kRange0 >> kNumBitModelTotalBits) << (kNumBitModelTotalBits - 1))
+#define kBadRepCode (kBound0 + (((kRange0 - kBound0) >> kNumBitModelTotalBits) << (kNumBitModelTotalBits - 1)))
+#if kBadRepCode != (0xC0000000 - 0x400)
+ #error Stop_Compiling_Bad_LZMA_Check
+#endif
+
+
+/*
+LzmaDec_DecodeReal2():
+ It calls LZMA_DECODE_REAL() and it adjusts limit according (p->checkDicSize).
+
+We correct (p->checkDicSize) after LZMA_DECODE_REAL() and in LzmaDec_WriteRem(),
+and we support the following state of (p->checkDicSize):
+ if (total_processed < p->prop.dicSize) then
+ {
+ (total_processed == p->processedPos)
+ (p->checkDicSize == 0)
+ }
+ else
+ (p->checkDicSize == p->prop.dicSize)
+*/
+
+static int Z7_FASTCALL LzmaDec_DecodeReal2(CLzmaDec *p, SizeT limit, const Byte *bufLimit)
+{
+ if (p->checkDicSize == 0)
+ {
+ UInt32 rem = p->prop.dicSize - p->processedPos;
+ if (limit - p->dicPos > rem)
+ limit = p->dicPos + rem;
+ }
+ {
+ int res = LZMA_DECODE_REAL(p, limit, bufLimit);
+ if (p->checkDicSize == 0 && p->processedPos >= p->prop.dicSize)
+ p->checkDicSize = p->prop.dicSize;
+ return res;
+ }
+}
+
+
+
+typedef enum
+{
+ DUMMY_INPUT_EOF, /* need more input data */
+ DUMMY_LIT,
+ DUMMY_MATCH,
+ DUMMY_REP
+} ELzmaDummy;
+
+
+#define IS_DUMMY_END_MARKER_POSSIBLE(dummyRes) ((dummyRes) == DUMMY_MATCH)
+
+static ELzmaDummy LzmaDec_TryDummy(const CLzmaDec *p, const Byte *buf, const Byte **bufOut)
+{
+ UInt32 range = p->range;
+ UInt32 code = p->code;
+ const Byte *bufLimit = *bufOut;
+ const CLzmaProb *probs = GET_PROBS;
+ unsigned state = (unsigned)p->state;
+ ELzmaDummy res;
+
+ for (;;)
+ {
+ const CLzmaProb *prob;
+ UInt32 bound;
+ unsigned ttt;
+ unsigned posState = CALC_POS_STATE(p->processedPos, ((unsigned)1 << p->prop.pb) - 1);
+
+ prob = probs + IsMatch + COMBINED_PS_STATE;
+ IF_BIT_0_CHECK(prob)
+ {
+ UPDATE_0_CHECK
+
+ prob = probs + Literal;
+ if (p->checkDicSize != 0 || p->processedPos != 0)
+ prob += ((UInt32)LZMA_LIT_SIZE *
+ ((((p->processedPos) & (((unsigned)1 << (p->prop.lp)) - 1)) << p->prop.lc) +
+ ((unsigned)p->dic[(p->dicPos == 0 ? p->dicBufSize : p->dicPos) - 1] >> (8 - p->prop.lc))));
+
+ if (state < kNumLitStates)
+ {
+ unsigned symbol = 1;
+ do { GET_BIT_CHECK(prob + symbol, symbol) } while (symbol < 0x100);
+ }
+ else
+ {
+ unsigned matchByte = p->dic[p->dicPos - p->reps[0] +
+ (p->dicPos < p->reps[0] ? p->dicBufSize : 0)];
+ unsigned offs = 0x100;
+ unsigned symbol = 1;
+ do
+ {
+ unsigned bit;
+ const CLzmaProb *probLit;
+ matchByte += matchByte;
+ bit = offs;
+ offs &= matchByte;
+ probLit = prob + (offs + bit + symbol);
+ GET_BIT2_CHECK(probLit, symbol, offs ^= bit; , ; )
+ }
+ while (symbol < 0x100);
+ }
+ res = DUMMY_LIT;
+ }
+ else
+ {
+ unsigned len;
+ UPDATE_1_CHECK
+
+ prob = probs + IsRep + state;
+ IF_BIT_0_CHECK(prob)
+ {
+ UPDATE_0_CHECK
+ state = 0;
+ prob = probs + LenCoder;
+ res = DUMMY_MATCH;
+ }
+ else
+ {
+ UPDATE_1_CHECK
+ res = DUMMY_REP;
+ prob = probs + IsRepG0 + state;
+ IF_BIT_0_CHECK(prob)
+ {
+ UPDATE_0_CHECK
+ prob = probs + IsRep0Long + COMBINED_PS_STATE;
+ IF_BIT_0_CHECK(prob)
+ {
+ UPDATE_0_CHECK
+ break;
+ }
+ else
+ {
+ UPDATE_1_CHECK
+ }
+ }
+ else
+ {
+ UPDATE_1_CHECK
+ prob = probs + IsRepG1 + state;
+ IF_BIT_0_CHECK(prob)
+ {
+ UPDATE_0_CHECK
+ }
+ else
+ {
+ UPDATE_1_CHECK
+ prob = probs + IsRepG2 + state;
+ IF_BIT_0_CHECK(prob)
+ {
+ UPDATE_0_CHECK
+ }
+ else
+ {
+ UPDATE_1_CHECK
+ }
+ }
+ }
+ state = kNumStates;
+ prob = probs + RepLenCoder;
+ }
+ {
+ unsigned limit, offset;
+ const CLzmaProb *probLen = prob + LenChoice;
+ IF_BIT_0_CHECK(probLen)
+ {
+ UPDATE_0_CHECK
+ probLen = prob + LenLow + GET_LEN_STATE;
+ offset = 0;
+ limit = 1 << kLenNumLowBits;
+ }
+ else
+ {
+ UPDATE_1_CHECK
+ probLen = prob + LenChoice2;
+ IF_BIT_0_CHECK(probLen)
+ {
+ UPDATE_0_CHECK
+ probLen = prob + LenLow + GET_LEN_STATE + (1 << kLenNumLowBits);
+ offset = kLenNumLowSymbols;
+ limit = 1 << kLenNumLowBits;
+ }
+ else
+ {
+ UPDATE_1_CHECK
+ probLen = prob + LenHigh;
+ offset = kLenNumLowSymbols * 2;
+ limit = 1 << kLenNumHighBits;
+ }
+ }
+ TREE_DECODE_CHECK(probLen, limit, len)
+ len += offset;
+ }
+
+ if (state < 4)
+ {
+ unsigned posSlot;
+ prob = probs + PosSlot +
+ ((len < kNumLenToPosStates - 1 ? len : kNumLenToPosStates - 1) <<
+ kNumPosSlotBits);
+ TREE_DECODE_CHECK(prob, 1 << kNumPosSlotBits, posSlot)
+ if (posSlot >= kStartPosModelIndex)
+ {
+ unsigned numDirectBits = ((posSlot >> 1) - 1);
+
+ if (posSlot < kEndPosModelIndex)
+ {
+ prob = probs + SpecPos + ((2 | (posSlot & 1)) << numDirectBits);
+ }
+ else
+ {
+ numDirectBits -= kNumAlignBits;
+ do
+ {
+ NORMALIZE_CHECK
+ range >>= 1;
+ code -= range & (((code - range) >> 31) - 1);
+ /* if (code >= range) code -= range; */
+ }
+ while (--numDirectBits);
+ prob = probs + Align;
+ numDirectBits = kNumAlignBits;
+ }
+ {
+ unsigned i = 1;
+ unsigned m = 1;
+ do
+ {
+ REV_BIT_CHECK(prob, i, m)
+ }
+ while (--numDirectBits);
+ }
+ }
+ }
+ }
+ break;
+ }
+ NORMALIZE_CHECK
+
+ *bufOut = buf;
+ return res;
+}
+
+void LzmaDec_InitDicAndState(CLzmaDec *p, BoolInt initDic, BoolInt initState);
+void LzmaDec_InitDicAndState(CLzmaDec *p, BoolInt initDic, BoolInt initState)
+{
+ p->remainLen = kMatchSpecLenStart + 1;
+ p->tempBufSize = 0;
+
+ if (initDic)
+ {
+ p->processedPos = 0;
+ p->checkDicSize = 0;
+ p->remainLen = kMatchSpecLenStart + 2;
+ }
+ if (initState)
+ p->remainLen = kMatchSpecLenStart + 2;
+}
+
+void LzmaDec_Init(CLzmaDec *p)
+{
+ p->dicPos = 0;
+ LzmaDec_InitDicAndState(p, True, True);
+}
+
+
+/*
+LZMA supports optional end_marker.
+So the decoder can lookahead for one additional LZMA-Symbol to check end_marker.
+That additional LZMA-Symbol can require up to LZMA_REQUIRED_INPUT_MAX bytes in input stream.
+When the decoder reaches dicLimit, it looks (finishMode) parameter:
+ if (finishMode == LZMA_FINISH_ANY), the decoder doesn't lookahead
+ if (finishMode != LZMA_FINISH_ANY), the decoder lookahead, if end_marker is possible for current position
+
+When the decoder lookahead, and the lookahead symbol is not end_marker, we have two ways:
+ 1) Strict mode (default) : the decoder returns SZ_ERROR_DATA.
+ 2) The relaxed mode (alternative mode) : we could return SZ_OK, and the caller
+ must check (status) value. The caller can show the error,
+ if the end of stream is expected, and the (status) is noit
+ LZMA_STATUS_FINISHED_WITH_MARK or LZMA_STATUS_MAYBE_FINISHED_WITHOUT_MARK.
+*/
+
+
+#define RETURN_NOT_FINISHED_FOR_FINISH \
+ *status = LZMA_STATUS_NOT_FINISHED; \
+ return SZ_ERROR_DATA; // for strict mode
+ // return SZ_OK; // for relaxed mode
+
+
+SRes LzmaDec_DecodeToDic(CLzmaDec *p, SizeT dicLimit, const Byte *src, SizeT *srcLen,
+ ELzmaFinishMode finishMode, ELzmaStatus *status)
+{
+ SizeT inSize = *srcLen;
+ (*srcLen) = 0;
+ *status = LZMA_STATUS_NOT_SPECIFIED;
+
+ if (p->remainLen > kMatchSpecLenStart)
+ {
+ if (p->remainLen > kMatchSpecLenStart + 2)
+ return p->remainLen == kMatchSpecLen_Error_Fail ? SZ_ERROR_FAIL : SZ_ERROR_DATA;
+
+ for (; inSize > 0 && p->tempBufSize < RC_INIT_SIZE; (*srcLen)++, inSize--)
+ p->tempBuf[p->tempBufSize++] = *src++;
+ if (p->tempBufSize != 0 && p->tempBuf[0] != 0)
+ return SZ_ERROR_DATA;
+ if (p->tempBufSize < RC_INIT_SIZE)
+ {
+ *status = LZMA_STATUS_NEEDS_MORE_INPUT;
+ return SZ_OK;
+ }
+ p->code =
+ ((UInt32)p->tempBuf[1] << 24)
+ | ((UInt32)p->tempBuf[2] << 16)
+ | ((UInt32)p->tempBuf[3] << 8)
+ | ((UInt32)p->tempBuf[4]);
+
+ if (p->checkDicSize == 0
+ && p->processedPos == 0
+ && p->code >= kBadRepCode)
+ return SZ_ERROR_DATA;
+
+ p->range = 0xFFFFFFFF;
+ p->tempBufSize = 0;
+
+ if (p->remainLen > kMatchSpecLenStart + 1)
+ {
+ SizeT numProbs = LzmaProps_GetNumProbs(&p->prop);
+ SizeT i;
+ CLzmaProb *probs = p->probs;
+ for (i = 0; i < numProbs; i++)
+ probs[i] = kBitModelTotal >> 1;
+ p->reps[0] = p->reps[1] = p->reps[2] = p->reps[3] = 1;
+ p->state = 0;
+ }
+
+ p->remainLen = 0;
+ }
+
+ for (;;)
+ {
+ if (p->remainLen == kMatchSpecLenStart)
+ {
+ if (p->code != 0)
+ return SZ_ERROR_DATA;
+ *status = LZMA_STATUS_FINISHED_WITH_MARK;
+ return SZ_OK;
+ }
+
+ LzmaDec_WriteRem(p, dicLimit);
+
+ {
+ // (p->remainLen == 0 || p->dicPos == dicLimit)
+
+ int checkEndMarkNow = 0;
+
+ if (p->dicPos >= dicLimit)
+ {
+ if (p->remainLen == 0 && p->code == 0)
+ {
+ *status = LZMA_STATUS_MAYBE_FINISHED_WITHOUT_MARK;
+ return SZ_OK;
+ }
+ if (finishMode == LZMA_FINISH_ANY)
+ {
+ *status = LZMA_STATUS_NOT_FINISHED;
+ return SZ_OK;
+ }
+ if (p->remainLen != 0)
+ {
+ RETURN_NOT_FINISHED_FOR_FINISH
+ }
+ checkEndMarkNow = 1;
+ }
+
+ // (p->remainLen == 0)
+
+ if (p->tempBufSize == 0)
+ {
+ const Byte *bufLimit;
+ int dummyProcessed = -1;
+
+ if (inSize < LZMA_REQUIRED_INPUT_MAX || checkEndMarkNow)
+ {
+ const Byte *bufOut = src + inSize;
+
+ ELzmaDummy dummyRes = LzmaDec_TryDummy(p, src, &bufOut);
+
+ if (dummyRes == DUMMY_INPUT_EOF)
+ {
+ size_t i;
+ if (inSize >= LZMA_REQUIRED_INPUT_MAX)
+ break;
+ (*srcLen) += inSize;
+ p->tempBufSize = (unsigned)inSize;
+ for (i = 0; i < inSize; i++)
+ p->tempBuf[i] = src[i];
+ *status = LZMA_STATUS_NEEDS_MORE_INPUT;
+ return SZ_OK;
+ }
+
+ dummyProcessed = (int)(bufOut - src);
+ if ((unsigned)dummyProcessed > LZMA_REQUIRED_INPUT_MAX)
+ break;
+
+ if (checkEndMarkNow && !IS_DUMMY_END_MARKER_POSSIBLE(dummyRes))
+ {
+ unsigned i;
+ (*srcLen) += (unsigned)dummyProcessed;
+ p->tempBufSize = (unsigned)dummyProcessed;
+ for (i = 0; i < (unsigned)dummyProcessed; i++)
+ p->tempBuf[i] = src[i];
+ // p->remainLen = kMatchSpecLen_Error_Data;
+ RETURN_NOT_FINISHED_FOR_FINISH
+ }
+
+ bufLimit = src;
+ // we will decode only one iteration
+ }
+ else
+ bufLimit = src + inSize - LZMA_REQUIRED_INPUT_MAX;
+
+ p->buf = src;
+
+ {
+ int res = LzmaDec_DecodeReal2(p, dicLimit, bufLimit);
+
+ SizeT processed = (SizeT)(p->buf - src);
+
+ if (dummyProcessed < 0)
+ {
+ if (processed > inSize)
+ break;
+ }
+ else if ((unsigned)dummyProcessed != processed)
+ break;
+
+ src += processed;
+ inSize -= processed;
+ (*srcLen) += processed;
+
+ if (res != SZ_OK)
+ {
+ p->remainLen = kMatchSpecLen_Error_Data;
+ return SZ_ERROR_DATA;
+ }
+ }
+ continue;
+ }
+
+ {
+ // we have some data in (p->tempBuf)
+ // in strict mode: tempBufSize is not enough for one Symbol decoding.
+ // in relaxed mode: tempBufSize not larger than required for one Symbol decoding.
+
+ unsigned rem = p->tempBufSize;
+ unsigned ahead = 0;
+ int dummyProcessed = -1;
+
+ while (rem < LZMA_REQUIRED_INPUT_MAX && ahead < inSize)
+ p->tempBuf[rem++] = src[ahead++];
+
+ // ahead - the size of new data copied from (src) to (p->tempBuf)
+ // rem - the size of temp buffer including new data from (src)
+
+ if (rem < LZMA_REQUIRED_INPUT_MAX || checkEndMarkNow)
+ {
+ const Byte *bufOut = p->tempBuf + rem;
+
+ ELzmaDummy dummyRes = LzmaDec_TryDummy(p, p->tempBuf, &bufOut);
+
+ if (dummyRes == DUMMY_INPUT_EOF)
+ {
+ if (rem >= LZMA_REQUIRED_INPUT_MAX)
+ break;
+ p->tempBufSize = rem;
+ (*srcLen) += (SizeT)ahead;
+ *status = LZMA_STATUS_NEEDS_MORE_INPUT;
+ return SZ_OK;
+ }
+
+ dummyProcessed = (int)(bufOut - p->tempBuf);
+
+ if ((unsigned)dummyProcessed < p->tempBufSize)
+ break;
+
+ if (checkEndMarkNow && !IS_DUMMY_END_MARKER_POSSIBLE(dummyRes))
+ {
+ (*srcLen) += (unsigned)dummyProcessed - p->tempBufSize;
+ p->tempBufSize = (unsigned)dummyProcessed;
+ // p->remainLen = kMatchSpecLen_Error_Data;
+ RETURN_NOT_FINISHED_FOR_FINISH
+ }
+ }
+
+ p->buf = p->tempBuf;
+
+ {
+ // we decode one symbol from (p->tempBuf) here, so the (bufLimit) is equal to (p->buf)
+ int res = LzmaDec_DecodeReal2(p, dicLimit, p->buf);
+
+ SizeT processed = (SizeT)(p->buf - p->tempBuf);
+ rem = p->tempBufSize;
+
+ if (dummyProcessed < 0)
+ {
+ if (processed > LZMA_REQUIRED_INPUT_MAX)
+ break;
+ if (processed < rem)
+ break;
+ }
+ else if ((unsigned)dummyProcessed != processed)
+ break;
+
+ processed -= rem;
+
+ src += processed;
+ inSize -= processed;
+ (*srcLen) += processed;
+ p->tempBufSize = 0;
+
+ if (res != SZ_OK)
+ {
+ p->remainLen = kMatchSpecLen_Error_Data;
+ return SZ_ERROR_DATA;
+ }
+ }
+ }
+ }
+ }
+
+ /* Some unexpected error: internal error of code, memory corruption or hardware failure */
+ p->remainLen = kMatchSpecLen_Error_Fail;
+ return SZ_ERROR_FAIL;
+}
+
+
+
+SRes LzmaDec_DecodeToBuf(CLzmaDec *p, Byte *dest, SizeT *destLen, const Byte *src, SizeT *srcLen, ELzmaFinishMode finishMode, ELzmaStatus *status)
+{
+ SizeT outSize = *destLen;
+ SizeT inSize = *srcLen;
+ *srcLen = *destLen = 0;
+ for (;;)
+ {
+ SizeT inSizeCur = inSize, outSizeCur, dicPos;
+ ELzmaFinishMode curFinishMode;
+ SRes res;
+ if (p->dicPos == p->dicBufSize)
+ p->dicPos = 0;
+ dicPos = p->dicPos;
+ if (outSize > p->dicBufSize - dicPos)
+ {
+ outSizeCur = p->dicBufSize;
+ curFinishMode = LZMA_FINISH_ANY;
+ }
+ else
+ {
+ outSizeCur = dicPos + outSize;
+ curFinishMode = finishMode;
+ }
+
+ res = LzmaDec_DecodeToDic(p, outSizeCur, src, &inSizeCur, curFinishMode, status);
+ src += inSizeCur;
+ inSize -= inSizeCur;
+ *srcLen += inSizeCur;
+ outSizeCur = p->dicPos - dicPos;
+ memcpy(dest, p->dic + dicPos, outSizeCur);
+ dest += outSizeCur;
+ outSize -= outSizeCur;
+ *destLen += outSizeCur;
+ if (res != 0)
+ return res;
+ if (outSizeCur == 0 || outSize == 0)
+ return SZ_OK;
+ }
+}
+
+void LzmaDec_FreeProbs(CLzmaDec *p, ISzAllocPtr alloc)
+{
+ ISzAlloc_Free(alloc, p->probs);
+ p->probs = NULL;
+}
+
+static void LzmaDec_FreeDict(CLzmaDec *p, ISzAllocPtr alloc)
+{
+ ISzAlloc_Free(alloc, p->dic);
+ p->dic = NULL;
+}
+
+void LzmaDec_Free(CLzmaDec *p, ISzAllocPtr alloc)
+{
+ LzmaDec_FreeProbs(p, alloc);
+ LzmaDec_FreeDict(p, alloc);
+}
+
+SRes LzmaProps_Decode(CLzmaProps *p, const Byte *data, unsigned size)
+{
+ UInt32 dicSize;
+ Byte d;
+
+ if (size < LZMA_PROPS_SIZE)
+ return SZ_ERROR_UNSUPPORTED;
+ else
+ dicSize = data[1] | ((UInt32)data[2] << 8) | ((UInt32)data[3] << 16) | ((UInt32)data[4] << 24);
+
+ if (dicSize < LZMA_DIC_MIN)
+ dicSize = LZMA_DIC_MIN;
+ p->dicSize = dicSize;
+
+ d = data[0];
+ if (d >= (9 * 5 * 5))
+ return SZ_ERROR_UNSUPPORTED;
+
+ p->lc = (Byte)(d % 9);
+ d /= 9;
+ p->pb = (Byte)(d / 5);
+ p->lp = (Byte)(d % 5);
+
+ return SZ_OK;
+}
+
+static SRes LzmaDec_AllocateProbs2(CLzmaDec *p, const CLzmaProps *propNew, ISzAllocPtr alloc)
+{
+ UInt32 numProbs = LzmaProps_GetNumProbs(propNew);
+ if (!p->probs || numProbs != p->numProbs)
+ {
+ LzmaDec_FreeProbs(p, alloc);
+ p->probs = (CLzmaProb *)ISzAlloc_Alloc(alloc, numProbs * sizeof(CLzmaProb));
+ if (!p->probs)
+ return SZ_ERROR_MEM;
+ p->probs_1664 = p->probs + 1664;
+ p->numProbs = numProbs;
+ }
+ return SZ_OK;
+}
+
+SRes LzmaDec_AllocateProbs(CLzmaDec *p, const Byte *props, unsigned propsSize, ISzAllocPtr alloc)
+{
+ CLzmaProps propNew;
+ RINOK(LzmaProps_Decode(&propNew, props, propsSize))
+ RINOK(LzmaDec_AllocateProbs2(p, &propNew, alloc))
+ p->prop = propNew;
+ return SZ_OK;
+}
+
+SRes LzmaDec_Allocate(CLzmaDec *p, const Byte *props, unsigned propsSize, ISzAllocPtr alloc)
+{
+ CLzmaProps propNew;
+ SizeT dicBufSize;
+ RINOK(LzmaProps_Decode(&propNew, props, propsSize))
+ RINOK(LzmaDec_AllocateProbs2(p, &propNew, alloc))
+
+ {
+ UInt32 dictSize = propNew.dicSize;
+ SizeT mask = ((UInt32)1 << 12) - 1;
+ if (dictSize >= ((UInt32)1 << 30)) mask = ((UInt32)1 << 22) - 1;
+ else if (dictSize >= ((UInt32)1 << 22)) mask = ((UInt32)1 << 20) - 1;
+ dicBufSize = ((SizeT)dictSize + mask) & ~mask;
+ if (dicBufSize < dictSize)
+ dicBufSize = dictSize;
+ }
+
+ if (!p->dic || dicBufSize != p->dicBufSize)
+ {
+ LzmaDec_FreeDict(p, alloc);
+ p->dic = (Byte *)ISzAlloc_Alloc(alloc, dicBufSize);
+ if (!p->dic)
+ {
+ LzmaDec_FreeProbs(p, alloc);
+ return SZ_ERROR_MEM;
+ }
+ }
+ p->dicBufSize = dicBufSize;
+ p->prop = propNew;
+ return SZ_OK;
+}
+
+SRes LzmaDecode(Byte *dest, SizeT *destLen, const Byte *src, SizeT *srcLen,
+ const Byte *propData, unsigned propSize, ELzmaFinishMode finishMode,
+ ELzmaStatus *status, ISzAllocPtr alloc)
+{
+ CLzmaDec p;
+ SRes res;
+ SizeT outSize = *destLen, inSize = *srcLen;
+ *destLen = *srcLen = 0;
+ *status = LZMA_STATUS_NOT_SPECIFIED;
+ if (inSize < RC_INIT_SIZE)
+ return SZ_ERROR_INPUT_EOF;
+ LzmaDec_CONSTRUCT(&p)
+ RINOK(LzmaDec_AllocateProbs(&p, propData, propSize, alloc))
+ p.dic = dest;
+ p.dicBufSize = outSize;
+ LzmaDec_Init(&p);
+ *srcLen = inSize;
+ res = LzmaDec_DecodeToDic(&p, outSize, src, srcLen, finishMode, status);
+ *destLen = p.dicPos;
+ if (res == SZ_OK && *status == LZMA_STATUS_NEEDS_MORE_INPUT)
+ res = SZ_ERROR_INPUT_EOF;
+ LzmaDec_FreeProbs(&p, alloc);
+ return res;
+}
-/* LzmaEnc.c -- LZMA Encoder\r
-2022-07-15: Igor Pavlov : Public domain */\r
-\r
-#include "Precomp.h"\r
-\r
-#include <string.h>\r
-\r
-/* #define SHOW_STAT */\r
-/* #define SHOW_STAT2 */\r
-\r
-#if defined(SHOW_STAT) || defined(SHOW_STAT2)\r
-#include <stdio.h>\r
-#endif\r
-\r
-#include "CpuArch.h"\r
-#include "LzmaEnc.h"\r
-\r
-#include "LzFind.h"\r
-#ifndef _7ZIP_ST\r
-#include "LzFindMt.h"\r
-#endif\r
-\r
-/* the following LzmaEnc_* declarations is internal LZMA interface for LZMA2 encoder */\r
-\r
-SRes LzmaEnc_PrepareForLzma2(CLzmaEncHandle pp, ISeqInStream *inStream, UInt32 keepWindowSize,\r
- ISzAllocPtr alloc, ISzAllocPtr allocBig);\r
-SRes LzmaEnc_MemPrepare(CLzmaEncHandle pp, const Byte *src, SizeT srcLen,\r
- UInt32 keepWindowSize, ISzAllocPtr alloc, ISzAllocPtr allocBig);\r
-SRes LzmaEnc_CodeOneMemBlock(CLzmaEncHandle pp, BoolInt reInit,\r
- Byte *dest, size_t *destLen, UInt32 desiredPackSize, UInt32 *unpackSize);\r
-const Byte *LzmaEnc_GetCurBuf(CLzmaEncHandle pp);\r
-void LzmaEnc_Finish(CLzmaEncHandle pp);\r
-void LzmaEnc_SaveState(CLzmaEncHandle pp);\r
-void LzmaEnc_RestoreState(CLzmaEncHandle pp);\r
-\r
-#ifdef SHOW_STAT\r
-static unsigned g_STAT_OFFSET = 0;\r
-#endif\r
-\r
-/* for good normalization speed we still reserve 256 MB before 4 GB range */\r
-#define kLzmaMaxHistorySize ((UInt32)15 << 28)\r
-\r
-#define kNumTopBits 24\r
-#define kTopValue ((UInt32)1 << kNumTopBits)\r
-\r
-#define kNumBitModelTotalBits 11\r
-#define kBitModelTotal (1 << kNumBitModelTotalBits)\r
-#define kNumMoveBits 5\r
-#define kProbInitValue (kBitModelTotal >> 1)\r
-\r
-#define kNumMoveReducingBits 4\r
-#define kNumBitPriceShiftBits 4\r
-// #define kBitPrice (1 << kNumBitPriceShiftBits)\r
-\r
-#define REP_LEN_COUNT 64\r
-\r
-void LzmaEncProps_Init(CLzmaEncProps *p)\r
-{\r
- p->level = 5;\r
- p->dictSize = p->mc = 0;\r
- p->reduceSize = (UInt64)(Int64)-1;\r
- p->lc = p->lp = p->pb = p->algo = p->fb = p->btMode = p->numHashBytes = p->numThreads = -1;\r
- p->writeEndMark = 0;\r
- p->affinity = 0;\r
-}\r
-\r
-void LzmaEncProps_Normalize(CLzmaEncProps *p)\r
-{\r
- int level = p->level;\r
- if (level < 0) level = 5;\r
- p->level = level;\r
- \r
- if (p->dictSize == 0)\r
- p->dictSize =\r
- ( level <= 3 ? ((UInt32)1 << (level * 2 + 16)) :\r
- ( level <= 6 ? ((UInt32)1 << (level + 19)) :\r
- ( level <= 7 ? ((UInt32)1 << 25) : ((UInt32)1 << 26)\r
- )));\r
-\r
- if (p->dictSize > p->reduceSize)\r
- {\r
- UInt32 v = (UInt32)p->reduceSize;\r
- const UInt32 kReduceMin = ((UInt32)1 << 12);\r
- if (v < kReduceMin)\r
- v = kReduceMin;\r
- if (p->dictSize > v)\r
- p->dictSize = v;\r
- }\r
-\r
- if (p->lc < 0) p->lc = 3;\r
- if (p->lp < 0) p->lp = 0;\r
- if (p->pb < 0) p->pb = 2;\r
-\r
- if (p->algo < 0) p->algo = (level < 5 ? 0 : 1);\r
- if (p->fb < 0) p->fb = (level < 7 ? 32 : 64);\r
- if (p->btMode < 0) p->btMode = (p->algo == 0 ? 0 : 1);\r
- if (p->numHashBytes < 0) p->numHashBytes = (p->btMode ? 4 : 5);\r
- if (p->mc == 0) p->mc = (16 + ((unsigned)p->fb >> 1)) >> (p->btMode ? 0 : 1);\r
- \r
- if (p->numThreads < 0)\r
- p->numThreads =\r
- #ifndef _7ZIP_ST\r
- ((p->btMode && p->algo) ? 2 : 1);\r
- #else\r
- 1;\r
- #endif\r
-}\r
-\r
-UInt32 LzmaEncProps_GetDictSize(const CLzmaEncProps *props2)\r
-{\r
- CLzmaEncProps props = *props2;\r
- LzmaEncProps_Normalize(&props);\r
- return props.dictSize;\r
-}\r
-\r
-\r
-/*\r
-x86/x64:\r
-\r
-BSR:\r
- IF (SRC == 0) ZF = 1, DEST is undefined;\r
- AMD : DEST is unchanged;\r
- IF (SRC != 0) ZF = 0; DEST is index of top non-zero bit\r
- BSR is slow in some processors\r
-\r
-LZCNT:\r
- IF (SRC == 0) CF = 1, DEST is size_in_bits_of_register(src) (32 or 64)\r
- IF (SRC != 0) CF = 0, DEST = num_lead_zero_bits\r
- IF (DEST == 0) ZF = 1;\r
-\r
-LZCNT works only in new processors starting from Haswell.\r
-if LZCNT is not supported by processor, then it's executed as BSR.\r
-LZCNT can be faster than BSR, if supported.\r
-*/\r
-\r
-// #define LZMA_LOG_BSR\r
-\r
-#if defined(MY_CPU_ARM_OR_ARM64) /* || defined(MY_CPU_X86_OR_AMD64) */\r
-\r
- #if (defined(__clang__) && (__clang_major__ >= 6)) \\r
- || (defined(__GNUC__) && (__GNUC__ >= 6))\r
- #define LZMA_LOG_BSR\r
- #elif defined(_MSC_VER) && (_MSC_VER >= 1300)\r
- // #if defined(MY_CPU_ARM_OR_ARM64)\r
- #define LZMA_LOG_BSR\r
- // #endif\r
- #endif\r
-#endif\r
-\r
-// #include <intrin.h>\r
-\r
-#ifdef LZMA_LOG_BSR\r
-\r
-#if defined(__clang__) \\r
- || defined(__GNUC__)\r
-\r
-/*\r
- C code: : (30 - __builtin_clz(x))\r
- gcc9/gcc10 for x64 /x86 : 30 - (bsr(x) xor 31)\r
- clang10 for x64 : 31 + (bsr(x) xor -32)\r
-*/\r
-\r
- #define MY_clz(x) ((unsigned)__builtin_clz(x))\r
- // __lzcnt32\r
- // __builtin_ia32_lzcnt_u32\r
-\r
-#else // #if defined(_MSC_VER)\r
-\r
- #ifdef MY_CPU_ARM_OR_ARM64\r
-\r
- #define MY_clz _CountLeadingZeros\r
-\r
- #else // if defined(MY_CPU_X86_OR_AMD64)\r
-\r
- // #define MY_clz __lzcnt // we can use lzcnt (unsupported by old CPU)\r
- // _BitScanReverse code is not optimal for some MSVC compilers\r
- #define BSR2_RET(pos, res) { unsigned long zz; _BitScanReverse(&zz, (pos)); zz--; \\r
- res = (zz + zz) + (pos >> zz); }\r
-\r
- #endif // MY_CPU_X86_OR_AMD64\r
-\r
-#endif // _MSC_VER\r
-\r
-\r
-#ifndef BSR2_RET\r
-\r
- #define BSR2_RET(pos, res) { unsigned zz = 30 - MY_clz(pos); \\r
- res = (zz + zz) + (pos >> zz); }\r
-\r
-#endif\r
-\r
-\r
-unsigned GetPosSlot1(UInt32 pos);\r
-unsigned GetPosSlot1(UInt32 pos)\r
-{\r
- unsigned res;\r
- BSR2_RET(pos, res);\r
- return res;\r
-}\r
-#define GetPosSlot2(pos, res) { BSR2_RET(pos, res); }\r
-#define GetPosSlot(pos, res) { if (pos < 2) res = pos; else BSR2_RET(pos, res); }\r
-\r
-\r
-#else // ! LZMA_LOG_BSR\r
-\r
-#define kNumLogBits (11 + sizeof(size_t) / 8 * 3)\r
-\r
-#define kDicLogSizeMaxCompress ((kNumLogBits - 1) * 2 + 7)\r
-\r
-static void LzmaEnc_FastPosInit(Byte *g_FastPos)\r
-{\r
- unsigned slot;\r
- g_FastPos[0] = 0;\r
- g_FastPos[1] = 1;\r
- g_FastPos += 2;\r
- \r
- for (slot = 2; slot < kNumLogBits * 2; slot++)\r
- {\r
- size_t k = ((size_t)1 << ((slot >> 1) - 1));\r
- size_t j;\r
- for (j = 0; j < k; j++)\r
- g_FastPos[j] = (Byte)slot;\r
- g_FastPos += k;\r
- }\r
-}\r
-\r
-/* we can use ((limit - pos) >> 31) only if (pos < ((UInt32)1 << 31)) */\r
-/*\r
-#define BSR2_RET(pos, res) { unsigned zz = 6 + ((kNumLogBits - 1) & \\r
- (0 - (((((UInt32)1 << (kNumLogBits + 6)) - 1) - pos) >> 31))); \\r
- res = p->g_FastPos[pos >> zz] + (zz * 2); }\r
-*/\r
-\r
-/*\r
-#define BSR2_RET(pos, res) { unsigned zz = 6 + ((kNumLogBits - 1) & \\r
- (0 - (((((UInt32)1 << (kNumLogBits)) - 1) - (pos >> 6)) >> 31))); \\r
- res = p->g_FastPos[pos >> zz] + (zz * 2); }\r
-*/\r
-\r
-#define BSR2_RET(pos, res) { unsigned zz = (pos < (1 << (kNumLogBits + 6))) ? 6 : 6 + kNumLogBits - 1; \\r
- res = p->g_FastPos[pos >> zz] + (zz * 2); }\r
-\r
-/*\r
-#define BSR2_RET(pos, res) { res = (pos < (1 << (kNumLogBits + 6))) ? \\r
- p->g_FastPos[pos >> 6] + 12 : \\r
- p->g_FastPos[pos >> (6 + kNumLogBits - 1)] + (6 + (kNumLogBits - 1)) * 2; }\r
-*/\r
-\r
-#define GetPosSlot1(pos) p->g_FastPos[pos]\r
-#define GetPosSlot2(pos, res) { BSR2_RET(pos, res); }\r
-#define GetPosSlot(pos, res) { if (pos < kNumFullDistances) res = p->g_FastPos[pos & (kNumFullDistances - 1)]; else BSR2_RET(pos, res); }\r
-\r
-#endif // LZMA_LOG_BSR\r
-\r
-\r
-#define LZMA_NUM_REPS 4\r
-\r
-typedef UInt16 CState;\r
-typedef UInt16 CExtra;\r
-\r
-typedef struct\r
-{\r
- UInt32 price;\r
- CState state;\r
- CExtra extra;\r
- // 0 : normal\r
- // 1 : LIT : MATCH\r
- // > 1 : MATCH (extra-1) : LIT : REP0 (len)\r
- UInt32 len;\r
- UInt32 dist;\r
- UInt32 reps[LZMA_NUM_REPS];\r
-} COptimal;\r
-\r
-\r
-// 18.06\r
-#define kNumOpts (1 << 11)\r
-#define kPackReserve (kNumOpts * 8)\r
-// #define kNumOpts (1 << 12)\r
-// #define kPackReserve (1 + kNumOpts * 2)\r
-\r
-#define kNumLenToPosStates 4\r
-#define kNumPosSlotBits 6\r
-// #define kDicLogSizeMin 0\r
-#define kDicLogSizeMax 32\r
-#define kDistTableSizeMax (kDicLogSizeMax * 2)\r
-\r
-#define kNumAlignBits 4\r
-#define kAlignTableSize (1 << kNumAlignBits)\r
-#define kAlignMask (kAlignTableSize - 1)\r
-\r
-#define kStartPosModelIndex 4\r
-#define kEndPosModelIndex 14\r
-#define kNumFullDistances (1 << (kEndPosModelIndex >> 1))\r
-\r
-typedef\r
-#ifdef _LZMA_PROB32\r
- UInt32\r
-#else\r
- UInt16\r
-#endif\r
- CLzmaProb;\r
-\r
-#define LZMA_PB_MAX 4\r
-#define LZMA_LC_MAX 8\r
-#define LZMA_LP_MAX 4\r
-\r
-#define LZMA_NUM_PB_STATES_MAX (1 << LZMA_PB_MAX)\r
-\r
-#define kLenNumLowBits 3\r
-#define kLenNumLowSymbols (1 << kLenNumLowBits)\r
-#define kLenNumHighBits 8\r
-#define kLenNumHighSymbols (1 << kLenNumHighBits)\r
-#define kLenNumSymbolsTotal (kLenNumLowSymbols * 2 + kLenNumHighSymbols)\r
-\r
-#define LZMA_MATCH_LEN_MIN 2\r
-#define LZMA_MATCH_LEN_MAX (LZMA_MATCH_LEN_MIN + kLenNumSymbolsTotal - 1)\r
-\r
-#define kNumStates 12\r
-\r
-\r
-typedef struct\r
-{\r
- CLzmaProb low[LZMA_NUM_PB_STATES_MAX << (kLenNumLowBits + 1)];\r
- CLzmaProb high[kLenNumHighSymbols];\r
-} CLenEnc;\r
-\r
-\r
-typedef struct\r
-{\r
- unsigned tableSize;\r
- UInt32 prices[LZMA_NUM_PB_STATES_MAX][kLenNumSymbolsTotal];\r
- // UInt32 prices1[LZMA_NUM_PB_STATES_MAX][kLenNumLowSymbols * 2];\r
- // UInt32 prices2[kLenNumSymbolsTotal];\r
-} CLenPriceEnc;\r
-\r
-#define GET_PRICE_LEN(p, posState, len) \\r
- ((p)->prices[posState][(size_t)(len) - LZMA_MATCH_LEN_MIN])\r
-\r
-/*\r
-#define GET_PRICE_LEN(p, posState, len) \\r
- ((p)->prices2[(size_t)(len) - 2] + ((p)->prices1[posState][((len) - 2) & (kLenNumLowSymbols * 2 - 1)] & (((len) - 2 - kLenNumLowSymbols * 2) >> 9)))\r
-*/\r
-\r
-typedef struct\r
-{\r
- UInt32 range;\r
- unsigned cache;\r
- UInt64 low;\r
- UInt64 cacheSize;\r
- Byte *buf;\r
- Byte *bufLim;\r
- Byte *bufBase;\r
- ISeqOutStream *outStream;\r
- UInt64 processed;\r
- SRes res;\r
-} CRangeEnc;\r
-\r
-\r
-typedef struct\r
-{\r
- CLzmaProb *litProbs;\r
-\r
- unsigned state;\r
- UInt32 reps[LZMA_NUM_REPS];\r
-\r
- CLzmaProb posAlignEncoder[1 << kNumAlignBits];\r
- CLzmaProb isRep[kNumStates];\r
- CLzmaProb isRepG0[kNumStates];\r
- CLzmaProb isRepG1[kNumStates];\r
- CLzmaProb isRepG2[kNumStates];\r
- CLzmaProb isMatch[kNumStates][LZMA_NUM_PB_STATES_MAX];\r
- CLzmaProb isRep0Long[kNumStates][LZMA_NUM_PB_STATES_MAX];\r
-\r
- CLzmaProb posSlotEncoder[kNumLenToPosStates][1 << kNumPosSlotBits];\r
- CLzmaProb posEncoders[kNumFullDistances];\r
- \r
- CLenEnc lenProbs;\r
- CLenEnc repLenProbs;\r
-\r
-} CSaveState;\r
-\r
-\r
-typedef UInt32 CProbPrice;\r
-\r
-\r
-typedef struct\r
-{\r
- void *matchFinderObj;\r
- IMatchFinder2 matchFinder;\r
-\r
- unsigned optCur;\r
- unsigned optEnd;\r
-\r
- unsigned longestMatchLen;\r
- unsigned numPairs;\r
- UInt32 numAvail;\r
-\r
- unsigned state;\r
- unsigned numFastBytes;\r
- unsigned additionalOffset;\r
- UInt32 reps[LZMA_NUM_REPS];\r
- unsigned lpMask, pbMask;\r
- CLzmaProb *litProbs;\r
- CRangeEnc rc;\r
-\r
- UInt32 backRes;\r
-\r
- unsigned lc, lp, pb;\r
- unsigned lclp;\r
-\r
- BoolInt fastMode;\r
- BoolInt writeEndMark;\r
- BoolInt finished;\r
- BoolInt multiThread;\r
- BoolInt needInit;\r
- // BoolInt _maxMode;\r
-\r
- UInt64 nowPos64;\r
- \r
- unsigned matchPriceCount;\r
- // unsigned alignPriceCount;\r
- int repLenEncCounter;\r
-\r
- unsigned distTableSize;\r
-\r
- UInt32 dictSize;\r
- SRes result;\r
-\r
- #ifndef _7ZIP_ST\r
- BoolInt mtMode;\r
- // begin of CMatchFinderMt is used in LZ thread\r
- CMatchFinderMt matchFinderMt;\r
- // end of CMatchFinderMt is used in BT and HASH threads\r
- // #else\r
- // CMatchFinder matchFinderBase;\r
- #endif\r
- CMatchFinder matchFinderBase;\r
-\r
- \r
- // we suppose that we have 8-bytes alignment after CMatchFinder\r
- \r
- #ifndef _7ZIP_ST\r
- Byte pad[128];\r
- #endif\r
- \r
- // LZ thread\r
- CProbPrice ProbPrices[kBitModelTotal >> kNumMoveReducingBits];\r
-\r
- // we want {len , dist} pairs to be 8-bytes aligned in matches array\r
- UInt32 matches[LZMA_MATCH_LEN_MAX * 2 + 2];\r
-\r
- // we want 8-bytes alignment here\r
- UInt32 alignPrices[kAlignTableSize];\r
- UInt32 posSlotPrices[kNumLenToPosStates][kDistTableSizeMax];\r
- UInt32 distancesPrices[kNumLenToPosStates][kNumFullDistances];\r
-\r
- CLzmaProb posAlignEncoder[1 << kNumAlignBits];\r
- CLzmaProb isRep[kNumStates];\r
- CLzmaProb isRepG0[kNumStates];\r
- CLzmaProb isRepG1[kNumStates];\r
- CLzmaProb isRepG2[kNumStates];\r
- CLzmaProb isMatch[kNumStates][LZMA_NUM_PB_STATES_MAX];\r
- CLzmaProb isRep0Long[kNumStates][LZMA_NUM_PB_STATES_MAX];\r
- CLzmaProb posSlotEncoder[kNumLenToPosStates][1 << kNumPosSlotBits];\r
- CLzmaProb posEncoders[kNumFullDistances];\r
- \r
- CLenEnc lenProbs;\r
- CLenEnc repLenProbs;\r
-\r
- #ifndef LZMA_LOG_BSR\r
- Byte g_FastPos[1 << kNumLogBits];\r
- #endif\r
-\r
- CLenPriceEnc lenEnc;\r
- CLenPriceEnc repLenEnc;\r
-\r
- COptimal opt[kNumOpts];\r
-\r
- CSaveState saveState;\r
-\r
- // BoolInt mf_Failure;\r
- #ifndef _7ZIP_ST\r
- Byte pad2[128];\r
- #endif\r
-} CLzmaEnc;\r
-\r
-\r
-#define MFB (p->matchFinderBase)\r
-/*\r
-#ifndef _7ZIP_ST\r
-#define MFB (p->matchFinderMt.MatchFinder)\r
-#endif\r
-*/\r
-\r
-#define COPY_ARR(dest, src, arr) memcpy(dest->arr, src->arr, sizeof(src->arr));\r
-\r
-void LzmaEnc_SaveState(CLzmaEncHandle pp)\r
-{\r
- CLzmaEnc *p = (CLzmaEnc *)pp;\r
- CSaveState *dest = &p->saveState;\r
- \r
- dest->state = p->state;\r
- \r
- dest->lenProbs = p->lenProbs;\r
- dest->repLenProbs = p->repLenProbs;\r
-\r
- COPY_ARR(dest, p, reps);\r
-\r
- COPY_ARR(dest, p, posAlignEncoder);\r
- COPY_ARR(dest, p, isRep);\r
- COPY_ARR(dest, p, isRepG0);\r
- COPY_ARR(dest, p, isRepG1);\r
- COPY_ARR(dest, p, isRepG2);\r
- COPY_ARR(dest, p, isMatch);\r
- COPY_ARR(dest, p, isRep0Long);\r
- COPY_ARR(dest, p, posSlotEncoder);\r
- COPY_ARR(dest, p, posEncoders);\r
-\r
- memcpy(dest->litProbs, p->litProbs, ((UInt32)0x300 << p->lclp) * sizeof(CLzmaProb));\r
-}\r
-\r
-\r
-void LzmaEnc_RestoreState(CLzmaEncHandle pp)\r
-{\r
- CLzmaEnc *dest = (CLzmaEnc *)pp;\r
- const CSaveState *p = &dest->saveState;\r
-\r
- dest->state = p->state;\r
-\r
- dest->lenProbs = p->lenProbs;\r
- dest->repLenProbs = p->repLenProbs;\r
- \r
- COPY_ARR(dest, p, reps);\r
- \r
- COPY_ARR(dest, p, posAlignEncoder);\r
- COPY_ARR(dest, p, isRep);\r
- COPY_ARR(dest, p, isRepG0);\r
- COPY_ARR(dest, p, isRepG1);\r
- COPY_ARR(dest, p, isRepG2);\r
- COPY_ARR(dest, p, isMatch);\r
- COPY_ARR(dest, p, isRep0Long);\r
- COPY_ARR(dest, p, posSlotEncoder);\r
- COPY_ARR(dest, p, posEncoders);\r
-\r
- memcpy(dest->litProbs, p->litProbs, ((UInt32)0x300 << dest->lclp) * sizeof(CLzmaProb));\r
-}\r
-\r
-\r
-\r
-SRes LzmaEnc_SetProps(CLzmaEncHandle pp, const CLzmaEncProps *props2)\r
-{\r
- CLzmaEnc *p = (CLzmaEnc *)pp;\r
- CLzmaEncProps props = *props2;\r
- LzmaEncProps_Normalize(&props);\r
-\r
- if (props.lc > LZMA_LC_MAX\r
- || props.lp > LZMA_LP_MAX\r
- || props.pb > LZMA_PB_MAX)\r
- return SZ_ERROR_PARAM;\r
-\r
-\r
- if (props.dictSize > kLzmaMaxHistorySize)\r
- props.dictSize = kLzmaMaxHistorySize;\r
-\r
- #ifndef LZMA_LOG_BSR\r
- {\r
- const UInt64 dict64 = props.dictSize;\r
- if (dict64 > ((UInt64)1 << kDicLogSizeMaxCompress))\r
- return SZ_ERROR_PARAM;\r
- }\r
- #endif\r
-\r
- p->dictSize = props.dictSize;\r
- {\r
- unsigned fb = (unsigned)props.fb;\r
- if (fb < 5)\r
- fb = 5;\r
- if (fb > LZMA_MATCH_LEN_MAX)\r
- fb = LZMA_MATCH_LEN_MAX;\r
- p->numFastBytes = fb;\r
- }\r
- p->lc = (unsigned)props.lc;\r
- p->lp = (unsigned)props.lp;\r
- p->pb = (unsigned)props.pb;\r
- p->fastMode = (props.algo == 0);\r
- // p->_maxMode = True;\r
- MFB.btMode = (Byte)(props.btMode ? 1 : 0);\r
- {\r
- unsigned numHashBytes = 4;\r
- if (props.btMode)\r
- {\r
- if (props.numHashBytes < 2) numHashBytes = 2;\r
- else if (props.numHashBytes < 4) numHashBytes = (unsigned)props.numHashBytes;\r
- }\r
- if (props.numHashBytes >= 5) numHashBytes = 5;\r
-\r
- MFB.numHashBytes = numHashBytes;\r
- }\r
-\r
- MFB.cutValue = props.mc;\r
-\r
- p->writeEndMark = (BoolInt)props.writeEndMark;\r
-\r
- #ifndef _7ZIP_ST\r
- /*\r
- if (newMultiThread != _multiThread)\r
- {\r
- ReleaseMatchFinder();\r
- _multiThread = newMultiThread;\r
- }\r
- */\r
- p->multiThread = (props.numThreads > 1);\r
- p->matchFinderMt.btSync.affinity =\r
- p->matchFinderMt.hashSync.affinity = props.affinity;\r
- #endif\r
-\r
- return SZ_OK;\r
-}\r
-\r
-\r
-void LzmaEnc_SetDataSize(CLzmaEncHandle pp, UInt64 expectedDataSiize)\r
-{\r
- CLzmaEnc *p = (CLzmaEnc *)pp;\r
- MFB.expectedDataSize = expectedDataSiize;\r
-}\r
-\r
-\r
-#define kState_Start 0\r
-#define kState_LitAfterMatch 4\r
-#define kState_LitAfterRep 5\r
-#define kState_MatchAfterLit 7\r
-#define kState_RepAfterLit 8\r
-\r
-static const Byte kLiteralNextStates[kNumStates] = {0, 0, 0, 0, 1, 2, 3, 4, 5, 6, 4, 5};\r
-static const Byte kMatchNextStates[kNumStates] = {7, 7, 7, 7, 7, 7, 7, 10, 10, 10, 10, 10};\r
-static const Byte kRepNextStates[kNumStates] = {8, 8, 8, 8, 8, 8, 8, 11, 11, 11, 11, 11};\r
-static const Byte kShortRepNextStates[kNumStates]= {9, 9, 9, 9, 9, 9, 9, 11, 11, 11, 11, 11};\r
-\r
-#define IsLitState(s) ((s) < 7)\r
-#define GetLenToPosState2(len) (((len) < kNumLenToPosStates - 1) ? (len) : kNumLenToPosStates - 1)\r
-#define GetLenToPosState(len) (((len) < kNumLenToPosStates + 1) ? (len) - 2 : kNumLenToPosStates - 1)\r
-\r
-#define kInfinityPrice (1 << 30)\r
-\r
-static void RangeEnc_Construct(CRangeEnc *p)\r
-{\r
- p->outStream = NULL;\r
- p->bufBase = NULL;\r
-}\r
-\r
-#define RangeEnc_GetProcessed(p) ( (p)->processed + (size_t)((p)->buf - (p)->bufBase) + (p)->cacheSize)\r
-#define RangeEnc_GetProcessed_sizet(p) ((size_t)(p)->processed + (size_t)((p)->buf - (p)->bufBase) + (size_t)(p)->cacheSize)\r
-\r
-#define RC_BUF_SIZE (1 << 16)\r
-\r
-static int RangeEnc_Alloc(CRangeEnc *p, ISzAllocPtr alloc)\r
-{\r
- if (!p->bufBase)\r
- {\r
- p->bufBase = (Byte *)ISzAlloc_Alloc(alloc, RC_BUF_SIZE);\r
- if (!p->bufBase)\r
- return 0;\r
- p->bufLim = p->bufBase + RC_BUF_SIZE;\r
- }\r
- return 1;\r
-}\r
-\r
-static void RangeEnc_Free(CRangeEnc *p, ISzAllocPtr alloc)\r
-{\r
- ISzAlloc_Free(alloc, p->bufBase);\r
- p->bufBase = NULL;\r
-}\r
-\r
-static void RangeEnc_Init(CRangeEnc *p)\r
-{\r
- p->range = 0xFFFFFFFF;\r
- p->cache = 0;\r
- p->low = 0;\r
- p->cacheSize = 0;\r
-\r
- p->buf = p->bufBase;\r
-\r
- p->processed = 0;\r
- p->res = SZ_OK;\r
-}\r
-\r
-MY_NO_INLINE static void RangeEnc_FlushStream(CRangeEnc *p)\r
-{\r
- const size_t num = (size_t)(p->buf - p->bufBase);\r
- if (p->res == SZ_OK)\r
- {\r
- if (num != ISeqOutStream_Write(p->outStream, p->bufBase, num))\r
- p->res = SZ_ERROR_WRITE;\r
- }\r
- p->processed += num;\r
- p->buf = p->bufBase;\r
-}\r
-\r
-MY_NO_INLINE static void MY_FAST_CALL RangeEnc_ShiftLow(CRangeEnc *p)\r
-{\r
- UInt32 low = (UInt32)p->low;\r
- unsigned high = (unsigned)(p->low >> 32);\r
- p->low = (UInt32)(low << 8);\r
- if (low < (UInt32)0xFF000000 || high != 0)\r
- {\r
- {\r
- Byte *buf = p->buf;\r
- *buf++ = (Byte)(p->cache + high);\r
- p->cache = (unsigned)(low >> 24);\r
- p->buf = buf;\r
- if (buf == p->bufLim)\r
- RangeEnc_FlushStream(p);\r
- if (p->cacheSize == 0)\r
- return;\r
- }\r
- high += 0xFF;\r
- for (;;)\r
- {\r
- Byte *buf = p->buf;\r
- *buf++ = (Byte)(high);\r
- p->buf = buf;\r
- if (buf == p->bufLim)\r
- RangeEnc_FlushStream(p);\r
- if (--p->cacheSize == 0)\r
- return;\r
- }\r
- }\r
- p->cacheSize++;\r
-}\r
-\r
-static void RangeEnc_FlushData(CRangeEnc *p)\r
-{\r
- int i;\r
- for (i = 0; i < 5; i++)\r
- RangeEnc_ShiftLow(p);\r
-}\r
-\r
-#define RC_NORM(p) if (range < kTopValue) { range <<= 8; RangeEnc_ShiftLow(p); }\r
-\r
-#define RC_BIT_PRE(p, prob) \\r
- ttt = *(prob); \\r
- newBound = (range >> kNumBitModelTotalBits) * ttt;\r
-\r
-// #define _LZMA_ENC_USE_BRANCH\r
-\r
-#ifdef _LZMA_ENC_USE_BRANCH\r
-\r
-#define RC_BIT(p, prob, bit) { \\r
- RC_BIT_PRE(p, prob) \\r
- if (bit == 0) { range = newBound; ttt += (kBitModelTotal - ttt) >> kNumMoveBits; } \\r
- else { (p)->low += newBound; range -= newBound; ttt -= ttt >> kNumMoveBits; } \\r
- *(prob) = (CLzmaProb)ttt; \\r
- RC_NORM(p) \\r
- }\r
-\r
-#else\r
-\r
-#define RC_BIT(p, prob, bit) { \\r
- UInt32 mask; \\r
- RC_BIT_PRE(p, prob) \\r
- mask = 0 - (UInt32)bit; \\r
- range &= mask; \\r
- mask &= newBound; \\r
- range -= mask; \\r
- (p)->low += mask; \\r
- mask = (UInt32)bit - 1; \\r
- range += newBound & mask; \\r
- mask &= (kBitModelTotal - ((1 << kNumMoveBits) - 1)); \\r
- mask += ((1 << kNumMoveBits) - 1); \\r
- ttt += (UInt32)((Int32)(mask - ttt) >> kNumMoveBits); \\r
- *(prob) = (CLzmaProb)ttt; \\r
- RC_NORM(p) \\r
- }\r
-\r
-#endif\r
-\r
-\r
-\r
-\r
-#define RC_BIT_0_BASE(p, prob) \\r
- range = newBound; *(prob) = (CLzmaProb)(ttt + ((kBitModelTotal - ttt) >> kNumMoveBits));\r
-\r
-#define RC_BIT_1_BASE(p, prob) \\r
- range -= newBound; (p)->low += newBound; *(prob) = (CLzmaProb)(ttt - (ttt >> kNumMoveBits)); \\r
-\r
-#define RC_BIT_0(p, prob) \\r
- RC_BIT_0_BASE(p, prob) \\r
- RC_NORM(p)\r
-\r
-#define RC_BIT_1(p, prob) \\r
- RC_BIT_1_BASE(p, prob) \\r
- RC_NORM(p)\r
-\r
-static void RangeEnc_EncodeBit_0(CRangeEnc *p, CLzmaProb *prob)\r
-{\r
- UInt32 range, ttt, newBound;\r
- range = p->range;\r
- RC_BIT_PRE(p, prob)\r
- RC_BIT_0(p, prob)\r
- p->range = range;\r
-}\r
-\r
-static void LitEnc_Encode(CRangeEnc *p, CLzmaProb *probs, UInt32 sym)\r
-{\r
- UInt32 range = p->range;\r
- sym |= 0x100;\r
- do\r
- {\r
- UInt32 ttt, newBound;\r
- // RangeEnc_EncodeBit(p, probs + (sym >> 8), (sym >> 7) & 1);\r
- CLzmaProb *prob = probs + (sym >> 8);\r
- UInt32 bit = (sym >> 7) & 1;\r
- sym <<= 1;\r
- RC_BIT(p, prob, bit);\r
- }\r
- while (sym < 0x10000);\r
- p->range = range;\r
-}\r
-\r
-static void LitEnc_EncodeMatched(CRangeEnc *p, CLzmaProb *probs, UInt32 sym, UInt32 matchByte)\r
-{\r
- UInt32 range = p->range;\r
- UInt32 offs = 0x100;\r
- sym |= 0x100;\r
- do\r
- {\r
- UInt32 ttt, newBound;\r
- CLzmaProb *prob;\r
- UInt32 bit;\r
- matchByte <<= 1;\r
- // RangeEnc_EncodeBit(p, probs + (offs + (matchByte & offs) + (sym >> 8)), (sym >> 7) & 1);\r
- prob = probs + (offs + (matchByte & offs) + (sym >> 8));\r
- bit = (sym >> 7) & 1;\r
- sym <<= 1;\r
- offs &= ~(matchByte ^ sym);\r
- RC_BIT(p, prob, bit);\r
- }\r
- while (sym < 0x10000);\r
- p->range = range;\r
-}\r
-\r
-\r
-\r
-static void LzmaEnc_InitPriceTables(CProbPrice *ProbPrices)\r
-{\r
- UInt32 i;\r
- for (i = 0; i < (kBitModelTotal >> kNumMoveReducingBits); i++)\r
- {\r
- const unsigned kCyclesBits = kNumBitPriceShiftBits;\r
- UInt32 w = (i << kNumMoveReducingBits) + (1 << (kNumMoveReducingBits - 1));\r
- unsigned bitCount = 0;\r
- unsigned j;\r
- for (j = 0; j < kCyclesBits; j++)\r
- {\r
- w = w * w;\r
- bitCount <<= 1;\r
- while (w >= ((UInt32)1 << 16))\r
- {\r
- w >>= 1;\r
- bitCount++;\r
- }\r
- }\r
- ProbPrices[i] = (CProbPrice)(((unsigned)kNumBitModelTotalBits << kCyclesBits) - 15 - bitCount);\r
- // printf("\n%3d: %5d", i, ProbPrices[i]);\r
- }\r
-}\r
-\r
-\r
-#define GET_PRICE(prob, bit) \\r
- p->ProbPrices[((prob) ^ (unsigned)(((-(int)(bit))) & (kBitModelTotal - 1))) >> kNumMoveReducingBits];\r
-\r
-#define GET_PRICEa(prob, bit) \\r
- ProbPrices[((prob) ^ (unsigned)((-((int)(bit))) & (kBitModelTotal - 1))) >> kNumMoveReducingBits];\r
-\r
-#define GET_PRICE_0(prob) p->ProbPrices[(prob) >> kNumMoveReducingBits]\r
-#define GET_PRICE_1(prob) p->ProbPrices[((prob) ^ (kBitModelTotal - 1)) >> kNumMoveReducingBits]\r
-\r
-#define GET_PRICEa_0(prob) ProbPrices[(prob) >> kNumMoveReducingBits]\r
-#define GET_PRICEa_1(prob) ProbPrices[((prob) ^ (kBitModelTotal - 1)) >> kNumMoveReducingBits]\r
-\r
-\r
-static UInt32 LitEnc_GetPrice(const CLzmaProb *probs, UInt32 sym, const CProbPrice *ProbPrices)\r
-{\r
- UInt32 price = 0;\r
- sym |= 0x100;\r
- do\r
- {\r
- unsigned bit = sym & 1;\r
- sym >>= 1;\r
- price += GET_PRICEa(probs[sym], bit);\r
- }\r
- while (sym >= 2);\r
- return price;\r
-}\r
-\r
-\r
-static UInt32 LitEnc_Matched_GetPrice(const CLzmaProb *probs, UInt32 sym, UInt32 matchByte, const CProbPrice *ProbPrices)\r
-{\r
- UInt32 price = 0;\r
- UInt32 offs = 0x100;\r
- sym |= 0x100;\r
- do\r
- {\r
- matchByte <<= 1;\r
- price += GET_PRICEa(probs[offs + (matchByte & offs) + (sym >> 8)], (sym >> 7) & 1);\r
- sym <<= 1;\r
- offs &= ~(matchByte ^ sym);\r
- }\r
- while (sym < 0x10000);\r
- return price;\r
-}\r
-\r
-\r
-static void RcTree_ReverseEncode(CRangeEnc *rc, CLzmaProb *probs, unsigned numBits, unsigned sym)\r
-{\r
- UInt32 range = rc->range;\r
- unsigned m = 1;\r
- do\r
- {\r
- UInt32 ttt, newBound;\r
- unsigned bit = sym & 1;\r
- // RangeEnc_EncodeBit(rc, probs + m, bit);\r
- sym >>= 1;\r
- RC_BIT(rc, probs + m, bit);\r
- m = (m << 1) | bit;\r
- }\r
- while (--numBits);\r
- rc->range = range;\r
-}\r
-\r
-\r
-\r
-static void LenEnc_Init(CLenEnc *p)\r
-{\r
- unsigned i;\r
- for (i = 0; i < (LZMA_NUM_PB_STATES_MAX << (kLenNumLowBits + 1)); i++)\r
- p->low[i] = kProbInitValue;\r
- for (i = 0; i < kLenNumHighSymbols; i++)\r
- p->high[i] = kProbInitValue;\r
-}\r
-\r
-static void LenEnc_Encode(CLenEnc *p, CRangeEnc *rc, unsigned sym, unsigned posState)\r
-{\r
- UInt32 range, ttt, newBound;\r
- CLzmaProb *probs = p->low;\r
- range = rc->range;\r
- RC_BIT_PRE(rc, probs);\r
- if (sym >= kLenNumLowSymbols)\r
- {\r
- RC_BIT_1(rc, probs);\r
- probs += kLenNumLowSymbols;\r
- RC_BIT_PRE(rc, probs);\r
- if (sym >= kLenNumLowSymbols * 2)\r
- {\r
- RC_BIT_1(rc, probs);\r
- rc->range = range;\r
- // RcTree_Encode(rc, p->high, kLenNumHighBits, sym - kLenNumLowSymbols * 2);\r
- LitEnc_Encode(rc, p->high, sym - kLenNumLowSymbols * 2);\r
- return;\r
- }\r
- sym -= kLenNumLowSymbols;\r
- }\r
-\r
- // RcTree_Encode(rc, probs + (posState << kLenNumLowBits), kLenNumLowBits, sym);\r
- {\r
- unsigned m;\r
- unsigned bit;\r
- RC_BIT_0(rc, probs);\r
- probs += (posState << (1 + kLenNumLowBits));\r
- bit = (sym >> 2) ; RC_BIT(rc, probs + 1, bit); m = (1 << 1) + bit;\r
- bit = (sym >> 1) & 1; RC_BIT(rc, probs + m, bit); m = (m << 1) + bit;\r
- bit = sym & 1; RC_BIT(rc, probs + m, bit);\r
- rc->range = range;\r
- }\r
-}\r
-\r
-static void SetPrices_3(const CLzmaProb *probs, UInt32 startPrice, UInt32 *prices, const CProbPrice *ProbPrices)\r
-{\r
- unsigned i;\r
- for (i = 0; i < 8; i += 2)\r
- {\r
- UInt32 price = startPrice;\r
- UInt32 prob;\r
- price += GET_PRICEa(probs[1 ], (i >> 2));\r
- price += GET_PRICEa(probs[2 + (i >> 2)], (i >> 1) & 1);\r
- prob = probs[4 + (i >> 1)];\r
- prices[i ] = price + GET_PRICEa_0(prob);\r
- prices[i + 1] = price + GET_PRICEa_1(prob);\r
- }\r
-}\r
-\r
-\r
-MY_NO_INLINE static void MY_FAST_CALL LenPriceEnc_UpdateTables(\r
- CLenPriceEnc *p,\r
- unsigned numPosStates,\r
- const CLenEnc *enc,\r
- const CProbPrice *ProbPrices)\r
-{\r
- UInt32 b;\r
- \r
- {\r
- unsigned prob = enc->low[0];\r
- UInt32 a, c;\r
- unsigned posState;\r
- b = GET_PRICEa_1(prob);\r
- a = GET_PRICEa_0(prob);\r
- c = b + GET_PRICEa_0(enc->low[kLenNumLowSymbols]);\r
- for (posState = 0; posState < numPosStates; posState++)\r
- {\r
- UInt32 *prices = p->prices[posState];\r
- const CLzmaProb *probs = enc->low + (posState << (1 + kLenNumLowBits));\r
- SetPrices_3(probs, a, prices, ProbPrices);\r
- SetPrices_3(probs + kLenNumLowSymbols, c, prices + kLenNumLowSymbols, ProbPrices);\r
- }\r
- }\r
-\r
- /*\r
- {\r
- unsigned i;\r
- UInt32 b;\r
- a = GET_PRICEa_0(enc->low[0]);\r
- for (i = 0; i < kLenNumLowSymbols; i++)\r
- p->prices2[i] = a;\r
- a = GET_PRICEa_1(enc->low[0]);\r
- b = a + GET_PRICEa_0(enc->low[kLenNumLowSymbols]);\r
- for (i = kLenNumLowSymbols; i < kLenNumLowSymbols * 2; i++)\r
- p->prices2[i] = b;\r
- a += GET_PRICEa_1(enc->low[kLenNumLowSymbols]);\r
- }\r
- */\r
- \r
- // p->counter = numSymbols;\r
- // p->counter = 64;\r
-\r
- {\r
- unsigned i = p->tableSize;\r
- \r
- if (i > kLenNumLowSymbols * 2)\r
- {\r
- const CLzmaProb *probs = enc->high;\r
- UInt32 *prices = p->prices[0] + kLenNumLowSymbols * 2;\r
- i -= kLenNumLowSymbols * 2 - 1;\r
- i >>= 1;\r
- b += GET_PRICEa_1(enc->low[kLenNumLowSymbols]);\r
- do\r
- {\r
- /*\r
- p->prices2[i] = a +\r
- // RcTree_GetPrice(enc->high, kLenNumHighBits, i - kLenNumLowSymbols * 2, ProbPrices);\r
- LitEnc_GetPrice(probs, i - kLenNumLowSymbols * 2, ProbPrices);\r
- */\r
- // UInt32 price = a + RcTree_GetPrice(probs, kLenNumHighBits - 1, sym, ProbPrices);\r
- unsigned sym = --i + (1 << (kLenNumHighBits - 1));\r
- UInt32 price = b;\r
- do\r
- {\r
- unsigned bit = sym & 1;\r
- sym >>= 1;\r
- price += GET_PRICEa(probs[sym], bit);\r
- }\r
- while (sym >= 2);\r
-\r
- {\r
- unsigned prob = probs[(size_t)i + (1 << (kLenNumHighBits - 1))];\r
- prices[(size_t)i * 2 ] = price + GET_PRICEa_0(prob);\r
- prices[(size_t)i * 2 + 1] = price + GET_PRICEa_1(prob);\r
- }\r
- }\r
- while (i);\r
-\r
- {\r
- unsigned posState;\r
- size_t num = (p->tableSize - kLenNumLowSymbols * 2) * sizeof(p->prices[0][0]);\r
- for (posState = 1; posState < numPosStates; posState++)\r
- memcpy(p->prices[posState] + kLenNumLowSymbols * 2, p->prices[0] + kLenNumLowSymbols * 2, num);\r
- }\r
- }\r
- }\r
-}\r
-\r
-/*\r
- #ifdef SHOW_STAT\r
- g_STAT_OFFSET += num;\r
- printf("\n MovePos %u", num);\r
- #endif\r
-*/\r
- \r
-#define MOVE_POS(p, num) { \\r
- p->additionalOffset += (num); \\r
- p->matchFinder.Skip(p->matchFinderObj, (UInt32)(num)); }\r
-\r
-\r
-static unsigned ReadMatchDistances(CLzmaEnc *p, unsigned *numPairsRes)\r
-{\r
- unsigned numPairs;\r
- \r
- p->additionalOffset++;\r
- p->numAvail = p->matchFinder.GetNumAvailableBytes(p->matchFinderObj);\r
- {\r
- const UInt32 *d = p->matchFinder.GetMatches(p->matchFinderObj, p->matches);\r
- // if (!d) { p->mf_Failure = True; *numPairsRes = 0; return 0; }\r
- numPairs = (unsigned)(d - p->matches);\r
- }\r
- *numPairsRes = numPairs;\r
- \r
- #ifdef SHOW_STAT\r
- printf("\n i = %u numPairs = %u ", g_STAT_OFFSET, numPairs / 2);\r
- g_STAT_OFFSET++;\r
- {\r
- unsigned i;\r
- for (i = 0; i < numPairs; i += 2)\r
- printf("%2u %6u | ", p->matches[i], p->matches[i + 1]);\r
- }\r
- #endif\r
- \r
- if (numPairs == 0)\r
- return 0;\r
- {\r
- const unsigned len = p->matches[(size_t)numPairs - 2];\r
- if (len != p->numFastBytes)\r
- return len;\r
- {\r
- UInt32 numAvail = p->numAvail;\r
- if (numAvail > LZMA_MATCH_LEN_MAX)\r
- numAvail = LZMA_MATCH_LEN_MAX;\r
- {\r
- const Byte *p1 = p->matchFinder.GetPointerToCurrentPos(p->matchFinderObj) - 1;\r
- const Byte *p2 = p1 + len;\r
- const ptrdiff_t dif = (ptrdiff_t)-1 - (ptrdiff_t)p->matches[(size_t)numPairs - 1];\r
- const Byte *lim = p1 + numAvail;\r
- for (; p2 != lim && *p2 == p2[dif]; p2++)\r
- {}\r
- return (unsigned)(p2 - p1);\r
- }\r
- }\r
- }\r
-}\r
-\r
-#define MARK_LIT ((UInt32)(Int32)-1)\r
-\r
-#define MakeAs_Lit(p) { (p)->dist = MARK_LIT; (p)->extra = 0; }\r
-#define MakeAs_ShortRep(p) { (p)->dist = 0; (p)->extra = 0; }\r
-#define IsShortRep(p) ((p)->dist == 0)\r
-\r
-\r
-#define GetPrice_ShortRep(p, state, posState) \\r
- ( GET_PRICE_0(p->isRepG0[state]) + GET_PRICE_0(p->isRep0Long[state][posState]))\r
-\r
-#define GetPrice_Rep_0(p, state, posState) ( \\r
- GET_PRICE_1(p->isMatch[state][posState]) \\r
- + GET_PRICE_1(p->isRep0Long[state][posState])) \\r
- + GET_PRICE_1(p->isRep[state]) \\r
- + GET_PRICE_0(p->isRepG0[state])\r
- \r
-MY_FORCE_INLINE\r
-static UInt32 GetPrice_PureRep(const CLzmaEnc *p, unsigned repIndex, size_t state, size_t posState)\r
-{\r
- UInt32 price;\r
- UInt32 prob = p->isRepG0[state];\r
- if (repIndex == 0)\r
- {\r
- price = GET_PRICE_0(prob);\r
- price += GET_PRICE_1(p->isRep0Long[state][posState]);\r
- }\r
- else\r
- {\r
- price = GET_PRICE_1(prob);\r
- prob = p->isRepG1[state];\r
- if (repIndex == 1)\r
- price += GET_PRICE_0(prob);\r
- else\r
- {\r
- price += GET_PRICE_1(prob);\r
- price += GET_PRICE(p->isRepG2[state], repIndex - 2);\r
- }\r
- }\r
- return price;\r
-}\r
-\r
-\r
-static unsigned Backward(CLzmaEnc *p, unsigned cur)\r
-{\r
- unsigned wr = cur + 1;\r
- p->optEnd = wr;\r
-\r
- for (;;)\r
- {\r
- UInt32 dist = p->opt[cur].dist;\r
- unsigned len = (unsigned)p->opt[cur].len;\r
- unsigned extra = (unsigned)p->opt[cur].extra;\r
- cur -= len;\r
-\r
- if (extra)\r
- {\r
- wr--;\r
- p->opt[wr].len = (UInt32)len;\r
- cur -= extra;\r
- len = extra;\r
- if (extra == 1)\r
- {\r
- p->opt[wr].dist = dist;\r
- dist = MARK_LIT;\r
- }\r
- else\r
- {\r
- p->opt[wr].dist = 0;\r
- len--;\r
- wr--;\r
- p->opt[wr].dist = MARK_LIT;\r
- p->opt[wr].len = 1;\r
- }\r
- }\r
-\r
- if (cur == 0)\r
- {\r
- p->backRes = dist;\r
- p->optCur = wr;\r
- return len;\r
- }\r
- \r
- wr--;\r
- p->opt[wr].dist = dist;\r
- p->opt[wr].len = (UInt32)len;\r
- }\r
-}\r
-\r
-\r
-\r
-#define LIT_PROBS(pos, prevByte) \\r
- (p->litProbs + (UInt32)3 * (((((pos) << 8) + (prevByte)) & p->lpMask) << p->lc))\r
-\r
-\r
-static unsigned GetOptimum(CLzmaEnc *p, UInt32 position)\r
-{\r
- unsigned last, cur;\r
- UInt32 reps[LZMA_NUM_REPS];\r
- unsigned repLens[LZMA_NUM_REPS];\r
- UInt32 *matches;\r
-\r
- {\r
- UInt32 numAvail;\r
- unsigned numPairs, mainLen, repMaxIndex, i, posState;\r
- UInt32 matchPrice, repMatchPrice;\r
- const Byte *data;\r
- Byte curByte, matchByte;\r
- \r
- p->optCur = p->optEnd = 0;\r
- \r
- if (p->additionalOffset == 0)\r
- mainLen = ReadMatchDistances(p, &numPairs);\r
- else\r
- {\r
- mainLen = p->longestMatchLen;\r
- numPairs = p->numPairs;\r
- }\r
- \r
- numAvail = p->numAvail;\r
- if (numAvail < 2)\r
- {\r
- p->backRes = MARK_LIT;\r
- return 1;\r
- }\r
- if (numAvail > LZMA_MATCH_LEN_MAX)\r
- numAvail = LZMA_MATCH_LEN_MAX;\r
- \r
- data = p->matchFinder.GetPointerToCurrentPos(p->matchFinderObj) - 1;\r
- repMaxIndex = 0;\r
- \r
- for (i = 0; i < LZMA_NUM_REPS; i++)\r
- {\r
- unsigned len;\r
- const Byte *data2;\r
- reps[i] = p->reps[i];\r
- data2 = data - reps[i];\r
- if (data[0] != data2[0] || data[1] != data2[1])\r
- {\r
- repLens[i] = 0;\r
- continue;\r
- }\r
- for (len = 2; len < numAvail && data[len] == data2[len]; len++)\r
- {}\r
- repLens[i] = len;\r
- if (len > repLens[repMaxIndex])\r
- repMaxIndex = i;\r
- if (len == LZMA_MATCH_LEN_MAX) // 21.03 : optimization\r
- break;\r
- }\r
- \r
- if (repLens[repMaxIndex] >= p->numFastBytes)\r
- {\r
- unsigned len;\r
- p->backRes = (UInt32)repMaxIndex;\r
- len = repLens[repMaxIndex];\r
- MOVE_POS(p, len - 1)\r
- return len;\r
- }\r
- \r
- matches = p->matches;\r
- #define MATCHES matches\r
- // #define MATCHES p->matches\r
- \r
- if (mainLen >= p->numFastBytes)\r
- {\r
- p->backRes = MATCHES[(size_t)numPairs - 1] + LZMA_NUM_REPS;\r
- MOVE_POS(p, mainLen - 1)\r
- return mainLen;\r
- }\r
- \r
- curByte = *data;\r
- matchByte = *(data - reps[0]);\r
-\r
- last = repLens[repMaxIndex];\r
- if (last <= mainLen)\r
- last = mainLen;\r
- \r
- if (last < 2 && curByte != matchByte)\r
- {\r
- p->backRes = MARK_LIT;\r
- return 1;\r
- }\r
- \r
- p->opt[0].state = (CState)p->state;\r
- \r
- posState = (position & p->pbMask);\r
- \r
- {\r
- const CLzmaProb *probs = LIT_PROBS(position, *(data - 1));\r
- p->opt[1].price = GET_PRICE_0(p->isMatch[p->state][posState]) +\r
- (!IsLitState(p->state) ?\r
- LitEnc_Matched_GetPrice(probs, curByte, matchByte, p->ProbPrices) :\r
- LitEnc_GetPrice(probs, curByte, p->ProbPrices));\r
- }\r
-\r
- MakeAs_Lit(&p->opt[1]);\r
- \r
- matchPrice = GET_PRICE_1(p->isMatch[p->state][posState]);\r
- repMatchPrice = matchPrice + GET_PRICE_1(p->isRep[p->state]);\r
- \r
- // 18.06\r
- if (matchByte == curByte && repLens[0] == 0)\r
- {\r
- UInt32 shortRepPrice = repMatchPrice + GetPrice_ShortRep(p, p->state, posState);\r
- if (shortRepPrice < p->opt[1].price)\r
- {\r
- p->opt[1].price = shortRepPrice;\r
- MakeAs_ShortRep(&p->opt[1]);\r
- }\r
- if (last < 2)\r
- {\r
- p->backRes = p->opt[1].dist;\r
- return 1;\r
- }\r
- }\r
- \r
- p->opt[1].len = 1;\r
- \r
- p->opt[0].reps[0] = reps[0];\r
- p->opt[0].reps[1] = reps[1];\r
- p->opt[0].reps[2] = reps[2];\r
- p->opt[0].reps[3] = reps[3];\r
- \r
- // ---------- REP ----------\r
- \r
- for (i = 0; i < LZMA_NUM_REPS; i++)\r
- {\r
- unsigned repLen = repLens[i];\r
- UInt32 price;\r
- if (repLen < 2)\r
- continue;\r
- price = repMatchPrice + GetPrice_PureRep(p, i, p->state, posState);\r
- do\r
- {\r
- UInt32 price2 = price + GET_PRICE_LEN(&p->repLenEnc, posState, repLen);\r
- COptimal *opt = &p->opt[repLen];\r
- if (price2 < opt->price)\r
- {\r
- opt->price = price2;\r
- opt->len = (UInt32)repLen;\r
- opt->dist = (UInt32)i;\r
- opt->extra = 0;\r
- }\r
- }\r
- while (--repLen >= 2);\r
- }\r
- \r
- \r
- // ---------- MATCH ----------\r
- {\r
- unsigned len = repLens[0] + 1;\r
- if (len <= mainLen)\r
- {\r
- unsigned offs = 0;\r
- UInt32 normalMatchPrice = matchPrice + GET_PRICE_0(p->isRep[p->state]);\r
-\r
- if (len < 2)\r
- len = 2;\r
- else\r
- while (len > MATCHES[offs])\r
- offs += 2;\r
- \r
- for (; ; len++)\r
- {\r
- COptimal *opt;\r
- UInt32 dist = MATCHES[(size_t)offs + 1];\r
- UInt32 price = normalMatchPrice + GET_PRICE_LEN(&p->lenEnc, posState, len);\r
- unsigned lenToPosState = GetLenToPosState(len);\r
- \r
- if (dist < kNumFullDistances)\r
- price += p->distancesPrices[lenToPosState][dist & (kNumFullDistances - 1)];\r
- else\r
- {\r
- unsigned slot;\r
- GetPosSlot2(dist, slot);\r
- price += p->alignPrices[dist & kAlignMask];\r
- price += p->posSlotPrices[lenToPosState][slot];\r
- }\r
- \r
- opt = &p->opt[len];\r
- \r
- if (price < opt->price)\r
- {\r
- opt->price = price;\r
- opt->len = (UInt32)len;\r
- opt->dist = dist + LZMA_NUM_REPS;\r
- opt->extra = 0;\r
- }\r
- \r
- if (len == MATCHES[offs])\r
- {\r
- offs += 2;\r
- if (offs == numPairs)\r
- break;\r
- }\r
- }\r
- }\r
- }\r
- \r
-\r
- cur = 0;\r
-\r
- #ifdef SHOW_STAT2\r
- /* if (position >= 0) */\r
- {\r
- unsigned i;\r
- printf("\n pos = %4X", position);\r
- for (i = cur; i <= last; i++)\r
- printf("\nprice[%4X] = %u", position - cur + i, p->opt[i].price);\r
- }\r
- #endif\r
- }\r
-\r
-\r
- \r
- // ---------- Optimal Parsing ----------\r
-\r
- for (;;)\r
- {\r
- unsigned numAvail;\r
- UInt32 numAvailFull;\r
- unsigned newLen, numPairs, prev, state, posState, startLen;\r
- UInt32 litPrice, matchPrice, repMatchPrice;\r
- BoolInt nextIsLit;\r
- Byte curByte, matchByte;\r
- const Byte *data;\r
- COptimal *curOpt, *nextOpt;\r
-\r
- if (++cur == last)\r
- break;\r
- \r
- // 18.06\r
- if (cur >= kNumOpts - 64)\r
- {\r
- unsigned j, best;\r
- UInt32 price = p->opt[cur].price;\r
- best = cur;\r
- for (j = cur + 1; j <= last; j++)\r
- {\r
- UInt32 price2 = p->opt[j].price;\r
- if (price >= price2)\r
- {\r
- price = price2;\r
- best = j;\r
- }\r
- }\r
- {\r
- unsigned delta = best - cur;\r
- if (delta != 0)\r
- {\r
- MOVE_POS(p, delta);\r
- }\r
- }\r
- cur = best;\r
- break;\r
- }\r
-\r
- newLen = ReadMatchDistances(p, &numPairs);\r
- \r
- if (newLen >= p->numFastBytes)\r
- {\r
- p->numPairs = numPairs;\r
- p->longestMatchLen = newLen;\r
- break;\r
- }\r
- \r
- curOpt = &p->opt[cur];\r
-\r
- position++;\r
-\r
- // we need that check here, if skip_items in p->opt are possible\r
- /*\r
- if (curOpt->price >= kInfinityPrice)\r
- continue;\r
- */\r
-\r
- prev = cur - curOpt->len;\r
-\r
- if (curOpt->len == 1)\r
- {\r
- state = (unsigned)p->opt[prev].state;\r
- if (IsShortRep(curOpt))\r
- state = kShortRepNextStates[state];\r
- else\r
- state = kLiteralNextStates[state];\r
- }\r
- else\r
- {\r
- const COptimal *prevOpt;\r
- UInt32 b0;\r
- UInt32 dist = curOpt->dist;\r
-\r
- if (curOpt->extra)\r
- {\r
- prev -= (unsigned)curOpt->extra;\r
- state = kState_RepAfterLit;\r
- if (curOpt->extra == 1)\r
- state = (dist < LZMA_NUM_REPS ? kState_RepAfterLit : kState_MatchAfterLit);\r
- }\r
- else\r
- {\r
- state = (unsigned)p->opt[prev].state;\r
- if (dist < LZMA_NUM_REPS)\r
- state = kRepNextStates[state];\r
- else\r
- state = kMatchNextStates[state];\r
- }\r
-\r
- prevOpt = &p->opt[prev];\r
- b0 = prevOpt->reps[0];\r
-\r
- if (dist < LZMA_NUM_REPS)\r
- {\r
- if (dist == 0)\r
- {\r
- reps[0] = b0;\r
- reps[1] = prevOpt->reps[1];\r
- reps[2] = prevOpt->reps[2];\r
- reps[3] = prevOpt->reps[3];\r
- }\r
- else\r
- {\r
- reps[1] = b0;\r
- b0 = prevOpt->reps[1];\r
- if (dist == 1)\r
- {\r
- reps[0] = b0;\r
- reps[2] = prevOpt->reps[2];\r
- reps[3] = prevOpt->reps[3];\r
- }\r
- else\r
- {\r
- reps[2] = b0;\r
- reps[0] = prevOpt->reps[dist];\r
- reps[3] = prevOpt->reps[dist ^ 1];\r
- }\r
- }\r
- }\r
- else\r
- {\r
- reps[0] = (dist - LZMA_NUM_REPS + 1);\r
- reps[1] = b0;\r
- reps[2] = prevOpt->reps[1];\r
- reps[3] = prevOpt->reps[2];\r
- }\r
- }\r
- \r
- curOpt->state = (CState)state;\r
- curOpt->reps[0] = reps[0];\r
- curOpt->reps[1] = reps[1];\r
- curOpt->reps[2] = reps[2];\r
- curOpt->reps[3] = reps[3];\r
-\r
- data = p->matchFinder.GetPointerToCurrentPos(p->matchFinderObj) - 1;\r
- curByte = *data;\r
- matchByte = *(data - reps[0]);\r
-\r
- posState = (position & p->pbMask);\r
-\r
- /*\r
- The order of Price checks:\r
- < LIT\r
- <= SHORT_REP\r
- < LIT : REP_0\r
- < REP [ : LIT : REP_0 ]\r
- < MATCH [ : LIT : REP_0 ]\r
- */\r
-\r
- {\r
- UInt32 curPrice = curOpt->price;\r
- unsigned prob = p->isMatch[state][posState];\r
- matchPrice = curPrice + GET_PRICE_1(prob);\r
- litPrice = curPrice + GET_PRICE_0(prob);\r
- }\r
-\r
- nextOpt = &p->opt[(size_t)cur + 1];\r
- nextIsLit = False;\r
-\r
- // here we can allow skip_items in p->opt, if we don't check (nextOpt->price < kInfinityPrice)\r
- // 18.new.06\r
- if ((nextOpt->price < kInfinityPrice\r
- // && !IsLitState(state)\r
- && matchByte == curByte)\r
- || litPrice > nextOpt->price\r
- )\r
- litPrice = 0;\r
- else\r
- {\r
- const CLzmaProb *probs = LIT_PROBS(position, *(data - 1));\r
- litPrice += (!IsLitState(state) ?\r
- LitEnc_Matched_GetPrice(probs, curByte, matchByte, p->ProbPrices) :\r
- LitEnc_GetPrice(probs, curByte, p->ProbPrices));\r
- \r
- if (litPrice < nextOpt->price)\r
- {\r
- nextOpt->price = litPrice;\r
- nextOpt->len = 1;\r
- MakeAs_Lit(nextOpt);\r
- nextIsLit = True;\r
- }\r
- }\r
-\r
- repMatchPrice = matchPrice + GET_PRICE_1(p->isRep[state]);\r
- \r
- numAvailFull = p->numAvail;\r
- {\r
- unsigned temp = kNumOpts - 1 - cur;\r
- if (numAvailFull > temp)\r
- numAvailFull = (UInt32)temp;\r
- }\r
-\r
- // 18.06\r
- // ---------- SHORT_REP ----------\r
- if (IsLitState(state)) // 18.new\r
- if (matchByte == curByte)\r
- if (repMatchPrice < nextOpt->price) // 18.new\r
- // if (numAvailFull < 2 || data[1] != *(data - reps[0] + 1))\r
- if (\r
- // nextOpt->price >= kInfinityPrice ||\r
- nextOpt->len < 2 // we can check nextOpt->len, if skip items are not allowed in p->opt\r
- || (nextOpt->dist != 0\r
- // && nextOpt->extra <= 1 // 17.old\r
- )\r
- )\r
- {\r
- UInt32 shortRepPrice = repMatchPrice + GetPrice_ShortRep(p, state, posState);\r
- // if (shortRepPrice <= nextOpt->price) // 17.old\r
- if (shortRepPrice < nextOpt->price) // 18.new\r
- {\r
- nextOpt->price = shortRepPrice;\r
- nextOpt->len = 1;\r
- MakeAs_ShortRep(nextOpt);\r
- nextIsLit = False;\r
- }\r
- }\r
- \r
- if (numAvailFull < 2)\r
- continue;\r
- numAvail = (numAvailFull <= p->numFastBytes ? numAvailFull : p->numFastBytes);\r
-\r
- // numAvail <= p->numFastBytes\r
-\r
- // ---------- LIT : REP_0 ----------\r
-\r
- if (!nextIsLit\r
- && litPrice != 0 // 18.new\r
- && matchByte != curByte\r
- && numAvailFull > 2)\r
- {\r
- const Byte *data2 = data - reps[0];\r
- if (data[1] == data2[1] && data[2] == data2[2])\r
- {\r
- unsigned len;\r
- unsigned limit = p->numFastBytes + 1;\r
- if (limit > numAvailFull)\r
- limit = numAvailFull;\r
- for (len = 3; len < limit && data[len] == data2[len]; len++)\r
- {}\r
- \r
- {\r
- unsigned state2 = kLiteralNextStates[state];\r
- unsigned posState2 = (position + 1) & p->pbMask;\r
- UInt32 price = litPrice + GetPrice_Rep_0(p, state2, posState2);\r
- {\r
- unsigned offset = cur + len;\r
-\r
- if (last < offset)\r
- last = offset;\r
- \r
- // do\r
- {\r
- UInt32 price2;\r
- COptimal *opt;\r
- len--;\r
- // price2 = price + GetPrice_Len_Rep_0(p, len, state2, posState2);\r
- price2 = price + GET_PRICE_LEN(&p->repLenEnc, posState2, len);\r
-\r
- opt = &p->opt[offset];\r
- // offset--;\r
- if (price2 < opt->price)\r
- {\r
- opt->price = price2;\r
- opt->len = (UInt32)len;\r
- opt->dist = 0;\r
- opt->extra = 1;\r
- }\r
- }\r
- // while (len >= 3);\r
- }\r
- }\r
- }\r
- }\r
- \r
- startLen = 2; /* speed optimization */\r
-\r
- {\r
- // ---------- REP ----------\r
- unsigned repIndex = 0; // 17.old\r
- // unsigned repIndex = IsLitState(state) ? 0 : 1; // 18.notused\r
- for (; repIndex < LZMA_NUM_REPS; repIndex++)\r
- {\r
- unsigned len;\r
- UInt32 price;\r
- const Byte *data2 = data - reps[repIndex];\r
- if (data[0] != data2[0] || data[1] != data2[1])\r
- continue;\r
- \r
- for (len = 2; len < numAvail && data[len] == data2[len]; len++)\r
- {}\r
- \r
- // if (len < startLen) continue; // 18.new: speed optimization\r
-\r
- {\r
- unsigned offset = cur + len;\r
- if (last < offset)\r
- last = offset;\r
- }\r
- {\r
- unsigned len2 = len;\r
- price = repMatchPrice + GetPrice_PureRep(p, repIndex, state, posState);\r
- do\r
- {\r
- UInt32 price2 = price + GET_PRICE_LEN(&p->repLenEnc, posState, len2);\r
- COptimal *opt = &p->opt[cur + len2];\r
- if (price2 < opt->price)\r
- {\r
- opt->price = price2;\r
- opt->len = (UInt32)len2;\r
- opt->dist = (UInt32)repIndex;\r
- opt->extra = 0;\r
- }\r
- }\r
- while (--len2 >= 2);\r
- }\r
- \r
- if (repIndex == 0) startLen = len + 1; // 17.old\r
- // startLen = len + 1; // 18.new\r
-\r
- /* if (_maxMode) */\r
- {\r
- // ---------- REP : LIT : REP_0 ----------\r
- // numFastBytes + 1 + numFastBytes\r
-\r
- unsigned len2 = len + 1;\r
- unsigned limit = len2 + p->numFastBytes;\r
- if (limit > numAvailFull)\r
- limit = numAvailFull;\r
- \r
- len2 += 2;\r
- if (len2 <= limit)\r
- if (data[len2 - 2] == data2[len2 - 2])\r
- if (data[len2 - 1] == data2[len2 - 1])\r
- {\r
- unsigned state2 = kRepNextStates[state];\r
- unsigned posState2 = (position + len) & p->pbMask;\r
- price += GET_PRICE_LEN(&p->repLenEnc, posState, len)\r
- + GET_PRICE_0(p->isMatch[state2][posState2])\r
- + LitEnc_Matched_GetPrice(LIT_PROBS(position + len, data[(size_t)len - 1]),\r
- data[len], data2[len], p->ProbPrices);\r
- \r
- // state2 = kLiteralNextStates[state2];\r
- state2 = kState_LitAfterRep;\r
- posState2 = (posState2 + 1) & p->pbMask;\r
-\r
-\r
- price += GetPrice_Rep_0(p, state2, posState2);\r
-\r
- for (; len2 < limit && data[len2] == data2[len2]; len2++)\r
- {}\r
- \r
- len2 -= len;\r
- // if (len2 >= 3)\r
- {\r
- {\r
- unsigned offset = cur + len + len2;\r
-\r
- if (last < offset)\r
- last = offset;\r
- // do\r
- {\r
- UInt32 price2;\r
- COptimal *opt;\r
- len2--;\r
- // price2 = price + GetPrice_Len_Rep_0(p, len2, state2, posState2);\r
- price2 = price + GET_PRICE_LEN(&p->repLenEnc, posState2, len2);\r
-\r
- opt = &p->opt[offset];\r
- // offset--;\r
- if (price2 < opt->price)\r
- {\r
- opt->price = price2;\r
- opt->len = (UInt32)len2;\r
- opt->extra = (CExtra)(len + 1);\r
- opt->dist = (UInt32)repIndex;\r
- }\r
- }\r
- // while (len2 >= 3);\r
- }\r
- }\r
- }\r
- }\r
- }\r
- }\r
-\r
-\r
- // ---------- MATCH ----------\r
- /* for (unsigned len = 2; len <= newLen; len++) */\r
- if (newLen > numAvail)\r
- {\r
- newLen = numAvail;\r
- for (numPairs = 0; newLen > MATCHES[numPairs]; numPairs += 2);\r
- MATCHES[numPairs] = (UInt32)newLen;\r
- numPairs += 2;\r
- }\r
- \r
- // startLen = 2; /* speed optimization */\r
-\r
- if (newLen >= startLen)\r
- {\r
- UInt32 normalMatchPrice = matchPrice + GET_PRICE_0(p->isRep[state]);\r
- UInt32 dist;\r
- unsigned offs, posSlot, len;\r
- \r
- {\r
- unsigned offset = cur + newLen;\r
- if (last < offset)\r
- last = offset;\r
- }\r
-\r
- offs = 0;\r
- while (startLen > MATCHES[offs])\r
- offs += 2;\r
- dist = MATCHES[(size_t)offs + 1];\r
- \r
- // if (dist >= kNumFullDistances)\r
- GetPosSlot2(dist, posSlot);\r
- \r
- for (len = /*2*/ startLen; ; len++)\r
- {\r
- UInt32 price = normalMatchPrice + GET_PRICE_LEN(&p->lenEnc, posState, len);\r
- {\r
- COptimal *opt;\r
- unsigned lenNorm = len - 2;\r
- lenNorm = GetLenToPosState2(lenNorm);\r
- if (dist < kNumFullDistances)\r
- price += p->distancesPrices[lenNorm][dist & (kNumFullDistances - 1)];\r
- else\r
- price += p->posSlotPrices[lenNorm][posSlot] + p->alignPrices[dist & kAlignMask];\r
- \r
- opt = &p->opt[cur + len];\r
- if (price < opt->price)\r
- {\r
- opt->price = price;\r
- opt->len = (UInt32)len;\r
- opt->dist = dist + LZMA_NUM_REPS;\r
- opt->extra = 0;\r
- }\r
- }\r
-\r
- if (len == MATCHES[offs])\r
- {\r
- // if (p->_maxMode) {\r
- // MATCH : LIT : REP_0\r
-\r
- const Byte *data2 = data - dist - 1;\r
- unsigned len2 = len + 1;\r
- unsigned limit = len2 + p->numFastBytes;\r
- if (limit > numAvailFull)\r
- limit = numAvailFull;\r
- \r
- len2 += 2;\r
- if (len2 <= limit)\r
- if (data[len2 - 2] == data2[len2 - 2])\r
- if (data[len2 - 1] == data2[len2 - 1])\r
- {\r
- for (; len2 < limit && data[len2] == data2[len2]; len2++)\r
- {}\r
- \r
- len2 -= len;\r
- \r
- // if (len2 >= 3)\r
- {\r
- unsigned state2 = kMatchNextStates[state];\r
- unsigned posState2 = (position + len) & p->pbMask;\r
- unsigned offset;\r
- price += GET_PRICE_0(p->isMatch[state2][posState2]);\r
- price += LitEnc_Matched_GetPrice(LIT_PROBS(position + len, data[(size_t)len - 1]),\r
- data[len], data2[len], p->ProbPrices);\r
-\r
- // state2 = kLiteralNextStates[state2];\r
- state2 = kState_LitAfterMatch;\r
-\r
- posState2 = (posState2 + 1) & p->pbMask;\r
- price += GetPrice_Rep_0(p, state2, posState2);\r
-\r
- offset = cur + len + len2;\r
-\r
- if (last < offset)\r
- last = offset;\r
- // do\r
- {\r
- UInt32 price2;\r
- COptimal *opt;\r
- len2--;\r
- // price2 = price + GetPrice_Len_Rep_0(p, len2, state2, posState2);\r
- price2 = price + GET_PRICE_LEN(&p->repLenEnc, posState2, len2);\r
- opt = &p->opt[offset];\r
- // offset--;\r
- if (price2 < opt->price)\r
- {\r
- opt->price = price2;\r
- opt->len = (UInt32)len2;\r
- opt->extra = (CExtra)(len + 1);\r
- opt->dist = dist + LZMA_NUM_REPS;\r
- }\r
- }\r
- // while (len2 >= 3);\r
- }\r
-\r
- }\r
- \r
- offs += 2;\r
- if (offs == numPairs)\r
- break;\r
- dist = MATCHES[(size_t)offs + 1];\r
- // if (dist >= kNumFullDistances)\r
- GetPosSlot2(dist, posSlot);\r
- }\r
- }\r
- }\r
- }\r
-\r
- do\r
- p->opt[last].price = kInfinityPrice;\r
- while (--last);\r
-\r
- return Backward(p, cur);\r
-}\r
-\r
-\r
-\r
-#define ChangePair(smallDist, bigDist) (((bigDist) >> 7) > (smallDist))\r
-\r
-\r
-\r
-static unsigned GetOptimumFast(CLzmaEnc *p)\r
-{\r
- UInt32 numAvail, mainDist;\r
- unsigned mainLen, numPairs, repIndex, repLen, i;\r
- const Byte *data;\r
-\r
- if (p->additionalOffset == 0)\r
- mainLen = ReadMatchDistances(p, &numPairs);\r
- else\r
- {\r
- mainLen = p->longestMatchLen;\r
- numPairs = p->numPairs;\r
- }\r
-\r
- numAvail = p->numAvail;\r
- p->backRes = MARK_LIT;\r
- if (numAvail < 2)\r
- return 1;\r
- // if (mainLen < 2 && p->state == 0) return 1; // 18.06.notused\r
- if (numAvail > LZMA_MATCH_LEN_MAX)\r
- numAvail = LZMA_MATCH_LEN_MAX;\r
- data = p->matchFinder.GetPointerToCurrentPos(p->matchFinderObj) - 1;\r
- repLen = repIndex = 0;\r
- \r
- for (i = 0; i < LZMA_NUM_REPS; i++)\r
- {\r
- unsigned len;\r
- const Byte *data2 = data - p->reps[i];\r
- if (data[0] != data2[0] || data[1] != data2[1])\r
- continue;\r
- for (len = 2; len < numAvail && data[len] == data2[len]; len++)\r
- {}\r
- if (len >= p->numFastBytes)\r
- {\r
- p->backRes = (UInt32)i;\r
- MOVE_POS(p, len - 1)\r
- return len;\r
- }\r
- if (len > repLen)\r
- {\r
- repIndex = i;\r
- repLen = len;\r
- }\r
- }\r
-\r
- if (mainLen >= p->numFastBytes)\r
- {\r
- p->backRes = p->matches[(size_t)numPairs - 1] + LZMA_NUM_REPS;\r
- MOVE_POS(p, mainLen - 1)\r
- return mainLen;\r
- }\r
-\r
- mainDist = 0; /* for GCC */\r
- \r
- if (mainLen >= 2)\r
- {\r
- mainDist = p->matches[(size_t)numPairs - 1];\r
- while (numPairs > 2)\r
- {\r
- UInt32 dist2;\r
- if (mainLen != p->matches[(size_t)numPairs - 4] + 1)\r
- break;\r
- dist2 = p->matches[(size_t)numPairs - 3];\r
- if (!ChangePair(dist2, mainDist))\r
- break;\r
- numPairs -= 2;\r
- mainLen--;\r
- mainDist = dist2;\r
- }\r
- if (mainLen == 2 && mainDist >= 0x80)\r
- mainLen = 1;\r
- }\r
-\r
- if (repLen >= 2)\r
- if ( repLen + 1 >= mainLen\r
- || (repLen + 2 >= mainLen && mainDist >= (1 << 9))\r
- || (repLen + 3 >= mainLen && mainDist >= (1 << 15)))\r
- {\r
- p->backRes = (UInt32)repIndex;\r
- MOVE_POS(p, repLen - 1)\r
- return repLen;\r
- }\r
- \r
- if (mainLen < 2 || numAvail <= 2)\r
- return 1;\r
-\r
- {\r
- unsigned len1 = ReadMatchDistances(p, &p->numPairs);\r
- p->longestMatchLen = len1;\r
- \r
- if (len1 >= 2)\r
- {\r
- UInt32 newDist = p->matches[(size_t)p->numPairs - 1];\r
- if ( (len1 >= mainLen && newDist < mainDist)\r
- || (len1 == mainLen + 1 && !ChangePair(mainDist, newDist))\r
- || (len1 > mainLen + 1)\r
- || (len1 + 1 >= mainLen && mainLen >= 3 && ChangePair(newDist, mainDist)))\r
- return 1;\r
- }\r
- }\r
- \r
- data = p->matchFinder.GetPointerToCurrentPos(p->matchFinderObj) - 1;\r
- \r
- for (i = 0; i < LZMA_NUM_REPS; i++)\r
- {\r
- unsigned len, limit;\r
- const Byte *data2 = data - p->reps[i];\r
- if (data[0] != data2[0] || data[1] != data2[1])\r
- continue;\r
- limit = mainLen - 1;\r
- for (len = 2;; len++)\r
- {\r
- if (len >= limit)\r
- return 1;\r
- if (data[len] != data2[len])\r
- break;\r
- }\r
- }\r
- \r
- p->backRes = mainDist + LZMA_NUM_REPS;\r
- if (mainLen != 2)\r
- {\r
- MOVE_POS(p, mainLen - 2)\r
- }\r
- return mainLen;\r
-}\r
-\r
-\r
-\r
-\r
-static void WriteEndMarker(CLzmaEnc *p, unsigned posState)\r
-{\r
- UInt32 range;\r
- range = p->rc.range;\r
- {\r
- UInt32 ttt, newBound;\r
- CLzmaProb *prob = &p->isMatch[p->state][posState];\r
- RC_BIT_PRE(&p->rc, prob)\r
- RC_BIT_1(&p->rc, prob)\r
- prob = &p->isRep[p->state];\r
- RC_BIT_PRE(&p->rc, prob)\r
- RC_BIT_0(&p->rc, prob)\r
- }\r
- p->state = kMatchNextStates[p->state];\r
- \r
- p->rc.range = range;\r
- LenEnc_Encode(&p->lenProbs, &p->rc, 0, posState);\r
- range = p->rc.range;\r
-\r
- {\r
- // RcTree_Encode_PosSlot(&p->rc, p->posSlotEncoder[0], (1 << kNumPosSlotBits) - 1);\r
- CLzmaProb *probs = p->posSlotEncoder[0];\r
- unsigned m = 1;\r
- do\r
- {\r
- UInt32 ttt, newBound;\r
- RC_BIT_PRE(p, probs + m)\r
- RC_BIT_1(&p->rc, probs + m);\r
- m = (m << 1) + 1;\r
- }\r
- while (m < (1 << kNumPosSlotBits));\r
- }\r
- {\r
- // RangeEnc_EncodeDirectBits(&p->rc, ((UInt32)1 << (30 - kNumAlignBits)) - 1, 30 - kNumAlignBits); UInt32 range = p->range;\r
- unsigned numBits = 30 - kNumAlignBits;\r
- do\r
- {\r
- range >>= 1;\r
- p->rc.low += range;\r
- RC_NORM(&p->rc)\r
- }\r
- while (--numBits);\r
- }\r
- \r
- {\r
- // RcTree_ReverseEncode(&p->rc, p->posAlignEncoder, kNumAlignBits, kAlignMask);\r
- CLzmaProb *probs = p->posAlignEncoder;\r
- unsigned m = 1;\r
- do\r
- {\r
- UInt32 ttt, newBound;\r
- RC_BIT_PRE(p, probs + m)\r
- RC_BIT_1(&p->rc, probs + m);\r
- m = (m << 1) + 1;\r
- }\r
- while (m < kAlignTableSize);\r
- }\r
- p->rc.range = range;\r
-}\r
-\r
-\r
-static SRes CheckErrors(CLzmaEnc *p)\r
-{\r
- if (p->result != SZ_OK)\r
- return p->result;\r
- if (p->rc.res != SZ_OK)\r
- p->result = SZ_ERROR_WRITE;\r
-\r
- #ifndef _7ZIP_ST\r
- if (\r
- // p->mf_Failure ||\r
- (p->mtMode &&\r
- ( // p->matchFinderMt.failure_LZ_LZ ||\r
- p->matchFinderMt.failure_LZ_BT))\r
- )\r
- {\r
- p->result = MY_HRES_ERROR__INTERNAL_ERROR;\r
- // printf("\nCheckErrors p->matchFinderMt.failureLZ\n");\r
- }\r
- #endif\r
-\r
- if (MFB.result != SZ_OK)\r
- p->result = SZ_ERROR_READ;\r
- \r
- if (p->result != SZ_OK)\r
- p->finished = True;\r
- return p->result;\r
-}\r
-\r
-\r
-MY_NO_INLINE static SRes Flush(CLzmaEnc *p, UInt32 nowPos)\r
-{\r
- /* ReleaseMFStream(); */\r
- p->finished = True;\r
- if (p->writeEndMark)\r
- WriteEndMarker(p, nowPos & p->pbMask);\r
- RangeEnc_FlushData(&p->rc);\r
- RangeEnc_FlushStream(&p->rc);\r
- return CheckErrors(p);\r
-}\r
-\r
-\r
-MY_NO_INLINE static void FillAlignPrices(CLzmaEnc *p)\r
-{\r
- unsigned i;\r
- const CProbPrice *ProbPrices = p->ProbPrices;\r
- const CLzmaProb *probs = p->posAlignEncoder;\r
- // p->alignPriceCount = 0;\r
- for (i = 0; i < kAlignTableSize / 2; i++)\r
- {\r
- UInt32 price = 0;\r
- unsigned sym = i;\r
- unsigned m = 1;\r
- unsigned bit;\r
- UInt32 prob;\r
- bit = sym & 1; sym >>= 1; price += GET_PRICEa(probs[m], bit); m = (m << 1) + bit;\r
- bit = sym & 1; sym >>= 1; price += GET_PRICEa(probs[m], bit); m = (m << 1) + bit;\r
- bit = sym & 1; sym >>= 1; price += GET_PRICEa(probs[m], bit); m = (m << 1) + bit;\r
- prob = probs[m];\r
- p->alignPrices[i ] = price + GET_PRICEa_0(prob);\r
- p->alignPrices[i + 8] = price + GET_PRICEa_1(prob);\r
- // p->alignPrices[i] = RcTree_ReverseGetPrice(p->posAlignEncoder, kNumAlignBits, i, p->ProbPrices);\r
- }\r
-}\r
-\r
-\r
-MY_NO_INLINE static void FillDistancesPrices(CLzmaEnc *p)\r
-{\r
- // int y; for (y = 0; y < 100; y++) {\r
-\r
- UInt32 tempPrices[kNumFullDistances];\r
- unsigned i, lps;\r
-\r
- const CProbPrice *ProbPrices = p->ProbPrices;\r
- p->matchPriceCount = 0;\r
-\r
- for (i = kStartPosModelIndex / 2; i < kNumFullDistances / 2; i++)\r
- {\r
- unsigned posSlot = GetPosSlot1(i);\r
- unsigned footerBits = (posSlot >> 1) - 1;\r
- unsigned base = ((2 | (posSlot & 1)) << footerBits);\r
- const CLzmaProb *probs = p->posEncoders + (size_t)base * 2;\r
- // tempPrices[i] = RcTree_ReverseGetPrice(p->posEncoders + base, footerBits, i - base, p->ProbPrices);\r
- UInt32 price = 0;\r
- unsigned m = 1;\r
- unsigned sym = i;\r
- unsigned offset = (unsigned)1 << footerBits;\r
- base += i;\r
- \r
- if (footerBits)\r
- do\r
- {\r
- unsigned bit = sym & 1;\r
- sym >>= 1;\r
- price += GET_PRICEa(probs[m], bit);\r
- m = (m << 1) + bit;\r
- }\r
- while (--footerBits);\r
-\r
- {\r
- unsigned prob = probs[m];\r
- tempPrices[base ] = price + GET_PRICEa_0(prob);\r
- tempPrices[base + offset] = price + GET_PRICEa_1(prob);\r
- }\r
- }\r
-\r
- for (lps = 0; lps < kNumLenToPosStates; lps++)\r
- {\r
- unsigned slot;\r
- unsigned distTableSize2 = (p->distTableSize + 1) >> 1;\r
- UInt32 *posSlotPrices = p->posSlotPrices[lps];\r
- const CLzmaProb *probs = p->posSlotEncoder[lps];\r
- \r
- for (slot = 0; slot < distTableSize2; slot++)\r
- {\r
- // posSlotPrices[slot] = RcTree_GetPrice(encoder, kNumPosSlotBits, slot, p->ProbPrices);\r
- UInt32 price;\r
- unsigned bit;\r
- unsigned sym = slot + (1 << (kNumPosSlotBits - 1));\r
- unsigned prob;\r
- bit = sym & 1; sym >>= 1; price = GET_PRICEa(probs[sym], bit);\r
- bit = sym & 1; sym >>= 1; price += GET_PRICEa(probs[sym], bit);\r
- bit = sym & 1; sym >>= 1; price += GET_PRICEa(probs[sym], bit);\r
- bit = sym & 1; sym >>= 1; price += GET_PRICEa(probs[sym], bit);\r
- bit = sym & 1; sym >>= 1; price += GET_PRICEa(probs[sym], bit);\r
- prob = probs[(size_t)slot + (1 << (kNumPosSlotBits - 1))];\r
- posSlotPrices[(size_t)slot * 2 ] = price + GET_PRICEa_0(prob);\r
- posSlotPrices[(size_t)slot * 2 + 1] = price + GET_PRICEa_1(prob);\r
- }\r
- \r
- {\r
- UInt32 delta = ((UInt32)((kEndPosModelIndex / 2 - 1) - kNumAlignBits) << kNumBitPriceShiftBits);\r
- for (slot = kEndPosModelIndex / 2; slot < distTableSize2; slot++)\r
- {\r
- posSlotPrices[(size_t)slot * 2 ] += delta;\r
- posSlotPrices[(size_t)slot * 2 + 1] += delta;\r
- delta += ((UInt32)1 << kNumBitPriceShiftBits);\r
- }\r
- }\r
-\r
- {\r
- UInt32 *dp = p->distancesPrices[lps];\r
- \r
- dp[0] = posSlotPrices[0];\r
- dp[1] = posSlotPrices[1];\r
- dp[2] = posSlotPrices[2];\r
- dp[3] = posSlotPrices[3];\r
-\r
- for (i = 4; i < kNumFullDistances; i += 2)\r
- {\r
- UInt32 slotPrice = posSlotPrices[GetPosSlot1(i)];\r
- dp[i ] = slotPrice + tempPrices[i];\r
- dp[i + 1] = slotPrice + tempPrices[i + 1];\r
- }\r
- }\r
- }\r
- // }\r
-}\r
-\r
-\r
-\r
-static void LzmaEnc_Construct(CLzmaEnc *p)\r
-{\r
- RangeEnc_Construct(&p->rc);\r
- MatchFinder_Construct(&MFB);\r
- \r
- #ifndef _7ZIP_ST\r
- p->matchFinderMt.MatchFinder = &MFB;\r
- MatchFinderMt_Construct(&p->matchFinderMt);\r
- #endif\r
-\r
- {\r
- CLzmaEncProps props;\r
- LzmaEncProps_Init(&props);\r
- LzmaEnc_SetProps(p, &props);\r
- }\r
-\r
- #ifndef LZMA_LOG_BSR\r
- LzmaEnc_FastPosInit(p->g_FastPos);\r
- #endif\r
-\r
- LzmaEnc_InitPriceTables(p->ProbPrices);\r
- p->litProbs = NULL;\r
- p->saveState.litProbs = NULL;\r
-}\r
-\r
-CLzmaEncHandle LzmaEnc_Create(ISzAllocPtr alloc)\r
-{\r
- void *p;\r
- p = ISzAlloc_Alloc(alloc, sizeof(CLzmaEnc));\r
- if (p)\r
- LzmaEnc_Construct((CLzmaEnc *)p);\r
- return p;\r
-}\r
-\r
-static void LzmaEnc_FreeLits(CLzmaEnc *p, ISzAllocPtr alloc)\r
-{\r
- ISzAlloc_Free(alloc, p->litProbs);\r
- ISzAlloc_Free(alloc, p->saveState.litProbs);\r
- p->litProbs = NULL;\r
- p->saveState.litProbs = NULL;\r
-}\r
-\r
-static void LzmaEnc_Destruct(CLzmaEnc *p, ISzAllocPtr alloc, ISzAllocPtr allocBig)\r
-{\r
- #ifndef _7ZIP_ST\r
- MatchFinderMt_Destruct(&p->matchFinderMt, allocBig);\r
- #endif\r
- \r
- MatchFinder_Free(&MFB, allocBig);\r
- LzmaEnc_FreeLits(p, alloc);\r
- RangeEnc_Free(&p->rc, alloc);\r
-}\r
-\r
-void LzmaEnc_Destroy(CLzmaEncHandle p, ISzAllocPtr alloc, ISzAllocPtr allocBig)\r
-{\r
- LzmaEnc_Destruct((CLzmaEnc *)p, alloc, allocBig);\r
- ISzAlloc_Free(alloc, p);\r
-}\r
-\r
-\r
-MY_NO_INLINE\r
-static SRes LzmaEnc_CodeOneBlock(CLzmaEnc *p, UInt32 maxPackSize, UInt32 maxUnpackSize)\r
-{\r
- UInt32 nowPos32, startPos32;\r
- if (p->needInit)\r
- {\r
- #ifndef _7ZIP_ST\r
- if (p->mtMode)\r
- {\r
- RINOK(MatchFinderMt_InitMt(&p->matchFinderMt));\r
- }\r
- #endif\r
- p->matchFinder.Init(p->matchFinderObj);\r
- p->needInit = 0;\r
- }\r
-\r
- if (p->finished)\r
- return p->result;\r
- RINOK(CheckErrors(p));\r
-\r
- nowPos32 = (UInt32)p->nowPos64;\r
- startPos32 = nowPos32;\r
-\r
- if (p->nowPos64 == 0)\r
- {\r
- unsigned numPairs;\r
- Byte curByte;\r
- if (p->matchFinder.GetNumAvailableBytes(p->matchFinderObj) == 0)\r
- return Flush(p, nowPos32);\r
- ReadMatchDistances(p, &numPairs);\r
- RangeEnc_EncodeBit_0(&p->rc, &p->isMatch[kState_Start][0]);\r
- // p->state = kLiteralNextStates[p->state];\r
- curByte = *(p->matchFinder.GetPointerToCurrentPos(p->matchFinderObj) - p->additionalOffset);\r
- LitEnc_Encode(&p->rc, p->litProbs, curByte);\r
- p->additionalOffset--;\r
- nowPos32++;\r
- }\r
-\r
- if (p->matchFinder.GetNumAvailableBytes(p->matchFinderObj) != 0)\r
- \r
- for (;;)\r
- {\r
- UInt32 dist;\r
- unsigned len, posState;\r
- UInt32 range, ttt, newBound;\r
- CLzmaProb *probs;\r
- \r
- if (p->fastMode)\r
- len = GetOptimumFast(p);\r
- else\r
- {\r
- unsigned oci = p->optCur;\r
- if (p->optEnd == oci)\r
- len = GetOptimum(p, nowPos32);\r
- else\r
- {\r
- const COptimal *opt = &p->opt[oci];\r
- len = opt->len;\r
- p->backRes = opt->dist;\r
- p->optCur = oci + 1;\r
- }\r
- }\r
-\r
- posState = (unsigned)nowPos32 & p->pbMask;\r
- range = p->rc.range;\r
- probs = &p->isMatch[p->state][posState];\r
- \r
- RC_BIT_PRE(&p->rc, probs)\r
- \r
- dist = p->backRes;\r
-\r
- #ifdef SHOW_STAT2\r
- printf("\n pos = %6X, len = %3u pos = %6u", nowPos32, len, dist);\r
- #endif\r
-\r
- if (dist == MARK_LIT)\r
- {\r
- Byte curByte;\r
- const Byte *data;\r
- unsigned state;\r
-\r
- RC_BIT_0(&p->rc, probs);\r
- p->rc.range = range;\r
- data = p->matchFinder.GetPointerToCurrentPos(p->matchFinderObj) - p->additionalOffset;\r
- probs = LIT_PROBS(nowPos32, *(data - 1));\r
- curByte = *data;\r
- state = p->state;\r
- p->state = kLiteralNextStates[state];\r
- if (IsLitState(state))\r
- LitEnc_Encode(&p->rc, probs, curByte);\r
- else\r
- LitEnc_EncodeMatched(&p->rc, probs, curByte, *(data - p->reps[0]));\r
- }\r
- else\r
- {\r
- RC_BIT_1(&p->rc, probs);\r
- probs = &p->isRep[p->state];\r
- RC_BIT_PRE(&p->rc, probs)\r
- \r
- if (dist < LZMA_NUM_REPS)\r
- {\r
- RC_BIT_1(&p->rc, probs);\r
- probs = &p->isRepG0[p->state];\r
- RC_BIT_PRE(&p->rc, probs)\r
- if (dist == 0)\r
- {\r
- RC_BIT_0(&p->rc, probs);\r
- probs = &p->isRep0Long[p->state][posState];\r
- RC_BIT_PRE(&p->rc, probs)\r
- if (len != 1)\r
- {\r
- RC_BIT_1_BASE(&p->rc, probs);\r
- }\r
- else\r
- {\r
- RC_BIT_0_BASE(&p->rc, probs);\r
- p->state = kShortRepNextStates[p->state];\r
- }\r
- }\r
- else\r
- {\r
- RC_BIT_1(&p->rc, probs);\r
- probs = &p->isRepG1[p->state];\r
- RC_BIT_PRE(&p->rc, probs)\r
- if (dist == 1)\r
- {\r
- RC_BIT_0_BASE(&p->rc, probs);\r
- dist = p->reps[1];\r
- }\r
- else\r
- {\r
- RC_BIT_1(&p->rc, probs);\r
- probs = &p->isRepG2[p->state];\r
- RC_BIT_PRE(&p->rc, probs)\r
- if (dist == 2)\r
- {\r
- RC_BIT_0_BASE(&p->rc, probs);\r
- dist = p->reps[2];\r
- }\r
- else\r
- {\r
- RC_BIT_1_BASE(&p->rc, probs);\r
- dist = p->reps[3];\r
- p->reps[3] = p->reps[2];\r
- }\r
- p->reps[2] = p->reps[1];\r
- }\r
- p->reps[1] = p->reps[0];\r
- p->reps[0] = dist;\r
- }\r
-\r
- RC_NORM(&p->rc)\r
-\r
- p->rc.range = range;\r
-\r
- if (len != 1)\r
- {\r
- LenEnc_Encode(&p->repLenProbs, &p->rc, len - LZMA_MATCH_LEN_MIN, posState);\r
- --p->repLenEncCounter;\r
- p->state = kRepNextStates[p->state];\r
- }\r
- }\r
- else\r
- {\r
- unsigned posSlot;\r
- RC_BIT_0(&p->rc, probs);\r
- p->rc.range = range;\r
- p->state = kMatchNextStates[p->state];\r
-\r
- LenEnc_Encode(&p->lenProbs, &p->rc, len - LZMA_MATCH_LEN_MIN, posState);\r
- // --p->lenEnc.counter;\r
-\r
- dist -= LZMA_NUM_REPS;\r
- p->reps[3] = p->reps[2];\r
- p->reps[2] = p->reps[1];\r
- p->reps[1] = p->reps[0];\r
- p->reps[0] = dist + 1;\r
- \r
- p->matchPriceCount++;\r
- GetPosSlot(dist, posSlot);\r
- // RcTree_Encode_PosSlot(&p->rc, p->posSlotEncoder[GetLenToPosState(len)], posSlot);\r
- {\r
- UInt32 sym = (UInt32)posSlot + (1 << kNumPosSlotBits);\r
- range = p->rc.range;\r
- probs = p->posSlotEncoder[GetLenToPosState(len)];\r
- do\r
- {\r
- CLzmaProb *prob = probs + (sym >> kNumPosSlotBits);\r
- UInt32 bit = (sym >> (kNumPosSlotBits - 1)) & 1;\r
- sym <<= 1;\r
- RC_BIT(&p->rc, prob, bit);\r
- }\r
- while (sym < (1 << kNumPosSlotBits * 2));\r
- p->rc.range = range;\r
- }\r
- \r
- if (dist >= kStartPosModelIndex)\r
- {\r
- unsigned footerBits = ((posSlot >> 1) - 1);\r
-\r
- if (dist < kNumFullDistances)\r
- {\r
- unsigned base = ((2 | (posSlot & 1)) << footerBits);\r
- RcTree_ReverseEncode(&p->rc, p->posEncoders + base, footerBits, (unsigned)(dist /* - base */));\r
- }\r
- else\r
- {\r
- UInt32 pos2 = (dist | 0xF) << (32 - footerBits);\r
- range = p->rc.range;\r
- // RangeEnc_EncodeDirectBits(&p->rc, posReduced >> kNumAlignBits, footerBits - kNumAlignBits);\r
- /*\r
- do\r
- {\r
- range >>= 1;\r
- p->rc.low += range & (0 - ((dist >> --footerBits) & 1));\r
- RC_NORM(&p->rc)\r
- }\r
- while (footerBits > kNumAlignBits);\r
- */\r
- do\r
- {\r
- range >>= 1;\r
- p->rc.low += range & (0 - (pos2 >> 31));\r
- pos2 += pos2;\r
- RC_NORM(&p->rc)\r
- }\r
- while (pos2 != 0xF0000000);\r
-\r
-\r
- // RcTree_ReverseEncode(&p->rc, p->posAlignEncoder, kNumAlignBits, posReduced & kAlignMask);\r
-\r
- {\r
- unsigned m = 1;\r
- unsigned bit;\r
- bit = dist & 1; dist >>= 1; RC_BIT(&p->rc, p->posAlignEncoder + m, bit); m = (m << 1) + bit;\r
- bit = dist & 1; dist >>= 1; RC_BIT(&p->rc, p->posAlignEncoder + m, bit); m = (m << 1) + bit;\r
- bit = dist & 1; dist >>= 1; RC_BIT(&p->rc, p->posAlignEncoder + m, bit); m = (m << 1) + bit;\r
- bit = dist & 1; RC_BIT(&p->rc, p->posAlignEncoder + m, bit);\r
- p->rc.range = range;\r
- // p->alignPriceCount++;\r
- }\r
- }\r
- }\r
- }\r
- }\r
-\r
- nowPos32 += (UInt32)len;\r
- p->additionalOffset -= len;\r
- \r
- if (p->additionalOffset == 0)\r
- {\r
- UInt32 processed;\r
-\r
- if (!p->fastMode)\r
- {\r
- /*\r
- if (p->alignPriceCount >= 16) // kAlignTableSize\r
- FillAlignPrices(p);\r
- if (p->matchPriceCount >= 128)\r
- FillDistancesPrices(p);\r
- if (p->lenEnc.counter <= 0)\r
- LenPriceEnc_UpdateTables(&p->lenEnc, 1 << p->pb, &p->lenProbs, p->ProbPrices);\r
- */\r
- if (p->matchPriceCount >= 64)\r
- {\r
- FillAlignPrices(p);\r
- // { int y; for (y = 0; y < 100; y++) {\r
- FillDistancesPrices(p);\r
- // }}\r
- LenPriceEnc_UpdateTables(&p->lenEnc, (unsigned)1 << p->pb, &p->lenProbs, p->ProbPrices);\r
- }\r
- if (p->repLenEncCounter <= 0)\r
- {\r
- p->repLenEncCounter = REP_LEN_COUNT;\r
- LenPriceEnc_UpdateTables(&p->repLenEnc, (unsigned)1 << p->pb, &p->repLenProbs, p->ProbPrices);\r
- }\r
- }\r
- \r
- if (p->matchFinder.GetNumAvailableBytes(p->matchFinderObj) == 0)\r
- break;\r
- processed = nowPos32 - startPos32;\r
- \r
- if (maxPackSize)\r
- {\r
- if (processed + kNumOpts + 300 >= maxUnpackSize\r
- || RangeEnc_GetProcessed_sizet(&p->rc) + kPackReserve >= maxPackSize)\r
- break;\r
- }\r
- else if (processed >= (1 << 17))\r
- {\r
- p->nowPos64 += nowPos32 - startPos32;\r
- return CheckErrors(p);\r
- }\r
- }\r
- }\r
-\r
- p->nowPos64 += nowPos32 - startPos32;\r
- return Flush(p, nowPos32);\r
-}\r
-\r
-\r
-\r
-#define kBigHashDicLimit ((UInt32)1 << 24)\r
-\r
-static SRes LzmaEnc_Alloc(CLzmaEnc *p, UInt32 keepWindowSize, ISzAllocPtr alloc, ISzAllocPtr allocBig)\r
-{\r
- UInt32 beforeSize = kNumOpts;\r
- UInt32 dictSize;\r
-\r
- if (!RangeEnc_Alloc(&p->rc, alloc))\r
- return SZ_ERROR_MEM;\r
-\r
- #ifndef _7ZIP_ST\r
- p->mtMode = (p->multiThread && !p->fastMode && (MFB.btMode != 0));\r
- #endif\r
-\r
- {\r
- unsigned lclp = p->lc + p->lp;\r
- if (!p->litProbs || !p->saveState.litProbs || p->lclp != lclp)\r
- {\r
- LzmaEnc_FreeLits(p, alloc);\r
- p->litProbs = (CLzmaProb *)ISzAlloc_Alloc(alloc, ((UInt32)0x300 << lclp) * sizeof(CLzmaProb));\r
- p->saveState.litProbs = (CLzmaProb *)ISzAlloc_Alloc(alloc, ((UInt32)0x300 << lclp) * sizeof(CLzmaProb));\r
- if (!p->litProbs || !p->saveState.litProbs)\r
- {\r
- LzmaEnc_FreeLits(p, alloc);\r
- return SZ_ERROR_MEM;\r
- }\r
- p->lclp = lclp;\r
- }\r
- }\r
-\r
- MFB.bigHash = (Byte)(p->dictSize > kBigHashDicLimit ? 1 : 0);\r
-\r
-\r
- dictSize = p->dictSize;\r
- if (dictSize == ((UInt32)2 << 30) ||\r
- dictSize == ((UInt32)3 << 30))\r
- {\r
- /* 21.03 : here we reduce the dictionary for 2 reasons:\r
- 1) we don't want 32-bit back_distance matches in decoder for 2 GB dictionary.\r
- 2) we want to elimate useless last MatchFinder_Normalize3() for corner cases,\r
- where data size is aligned for 1 GB: 5/6/8 GB.\r
- That reducing must be >= 1 for such corner cases. */\r
- dictSize -= 1;\r
- }\r
-\r
- if (beforeSize + dictSize < keepWindowSize)\r
- beforeSize = keepWindowSize - dictSize;\r
-\r
- /* in worst case we can look ahead for\r
- max(LZMA_MATCH_LEN_MAX, numFastBytes + 1 + numFastBytes) bytes.\r
- we send larger value for (keepAfter) to MantchFinder_Create():\r
- (numFastBytes + LZMA_MATCH_LEN_MAX + 1)\r
- */\r
-\r
- #ifndef _7ZIP_ST\r
- if (p->mtMode)\r
- {\r
- RINOK(MatchFinderMt_Create(&p->matchFinderMt, dictSize, beforeSize,\r
- p->numFastBytes, LZMA_MATCH_LEN_MAX + 1 /* 18.04 */\r
- , allocBig));\r
- p->matchFinderObj = &p->matchFinderMt;\r
- MFB.bigHash = (Byte)(\r
- (p->dictSize > kBigHashDicLimit && MFB.hashMask >= 0xFFFFFF) ? 1 : 0);\r
- MatchFinderMt_CreateVTable(&p->matchFinderMt, &p->matchFinder);\r
- }\r
- else\r
- #endif\r
- {\r
- if (!MatchFinder_Create(&MFB, dictSize, beforeSize,\r
- p->numFastBytes, LZMA_MATCH_LEN_MAX + 1 /* 21.03 */\r
- , allocBig))\r
- return SZ_ERROR_MEM;\r
- p->matchFinderObj = &MFB;\r
- MatchFinder_CreateVTable(&MFB, &p->matchFinder);\r
- }\r
- \r
- return SZ_OK;\r
-}\r
-\r
-static void LzmaEnc_Init(CLzmaEnc *p)\r
-{\r
- unsigned i;\r
- p->state = 0;\r
- p->reps[0] =\r
- p->reps[1] =\r
- p->reps[2] =\r
- p->reps[3] = 1;\r
-\r
- RangeEnc_Init(&p->rc);\r
-\r
- for (i = 0; i < (1 << kNumAlignBits); i++)\r
- p->posAlignEncoder[i] = kProbInitValue;\r
-\r
- for (i = 0; i < kNumStates; i++)\r
- {\r
- unsigned j;\r
- for (j = 0; j < LZMA_NUM_PB_STATES_MAX; j++)\r
- {\r
- p->isMatch[i][j] = kProbInitValue;\r
- p->isRep0Long[i][j] = kProbInitValue;\r
- }\r
- p->isRep[i] = kProbInitValue;\r
- p->isRepG0[i] = kProbInitValue;\r
- p->isRepG1[i] = kProbInitValue;\r
- p->isRepG2[i] = kProbInitValue;\r
- }\r
-\r
- {\r
- for (i = 0; i < kNumLenToPosStates; i++)\r
- {\r
- CLzmaProb *probs = p->posSlotEncoder[i];\r
- unsigned j;\r
- for (j = 0; j < (1 << kNumPosSlotBits); j++)\r
- probs[j] = kProbInitValue;\r
- }\r
- }\r
- {\r
- for (i = 0; i < kNumFullDistances; i++)\r
- p->posEncoders[i] = kProbInitValue;\r
- }\r
-\r
- {\r
- UInt32 num = (UInt32)0x300 << (p->lp + p->lc);\r
- UInt32 k;\r
- CLzmaProb *probs = p->litProbs;\r
- for (k = 0; k < num; k++)\r
- probs[k] = kProbInitValue;\r
- }\r
-\r
-\r
- LenEnc_Init(&p->lenProbs);\r
- LenEnc_Init(&p->repLenProbs);\r
-\r
- p->optEnd = 0;\r
- p->optCur = 0;\r
-\r
- {\r
- for (i = 0; i < kNumOpts; i++)\r
- p->opt[i].price = kInfinityPrice;\r
- }\r
-\r
- p->additionalOffset = 0;\r
-\r
- p->pbMask = ((unsigned)1 << p->pb) - 1;\r
- p->lpMask = ((UInt32)0x100 << p->lp) - ((unsigned)0x100 >> p->lc);\r
-\r
- // p->mf_Failure = False;\r
-}\r
-\r
-\r
-static void LzmaEnc_InitPrices(CLzmaEnc *p)\r
-{\r
- if (!p->fastMode)\r
- {\r
- FillDistancesPrices(p);\r
- FillAlignPrices(p);\r
- }\r
-\r
- p->lenEnc.tableSize =\r
- p->repLenEnc.tableSize =\r
- p->numFastBytes + 1 - LZMA_MATCH_LEN_MIN;\r
-\r
- p->repLenEncCounter = REP_LEN_COUNT;\r
-\r
- LenPriceEnc_UpdateTables(&p->lenEnc, (unsigned)1 << p->pb, &p->lenProbs, p->ProbPrices);\r
- LenPriceEnc_UpdateTables(&p->repLenEnc, (unsigned)1 << p->pb, &p->repLenProbs, p->ProbPrices);\r
-}\r
-\r
-static SRes LzmaEnc_AllocAndInit(CLzmaEnc *p, UInt32 keepWindowSize, ISzAllocPtr alloc, ISzAllocPtr allocBig)\r
-{\r
- unsigned i;\r
- for (i = kEndPosModelIndex / 2; i < kDicLogSizeMax; i++)\r
- if (p->dictSize <= ((UInt32)1 << i))\r
- break;\r
- p->distTableSize = i * 2;\r
-\r
- p->finished = False;\r
- p->result = SZ_OK;\r
- RINOK(LzmaEnc_Alloc(p, keepWindowSize, alloc, allocBig));\r
- LzmaEnc_Init(p);\r
- LzmaEnc_InitPrices(p);\r
- p->nowPos64 = 0;\r
- return SZ_OK;\r
-}\r
-\r
-static SRes LzmaEnc_Prepare(CLzmaEncHandle pp, ISeqOutStream *outStream, ISeqInStream *inStream,\r
- ISzAllocPtr alloc, ISzAllocPtr allocBig)\r
-{\r
- CLzmaEnc *p = (CLzmaEnc *)pp;\r
- MFB.stream = inStream;\r
- p->needInit = 1;\r
- p->rc.outStream = outStream;\r
- return LzmaEnc_AllocAndInit(p, 0, alloc, allocBig);\r
-}\r
-\r
-SRes LzmaEnc_PrepareForLzma2(CLzmaEncHandle pp,\r
- ISeqInStream *inStream, UInt32 keepWindowSize,\r
- ISzAllocPtr alloc, ISzAllocPtr allocBig)\r
-{\r
- CLzmaEnc *p = (CLzmaEnc *)pp;\r
- MFB.stream = inStream;\r
- p->needInit = 1;\r
- return LzmaEnc_AllocAndInit(p, keepWindowSize, alloc, allocBig);\r
-}\r
-\r
-static void LzmaEnc_SetInputBuf(CLzmaEnc *p, const Byte *src, SizeT srcLen)\r
-{\r
- MFB.directInput = 1;\r
- MFB.bufferBase = (Byte *)src;\r
- MFB.directInputRem = srcLen;\r
-}\r
-\r
-SRes LzmaEnc_MemPrepare(CLzmaEncHandle pp, const Byte *src, SizeT srcLen,\r
- UInt32 keepWindowSize, ISzAllocPtr alloc, ISzAllocPtr allocBig)\r
-{\r
- CLzmaEnc *p = (CLzmaEnc *)pp;\r
- LzmaEnc_SetInputBuf(p, src, srcLen);\r
- p->needInit = 1;\r
-\r
- LzmaEnc_SetDataSize(pp, srcLen);\r
- return LzmaEnc_AllocAndInit(p, keepWindowSize, alloc, allocBig);\r
-}\r
-\r
-void LzmaEnc_Finish(CLzmaEncHandle pp)\r
-{\r
- #ifndef _7ZIP_ST\r
- CLzmaEnc *p = (CLzmaEnc *)pp;\r
- if (p->mtMode)\r
- MatchFinderMt_ReleaseStream(&p->matchFinderMt);\r
- #else\r
- UNUSED_VAR(pp);\r
- #endif\r
-}\r
-\r
-\r
-typedef struct\r
-{\r
- ISeqOutStream vt;\r
- Byte *data;\r
- SizeT rem;\r
- BoolInt overflow;\r
-} CLzmaEnc_SeqOutStreamBuf;\r
-\r
-static size_t SeqOutStreamBuf_Write(const ISeqOutStream *pp, const void *data, size_t size)\r
-{\r
- CLzmaEnc_SeqOutStreamBuf *p = CONTAINER_FROM_VTBL(pp, CLzmaEnc_SeqOutStreamBuf, vt);\r
- if (p->rem < size)\r
- {\r
- size = p->rem;\r
- p->overflow = True;\r
- }\r
- if (size != 0)\r
- {\r
- memcpy(p->data, data, size);\r
- p->rem -= size;\r
- p->data += size;\r
- }\r
- return size;\r
-}\r
-\r
-\r
-/*\r
-UInt32 LzmaEnc_GetNumAvailableBytes(CLzmaEncHandle pp)\r
-{\r
- const CLzmaEnc *p = (CLzmaEnc *)pp;\r
- return p->matchFinder.GetNumAvailableBytes(p->matchFinderObj);\r
-}\r
-*/\r
-\r
-const Byte *LzmaEnc_GetCurBuf(CLzmaEncHandle pp)\r
-{\r
- const CLzmaEnc *p = (CLzmaEnc *)pp;\r
- return p->matchFinder.GetPointerToCurrentPos(p->matchFinderObj) - p->additionalOffset;\r
-}\r
-\r
-\r
-// (desiredPackSize == 0) is not allowed\r
-SRes LzmaEnc_CodeOneMemBlock(CLzmaEncHandle pp, BoolInt reInit,\r
- Byte *dest, size_t *destLen, UInt32 desiredPackSize, UInt32 *unpackSize)\r
-{\r
- CLzmaEnc *p = (CLzmaEnc *)pp;\r
- UInt64 nowPos64;\r
- SRes res;\r
- CLzmaEnc_SeqOutStreamBuf outStream;\r
-\r
- outStream.vt.Write = SeqOutStreamBuf_Write;\r
- outStream.data = dest;\r
- outStream.rem = *destLen;\r
- outStream.overflow = False;\r
-\r
- p->writeEndMark = False;\r
- p->finished = False;\r
- p->result = SZ_OK;\r
-\r
- if (reInit)\r
- LzmaEnc_Init(p);\r
- LzmaEnc_InitPrices(p);\r
- RangeEnc_Init(&p->rc);\r
- p->rc.outStream = &outStream.vt;\r
- nowPos64 = p->nowPos64;\r
- \r
- res = LzmaEnc_CodeOneBlock(p, desiredPackSize, *unpackSize);\r
- \r
- *unpackSize = (UInt32)(p->nowPos64 - nowPos64);\r
- *destLen -= outStream.rem;\r
- if (outStream.overflow)\r
- return SZ_ERROR_OUTPUT_EOF;\r
-\r
- return res;\r
-}\r
-\r
-\r
-MY_NO_INLINE\r
-static SRes LzmaEnc_Encode2(CLzmaEnc *p, ICompressProgress *progress)\r
-{\r
- SRes res = SZ_OK;\r
-\r
- #ifndef _7ZIP_ST\r
- Byte allocaDummy[0x300];\r
- allocaDummy[0] = 0;\r
- allocaDummy[1] = allocaDummy[0];\r
- #endif\r
-\r
- for (;;)\r
- {\r
- res = LzmaEnc_CodeOneBlock(p, 0, 0);\r
- if (res != SZ_OK || p->finished)\r
- break;\r
- if (progress)\r
- {\r
- res = ICompressProgress_Progress(progress, p->nowPos64, RangeEnc_GetProcessed(&p->rc));\r
- if (res != SZ_OK)\r
- {\r
- res = SZ_ERROR_PROGRESS;\r
- break;\r
- }\r
- }\r
- }\r
- \r
- LzmaEnc_Finish(p);\r
-\r
- /*\r
- if (res == SZ_OK && !Inline_MatchFinder_IsFinishedOK(&MFB))\r
- res = SZ_ERROR_FAIL;\r
- }\r
- */\r
-\r
- return res;\r
-}\r
-\r
-\r
-SRes LzmaEnc_Encode(CLzmaEncHandle pp, ISeqOutStream *outStream, ISeqInStream *inStream, ICompressProgress *progress,\r
- ISzAllocPtr alloc, ISzAllocPtr allocBig)\r
-{\r
- RINOK(LzmaEnc_Prepare(pp, outStream, inStream, alloc, allocBig));\r
- return LzmaEnc_Encode2((CLzmaEnc *)pp, progress);\r
-}\r
-\r
-\r
-SRes LzmaEnc_WriteProperties(CLzmaEncHandle pp, Byte *props, SizeT *size)\r
-{\r
- if (*size < LZMA_PROPS_SIZE)\r
- return SZ_ERROR_PARAM;\r
- *size = LZMA_PROPS_SIZE;\r
- {\r
- const CLzmaEnc *p = (const CLzmaEnc *)pp;\r
- const UInt32 dictSize = p->dictSize;\r
- UInt32 v;\r
- props[0] = (Byte)((p->pb * 5 + p->lp) * 9 + p->lc);\r
- \r
- // we write aligned dictionary value to properties for lzma decoder\r
- if (dictSize >= ((UInt32)1 << 21))\r
- {\r
- const UInt32 kDictMask = ((UInt32)1 << 20) - 1;\r
- v = (dictSize + kDictMask) & ~kDictMask;\r
- if (v < dictSize)\r
- v = dictSize;\r
- }\r
- else\r
- {\r
- unsigned i = 11 * 2;\r
- do\r
- {\r
- v = (UInt32)(2 + (i & 1)) << (i >> 1);\r
- i++;\r
- }\r
- while (v < dictSize);\r
- }\r
-\r
- SetUi32(props + 1, v);\r
- return SZ_OK;\r
- }\r
-}\r
-\r
-\r
-unsigned LzmaEnc_IsWriteEndMark(CLzmaEncHandle pp)\r
-{\r
- return (unsigned)((CLzmaEnc *)pp)->writeEndMark;\r
-}\r
-\r
-\r
-SRes LzmaEnc_MemEncode(CLzmaEncHandle pp, Byte *dest, SizeT *destLen, const Byte *src, SizeT srcLen,\r
- int writeEndMark, ICompressProgress *progress, ISzAllocPtr alloc, ISzAllocPtr allocBig)\r
-{\r
- SRes res;\r
- CLzmaEnc *p = (CLzmaEnc *)pp;\r
-\r
- CLzmaEnc_SeqOutStreamBuf outStream;\r
-\r
- outStream.vt.Write = SeqOutStreamBuf_Write;\r
- outStream.data = dest;\r
- outStream.rem = *destLen;\r
- outStream.overflow = False;\r
-\r
- p->writeEndMark = writeEndMark;\r
- p->rc.outStream = &outStream.vt;\r
-\r
- res = LzmaEnc_MemPrepare(pp, src, srcLen, 0, alloc, allocBig);\r
- \r
- if (res == SZ_OK)\r
- {\r
- res = LzmaEnc_Encode2(p, progress);\r
- if (res == SZ_OK && p->nowPos64 != srcLen)\r
- res = SZ_ERROR_FAIL;\r
- }\r
-\r
- *destLen -= outStream.rem;\r
- if (outStream.overflow)\r
- return SZ_ERROR_OUTPUT_EOF;\r
- return res;\r
-}\r
-\r
-\r
-SRes LzmaEncode(Byte *dest, SizeT *destLen, const Byte *src, SizeT srcLen,\r
- const CLzmaEncProps *props, Byte *propsEncoded, SizeT *propsSize, int writeEndMark,\r
- ICompressProgress *progress, ISzAllocPtr alloc, ISzAllocPtr allocBig)\r
-{\r
- CLzmaEnc *p = (CLzmaEnc *)LzmaEnc_Create(alloc);\r
- SRes res;\r
- if (!p)\r
- return SZ_ERROR_MEM;\r
-\r
- res = LzmaEnc_SetProps(p, props);\r
- if (res == SZ_OK)\r
- {\r
- res = LzmaEnc_WriteProperties(p, propsEncoded, propsSize);\r
- if (res == SZ_OK)\r
- res = LzmaEnc_MemEncode(p, dest, destLen, src, srcLen,\r
- writeEndMark, progress, alloc, allocBig);\r
- }\r
-\r
- LzmaEnc_Destroy(p, alloc, allocBig);\r
- return res;\r
-}\r
-\r
-\r
-/*\r
-#ifndef _7ZIP_ST\r
-void LzmaEnc_GetLzThreads(CLzmaEncHandle pp, HANDLE lz_threads[2])\r
-{\r
- const CLzmaEnc *p = (CLzmaEnc *)pp;\r
- lz_threads[0] = p->matchFinderMt.hashSync.thread;\r
- lz_threads[1] = p->matchFinderMt.btSync.thread;\r
-}\r
-#endif\r
-*/\r
+/* LzmaEnc.c -- LZMA Encoder
+2024-01-24: Igor Pavlov : Public domain */
+
+#include "Precomp.h"
+
+#include <string.h>
+
+/* #define SHOW_STAT */
+/* #define SHOW_STAT2 */
+
+#if defined(SHOW_STAT) || defined(SHOW_STAT2)
+#include <stdio.h>
+#endif
+
+#include "CpuArch.h"
+#include "LzmaEnc.h"
+
+#include "LzFind.h"
+#ifndef Z7_ST
+#include "LzFindMt.h"
+#endif
+
+/* the following LzmaEnc_* declarations is internal LZMA interface for LZMA2 encoder */
+
+SRes LzmaEnc_PrepareForLzma2(CLzmaEncHandle p, ISeqInStreamPtr inStream, UInt32 keepWindowSize,
+ ISzAllocPtr alloc, ISzAllocPtr allocBig);
+SRes LzmaEnc_MemPrepare(CLzmaEncHandle p, const Byte *src, SizeT srcLen,
+ UInt32 keepWindowSize, ISzAllocPtr alloc, ISzAllocPtr allocBig);
+SRes LzmaEnc_CodeOneMemBlock(CLzmaEncHandle p, BoolInt reInit,
+ Byte *dest, size_t *destLen, UInt32 desiredPackSize, UInt32 *unpackSize);
+const Byte *LzmaEnc_GetCurBuf(CLzmaEncHandle p);
+void LzmaEnc_Finish(CLzmaEncHandle p);
+void LzmaEnc_SaveState(CLzmaEncHandle p);
+void LzmaEnc_RestoreState(CLzmaEncHandle p);
+
+#ifdef SHOW_STAT
+static unsigned g_STAT_OFFSET = 0;
+#endif
+
+/* for good normalization speed we still reserve 256 MB before 4 GB range */
+#define kLzmaMaxHistorySize ((UInt32)15 << 28)
+
+// #define kNumTopBits 24
+#define kTopValue ((UInt32)1 << 24)
+
+#define kNumBitModelTotalBits 11
+#define kBitModelTotal (1 << kNumBitModelTotalBits)
+#define kNumMoveBits 5
+#define kProbInitValue (kBitModelTotal >> 1)
+
+#define kNumMoveReducingBits 4
+#define kNumBitPriceShiftBits 4
+// #define kBitPrice (1 << kNumBitPriceShiftBits)
+
+#define REP_LEN_COUNT 64
+
+void LzmaEncProps_Init(CLzmaEncProps *p)
+{
+ p->level = 5;
+ p->dictSize = p->mc = 0;
+ p->reduceSize = (UInt64)(Int64)-1;
+ p->lc = p->lp = p->pb = p->algo = p->fb = p->btMode = p->numHashBytes = p->numThreads = -1;
+ p->numHashOutBits = 0;
+ p->writeEndMark = 0;
+ p->affinity = 0;
+}
+
+void LzmaEncProps_Normalize(CLzmaEncProps *p)
+{
+ int level = p->level;
+ if (level < 0) level = 5;
+ p->level = level;
+
+ if (p->dictSize == 0)
+ p->dictSize =
+ ( level <= 3 ? ((UInt32)1 << (level * 2 + 16)) :
+ ( level <= 6 ? ((UInt32)1 << (level + 19)) :
+ ( level <= 7 ? ((UInt32)1 << 25) : ((UInt32)1 << 26)
+ )));
+
+ if (p->dictSize > p->reduceSize)
+ {
+ UInt32 v = (UInt32)p->reduceSize;
+ const UInt32 kReduceMin = ((UInt32)1 << 12);
+ if (v < kReduceMin)
+ v = kReduceMin;
+ if (p->dictSize > v)
+ p->dictSize = v;
+ }
+
+ if (p->lc < 0) p->lc = 3;
+ if (p->lp < 0) p->lp = 0;
+ if (p->pb < 0) p->pb = 2;
+
+ if (p->algo < 0) p->algo = (level < 5 ? 0 : 1);
+ if (p->fb < 0) p->fb = (level < 7 ? 32 : 64);
+ if (p->btMode < 0) p->btMode = (p->algo == 0 ? 0 : 1);
+ if (p->numHashBytes < 0) p->numHashBytes = (p->btMode ? 4 : 5);
+ if (p->mc == 0) p->mc = (16 + ((unsigned)p->fb >> 1)) >> (p->btMode ? 0 : 1);
+
+ if (p->numThreads < 0)
+ p->numThreads =
+ #ifndef Z7_ST
+ ((p->btMode && p->algo) ? 2 : 1);
+ #else
+ 1;
+ #endif
+}
+
+UInt32 LzmaEncProps_GetDictSize(const CLzmaEncProps *props2)
+{
+ CLzmaEncProps props = *props2;
+ LzmaEncProps_Normalize(&props);
+ return props.dictSize;
+}
+
+
+/*
+x86/x64:
+
+BSR:
+ IF (SRC == 0) ZF = 1, DEST is undefined;
+ AMD : DEST is unchanged;
+ IF (SRC != 0) ZF = 0; DEST is index of top non-zero bit
+ BSR is slow in some processors
+
+LZCNT:
+ IF (SRC == 0) CF = 1, DEST is size_in_bits_of_register(src) (32 or 64)
+ IF (SRC != 0) CF = 0, DEST = num_lead_zero_bits
+ IF (DEST == 0) ZF = 1;
+
+LZCNT works only in new processors starting from Haswell.
+if LZCNT is not supported by processor, then it's executed as BSR.
+LZCNT can be faster than BSR, if supported.
+*/
+
+// #define LZMA_LOG_BSR
+
+#if defined(MY_CPU_ARM_OR_ARM64) /* || defined(MY_CPU_X86_OR_AMD64) */
+
+ #if (defined(__clang__) && (__clang_major__ >= 6)) \
+ || (defined(__GNUC__) && (__GNUC__ >= 6))
+ #define LZMA_LOG_BSR
+ #elif defined(_MSC_VER) && (_MSC_VER >= 1300)
+ // #if defined(MY_CPU_ARM_OR_ARM64)
+ #define LZMA_LOG_BSR
+ // #endif
+ #endif
+#endif
+
+// #include <intrin.h>
+
+#ifdef LZMA_LOG_BSR
+
+#if defined(__clang__) \
+ || defined(__GNUC__)
+
+/*
+ C code: : (30 - __builtin_clz(x))
+ gcc9/gcc10 for x64 /x86 : 30 - (bsr(x) xor 31)
+ clang10 for x64 : 31 + (bsr(x) xor -32)
+*/
+
+ #define MY_clz(x) ((unsigned)__builtin_clz(x))
+ // __lzcnt32
+ // __builtin_ia32_lzcnt_u32
+
+#else // #if defined(_MSC_VER)
+
+ #ifdef MY_CPU_ARM_OR_ARM64
+
+ #define MY_clz _CountLeadingZeros
+
+ #else // if defined(MY_CPU_X86_OR_AMD64)
+
+ // #define MY_clz __lzcnt // we can use lzcnt (unsupported by old CPU)
+ // _BitScanReverse code is not optimal for some MSVC compilers
+ #define BSR2_RET(pos, res) { unsigned long zz; _BitScanReverse(&zz, (pos)); zz--; \
+ res = (zz + zz) + (pos >> zz); }
+
+ #endif // MY_CPU_X86_OR_AMD64
+
+#endif // _MSC_VER
+
+
+#ifndef BSR2_RET
+
+ #define BSR2_RET(pos, res) { unsigned zz = 30 - MY_clz(pos); \
+ res = (zz + zz) + (pos >> zz); }
+
+#endif
+
+
+unsigned GetPosSlot1(UInt32 pos);
+unsigned GetPosSlot1(UInt32 pos)
+{
+ unsigned res;
+ BSR2_RET(pos, res)
+ return res;
+}
+#define GetPosSlot2(pos, res) { BSR2_RET(pos, res) }
+#define GetPosSlot(pos, res) { if (pos < 2) res = pos; else BSR2_RET(pos, res) }
+
+
+#else // ! LZMA_LOG_BSR
+
+#define kNumLogBits (11 + sizeof(size_t) / 8 * 3)
+
+#define kDicLogSizeMaxCompress ((kNumLogBits - 1) * 2 + 7)
+
+static void LzmaEnc_FastPosInit(Byte *g_FastPos)
+{
+ unsigned slot;
+ g_FastPos[0] = 0;
+ g_FastPos[1] = 1;
+ g_FastPos += 2;
+
+ for (slot = 2; slot < kNumLogBits * 2; slot++)
+ {
+ size_t k = ((size_t)1 << ((slot >> 1) - 1));
+ size_t j;
+ for (j = 0; j < k; j++)
+ g_FastPos[j] = (Byte)slot;
+ g_FastPos += k;
+ }
+}
+
+/* we can use ((limit - pos) >> 31) only if (pos < ((UInt32)1 << 31)) */
+/*
+#define BSR2_RET(pos, res) { unsigned zz = 6 + ((kNumLogBits - 1) & \
+ (0 - (((((UInt32)1 << (kNumLogBits + 6)) - 1) - pos) >> 31))); \
+ res = p->g_FastPos[pos >> zz] + (zz * 2); }
+*/
+
+/*
+#define BSR2_RET(pos, res) { unsigned zz = 6 + ((kNumLogBits - 1) & \
+ (0 - (((((UInt32)1 << (kNumLogBits)) - 1) - (pos >> 6)) >> 31))); \
+ res = p->g_FastPos[pos >> zz] + (zz * 2); }
+*/
+
+#define BSR2_RET(pos, res) { unsigned zz = (pos < (1 << (kNumLogBits + 6))) ? 6 : 6 + kNumLogBits - 1; \
+ res = p->g_FastPos[pos >> zz] + (zz * 2); }
+
+/*
+#define BSR2_RET(pos, res) { res = (pos < (1 << (kNumLogBits + 6))) ? \
+ p->g_FastPos[pos >> 6] + 12 : \
+ p->g_FastPos[pos >> (6 + kNumLogBits - 1)] + (6 + (kNumLogBits - 1)) * 2; }
+*/
+
+#define GetPosSlot1(pos) p->g_FastPos[pos]
+#define GetPosSlot2(pos, res) { BSR2_RET(pos, res); }
+#define GetPosSlot(pos, res) { if (pos < kNumFullDistances) res = p->g_FastPos[pos & (kNumFullDistances - 1)]; else BSR2_RET(pos, res); }
+
+#endif // LZMA_LOG_BSR
+
+
+#define LZMA_NUM_REPS 4
+
+typedef UInt16 CState;
+typedef UInt16 CExtra;
+
+typedef struct
+{
+ UInt32 price;
+ CState state;
+ CExtra extra;
+ // 0 : normal
+ // 1 : LIT : MATCH
+ // > 1 : MATCH (extra-1) : LIT : REP0 (len)
+ UInt32 len;
+ UInt32 dist;
+ UInt32 reps[LZMA_NUM_REPS];
+} COptimal;
+
+
+// 18.06
+#define kNumOpts (1 << 11)
+#define kPackReserve (kNumOpts * 8)
+// #define kNumOpts (1 << 12)
+// #define kPackReserve (1 + kNumOpts * 2)
+
+#define kNumLenToPosStates 4
+#define kNumPosSlotBits 6
+// #define kDicLogSizeMin 0
+#define kDicLogSizeMax 32
+#define kDistTableSizeMax (kDicLogSizeMax * 2)
+
+#define kNumAlignBits 4
+#define kAlignTableSize (1 << kNumAlignBits)
+#define kAlignMask (kAlignTableSize - 1)
+
+#define kStartPosModelIndex 4
+#define kEndPosModelIndex 14
+#define kNumFullDistances (1 << (kEndPosModelIndex >> 1))
+
+typedef
+#ifdef Z7_LZMA_PROB32
+ UInt32
+#else
+ UInt16
+#endif
+ CLzmaProb;
+
+#define LZMA_PB_MAX 4
+#define LZMA_LC_MAX 8
+#define LZMA_LP_MAX 4
+
+#define LZMA_NUM_PB_STATES_MAX (1 << LZMA_PB_MAX)
+
+#define kLenNumLowBits 3
+#define kLenNumLowSymbols (1 << kLenNumLowBits)
+#define kLenNumHighBits 8
+#define kLenNumHighSymbols (1 << kLenNumHighBits)
+#define kLenNumSymbolsTotal (kLenNumLowSymbols * 2 + kLenNumHighSymbols)
+
+#define LZMA_MATCH_LEN_MIN 2
+#define LZMA_MATCH_LEN_MAX (LZMA_MATCH_LEN_MIN + kLenNumSymbolsTotal - 1)
+
+#define kNumStates 12
+
+
+typedef struct
+{
+ CLzmaProb low[LZMA_NUM_PB_STATES_MAX << (kLenNumLowBits + 1)];
+ CLzmaProb high[kLenNumHighSymbols];
+} CLenEnc;
+
+
+typedef struct
+{
+ unsigned tableSize;
+ UInt32 prices[LZMA_NUM_PB_STATES_MAX][kLenNumSymbolsTotal];
+ // UInt32 prices1[LZMA_NUM_PB_STATES_MAX][kLenNumLowSymbols * 2];
+ // UInt32 prices2[kLenNumSymbolsTotal];
+} CLenPriceEnc;
+
+#define GET_PRICE_LEN(p, posState, len) \
+ ((p)->prices[posState][(size_t)(len) - LZMA_MATCH_LEN_MIN])
+
+/*
+#define GET_PRICE_LEN(p, posState, len) \
+ ((p)->prices2[(size_t)(len) - 2] + ((p)->prices1[posState][((len) - 2) & (kLenNumLowSymbols * 2 - 1)] & (((len) - 2 - kLenNumLowSymbols * 2) >> 9)))
+*/
+
+typedef struct
+{
+ UInt32 range;
+ unsigned cache;
+ UInt64 low;
+ UInt64 cacheSize;
+ Byte *buf;
+ Byte *bufLim;
+ Byte *bufBase;
+ ISeqOutStreamPtr outStream;
+ UInt64 processed;
+ SRes res;
+} CRangeEnc;
+
+
+typedef struct
+{
+ CLzmaProb *litProbs;
+
+ unsigned state;
+ UInt32 reps[LZMA_NUM_REPS];
+
+ CLzmaProb posAlignEncoder[1 << kNumAlignBits];
+ CLzmaProb isRep[kNumStates];
+ CLzmaProb isRepG0[kNumStates];
+ CLzmaProb isRepG1[kNumStates];
+ CLzmaProb isRepG2[kNumStates];
+ CLzmaProb isMatch[kNumStates][LZMA_NUM_PB_STATES_MAX];
+ CLzmaProb isRep0Long[kNumStates][LZMA_NUM_PB_STATES_MAX];
+
+ CLzmaProb posSlotEncoder[kNumLenToPosStates][1 << kNumPosSlotBits];
+ CLzmaProb posEncoders[kNumFullDistances];
+
+ CLenEnc lenProbs;
+ CLenEnc repLenProbs;
+
+} CSaveState;
+
+
+typedef UInt32 CProbPrice;
+
+
+struct CLzmaEnc
+{
+ void *matchFinderObj;
+ IMatchFinder2 matchFinder;
+
+ unsigned optCur;
+ unsigned optEnd;
+
+ unsigned longestMatchLen;
+ unsigned numPairs;
+ UInt32 numAvail;
+
+ unsigned state;
+ unsigned numFastBytes;
+ unsigned additionalOffset;
+ UInt32 reps[LZMA_NUM_REPS];
+ unsigned lpMask, pbMask;
+ CLzmaProb *litProbs;
+ CRangeEnc rc;
+
+ UInt32 backRes;
+
+ unsigned lc, lp, pb;
+ unsigned lclp;
+
+ BoolInt fastMode;
+ BoolInt writeEndMark;
+ BoolInt finished;
+ BoolInt multiThread;
+ BoolInt needInit;
+ // BoolInt _maxMode;
+
+ UInt64 nowPos64;
+
+ unsigned matchPriceCount;
+ // unsigned alignPriceCount;
+ int repLenEncCounter;
+
+ unsigned distTableSize;
+
+ UInt32 dictSize;
+ SRes result;
+
+ #ifndef Z7_ST
+ BoolInt mtMode;
+ // begin of CMatchFinderMt is used in LZ thread
+ CMatchFinderMt matchFinderMt;
+ // end of CMatchFinderMt is used in BT and HASH threads
+ // #else
+ // CMatchFinder matchFinderBase;
+ #endif
+ CMatchFinder matchFinderBase;
+
+
+ // we suppose that we have 8-bytes alignment after CMatchFinder
+
+ #ifndef Z7_ST
+ Byte pad[128];
+ #endif
+
+ // LZ thread
+ CProbPrice ProbPrices[kBitModelTotal >> kNumMoveReducingBits];
+
+ // we want {len , dist} pairs to be 8-bytes aligned in matches array
+ UInt32 matches[LZMA_MATCH_LEN_MAX * 2 + 2];
+
+ // we want 8-bytes alignment here
+ UInt32 alignPrices[kAlignTableSize];
+ UInt32 posSlotPrices[kNumLenToPosStates][kDistTableSizeMax];
+ UInt32 distancesPrices[kNumLenToPosStates][kNumFullDistances];
+
+ CLzmaProb posAlignEncoder[1 << kNumAlignBits];
+ CLzmaProb isRep[kNumStates];
+ CLzmaProb isRepG0[kNumStates];
+ CLzmaProb isRepG1[kNumStates];
+ CLzmaProb isRepG2[kNumStates];
+ CLzmaProb isMatch[kNumStates][LZMA_NUM_PB_STATES_MAX];
+ CLzmaProb isRep0Long[kNumStates][LZMA_NUM_PB_STATES_MAX];
+ CLzmaProb posSlotEncoder[kNumLenToPosStates][1 << kNumPosSlotBits];
+ CLzmaProb posEncoders[kNumFullDistances];
+
+ CLenEnc lenProbs;
+ CLenEnc repLenProbs;
+
+ #ifndef LZMA_LOG_BSR
+ Byte g_FastPos[1 << kNumLogBits];
+ #endif
+
+ CLenPriceEnc lenEnc;
+ CLenPriceEnc repLenEnc;
+
+ COptimal opt[kNumOpts];
+
+ CSaveState saveState;
+
+ // BoolInt mf_Failure;
+ #ifndef Z7_ST
+ Byte pad2[128];
+ #endif
+};
+
+
+#define MFB (p->matchFinderBase)
+/*
+#ifndef Z7_ST
+#define MFB (p->matchFinderMt.MatchFinder)
+#endif
+*/
+
+// #define GET_CLzmaEnc_p CLzmaEnc *p = (CLzmaEnc*)(void *)p;
+// #define GET_const_CLzmaEnc_p const CLzmaEnc *p = (const CLzmaEnc*)(const void *)p;
+
+#define COPY_ARR(dest, src, arr) memcpy((dest)->arr, (src)->arr, sizeof((src)->arr));
+
+#define COPY_LZMA_ENC_STATE(d, s, p) \
+ (d)->state = (s)->state; \
+ COPY_ARR(d, s, reps) \
+ COPY_ARR(d, s, posAlignEncoder) \
+ COPY_ARR(d, s, isRep) \
+ COPY_ARR(d, s, isRepG0) \
+ COPY_ARR(d, s, isRepG1) \
+ COPY_ARR(d, s, isRepG2) \
+ COPY_ARR(d, s, isMatch) \
+ COPY_ARR(d, s, isRep0Long) \
+ COPY_ARR(d, s, posSlotEncoder) \
+ COPY_ARR(d, s, posEncoders) \
+ (d)->lenProbs = (s)->lenProbs; \
+ (d)->repLenProbs = (s)->repLenProbs; \
+ memcpy((d)->litProbs, (s)->litProbs, ((size_t)0x300 * sizeof(CLzmaProb)) << (p)->lclp);
+
+void LzmaEnc_SaveState(CLzmaEncHandle p)
+{
+ // GET_CLzmaEnc_p
+ CSaveState *v = &p->saveState;
+ COPY_LZMA_ENC_STATE(v, p, p)
+}
+
+void LzmaEnc_RestoreState(CLzmaEncHandle p)
+{
+ // GET_CLzmaEnc_p
+ const CSaveState *v = &p->saveState;
+ COPY_LZMA_ENC_STATE(p, v, p)
+}
+
+
+Z7_NO_INLINE
+SRes LzmaEnc_SetProps(CLzmaEncHandle p, const CLzmaEncProps *props2)
+{
+ // GET_CLzmaEnc_p
+ CLzmaEncProps props = *props2;
+ LzmaEncProps_Normalize(&props);
+
+ if (props.lc > LZMA_LC_MAX
+ || props.lp > LZMA_LP_MAX
+ || props.pb > LZMA_PB_MAX)
+ return SZ_ERROR_PARAM;
+
+
+ if (props.dictSize > kLzmaMaxHistorySize)
+ props.dictSize = kLzmaMaxHistorySize;
+
+ #ifndef LZMA_LOG_BSR
+ {
+ const UInt64 dict64 = props.dictSize;
+ if (dict64 > ((UInt64)1 << kDicLogSizeMaxCompress))
+ return SZ_ERROR_PARAM;
+ }
+ #endif
+
+ p->dictSize = props.dictSize;
+ {
+ unsigned fb = (unsigned)props.fb;
+ if (fb < 5)
+ fb = 5;
+ if (fb > LZMA_MATCH_LEN_MAX)
+ fb = LZMA_MATCH_LEN_MAX;
+ p->numFastBytes = fb;
+ }
+ p->lc = (unsigned)props.lc;
+ p->lp = (unsigned)props.lp;
+ p->pb = (unsigned)props.pb;
+ p->fastMode = (props.algo == 0);
+ // p->_maxMode = True;
+ MFB.btMode = (Byte)(props.btMode ? 1 : 0);
+ // MFB.btMode = (Byte)(props.btMode);
+ {
+ unsigned numHashBytes = 4;
+ if (props.btMode)
+ {
+ if (props.numHashBytes < 2) numHashBytes = 2;
+ else if (props.numHashBytes < 4) numHashBytes = (unsigned)props.numHashBytes;
+ }
+ if (props.numHashBytes >= 5) numHashBytes = 5;
+
+ MFB.numHashBytes = numHashBytes;
+ // MFB.numHashBytes_Min = 2;
+ MFB.numHashOutBits = (Byte)props.numHashOutBits;
+ }
+
+ MFB.cutValue = props.mc;
+
+ p->writeEndMark = (BoolInt)props.writeEndMark;
+
+ #ifndef Z7_ST
+ /*
+ if (newMultiThread != _multiThread)
+ {
+ ReleaseMatchFinder();
+ _multiThread = newMultiThread;
+ }
+ */
+ p->multiThread = (props.numThreads > 1);
+ p->matchFinderMt.btSync.affinity =
+ p->matchFinderMt.hashSync.affinity = props.affinity;
+ #endif
+
+ return SZ_OK;
+}
+
+
+void LzmaEnc_SetDataSize(CLzmaEncHandle p, UInt64 expectedDataSiize)
+{
+ // GET_CLzmaEnc_p
+ MFB.expectedDataSize = expectedDataSiize;
+}
+
+
+#define kState_Start 0
+#define kState_LitAfterMatch 4
+#define kState_LitAfterRep 5
+#define kState_MatchAfterLit 7
+#define kState_RepAfterLit 8
+
+static const Byte kLiteralNextStates[kNumStates] = {0, 0, 0, 0, 1, 2, 3, 4, 5, 6, 4, 5};
+static const Byte kMatchNextStates[kNumStates] = {7, 7, 7, 7, 7, 7, 7, 10, 10, 10, 10, 10};
+static const Byte kRepNextStates[kNumStates] = {8, 8, 8, 8, 8, 8, 8, 11, 11, 11, 11, 11};
+static const Byte kShortRepNextStates[kNumStates]= {9, 9, 9, 9, 9, 9, 9, 11, 11, 11, 11, 11};
+
+#define IsLitState(s) ((s) < 7)
+#define GetLenToPosState2(len) (((len) < kNumLenToPosStates - 1) ? (len) : kNumLenToPosStates - 1)
+#define GetLenToPosState(len) (((len) < kNumLenToPosStates + 1) ? (len) - 2 : kNumLenToPosStates - 1)
+
+#define kInfinityPrice (1 << 30)
+
+static void RangeEnc_Construct(CRangeEnc *p)
+{
+ p->outStream = NULL;
+ p->bufBase = NULL;
+}
+
+#define RangeEnc_GetProcessed(p) ( (p)->processed + (size_t)((p)->buf - (p)->bufBase) + (p)->cacheSize)
+#define RangeEnc_GetProcessed_sizet(p) ((size_t)(p)->processed + (size_t)((p)->buf - (p)->bufBase) + (size_t)(p)->cacheSize)
+
+#define RC_BUF_SIZE (1 << 16)
+
+static int RangeEnc_Alloc(CRangeEnc *p, ISzAllocPtr alloc)
+{
+ if (!p->bufBase)
+ {
+ p->bufBase = (Byte *)ISzAlloc_Alloc(alloc, RC_BUF_SIZE);
+ if (!p->bufBase)
+ return 0;
+ p->bufLim = p->bufBase + RC_BUF_SIZE;
+ }
+ return 1;
+}
+
+static void RangeEnc_Free(CRangeEnc *p, ISzAllocPtr alloc)
+{
+ ISzAlloc_Free(alloc, p->bufBase);
+ p->bufBase = NULL;
+}
+
+static void RangeEnc_Init(CRangeEnc *p)
+{
+ p->range = 0xFFFFFFFF;
+ p->cache = 0;
+ p->low = 0;
+ p->cacheSize = 0;
+
+ p->buf = p->bufBase;
+
+ p->processed = 0;
+ p->res = SZ_OK;
+}
+
+Z7_NO_INLINE static void RangeEnc_FlushStream(CRangeEnc *p)
+{
+ const size_t num = (size_t)(p->buf - p->bufBase);
+ if (p->res == SZ_OK)
+ {
+ if (num != ISeqOutStream_Write(p->outStream, p->bufBase, num))
+ p->res = SZ_ERROR_WRITE;
+ }
+ p->processed += num;
+ p->buf = p->bufBase;
+}
+
+Z7_NO_INLINE static void Z7_FASTCALL RangeEnc_ShiftLow(CRangeEnc *p)
+{
+ UInt32 low = (UInt32)p->low;
+ unsigned high = (unsigned)(p->low >> 32);
+ p->low = (UInt32)(low << 8);
+ if (low < (UInt32)0xFF000000 || high != 0)
+ {
+ {
+ Byte *buf = p->buf;
+ *buf++ = (Byte)(p->cache + high);
+ p->cache = (unsigned)(low >> 24);
+ p->buf = buf;
+ if (buf == p->bufLim)
+ RangeEnc_FlushStream(p);
+ if (p->cacheSize == 0)
+ return;
+ }
+ high += 0xFF;
+ for (;;)
+ {
+ Byte *buf = p->buf;
+ *buf++ = (Byte)(high);
+ p->buf = buf;
+ if (buf == p->bufLim)
+ RangeEnc_FlushStream(p);
+ if (--p->cacheSize == 0)
+ return;
+ }
+ }
+ p->cacheSize++;
+}
+
+static void RangeEnc_FlushData(CRangeEnc *p)
+{
+ int i;
+ for (i = 0; i < 5; i++)
+ RangeEnc_ShiftLow(p);
+}
+
+#define RC_NORM(p) if (range < kTopValue) { range <<= 8; RangeEnc_ShiftLow(p); }
+
+#define RC_BIT_PRE(p, prob) \
+ ttt = *(prob); \
+ newBound = (range >> kNumBitModelTotalBits) * ttt;
+
+// #define Z7_LZMA_ENC_USE_BRANCH
+
+#ifdef Z7_LZMA_ENC_USE_BRANCH
+
+#define RC_BIT(p, prob, bit) { \
+ RC_BIT_PRE(p, prob) \
+ if (bit == 0) { range = newBound; ttt += (kBitModelTotal - ttt) >> kNumMoveBits; } \
+ else { (p)->low += newBound; range -= newBound; ttt -= ttt >> kNumMoveBits; } \
+ *(prob) = (CLzmaProb)ttt; \
+ RC_NORM(p) \
+ }
+
+#else
+
+#define RC_BIT(p, prob, bit) { \
+ UInt32 mask; \
+ RC_BIT_PRE(p, prob) \
+ mask = 0 - (UInt32)bit; \
+ range &= mask; \
+ mask &= newBound; \
+ range -= mask; \
+ (p)->low += mask; \
+ mask = (UInt32)bit - 1; \
+ range += newBound & mask; \
+ mask &= (kBitModelTotal - ((1 << kNumMoveBits) - 1)); \
+ mask += ((1 << kNumMoveBits) - 1); \
+ ttt += (UInt32)((Int32)(mask - ttt) >> kNumMoveBits); \
+ *(prob) = (CLzmaProb)ttt; \
+ RC_NORM(p) \
+ }
+
+#endif
+
+
+
+
+#define RC_BIT_0_BASE(p, prob) \
+ range = newBound; *(prob) = (CLzmaProb)(ttt + ((kBitModelTotal - ttt) >> kNumMoveBits));
+
+#define RC_BIT_1_BASE(p, prob) \
+ range -= newBound; (p)->low += newBound; *(prob) = (CLzmaProb)(ttt - (ttt >> kNumMoveBits)); \
+
+#define RC_BIT_0(p, prob) \
+ RC_BIT_0_BASE(p, prob) \
+ RC_NORM(p)
+
+#define RC_BIT_1(p, prob) \
+ RC_BIT_1_BASE(p, prob) \
+ RC_NORM(p)
+
+static void RangeEnc_EncodeBit_0(CRangeEnc *p, CLzmaProb *prob)
+{
+ UInt32 range, ttt, newBound;
+ range = p->range;
+ RC_BIT_PRE(p, prob)
+ RC_BIT_0(p, prob)
+ p->range = range;
+}
+
+static void LitEnc_Encode(CRangeEnc *p, CLzmaProb *probs, UInt32 sym)
+{
+ UInt32 range = p->range;
+ sym |= 0x100;
+ do
+ {
+ UInt32 ttt, newBound;
+ // RangeEnc_EncodeBit(p, probs + (sym >> 8), (sym >> 7) & 1);
+ CLzmaProb *prob = probs + (sym >> 8);
+ UInt32 bit = (sym >> 7) & 1;
+ sym <<= 1;
+ RC_BIT(p, prob, bit)
+ }
+ while (sym < 0x10000);
+ p->range = range;
+}
+
+static void LitEnc_EncodeMatched(CRangeEnc *p, CLzmaProb *probs, UInt32 sym, UInt32 matchByte)
+{
+ UInt32 range = p->range;
+ UInt32 offs = 0x100;
+ sym |= 0x100;
+ do
+ {
+ UInt32 ttt, newBound;
+ CLzmaProb *prob;
+ UInt32 bit;
+ matchByte <<= 1;
+ // RangeEnc_EncodeBit(p, probs + (offs + (matchByte & offs) + (sym >> 8)), (sym >> 7) & 1);
+ prob = probs + (offs + (matchByte & offs) + (sym >> 8));
+ bit = (sym >> 7) & 1;
+ sym <<= 1;
+ offs &= ~(matchByte ^ sym);
+ RC_BIT(p, prob, bit)
+ }
+ while (sym < 0x10000);
+ p->range = range;
+}
+
+
+
+static void LzmaEnc_InitPriceTables(CProbPrice *ProbPrices)
+{
+ UInt32 i;
+ for (i = 0; i < (kBitModelTotal >> kNumMoveReducingBits); i++)
+ {
+ const unsigned kCyclesBits = kNumBitPriceShiftBits;
+ UInt32 w = (i << kNumMoveReducingBits) + (1 << (kNumMoveReducingBits - 1));
+ unsigned bitCount = 0;
+ unsigned j;
+ for (j = 0; j < kCyclesBits; j++)
+ {
+ w = w * w;
+ bitCount <<= 1;
+ while (w >= ((UInt32)1 << 16))
+ {
+ w >>= 1;
+ bitCount++;
+ }
+ }
+ ProbPrices[i] = (CProbPrice)(((unsigned)kNumBitModelTotalBits << kCyclesBits) - 15 - bitCount);
+ // printf("\n%3d: %5d", i, ProbPrices[i]);
+ }
+}
+
+
+#define GET_PRICE(prob, bit) \
+ p->ProbPrices[((prob) ^ (unsigned)(((-(int)(bit))) & (kBitModelTotal - 1))) >> kNumMoveReducingBits]
+
+#define GET_PRICEa(prob, bit) \
+ ProbPrices[((prob) ^ (unsigned)((-((int)(bit))) & (kBitModelTotal - 1))) >> kNumMoveReducingBits]
+
+#define GET_PRICE_0(prob) p->ProbPrices[(prob) >> kNumMoveReducingBits]
+#define GET_PRICE_1(prob) p->ProbPrices[((prob) ^ (kBitModelTotal - 1)) >> kNumMoveReducingBits]
+
+#define GET_PRICEa_0(prob) ProbPrices[(prob) >> kNumMoveReducingBits]
+#define GET_PRICEa_1(prob) ProbPrices[((prob) ^ (kBitModelTotal - 1)) >> kNumMoveReducingBits]
+
+
+static UInt32 LitEnc_GetPrice(const CLzmaProb *probs, UInt32 sym, const CProbPrice *ProbPrices)
+{
+ UInt32 price = 0;
+ sym |= 0x100;
+ do
+ {
+ unsigned bit = sym & 1;
+ sym >>= 1;
+ price += GET_PRICEa(probs[sym], bit);
+ }
+ while (sym >= 2);
+ return price;
+}
+
+
+static UInt32 LitEnc_Matched_GetPrice(const CLzmaProb *probs, UInt32 sym, UInt32 matchByte, const CProbPrice *ProbPrices)
+{
+ UInt32 price = 0;
+ UInt32 offs = 0x100;
+ sym |= 0x100;
+ do
+ {
+ matchByte <<= 1;
+ price += GET_PRICEa(probs[offs + (matchByte & offs) + (sym >> 8)], (sym >> 7) & 1);
+ sym <<= 1;
+ offs &= ~(matchByte ^ sym);
+ }
+ while (sym < 0x10000);
+ return price;
+}
+
+
+static void RcTree_ReverseEncode(CRangeEnc *rc, CLzmaProb *probs, unsigned numBits, unsigned sym)
+{
+ UInt32 range = rc->range;
+ unsigned m = 1;
+ do
+ {
+ UInt32 ttt, newBound;
+ unsigned bit = sym & 1;
+ // RangeEnc_EncodeBit(rc, probs + m, bit);
+ sym >>= 1;
+ RC_BIT(rc, probs + m, bit)
+ m = (m << 1) | bit;
+ }
+ while (--numBits);
+ rc->range = range;
+}
+
+
+
+static void LenEnc_Init(CLenEnc *p)
+{
+ unsigned i;
+ for (i = 0; i < (LZMA_NUM_PB_STATES_MAX << (kLenNumLowBits + 1)); i++)
+ p->low[i] = kProbInitValue;
+ for (i = 0; i < kLenNumHighSymbols; i++)
+ p->high[i] = kProbInitValue;
+}
+
+static void LenEnc_Encode(CLenEnc *p, CRangeEnc *rc, unsigned sym, unsigned posState)
+{
+ UInt32 range, ttt, newBound;
+ CLzmaProb *probs = p->low;
+ range = rc->range;
+ RC_BIT_PRE(rc, probs)
+ if (sym >= kLenNumLowSymbols)
+ {
+ RC_BIT_1(rc, probs)
+ probs += kLenNumLowSymbols;
+ RC_BIT_PRE(rc, probs)
+ if (sym >= kLenNumLowSymbols * 2)
+ {
+ RC_BIT_1(rc, probs)
+ rc->range = range;
+ // RcTree_Encode(rc, p->high, kLenNumHighBits, sym - kLenNumLowSymbols * 2);
+ LitEnc_Encode(rc, p->high, sym - kLenNumLowSymbols * 2);
+ return;
+ }
+ sym -= kLenNumLowSymbols;
+ }
+
+ // RcTree_Encode(rc, probs + (posState << kLenNumLowBits), kLenNumLowBits, sym);
+ {
+ unsigned m;
+ unsigned bit;
+ RC_BIT_0(rc, probs)
+ probs += (posState << (1 + kLenNumLowBits));
+ bit = (sym >> 2) ; RC_BIT(rc, probs + 1, bit) m = (1 << 1) + bit;
+ bit = (sym >> 1) & 1; RC_BIT(rc, probs + m, bit) m = (m << 1) + bit;
+ bit = sym & 1; RC_BIT(rc, probs + m, bit)
+ rc->range = range;
+ }
+}
+
+static void SetPrices_3(const CLzmaProb *probs, UInt32 startPrice, UInt32 *prices, const CProbPrice *ProbPrices)
+{
+ unsigned i;
+ for (i = 0; i < 8; i += 2)
+ {
+ UInt32 price = startPrice;
+ UInt32 prob;
+ price += GET_PRICEa(probs[1 ], (i >> 2));
+ price += GET_PRICEa(probs[2 + (i >> 2)], (i >> 1) & 1);
+ prob = probs[4 + (i >> 1)];
+ prices[i ] = price + GET_PRICEa_0(prob);
+ prices[i + 1] = price + GET_PRICEa_1(prob);
+ }
+}
+
+
+Z7_NO_INLINE static void Z7_FASTCALL LenPriceEnc_UpdateTables(
+ CLenPriceEnc *p,
+ unsigned numPosStates,
+ const CLenEnc *enc,
+ const CProbPrice *ProbPrices)
+{
+ UInt32 b;
+
+ {
+ unsigned prob = enc->low[0];
+ UInt32 a, c;
+ unsigned posState;
+ b = GET_PRICEa_1(prob);
+ a = GET_PRICEa_0(prob);
+ c = b + GET_PRICEa_0(enc->low[kLenNumLowSymbols]);
+ for (posState = 0; posState < numPosStates; posState++)
+ {
+ UInt32 *prices = p->prices[posState];
+ const CLzmaProb *probs = enc->low + (posState << (1 + kLenNumLowBits));
+ SetPrices_3(probs, a, prices, ProbPrices);
+ SetPrices_3(probs + kLenNumLowSymbols, c, prices + kLenNumLowSymbols, ProbPrices);
+ }
+ }
+
+ /*
+ {
+ unsigned i;
+ UInt32 b;
+ a = GET_PRICEa_0(enc->low[0]);
+ for (i = 0; i < kLenNumLowSymbols; i++)
+ p->prices2[i] = a;
+ a = GET_PRICEa_1(enc->low[0]);
+ b = a + GET_PRICEa_0(enc->low[kLenNumLowSymbols]);
+ for (i = kLenNumLowSymbols; i < kLenNumLowSymbols * 2; i++)
+ p->prices2[i] = b;
+ a += GET_PRICEa_1(enc->low[kLenNumLowSymbols]);
+ }
+ */
+
+ // p->counter = numSymbols;
+ // p->counter = 64;
+
+ {
+ unsigned i = p->tableSize;
+
+ if (i > kLenNumLowSymbols * 2)
+ {
+ const CLzmaProb *probs = enc->high;
+ UInt32 *prices = p->prices[0] + kLenNumLowSymbols * 2;
+ i -= kLenNumLowSymbols * 2 - 1;
+ i >>= 1;
+ b += GET_PRICEa_1(enc->low[kLenNumLowSymbols]);
+ do
+ {
+ /*
+ p->prices2[i] = a +
+ // RcTree_GetPrice(enc->high, kLenNumHighBits, i - kLenNumLowSymbols * 2, ProbPrices);
+ LitEnc_GetPrice(probs, i - kLenNumLowSymbols * 2, ProbPrices);
+ */
+ // UInt32 price = a + RcTree_GetPrice(probs, kLenNumHighBits - 1, sym, ProbPrices);
+ unsigned sym = --i + (1 << (kLenNumHighBits - 1));
+ UInt32 price = b;
+ do
+ {
+ const unsigned bit = sym & 1;
+ sym >>= 1;
+ price += GET_PRICEa(probs[sym], bit);
+ }
+ while (sym >= 2);
+
+ {
+ const unsigned prob = probs[(size_t)i + (1 << (kLenNumHighBits - 1))];
+ prices[(size_t)i * 2 ] = price + GET_PRICEa_0(prob);
+ prices[(size_t)i * 2 + 1] = price + GET_PRICEa_1(prob);
+ }
+ }
+ while (i);
+
+ {
+ unsigned posState;
+ const size_t num = (p->tableSize - kLenNumLowSymbols * 2) * sizeof(p->prices[0][0]);
+ for (posState = 1; posState < numPosStates; posState++)
+ memcpy(p->prices[posState] + kLenNumLowSymbols * 2, p->prices[0] + kLenNumLowSymbols * 2, num);
+ }
+ }
+ }
+}
+
+/*
+ #ifdef SHOW_STAT
+ g_STAT_OFFSET += num;
+ printf("\n MovePos %u", num);
+ #endif
+*/
+
+#define MOVE_POS(p, num) { \
+ p->additionalOffset += (num); \
+ p->matchFinder.Skip(p->matchFinderObj, (UInt32)(num)); }
+
+
+static unsigned ReadMatchDistances(CLzmaEnc *p, unsigned *numPairsRes)
+{
+ unsigned numPairs;
+
+ p->additionalOffset++;
+ p->numAvail = p->matchFinder.GetNumAvailableBytes(p->matchFinderObj);
+ {
+ const UInt32 *d = p->matchFinder.GetMatches(p->matchFinderObj, p->matches);
+ // if (!d) { p->mf_Failure = True; *numPairsRes = 0; return 0; }
+ numPairs = (unsigned)(d - p->matches);
+ }
+ *numPairsRes = numPairs;
+
+ #ifdef SHOW_STAT
+ printf("\n i = %u numPairs = %u ", g_STAT_OFFSET, numPairs / 2);
+ g_STAT_OFFSET++;
+ {
+ unsigned i;
+ for (i = 0; i < numPairs; i += 2)
+ printf("%2u %6u | ", p->matches[i], p->matches[i + 1]);
+ }
+ #endif
+
+ if (numPairs == 0)
+ return 0;
+ {
+ const unsigned len = p->matches[(size_t)numPairs - 2];
+ if (len != p->numFastBytes)
+ return len;
+ {
+ UInt32 numAvail = p->numAvail;
+ if (numAvail > LZMA_MATCH_LEN_MAX)
+ numAvail = LZMA_MATCH_LEN_MAX;
+ {
+ const Byte *p1 = p->matchFinder.GetPointerToCurrentPos(p->matchFinderObj) - 1;
+ const Byte *p2 = p1 + len;
+ const ptrdiff_t dif = (ptrdiff_t)-1 - (ptrdiff_t)p->matches[(size_t)numPairs - 1];
+ const Byte *lim = p1 + numAvail;
+ for (; p2 != lim && *p2 == p2[dif]; p2++)
+ {}
+ return (unsigned)(p2 - p1);
+ }
+ }
+ }
+}
+
+#define MARK_LIT ((UInt32)(Int32)-1)
+
+#define MakeAs_Lit(p) { (p)->dist = MARK_LIT; (p)->extra = 0; }
+#define MakeAs_ShortRep(p) { (p)->dist = 0; (p)->extra = 0; }
+#define IsShortRep(p) ((p)->dist == 0)
+
+
+#define GetPrice_ShortRep(p, state, posState) \
+ ( GET_PRICE_0(p->isRepG0[state]) + GET_PRICE_0(p->isRep0Long[state][posState]))
+
+#define GetPrice_Rep_0(p, state, posState) ( \
+ GET_PRICE_1(p->isMatch[state][posState]) \
+ + GET_PRICE_1(p->isRep0Long[state][posState])) \
+ + GET_PRICE_1(p->isRep[state]) \
+ + GET_PRICE_0(p->isRepG0[state])
+
+Z7_FORCE_INLINE
+static UInt32 GetPrice_PureRep(const CLzmaEnc *p, unsigned repIndex, size_t state, size_t posState)
+{
+ UInt32 price;
+ UInt32 prob = p->isRepG0[state];
+ if (repIndex == 0)
+ {
+ price = GET_PRICE_0(prob);
+ price += GET_PRICE_1(p->isRep0Long[state][posState]);
+ }
+ else
+ {
+ price = GET_PRICE_1(prob);
+ prob = p->isRepG1[state];
+ if (repIndex == 1)
+ price += GET_PRICE_0(prob);
+ else
+ {
+ price += GET_PRICE_1(prob);
+ price += GET_PRICE(p->isRepG2[state], repIndex - 2);
+ }
+ }
+ return price;
+}
+
+
+static unsigned Backward(CLzmaEnc *p, unsigned cur)
+{
+ unsigned wr = cur + 1;
+ p->optEnd = wr;
+
+ for (;;)
+ {
+ UInt32 dist = p->opt[cur].dist;
+ unsigned len = (unsigned)p->opt[cur].len;
+ unsigned extra = (unsigned)p->opt[cur].extra;
+ cur -= len;
+
+ if (extra)
+ {
+ wr--;
+ p->opt[wr].len = (UInt32)len;
+ cur -= extra;
+ len = extra;
+ if (extra == 1)
+ {
+ p->opt[wr].dist = dist;
+ dist = MARK_LIT;
+ }
+ else
+ {
+ p->opt[wr].dist = 0;
+ len--;
+ wr--;
+ p->opt[wr].dist = MARK_LIT;
+ p->opt[wr].len = 1;
+ }
+ }
+
+ if (cur == 0)
+ {
+ p->backRes = dist;
+ p->optCur = wr;
+ return len;
+ }
+
+ wr--;
+ p->opt[wr].dist = dist;
+ p->opt[wr].len = (UInt32)len;
+ }
+}
+
+
+
+#define LIT_PROBS(pos, prevByte) \
+ (p->litProbs + (UInt32)3 * (((((pos) << 8) + (prevByte)) & p->lpMask) << p->lc))
+
+
+static unsigned GetOptimum(CLzmaEnc *p, UInt32 position)
+{
+ unsigned last, cur;
+ UInt32 reps[LZMA_NUM_REPS];
+ unsigned repLens[LZMA_NUM_REPS];
+ UInt32 *matches;
+
+ {
+ UInt32 numAvail;
+ unsigned numPairs, mainLen, repMaxIndex, i, posState;
+ UInt32 matchPrice, repMatchPrice;
+ const Byte *data;
+ Byte curByte, matchByte;
+
+ p->optCur = p->optEnd = 0;
+
+ if (p->additionalOffset == 0)
+ mainLen = ReadMatchDistances(p, &numPairs);
+ else
+ {
+ mainLen = p->longestMatchLen;
+ numPairs = p->numPairs;
+ }
+
+ numAvail = p->numAvail;
+ if (numAvail < 2)
+ {
+ p->backRes = MARK_LIT;
+ return 1;
+ }
+ if (numAvail > LZMA_MATCH_LEN_MAX)
+ numAvail = LZMA_MATCH_LEN_MAX;
+
+ data = p->matchFinder.GetPointerToCurrentPos(p->matchFinderObj) - 1;
+ repMaxIndex = 0;
+
+ for (i = 0; i < LZMA_NUM_REPS; i++)
+ {
+ unsigned len;
+ const Byte *data2;
+ reps[i] = p->reps[i];
+ data2 = data - reps[i];
+ if (data[0] != data2[0] || data[1] != data2[1])
+ {
+ repLens[i] = 0;
+ continue;
+ }
+ for (len = 2; len < numAvail && data[len] == data2[len]; len++)
+ {}
+ repLens[i] = len;
+ if (len > repLens[repMaxIndex])
+ repMaxIndex = i;
+ if (len == LZMA_MATCH_LEN_MAX) // 21.03 : optimization
+ break;
+ }
+
+ if (repLens[repMaxIndex] >= p->numFastBytes)
+ {
+ unsigned len;
+ p->backRes = (UInt32)repMaxIndex;
+ len = repLens[repMaxIndex];
+ MOVE_POS(p, len - 1)
+ return len;
+ }
+
+ matches = p->matches;
+ #define MATCHES matches
+ // #define MATCHES p->matches
+
+ if (mainLen >= p->numFastBytes)
+ {
+ p->backRes = MATCHES[(size_t)numPairs - 1] + LZMA_NUM_REPS;
+ MOVE_POS(p, mainLen - 1)
+ return mainLen;
+ }
+
+ curByte = *data;
+ matchByte = *(data - reps[0]);
+
+ last = repLens[repMaxIndex];
+ if (last <= mainLen)
+ last = mainLen;
+
+ if (last < 2 && curByte != matchByte)
+ {
+ p->backRes = MARK_LIT;
+ return 1;
+ }
+
+ p->opt[0].state = (CState)p->state;
+
+ posState = (position & p->pbMask);
+
+ {
+ const CLzmaProb *probs = LIT_PROBS(position, *(data - 1));
+ p->opt[1].price = GET_PRICE_0(p->isMatch[p->state][posState]) +
+ (!IsLitState(p->state) ?
+ LitEnc_Matched_GetPrice(probs, curByte, matchByte, p->ProbPrices) :
+ LitEnc_GetPrice(probs, curByte, p->ProbPrices));
+ }
+
+ MakeAs_Lit(&p->opt[1])
+
+ matchPrice = GET_PRICE_1(p->isMatch[p->state][posState]);
+ repMatchPrice = matchPrice + GET_PRICE_1(p->isRep[p->state]);
+
+ // 18.06
+ if (matchByte == curByte && repLens[0] == 0)
+ {
+ UInt32 shortRepPrice = repMatchPrice + GetPrice_ShortRep(p, p->state, posState);
+ if (shortRepPrice < p->opt[1].price)
+ {
+ p->opt[1].price = shortRepPrice;
+ MakeAs_ShortRep(&p->opt[1])
+ }
+ if (last < 2)
+ {
+ p->backRes = p->opt[1].dist;
+ return 1;
+ }
+ }
+
+ p->opt[1].len = 1;
+
+ p->opt[0].reps[0] = reps[0];
+ p->opt[0].reps[1] = reps[1];
+ p->opt[0].reps[2] = reps[2];
+ p->opt[0].reps[3] = reps[3];
+
+ // ---------- REP ----------
+
+ for (i = 0; i < LZMA_NUM_REPS; i++)
+ {
+ unsigned repLen = repLens[i];
+ UInt32 price;
+ if (repLen < 2)
+ continue;
+ price = repMatchPrice + GetPrice_PureRep(p, i, p->state, posState);
+ do
+ {
+ UInt32 price2 = price + GET_PRICE_LEN(&p->repLenEnc, posState, repLen);
+ COptimal *opt = &p->opt[repLen];
+ if (price2 < opt->price)
+ {
+ opt->price = price2;
+ opt->len = (UInt32)repLen;
+ opt->dist = (UInt32)i;
+ opt->extra = 0;
+ }
+ }
+ while (--repLen >= 2);
+ }
+
+
+ // ---------- MATCH ----------
+ {
+ unsigned len = repLens[0] + 1;
+ if (len <= mainLen)
+ {
+ unsigned offs = 0;
+ UInt32 normalMatchPrice = matchPrice + GET_PRICE_0(p->isRep[p->state]);
+
+ if (len < 2)
+ len = 2;
+ else
+ while (len > MATCHES[offs])
+ offs += 2;
+
+ for (; ; len++)
+ {
+ COptimal *opt;
+ UInt32 dist = MATCHES[(size_t)offs + 1];
+ UInt32 price = normalMatchPrice + GET_PRICE_LEN(&p->lenEnc, posState, len);
+ unsigned lenToPosState = GetLenToPosState(len);
+
+ if (dist < kNumFullDistances)
+ price += p->distancesPrices[lenToPosState][dist & (kNumFullDistances - 1)];
+ else
+ {
+ unsigned slot;
+ GetPosSlot2(dist, slot)
+ price += p->alignPrices[dist & kAlignMask];
+ price += p->posSlotPrices[lenToPosState][slot];
+ }
+
+ opt = &p->opt[len];
+
+ if (price < opt->price)
+ {
+ opt->price = price;
+ opt->len = (UInt32)len;
+ opt->dist = dist + LZMA_NUM_REPS;
+ opt->extra = 0;
+ }
+
+ if (len == MATCHES[offs])
+ {
+ offs += 2;
+ if (offs == numPairs)
+ break;
+ }
+ }
+ }
+ }
+
+
+ cur = 0;
+
+ #ifdef SHOW_STAT2
+ /* if (position >= 0) */
+ {
+ unsigned i;
+ printf("\n pos = %4X", position);
+ for (i = cur; i <= last; i++)
+ printf("\nprice[%4X] = %u", position - cur + i, p->opt[i].price);
+ }
+ #endif
+ }
+
+
+
+ // ---------- Optimal Parsing ----------
+
+ for (;;)
+ {
+ unsigned numAvail;
+ UInt32 numAvailFull;
+ unsigned newLen, numPairs, prev, state, posState, startLen;
+ UInt32 litPrice, matchPrice, repMatchPrice;
+ BoolInt nextIsLit;
+ Byte curByte, matchByte;
+ const Byte *data;
+ COptimal *curOpt, *nextOpt;
+
+ if (++cur == last)
+ break;
+
+ // 18.06
+ if (cur >= kNumOpts - 64)
+ {
+ unsigned j, best;
+ UInt32 price = p->opt[cur].price;
+ best = cur;
+ for (j = cur + 1; j <= last; j++)
+ {
+ UInt32 price2 = p->opt[j].price;
+ if (price >= price2)
+ {
+ price = price2;
+ best = j;
+ }
+ }
+ {
+ unsigned delta = best - cur;
+ if (delta != 0)
+ {
+ MOVE_POS(p, delta)
+ }
+ }
+ cur = best;
+ break;
+ }
+
+ newLen = ReadMatchDistances(p, &numPairs);
+
+ if (newLen >= p->numFastBytes)
+ {
+ p->numPairs = numPairs;
+ p->longestMatchLen = newLen;
+ break;
+ }
+
+ curOpt = &p->opt[cur];
+
+ position++;
+
+ // we need that check here, if skip_items in p->opt are possible
+ /*
+ if (curOpt->price >= kInfinityPrice)
+ continue;
+ */
+
+ prev = cur - curOpt->len;
+
+ if (curOpt->len == 1)
+ {
+ state = (unsigned)p->opt[prev].state;
+ if (IsShortRep(curOpt))
+ state = kShortRepNextStates[state];
+ else
+ state = kLiteralNextStates[state];
+ }
+ else
+ {
+ const COptimal *prevOpt;
+ UInt32 b0;
+ UInt32 dist = curOpt->dist;
+
+ if (curOpt->extra)
+ {
+ prev -= (unsigned)curOpt->extra;
+ state = kState_RepAfterLit;
+ if (curOpt->extra == 1)
+ state = (dist < LZMA_NUM_REPS ? kState_RepAfterLit : kState_MatchAfterLit);
+ }
+ else
+ {
+ state = (unsigned)p->opt[prev].state;
+ if (dist < LZMA_NUM_REPS)
+ state = kRepNextStates[state];
+ else
+ state = kMatchNextStates[state];
+ }
+
+ prevOpt = &p->opt[prev];
+ b0 = prevOpt->reps[0];
+
+ if (dist < LZMA_NUM_REPS)
+ {
+ if (dist == 0)
+ {
+ reps[0] = b0;
+ reps[1] = prevOpt->reps[1];
+ reps[2] = prevOpt->reps[2];
+ reps[3] = prevOpt->reps[3];
+ }
+ else
+ {
+ reps[1] = b0;
+ b0 = prevOpt->reps[1];
+ if (dist == 1)
+ {
+ reps[0] = b0;
+ reps[2] = prevOpt->reps[2];
+ reps[3] = prevOpt->reps[3];
+ }
+ else
+ {
+ reps[2] = b0;
+ reps[0] = prevOpt->reps[dist];
+ reps[3] = prevOpt->reps[dist ^ 1];
+ }
+ }
+ }
+ else
+ {
+ reps[0] = (dist - LZMA_NUM_REPS + 1);
+ reps[1] = b0;
+ reps[2] = prevOpt->reps[1];
+ reps[3] = prevOpt->reps[2];
+ }
+ }
+
+ curOpt->state = (CState)state;
+ curOpt->reps[0] = reps[0];
+ curOpt->reps[1] = reps[1];
+ curOpt->reps[2] = reps[2];
+ curOpt->reps[3] = reps[3];
+
+ data = p->matchFinder.GetPointerToCurrentPos(p->matchFinderObj) - 1;
+ curByte = *data;
+ matchByte = *(data - reps[0]);
+
+ posState = (position & p->pbMask);
+
+ /*
+ The order of Price checks:
+ < LIT
+ <= SHORT_REP
+ < LIT : REP_0
+ < REP [ : LIT : REP_0 ]
+ < MATCH [ : LIT : REP_0 ]
+ */
+
+ {
+ UInt32 curPrice = curOpt->price;
+ unsigned prob = p->isMatch[state][posState];
+ matchPrice = curPrice + GET_PRICE_1(prob);
+ litPrice = curPrice + GET_PRICE_0(prob);
+ }
+
+ nextOpt = &p->opt[(size_t)cur + 1];
+ nextIsLit = False;
+
+ // here we can allow skip_items in p->opt, if we don't check (nextOpt->price < kInfinityPrice)
+ // 18.new.06
+ if ((nextOpt->price < kInfinityPrice
+ // && !IsLitState(state)
+ && matchByte == curByte)
+ || litPrice > nextOpt->price
+ )
+ litPrice = 0;
+ else
+ {
+ const CLzmaProb *probs = LIT_PROBS(position, *(data - 1));
+ litPrice += (!IsLitState(state) ?
+ LitEnc_Matched_GetPrice(probs, curByte, matchByte, p->ProbPrices) :
+ LitEnc_GetPrice(probs, curByte, p->ProbPrices));
+
+ if (litPrice < nextOpt->price)
+ {
+ nextOpt->price = litPrice;
+ nextOpt->len = 1;
+ MakeAs_Lit(nextOpt)
+ nextIsLit = True;
+ }
+ }
+
+ repMatchPrice = matchPrice + GET_PRICE_1(p->isRep[state]);
+
+ numAvailFull = p->numAvail;
+ {
+ unsigned temp = kNumOpts - 1 - cur;
+ if (numAvailFull > temp)
+ numAvailFull = (UInt32)temp;
+ }
+
+ // 18.06
+ // ---------- SHORT_REP ----------
+ if (IsLitState(state)) // 18.new
+ if (matchByte == curByte)
+ if (repMatchPrice < nextOpt->price) // 18.new
+ // if (numAvailFull < 2 || data[1] != *(data - reps[0] + 1))
+ if (
+ // nextOpt->price >= kInfinityPrice ||
+ nextOpt->len < 2 // we can check nextOpt->len, if skip items are not allowed in p->opt
+ || (nextOpt->dist != 0
+ // && nextOpt->extra <= 1 // 17.old
+ )
+ )
+ {
+ UInt32 shortRepPrice = repMatchPrice + GetPrice_ShortRep(p, state, posState);
+ // if (shortRepPrice <= nextOpt->price) // 17.old
+ if (shortRepPrice < nextOpt->price) // 18.new
+ {
+ nextOpt->price = shortRepPrice;
+ nextOpt->len = 1;
+ MakeAs_ShortRep(nextOpt)
+ nextIsLit = False;
+ }
+ }
+
+ if (numAvailFull < 2)
+ continue;
+ numAvail = (numAvailFull <= p->numFastBytes ? numAvailFull : p->numFastBytes);
+
+ // numAvail <= p->numFastBytes
+
+ // ---------- LIT : REP_0 ----------
+
+ if (!nextIsLit
+ && litPrice != 0 // 18.new
+ && matchByte != curByte
+ && numAvailFull > 2)
+ {
+ const Byte *data2 = data - reps[0];
+ if (data[1] == data2[1] && data[2] == data2[2])
+ {
+ unsigned len;
+ unsigned limit = p->numFastBytes + 1;
+ if (limit > numAvailFull)
+ limit = numAvailFull;
+ for (len = 3; len < limit && data[len] == data2[len]; len++)
+ {}
+
+ {
+ unsigned state2 = kLiteralNextStates[state];
+ unsigned posState2 = (position + 1) & p->pbMask;
+ UInt32 price = litPrice + GetPrice_Rep_0(p, state2, posState2);
+ {
+ unsigned offset = cur + len;
+
+ if (last < offset)
+ last = offset;
+
+ // do
+ {
+ UInt32 price2;
+ COptimal *opt;
+ len--;
+ // price2 = price + GetPrice_Len_Rep_0(p, len, state2, posState2);
+ price2 = price + GET_PRICE_LEN(&p->repLenEnc, posState2, len);
+
+ opt = &p->opt[offset];
+ // offset--;
+ if (price2 < opt->price)
+ {
+ opt->price = price2;
+ opt->len = (UInt32)len;
+ opt->dist = 0;
+ opt->extra = 1;
+ }
+ }
+ // while (len >= 3);
+ }
+ }
+ }
+ }
+
+ startLen = 2; /* speed optimization */
+
+ {
+ // ---------- REP ----------
+ unsigned repIndex = 0; // 17.old
+ // unsigned repIndex = IsLitState(state) ? 0 : 1; // 18.notused
+ for (; repIndex < LZMA_NUM_REPS; repIndex++)
+ {
+ unsigned len;
+ UInt32 price;
+ const Byte *data2 = data - reps[repIndex];
+ if (data[0] != data2[0] || data[1] != data2[1])
+ continue;
+
+ for (len = 2; len < numAvail && data[len] == data2[len]; len++)
+ {}
+
+ // if (len < startLen) continue; // 18.new: speed optimization
+
+ {
+ unsigned offset = cur + len;
+ if (last < offset)
+ last = offset;
+ }
+ {
+ unsigned len2 = len;
+ price = repMatchPrice + GetPrice_PureRep(p, repIndex, state, posState);
+ do
+ {
+ UInt32 price2 = price + GET_PRICE_LEN(&p->repLenEnc, posState, len2);
+ COptimal *opt = &p->opt[cur + len2];
+ if (price2 < opt->price)
+ {
+ opt->price = price2;
+ opt->len = (UInt32)len2;
+ opt->dist = (UInt32)repIndex;
+ opt->extra = 0;
+ }
+ }
+ while (--len2 >= 2);
+ }
+
+ if (repIndex == 0) startLen = len + 1; // 17.old
+ // startLen = len + 1; // 18.new
+
+ /* if (_maxMode) */
+ {
+ // ---------- REP : LIT : REP_0 ----------
+ // numFastBytes + 1 + numFastBytes
+
+ unsigned len2 = len + 1;
+ unsigned limit = len2 + p->numFastBytes;
+ if (limit > numAvailFull)
+ limit = numAvailFull;
+
+ len2 += 2;
+ if (len2 <= limit)
+ if (data[len2 - 2] == data2[len2 - 2])
+ if (data[len2 - 1] == data2[len2 - 1])
+ {
+ unsigned state2 = kRepNextStates[state];
+ unsigned posState2 = (position + len) & p->pbMask;
+ price += GET_PRICE_LEN(&p->repLenEnc, posState, len)
+ + GET_PRICE_0(p->isMatch[state2][posState2])
+ + LitEnc_Matched_GetPrice(LIT_PROBS(position + len, data[(size_t)len - 1]),
+ data[len], data2[len], p->ProbPrices);
+
+ // state2 = kLiteralNextStates[state2];
+ state2 = kState_LitAfterRep;
+ posState2 = (posState2 + 1) & p->pbMask;
+
+
+ price += GetPrice_Rep_0(p, state2, posState2);
+
+ for (; len2 < limit && data[len2] == data2[len2]; len2++)
+ {}
+
+ len2 -= len;
+ // if (len2 >= 3)
+ {
+ {
+ unsigned offset = cur + len + len2;
+
+ if (last < offset)
+ last = offset;
+ // do
+ {
+ UInt32 price2;
+ COptimal *opt;
+ len2--;
+ // price2 = price + GetPrice_Len_Rep_0(p, len2, state2, posState2);
+ price2 = price + GET_PRICE_LEN(&p->repLenEnc, posState2, len2);
+
+ opt = &p->opt[offset];
+ // offset--;
+ if (price2 < opt->price)
+ {
+ opt->price = price2;
+ opt->len = (UInt32)len2;
+ opt->extra = (CExtra)(len + 1);
+ opt->dist = (UInt32)repIndex;
+ }
+ }
+ // while (len2 >= 3);
+ }
+ }
+ }
+ }
+ }
+ }
+
+
+ // ---------- MATCH ----------
+ /* for (unsigned len = 2; len <= newLen; len++) */
+ if (newLen > numAvail)
+ {
+ newLen = numAvail;
+ for (numPairs = 0; newLen > MATCHES[numPairs]; numPairs += 2);
+ MATCHES[numPairs] = (UInt32)newLen;
+ numPairs += 2;
+ }
+
+ // startLen = 2; /* speed optimization */
+
+ if (newLen >= startLen)
+ {
+ UInt32 normalMatchPrice = matchPrice + GET_PRICE_0(p->isRep[state]);
+ UInt32 dist;
+ unsigned offs, posSlot, len;
+
+ {
+ unsigned offset = cur + newLen;
+ if (last < offset)
+ last = offset;
+ }
+
+ offs = 0;
+ while (startLen > MATCHES[offs])
+ offs += 2;
+ dist = MATCHES[(size_t)offs + 1];
+
+ // if (dist >= kNumFullDistances)
+ GetPosSlot2(dist, posSlot)
+
+ for (len = /*2*/ startLen; ; len++)
+ {
+ UInt32 price = normalMatchPrice + GET_PRICE_LEN(&p->lenEnc, posState, len);
+ {
+ COptimal *opt;
+ unsigned lenNorm = len - 2;
+ lenNorm = GetLenToPosState2(lenNorm);
+ if (dist < kNumFullDistances)
+ price += p->distancesPrices[lenNorm][dist & (kNumFullDistances - 1)];
+ else
+ price += p->posSlotPrices[lenNorm][posSlot] + p->alignPrices[dist & kAlignMask];
+
+ opt = &p->opt[cur + len];
+ if (price < opt->price)
+ {
+ opt->price = price;
+ opt->len = (UInt32)len;
+ opt->dist = dist + LZMA_NUM_REPS;
+ opt->extra = 0;
+ }
+ }
+
+ if (len == MATCHES[offs])
+ {
+ // if (p->_maxMode) {
+ // MATCH : LIT : REP_0
+
+ const Byte *data2 = data - dist - 1;
+ unsigned len2 = len + 1;
+ unsigned limit = len2 + p->numFastBytes;
+ if (limit > numAvailFull)
+ limit = numAvailFull;
+
+ len2 += 2;
+ if (len2 <= limit)
+ if (data[len2 - 2] == data2[len2 - 2])
+ if (data[len2 - 1] == data2[len2 - 1])
+ {
+ for (; len2 < limit && data[len2] == data2[len2]; len2++)
+ {}
+
+ len2 -= len;
+
+ // if (len2 >= 3)
+ {
+ unsigned state2 = kMatchNextStates[state];
+ unsigned posState2 = (position + len) & p->pbMask;
+ unsigned offset;
+ price += GET_PRICE_0(p->isMatch[state2][posState2]);
+ price += LitEnc_Matched_GetPrice(LIT_PROBS(position + len, data[(size_t)len - 1]),
+ data[len], data2[len], p->ProbPrices);
+
+ // state2 = kLiteralNextStates[state2];
+ state2 = kState_LitAfterMatch;
+
+ posState2 = (posState2 + 1) & p->pbMask;
+ price += GetPrice_Rep_0(p, state2, posState2);
+
+ offset = cur + len + len2;
+
+ if (last < offset)
+ last = offset;
+ // do
+ {
+ UInt32 price2;
+ COptimal *opt;
+ len2--;
+ // price2 = price + GetPrice_Len_Rep_0(p, len2, state2, posState2);
+ price2 = price + GET_PRICE_LEN(&p->repLenEnc, posState2, len2);
+ opt = &p->opt[offset];
+ // offset--;
+ if (price2 < opt->price)
+ {
+ opt->price = price2;
+ opt->len = (UInt32)len2;
+ opt->extra = (CExtra)(len + 1);
+ opt->dist = dist + LZMA_NUM_REPS;
+ }
+ }
+ // while (len2 >= 3);
+ }
+
+ }
+
+ offs += 2;
+ if (offs == numPairs)
+ break;
+ dist = MATCHES[(size_t)offs + 1];
+ // if (dist >= kNumFullDistances)
+ GetPosSlot2(dist, posSlot)
+ }
+ }
+ }
+ }
+
+ do
+ p->opt[last].price = kInfinityPrice;
+ while (--last);
+
+ return Backward(p, cur);
+}
+
+
+
+#define ChangePair(smallDist, bigDist) (((bigDist) >> 7) > (smallDist))
+
+
+
+static unsigned GetOptimumFast(CLzmaEnc *p)
+{
+ UInt32 numAvail, mainDist;
+ unsigned mainLen, numPairs, repIndex, repLen, i;
+ const Byte *data;
+
+ if (p->additionalOffset == 0)
+ mainLen = ReadMatchDistances(p, &numPairs);
+ else
+ {
+ mainLen = p->longestMatchLen;
+ numPairs = p->numPairs;
+ }
+
+ numAvail = p->numAvail;
+ p->backRes = MARK_LIT;
+ if (numAvail < 2)
+ return 1;
+ // if (mainLen < 2 && p->state == 0) return 1; // 18.06.notused
+ if (numAvail > LZMA_MATCH_LEN_MAX)
+ numAvail = LZMA_MATCH_LEN_MAX;
+ data = p->matchFinder.GetPointerToCurrentPos(p->matchFinderObj) - 1;
+ repLen = repIndex = 0;
+
+ for (i = 0; i < LZMA_NUM_REPS; i++)
+ {
+ unsigned len;
+ const Byte *data2 = data - p->reps[i];
+ if (data[0] != data2[0] || data[1] != data2[1])
+ continue;
+ for (len = 2; len < numAvail && data[len] == data2[len]; len++)
+ {}
+ if (len >= p->numFastBytes)
+ {
+ p->backRes = (UInt32)i;
+ MOVE_POS(p, len - 1)
+ return len;
+ }
+ if (len > repLen)
+ {
+ repIndex = i;
+ repLen = len;
+ }
+ }
+
+ if (mainLen >= p->numFastBytes)
+ {
+ p->backRes = p->matches[(size_t)numPairs - 1] + LZMA_NUM_REPS;
+ MOVE_POS(p, mainLen - 1)
+ return mainLen;
+ }
+
+ mainDist = 0; /* for GCC */
+
+ if (mainLen >= 2)
+ {
+ mainDist = p->matches[(size_t)numPairs - 1];
+ while (numPairs > 2)
+ {
+ UInt32 dist2;
+ if (mainLen != p->matches[(size_t)numPairs - 4] + 1)
+ break;
+ dist2 = p->matches[(size_t)numPairs - 3];
+ if (!ChangePair(dist2, mainDist))
+ break;
+ numPairs -= 2;
+ mainLen--;
+ mainDist = dist2;
+ }
+ if (mainLen == 2 && mainDist >= 0x80)
+ mainLen = 1;
+ }
+
+ if (repLen >= 2)
+ if ( repLen + 1 >= mainLen
+ || (repLen + 2 >= mainLen && mainDist >= (1 << 9))
+ || (repLen + 3 >= mainLen && mainDist >= (1 << 15)))
+ {
+ p->backRes = (UInt32)repIndex;
+ MOVE_POS(p, repLen - 1)
+ return repLen;
+ }
+
+ if (mainLen < 2 || numAvail <= 2)
+ return 1;
+
+ {
+ unsigned len1 = ReadMatchDistances(p, &p->numPairs);
+ p->longestMatchLen = len1;
+
+ if (len1 >= 2)
+ {
+ UInt32 newDist = p->matches[(size_t)p->numPairs - 1];
+ if ( (len1 >= mainLen && newDist < mainDist)
+ || (len1 == mainLen + 1 && !ChangePair(mainDist, newDist))
+ || (len1 > mainLen + 1)
+ || (len1 + 1 >= mainLen && mainLen >= 3 && ChangePair(newDist, mainDist)))
+ return 1;
+ }
+ }
+
+ data = p->matchFinder.GetPointerToCurrentPos(p->matchFinderObj) - 1;
+
+ for (i = 0; i < LZMA_NUM_REPS; i++)
+ {
+ unsigned len, limit;
+ const Byte *data2 = data - p->reps[i];
+ if (data[0] != data2[0] || data[1] != data2[1])
+ continue;
+ limit = mainLen - 1;
+ for (len = 2;; len++)
+ {
+ if (len >= limit)
+ return 1;
+ if (data[len] != data2[len])
+ break;
+ }
+ }
+
+ p->backRes = mainDist + LZMA_NUM_REPS;
+ if (mainLen != 2)
+ {
+ MOVE_POS(p, mainLen - 2)
+ }
+ return mainLen;
+}
+
+
+
+
+static void WriteEndMarker(CLzmaEnc *p, unsigned posState)
+{
+ UInt32 range;
+ range = p->rc.range;
+ {
+ UInt32 ttt, newBound;
+ CLzmaProb *prob = &p->isMatch[p->state][posState];
+ RC_BIT_PRE(&p->rc, prob)
+ RC_BIT_1(&p->rc, prob)
+ prob = &p->isRep[p->state];
+ RC_BIT_PRE(&p->rc, prob)
+ RC_BIT_0(&p->rc, prob)
+ }
+ p->state = kMatchNextStates[p->state];
+
+ p->rc.range = range;
+ LenEnc_Encode(&p->lenProbs, &p->rc, 0, posState);
+ range = p->rc.range;
+
+ {
+ // RcTree_Encode_PosSlot(&p->rc, p->posSlotEncoder[0], (1 << kNumPosSlotBits) - 1);
+ CLzmaProb *probs = p->posSlotEncoder[0];
+ unsigned m = 1;
+ do
+ {
+ UInt32 ttt, newBound;
+ RC_BIT_PRE(p, probs + m)
+ RC_BIT_1(&p->rc, probs + m)
+ m = (m << 1) + 1;
+ }
+ while (m < (1 << kNumPosSlotBits));
+ }
+ {
+ // RangeEnc_EncodeDirectBits(&p->rc, ((UInt32)1 << (30 - kNumAlignBits)) - 1, 30 - kNumAlignBits); UInt32 range = p->range;
+ unsigned numBits = 30 - kNumAlignBits;
+ do
+ {
+ range >>= 1;
+ p->rc.low += range;
+ RC_NORM(&p->rc)
+ }
+ while (--numBits);
+ }
+
+ {
+ // RcTree_ReverseEncode(&p->rc, p->posAlignEncoder, kNumAlignBits, kAlignMask);
+ CLzmaProb *probs = p->posAlignEncoder;
+ unsigned m = 1;
+ do
+ {
+ UInt32 ttt, newBound;
+ RC_BIT_PRE(p, probs + m)
+ RC_BIT_1(&p->rc, probs + m)
+ m = (m << 1) + 1;
+ }
+ while (m < kAlignTableSize);
+ }
+ p->rc.range = range;
+}
+
+
+static SRes CheckErrors(CLzmaEnc *p)
+{
+ if (p->result != SZ_OK)
+ return p->result;
+ if (p->rc.res != SZ_OK)
+ p->result = SZ_ERROR_WRITE;
+
+ #ifndef Z7_ST
+ if (
+ // p->mf_Failure ||
+ (p->mtMode &&
+ ( // p->matchFinderMt.failure_LZ_LZ ||
+ p->matchFinderMt.failure_LZ_BT))
+ )
+ {
+ p->result = MY_HRES_ERROR_INTERNAL_ERROR;
+ // printf("\nCheckErrors p->matchFinderMt.failureLZ\n");
+ }
+ #endif
+
+ if (MFB.result != SZ_OK)
+ p->result = SZ_ERROR_READ;
+
+ if (p->result != SZ_OK)
+ p->finished = True;
+ return p->result;
+}
+
+
+Z7_NO_INLINE static SRes Flush(CLzmaEnc *p, UInt32 nowPos)
+{
+ /* ReleaseMFStream(); */
+ p->finished = True;
+ if (p->writeEndMark)
+ WriteEndMarker(p, nowPos & p->pbMask);
+ RangeEnc_FlushData(&p->rc);
+ RangeEnc_FlushStream(&p->rc);
+ return CheckErrors(p);
+}
+
+
+Z7_NO_INLINE static void FillAlignPrices(CLzmaEnc *p)
+{
+ unsigned i;
+ const CProbPrice *ProbPrices = p->ProbPrices;
+ const CLzmaProb *probs = p->posAlignEncoder;
+ // p->alignPriceCount = 0;
+ for (i = 0; i < kAlignTableSize / 2; i++)
+ {
+ UInt32 price = 0;
+ unsigned sym = i;
+ unsigned m = 1;
+ unsigned bit;
+ UInt32 prob;
+ bit = sym & 1; sym >>= 1; price += GET_PRICEa(probs[m], bit); m = (m << 1) + bit;
+ bit = sym & 1; sym >>= 1; price += GET_PRICEa(probs[m], bit); m = (m << 1) + bit;
+ bit = sym & 1; sym >>= 1; price += GET_PRICEa(probs[m], bit); m = (m << 1) + bit;
+ prob = probs[m];
+ p->alignPrices[i ] = price + GET_PRICEa_0(prob);
+ p->alignPrices[i + 8] = price + GET_PRICEa_1(prob);
+ // p->alignPrices[i] = RcTree_ReverseGetPrice(p->posAlignEncoder, kNumAlignBits, i, p->ProbPrices);
+ }
+}
+
+
+Z7_NO_INLINE static void FillDistancesPrices(CLzmaEnc *p)
+{
+ // int y; for (y = 0; y < 100; y++) {
+
+ UInt32 tempPrices[kNumFullDistances];
+ unsigned i, lps;
+
+ const CProbPrice *ProbPrices = p->ProbPrices;
+ p->matchPriceCount = 0;
+
+ for (i = kStartPosModelIndex / 2; i < kNumFullDistances / 2; i++)
+ {
+ unsigned posSlot = GetPosSlot1(i);
+ unsigned footerBits = (posSlot >> 1) - 1;
+ unsigned base = ((2 | (posSlot & 1)) << footerBits);
+ const CLzmaProb *probs = p->posEncoders + (size_t)base * 2;
+ // tempPrices[i] = RcTree_ReverseGetPrice(p->posEncoders + base, footerBits, i - base, p->ProbPrices);
+ UInt32 price = 0;
+ unsigned m = 1;
+ unsigned sym = i;
+ unsigned offset = (unsigned)1 << footerBits;
+ base += i;
+
+ if (footerBits)
+ do
+ {
+ unsigned bit = sym & 1;
+ sym >>= 1;
+ price += GET_PRICEa(probs[m], bit);
+ m = (m << 1) + bit;
+ }
+ while (--footerBits);
+
+ {
+ unsigned prob = probs[m];
+ tempPrices[base ] = price + GET_PRICEa_0(prob);
+ tempPrices[base + offset] = price + GET_PRICEa_1(prob);
+ }
+ }
+
+ for (lps = 0; lps < kNumLenToPosStates; lps++)
+ {
+ unsigned slot;
+ unsigned distTableSize2 = (p->distTableSize + 1) >> 1;
+ UInt32 *posSlotPrices = p->posSlotPrices[lps];
+ const CLzmaProb *probs = p->posSlotEncoder[lps];
+
+ for (slot = 0; slot < distTableSize2; slot++)
+ {
+ // posSlotPrices[slot] = RcTree_GetPrice(encoder, kNumPosSlotBits, slot, p->ProbPrices);
+ UInt32 price;
+ unsigned bit;
+ unsigned sym = slot + (1 << (kNumPosSlotBits - 1));
+ unsigned prob;
+ bit = sym & 1; sym >>= 1; price = GET_PRICEa(probs[sym], bit);
+ bit = sym & 1; sym >>= 1; price += GET_PRICEa(probs[sym], bit);
+ bit = sym & 1; sym >>= 1; price += GET_PRICEa(probs[sym], bit);
+ bit = sym & 1; sym >>= 1; price += GET_PRICEa(probs[sym], bit);
+ bit = sym & 1; sym >>= 1; price += GET_PRICEa(probs[sym], bit);
+ prob = probs[(size_t)slot + (1 << (kNumPosSlotBits - 1))];
+ posSlotPrices[(size_t)slot * 2 ] = price + GET_PRICEa_0(prob);
+ posSlotPrices[(size_t)slot * 2 + 1] = price + GET_PRICEa_1(prob);
+ }
+
+ {
+ UInt32 delta = ((UInt32)((kEndPosModelIndex / 2 - 1) - kNumAlignBits) << kNumBitPriceShiftBits);
+ for (slot = kEndPosModelIndex / 2; slot < distTableSize2; slot++)
+ {
+ posSlotPrices[(size_t)slot * 2 ] += delta;
+ posSlotPrices[(size_t)slot * 2 + 1] += delta;
+ delta += ((UInt32)1 << kNumBitPriceShiftBits);
+ }
+ }
+
+ {
+ UInt32 *dp = p->distancesPrices[lps];
+
+ dp[0] = posSlotPrices[0];
+ dp[1] = posSlotPrices[1];
+ dp[2] = posSlotPrices[2];
+ dp[3] = posSlotPrices[3];
+
+ for (i = 4; i < kNumFullDistances; i += 2)
+ {
+ UInt32 slotPrice = posSlotPrices[GetPosSlot1(i)];
+ dp[i ] = slotPrice + tempPrices[i];
+ dp[i + 1] = slotPrice + tempPrices[i + 1];
+ }
+ }
+ }
+ // }
+}
+
+
+
+static void LzmaEnc_Construct(CLzmaEnc *p)
+{
+ RangeEnc_Construct(&p->rc);
+ MatchFinder_Construct(&MFB);
+
+ #ifndef Z7_ST
+ p->matchFinderMt.MatchFinder = &MFB;
+ MatchFinderMt_Construct(&p->matchFinderMt);
+ #endif
+
+ {
+ CLzmaEncProps props;
+ LzmaEncProps_Init(&props);
+ LzmaEnc_SetProps((CLzmaEncHandle)(void *)p, &props);
+ }
+
+ #ifndef LZMA_LOG_BSR
+ LzmaEnc_FastPosInit(p->g_FastPos);
+ #endif
+
+ LzmaEnc_InitPriceTables(p->ProbPrices);
+ p->litProbs = NULL;
+ p->saveState.litProbs = NULL;
+}
+
+CLzmaEncHandle LzmaEnc_Create(ISzAllocPtr alloc)
+{
+ void *p;
+ p = ISzAlloc_Alloc(alloc, sizeof(CLzmaEnc));
+ if (p)
+ LzmaEnc_Construct((CLzmaEnc *)p);
+ return p;
+}
+
+static void LzmaEnc_FreeLits(CLzmaEnc *p, ISzAllocPtr alloc)
+{
+ ISzAlloc_Free(alloc, p->litProbs);
+ ISzAlloc_Free(alloc, p->saveState.litProbs);
+ p->litProbs = NULL;
+ p->saveState.litProbs = NULL;
+}
+
+static void LzmaEnc_Destruct(CLzmaEnc *p, ISzAllocPtr alloc, ISzAllocPtr allocBig)
+{
+ #ifndef Z7_ST
+ MatchFinderMt_Destruct(&p->matchFinderMt, allocBig);
+ #endif
+
+ MatchFinder_Free(&MFB, allocBig);
+ LzmaEnc_FreeLits(p, alloc);
+ RangeEnc_Free(&p->rc, alloc);
+}
+
+void LzmaEnc_Destroy(CLzmaEncHandle p, ISzAllocPtr alloc, ISzAllocPtr allocBig)
+{
+ // GET_CLzmaEnc_p
+ LzmaEnc_Destruct(p, alloc, allocBig);
+ ISzAlloc_Free(alloc, p);
+}
+
+
+Z7_NO_INLINE
+static SRes LzmaEnc_CodeOneBlock(CLzmaEnc *p, UInt32 maxPackSize, UInt32 maxUnpackSize)
+{
+ UInt32 nowPos32, startPos32;
+ if (p->needInit)
+ {
+ #ifndef Z7_ST
+ if (p->mtMode)
+ {
+ RINOK(MatchFinderMt_InitMt(&p->matchFinderMt))
+ }
+ #endif
+ p->matchFinder.Init(p->matchFinderObj);
+ p->needInit = 0;
+ }
+
+ if (p->finished)
+ return p->result;
+ RINOK(CheckErrors(p))
+
+ nowPos32 = (UInt32)p->nowPos64;
+ startPos32 = nowPos32;
+
+ if (p->nowPos64 == 0)
+ {
+ unsigned numPairs;
+ Byte curByte;
+ if (p->matchFinder.GetNumAvailableBytes(p->matchFinderObj) == 0)
+ return Flush(p, nowPos32);
+ ReadMatchDistances(p, &numPairs);
+ RangeEnc_EncodeBit_0(&p->rc, &p->isMatch[kState_Start][0]);
+ // p->state = kLiteralNextStates[p->state];
+ curByte = *(p->matchFinder.GetPointerToCurrentPos(p->matchFinderObj) - p->additionalOffset);
+ LitEnc_Encode(&p->rc, p->litProbs, curByte);
+ p->additionalOffset--;
+ nowPos32++;
+ }
+
+ if (p->matchFinder.GetNumAvailableBytes(p->matchFinderObj) != 0)
+
+ for (;;)
+ {
+ UInt32 dist;
+ unsigned len, posState;
+ UInt32 range, ttt, newBound;
+ CLzmaProb *probs;
+
+ if (p->fastMode)
+ len = GetOptimumFast(p);
+ else
+ {
+ unsigned oci = p->optCur;
+ if (p->optEnd == oci)
+ len = GetOptimum(p, nowPos32);
+ else
+ {
+ const COptimal *opt = &p->opt[oci];
+ len = opt->len;
+ p->backRes = opt->dist;
+ p->optCur = oci + 1;
+ }
+ }
+
+ posState = (unsigned)nowPos32 & p->pbMask;
+ range = p->rc.range;
+ probs = &p->isMatch[p->state][posState];
+
+ RC_BIT_PRE(&p->rc, probs)
+
+ dist = p->backRes;
+
+ #ifdef SHOW_STAT2
+ printf("\n pos = %6X, len = %3u pos = %6u", nowPos32, len, dist);
+ #endif
+
+ if (dist == MARK_LIT)
+ {
+ Byte curByte;
+ const Byte *data;
+ unsigned state;
+
+ RC_BIT_0(&p->rc, probs)
+ p->rc.range = range;
+ data = p->matchFinder.GetPointerToCurrentPos(p->matchFinderObj) - p->additionalOffset;
+ probs = LIT_PROBS(nowPos32, *(data - 1));
+ curByte = *data;
+ state = p->state;
+ p->state = kLiteralNextStates[state];
+ if (IsLitState(state))
+ LitEnc_Encode(&p->rc, probs, curByte);
+ else
+ LitEnc_EncodeMatched(&p->rc, probs, curByte, *(data - p->reps[0]));
+ }
+ else
+ {
+ RC_BIT_1(&p->rc, probs)
+ probs = &p->isRep[p->state];
+ RC_BIT_PRE(&p->rc, probs)
+
+ if (dist < LZMA_NUM_REPS)
+ {
+ RC_BIT_1(&p->rc, probs)
+ probs = &p->isRepG0[p->state];
+ RC_BIT_PRE(&p->rc, probs)
+ if (dist == 0)
+ {
+ RC_BIT_0(&p->rc, probs)
+ probs = &p->isRep0Long[p->state][posState];
+ RC_BIT_PRE(&p->rc, probs)
+ if (len != 1)
+ {
+ RC_BIT_1_BASE(&p->rc, probs)
+ }
+ else
+ {
+ RC_BIT_0_BASE(&p->rc, probs)
+ p->state = kShortRepNextStates[p->state];
+ }
+ }
+ else
+ {
+ RC_BIT_1(&p->rc, probs)
+ probs = &p->isRepG1[p->state];
+ RC_BIT_PRE(&p->rc, probs)
+ if (dist == 1)
+ {
+ RC_BIT_0_BASE(&p->rc, probs)
+ dist = p->reps[1];
+ }
+ else
+ {
+ RC_BIT_1(&p->rc, probs)
+ probs = &p->isRepG2[p->state];
+ RC_BIT_PRE(&p->rc, probs)
+ if (dist == 2)
+ {
+ RC_BIT_0_BASE(&p->rc, probs)
+ dist = p->reps[2];
+ }
+ else
+ {
+ RC_BIT_1_BASE(&p->rc, probs)
+ dist = p->reps[3];
+ p->reps[3] = p->reps[2];
+ }
+ p->reps[2] = p->reps[1];
+ }
+ p->reps[1] = p->reps[0];
+ p->reps[0] = dist;
+ }
+
+ RC_NORM(&p->rc)
+
+ p->rc.range = range;
+
+ if (len != 1)
+ {
+ LenEnc_Encode(&p->repLenProbs, &p->rc, len - LZMA_MATCH_LEN_MIN, posState);
+ --p->repLenEncCounter;
+ p->state = kRepNextStates[p->state];
+ }
+ }
+ else
+ {
+ unsigned posSlot;
+ RC_BIT_0(&p->rc, probs)
+ p->rc.range = range;
+ p->state = kMatchNextStates[p->state];
+
+ LenEnc_Encode(&p->lenProbs, &p->rc, len - LZMA_MATCH_LEN_MIN, posState);
+ // --p->lenEnc.counter;
+
+ dist -= LZMA_NUM_REPS;
+ p->reps[3] = p->reps[2];
+ p->reps[2] = p->reps[1];
+ p->reps[1] = p->reps[0];
+ p->reps[0] = dist + 1;
+
+ p->matchPriceCount++;
+ GetPosSlot(dist, posSlot)
+ // RcTree_Encode_PosSlot(&p->rc, p->posSlotEncoder[GetLenToPosState(len)], posSlot);
+ {
+ UInt32 sym = (UInt32)posSlot + (1 << kNumPosSlotBits);
+ range = p->rc.range;
+ probs = p->posSlotEncoder[GetLenToPosState(len)];
+ do
+ {
+ CLzmaProb *prob = probs + (sym >> kNumPosSlotBits);
+ UInt32 bit = (sym >> (kNumPosSlotBits - 1)) & 1;
+ sym <<= 1;
+ RC_BIT(&p->rc, prob, bit)
+ }
+ while (sym < (1 << kNumPosSlotBits * 2));
+ p->rc.range = range;
+ }
+
+ if (dist >= kStartPosModelIndex)
+ {
+ unsigned footerBits = ((posSlot >> 1) - 1);
+
+ if (dist < kNumFullDistances)
+ {
+ unsigned base = ((2 | (posSlot & 1)) << footerBits);
+ RcTree_ReverseEncode(&p->rc, p->posEncoders + base, footerBits, (unsigned)(dist /* - base */));
+ }
+ else
+ {
+ UInt32 pos2 = (dist | 0xF) << (32 - footerBits);
+ range = p->rc.range;
+ // RangeEnc_EncodeDirectBits(&p->rc, posReduced >> kNumAlignBits, footerBits - kNumAlignBits);
+ /*
+ do
+ {
+ range >>= 1;
+ p->rc.low += range & (0 - ((dist >> --footerBits) & 1));
+ RC_NORM(&p->rc)
+ }
+ while (footerBits > kNumAlignBits);
+ */
+ do
+ {
+ range >>= 1;
+ p->rc.low += range & (0 - (pos2 >> 31));
+ pos2 += pos2;
+ RC_NORM(&p->rc)
+ }
+ while (pos2 != 0xF0000000);
+
+
+ // RcTree_ReverseEncode(&p->rc, p->posAlignEncoder, kNumAlignBits, posReduced & kAlignMask);
+
+ {
+ unsigned m = 1;
+ unsigned bit;
+ bit = dist & 1; dist >>= 1; RC_BIT(&p->rc, p->posAlignEncoder + m, bit) m = (m << 1) + bit;
+ bit = dist & 1; dist >>= 1; RC_BIT(&p->rc, p->posAlignEncoder + m, bit) m = (m << 1) + bit;
+ bit = dist & 1; dist >>= 1; RC_BIT(&p->rc, p->posAlignEncoder + m, bit) m = (m << 1) + bit;
+ bit = dist & 1; RC_BIT(&p->rc, p->posAlignEncoder + m, bit)
+ p->rc.range = range;
+ // p->alignPriceCount++;
+ }
+ }
+ }
+ }
+ }
+
+ nowPos32 += (UInt32)len;
+ p->additionalOffset -= len;
+
+ if (p->additionalOffset == 0)
+ {
+ UInt32 processed;
+
+ if (!p->fastMode)
+ {
+ /*
+ if (p->alignPriceCount >= 16) // kAlignTableSize
+ FillAlignPrices(p);
+ if (p->matchPriceCount >= 128)
+ FillDistancesPrices(p);
+ if (p->lenEnc.counter <= 0)
+ LenPriceEnc_UpdateTables(&p->lenEnc, 1 << p->pb, &p->lenProbs, p->ProbPrices);
+ */
+ if (p->matchPriceCount >= 64)
+ {
+ FillAlignPrices(p);
+ // { int y; for (y = 0; y < 100; y++) {
+ FillDistancesPrices(p);
+ // }}
+ LenPriceEnc_UpdateTables(&p->lenEnc, (unsigned)1 << p->pb, &p->lenProbs, p->ProbPrices);
+ }
+ if (p->repLenEncCounter <= 0)
+ {
+ p->repLenEncCounter = REP_LEN_COUNT;
+ LenPriceEnc_UpdateTables(&p->repLenEnc, (unsigned)1 << p->pb, &p->repLenProbs, p->ProbPrices);
+ }
+ }
+
+ if (p->matchFinder.GetNumAvailableBytes(p->matchFinderObj) == 0)
+ break;
+ processed = nowPos32 - startPos32;
+
+ if (maxPackSize)
+ {
+ if (processed + kNumOpts + 300 >= maxUnpackSize
+ || RangeEnc_GetProcessed_sizet(&p->rc) + kPackReserve >= maxPackSize)
+ break;
+ }
+ else if (processed >= (1 << 17))
+ {
+ p->nowPos64 += nowPos32 - startPos32;
+ return CheckErrors(p);
+ }
+ }
+ }
+
+ p->nowPos64 += nowPos32 - startPos32;
+ return Flush(p, nowPos32);
+}
+
+
+
+#define kBigHashDicLimit ((UInt32)1 << 24)
+
+static SRes LzmaEnc_Alloc(CLzmaEnc *p, UInt32 keepWindowSize, ISzAllocPtr alloc, ISzAllocPtr allocBig)
+{
+ UInt32 beforeSize = kNumOpts;
+ UInt32 dictSize;
+
+ if (!RangeEnc_Alloc(&p->rc, alloc))
+ return SZ_ERROR_MEM;
+
+ #ifndef Z7_ST
+ p->mtMode = (p->multiThread && !p->fastMode && (MFB.btMode != 0));
+ #endif
+
+ {
+ const unsigned lclp = p->lc + p->lp;
+ if (!p->litProbs || !p->saveState.litProbs || p->lclp != lclp)
+ {
+ LzmaEnc_FreeLits(p, alloc);
+ p->litProbs = (CLzmaProb *)ISzAlloc_Alloc(alloc, ((size_t)0x300 * sizeof(CLzmaProb)) << lclp);
+ p->saveState.litProbs = (CLzmaProb *)ISzAlloc_Alloc(alloc, ((size_t)0x300 * sizeof(CLzmaProb)) << lclp);
+ if (!p->litProbs || !p->saveState.litProbs)
+ {
+ LzmaEnc_FreeLits(p, alloc);
+ return SZ_ERROR_MEM;
+ }
+ p->lclp = lclp;
+ }
+ }
+
+ MFB.bigHash = (Byte)(p->dictSize > kBigHashDicLimit ? 1 : 0);
+
+
+ dictSize = p->dictSize;
+ if (dictSize == ((UInt32)2 << 30) ||
+ dictSize == ((UInt32)3 << 30))
+ {
+ /* 21.03 : here we reduce the dictionary for 2 reasons:
+ 1) we don't want 32-bit back_distance matches in decoder for 2 GB dictionary.
+ 2) we want to elimate useless last MatchFinder_Normalize3() for corner cases,
+ where data size is aligned for 1 GB: 5/6/8 GB.
+ That reducing must be >= 1 for such corner cases. */
+ dictSize -= 1;
+ }
+
+ if (beforeSize + dictSize < keepWindowSize)
+ beforeSize = keepWindowSize - dictSize;
+
+ /* in worst case we can look ahead for
+ max(LZMA_MATCH_LEN_MAX, numFastBytes + 1 + numFastBytes) bytes.
+ we send larger value for (keepAfter) to MantchFinder_Create():
+ (numFastBytes + LZMA_MATCH_LEN_MAX + 1)
+ */
+
+ #ifndef Z7_ST
+ if (p->mtMode)
+ {
+ RINOK(MatchFinderMt_Create(&p->matchFinderMt, dictSize, beforeSize,
+ p->numFastBytes, LZMA_MATCH_LEN_MAX + 1 /* 18.04 */
+ , allocBig))
+ p->matchFinderObj = &p->matchFinderMt;
+ MFB.bigHash = (Byte)(MFB.hashMask >= 0xFFFFFF ? 1 : 0);
+ MatchFinderMt_CreateVTable(&p->matchFinderMt, &p->matchFinder);
+ }
+ else
+ #endif
+ {
+ if (!MatchFinder_Create(&MFB, dictSize, beforeSize,
+ p->numFastBytes, LZMA_MATCH_LEN_MAX + 1 /* 21.03 */
+ , allocBig))
+ return SZ_ERROR_MEM;
+ p->matchFinderObj = &MFB;
+ MatchFinder_CreateVTable(&MFB, &p->matchFinder);
+ }
+
+ return SZ_OK;
+}
+
+static void LzmaEnc_Init(CLzmaEnc *p)
+{
+ unsigned i;
+ p->state = 0;
+ p->reps[0] =
+ p->reps[1] =
+ p->reps[2] =
+ p->reps[3] = 1;
+
+ RangeEnc_Init(&p->rc);
+
+ for (i = 0; i < (1 << kNumAlignBits); i++)
+ p->posAlignEncoder[i] = kProbInitValue;
+
+ for (i = 0; i < kNumStates; i++)
+ {
+ unsigned j;
+ for (j = 0; j < LZMA_NUM_PB_STATES_MAX; j++)
+ {
+ p->isMatch[i][j] = kProbInitValue;
+ p->isRep0Long[i][j] = kProbInitValue;
+ }
+ p->isRep[i] = kProbInitValue;
+ p->isRepG0[i] = kProbInitValue;
+ p->isRepG1[i] = kProbInitValue;
+ p->isRepG2[i] = kProbInitValue;
+ }
+
+ {
+ for (i = 0; i < kNumLenToPosStates; i++)
+ {
+ CLzmaProb *probs = p->posSlotEncoder[i];
+ unsigned j;
+ for (j = 0; j < (1 << kNumPosSlotBits); j++)
+ probs[j] = kProbInitValue;
+ }
+ }
+ {
+ for (i = 0; i < kNumFullDistances; i++)
+ p->posEncoders[i] = kProbInitValue;
+ }
+
+ {
+ const size_t num = (size_t)0x300 << (p->lp + p->lc);
+ size_t k;
+ CLzmaProb *probs = p->litProbs;
+ for (k = 0; k < num; k++)
+ probs[k] = kProbInitValue;
+ }
+
+
+ LenEnc_Init(&p->lenProbs);
+ LenEnc_Init(&p->repLenProbs);
+
+ p->optEnd = 0;
+ p->optCur = 0;
+
+ {
+ for (i = 0; i < kNumOpts; i++)
+ p->opt[i].price = kInfinityPrice;
+ }
+
+ p->additionalOffset = 0;
+
+ p->pbMask = ((unsigned)1 << p->pb) - 1;
+ p->lpMask = ((UInt32)0x100 << p->lp) - ((unsigned)0x100 >> p->lc);
+
+ // p->mf_Failure = False;
+}
+
+
+static void LzmaEnc_InitPrices(CLzmaEnc *p)
+{
+ if (!p->fastMode)
+ {
+ FillDistancesPrices(p);
+ FillAlignPrices(p);
+ }
+
+ p->lenEnc.tableSize =
+ p->repLenEnc.tableSize =
+ p->numFastBytes + 1 - LZMA_MATCH_LEN_MIN;
+
+ p->repLenEncCounter = REP_LEN_COUNT;
+
+ LenPriceEnc_UpdateTables(&p->lenEnc, (unsigned)1 << p->pb, &p->lenProbs, p->ProbPrices);
+ LenPriceEnc_UpdateTables(&p->repLenEnc, (unsigned)1 << p->pb, &p->repLenProbs, p->ProbPrices);
+}
+
+static SRes LzmaEnc_AllocAndInit(CLzmaEnc *p, UInt32 keepWindowSize, ISzAllocPtr alloc, ISzAllocPtr allocBig)
+{
+ unsigned i;
+ for (i = kEndPosModelIndex / 2; i < kDicLogSizeMax; i++)
+ if (p->dictSize <= ((UInt32)1 << i))
+ break;
+ p->distTableSize = i * 2;
+
+ p->finished = False;
+ p->result = SZ_OK;
+ p->nowPos64 = 0;
+ p->needInit = 1;
+ RINOK(LzmaEnc_Alloc(p, keepWindowSize, alloc, allocBig))
+ LzmaEnc_Init(p);
+ LzmaEnc_InitPrices(p);
+ return SZ_OK;
+}
+
+static SRes LzmaEnc_Prepare(CLzmaEncHandle p,
+ ISeqOutStreamPtr outStream,
+ ISeqInStreamPtr inStream,
+ ISzAllocPtr alloc, ISzAllocPtr allocBig)
+{
+ // GET_CLzmaEnc_p
+ MatchFinder_SET_STREAM(&MFB, inStream)
+ p->rc.outStream = outStream;
+ return LzmaEnc_AllocAndInit(p, 0, alloc, allocBig);
+}
+
+SRes LzmaEnc_PrepareForLzma2(CLzmaEncHandle p,
+ ISeqInStreamPtr inStream, UInt32 keepWindowSize,
+ ISzAllocPtr alloc, ISzAllocPtr allocBig)
+{
+ // GET_CLzmaEnc_p
+ MatchFinder_SET_STREAM(&MFB, inStream)
+ return LzmaEnc_AllocAndInit(p, keepWindowSize, alloc, allocBig);
+}
+
+SRes LzmaEnc_MemPrepare(CLzmaEncHandle p,
+ const Byte *src, SizeT srcLen,
+ UInt32 keepWindowSize,
+ ISzAllocPtr alloc, ISzAllocPtr allocBig)
+{
+ // GET_CLzmaEnc_p
+ MatchFinder_SET_DIRECT_INPUT_BUF(&MFB, src, srcLen)
+ LzmaEnc_SetDataSize(p, srcLen);
+ return LzmaEnc_AllocAndInit(p, keepWindowSize, alloc, allocBig);
+}
+
+void LzmaEnc_Finish(CLzmaEncHandle p)
+{
+ #ifndef Z7_ST
+ // GET_CLzmaEnc_p
+ if (p->mtMode)
+ MatchFinderMt_ReleaseStream(&p->matchFinderMt);
+ #else
+ UNUSED_VAR(p)
+ #endif
+}
+
+
+typedef struct
+{
+ ISeqOutStream vt;
+ Byte *data;
+ size_t rem;
+ BoolInt overflow;
+} CLzmaEnc_SeqOutStreamBuf;
+
+static size_t SeqOutStreamBuf_Write(ISeqOutStreamPtr pp, const void *data, size_t size)
+{
+ Z7_CONTAINER_FROM_VTBL_TO_DECL_VAR_pp_vt_p(CLzmaEnc_SeqOutStreamBuf)
+ if (p->rem < size)
+ {
+ size = p->rem;
+ p->overflow = True;
+ }
+ if (size != 0)
+ {
+ memcpy(p->data, data, size);
+ p->rem -= size;
+ p->data += size;
+ }
+ return size;
+}
+
+
+/*
+UInt32 LzmaEnc_GetNumAvailableBytes(CLzmaEncHandle p)
+{
+ GET_const_CLzmaEnc_p
+ return p->matchFinder.GetNumAvailableBytes(p->matchFinderObj);
+}
+*/
+
+const Byte *LzmaEnc_GetCurBuf(CLzmaEncHandle p)
+{
+ // GET_const_CLzmaEnc_p
+ return p->matchFinder.GetPointerToCurrentPos(p->matchFinderObj) - p->additionalOffset;
+}
+
+
+// (desiredPackSize == 0) is not allowed
+SRes LzmaEnc_CodeOneMemBlock(CLzmaEncHandle p, BoolInt reInit,
+ Byte *dest, size_t *destLen, UInt32 desiredPackSize, UInt32 *unpackSize)
+{
+ // GET_CLzmaEnc_p
+ UInt64 nowPos64;
+ SRes res;
+ CLzmaEnc_SeqOutStreamBuf outStream;
+
+ outStream.vt.Write = SeqOutStreamBuf_Write;
+ outStream.data = dest;
+ outStream.rem = *destLen;
+ outStream.overflow = False;
+
+ p->writeEndMark = False;
+ p->finished = False;
+ p->result = SZ_OK;
+
+ if (reInit)
+ LzmaEnc_Init(p);
+ LzmaEnc_InitPrices(p);
+ RangeEnc_Init(&p->rc);
+ p->rc.outStream = &outStream.vt;
+ nowPos64 = p->nowPos64;
+
+ res = LzmaEnc_CodeOneBlock(p, desiredPackSize, *unpackSize);
+
+ *unpackSize = (UInt32)(p->nowPos64 - nowPos64);
+ *destLen -= outStream.rem;
+ if (outStream.overflow)
+ return SZ_ERROR_OUTPUT_EOF;
+
+ return res;
+}
+
+
+Z7_NO_INLINE
+static SRes LzmaEnc_Encode2(CLzmaEnc *p, ICompressProgressPtr progress)
+{
+ SRes res = SZ_OK;
+
+ #ifndef Z7_ST
+ Byte allocaDummy[0x300];
+ allocaDummy[0] = 0;
+ allocaDummy[1] = allocaDummy[0];
+ #endif
+
+ for (;;)
+ {
+ res = LzmaEnc_CodeOneBlock(p, 0, 0);
+ if (res != SZ_OK || p->finished)
+ break;
+ if (progress)
+ {
+ res = ICompressProgress_Progress(progress, p->nowPos64, RangeEnc_GetProcessed(&p->rc));
+ if (res != SZ_OK)
+ {
+ res = SZ_ERROR_PROGRESS;
+ break;
+ }
+ }
+ }
+
+ LzmaEnc_Finish((CLzmaEncHandle)(void *)p);
+
+ /*
+ if (res == SZ_OK && !Inline_MatchFinder_IsFinishedOK(&MFB))
+ res = SZ_ERROR_FAIL;
+ }
+ */
+
+ return res;
+}
+
+
+SRes LzmaEnc_Encode(CLzmaEncHandle p, ISeqOutStreamPtr outStream, ISeqInStreamPtr inStream, ICompressProgressPtr progress,
+ ISzAllocPtr alloc, ISzAllocPtr allocBig)
+{
+ // GET_CLzmaEnc_p
+ RINOK(LzmaEnc_Prepare(p, outStream, inStream, alloc, allocBig))
+ return LzmaEnc_Encode2(p, progress);
+}
+
+
+SRes LzmaEnc_WriteProperties(CLzmaEncHandle p, Byte *props, SizeT *size)
+{
+ if (*size < LZMA_PROPS_SIZE)
+ return SZ_ERROR_PARAM;
+ *size = LZMA_PROPS_SIZE;
+ {
+ // GET_CLzmaEnc_p
+ const UInt32 dictSize = p->dictSize;
+ UInt32 v;
+ props[0] = (Byte)((p->pb * 5 + p->lp) * 9 + p->lc);
+
+ // we write aligned dictionary value to properties for lzma decoder
+ if (dictSize >= ((UInt32)1 << 21))
+ {
+ const UInt32 kDictMask = ((UInt32)1 << 20) - 1;
+ v = (dictSize + kDictMask) & ~kDictMask;
+ if (v < dictSize)
+ v = dictSize;
+ }
+ else
+ {
+ unsigned i = 11 * 2;
+ do
+ {
+ v = (UInt32)(2 + (i & 1)) << (i >> 1);
+ i++;
+ }
+ while (v < dictSize);
+ }
+
+ SetUi32(props + 1, v)
+ return SZ_OK;
+ }
+}
+
+
+unsigned LzmaEnc_IsWriteEndMark(CLzmaEncHandle p)
+{
+ // GET_CLzmaEnc_p
+ return (unsigned)p->writeEndMark;
+}
+
+
+SRes LzmaEnc_MemEncode(CLzmaEncHandle p, Byte *dest, SizeT *destLen, const Byte *src, SizeT srcLen,
+ int writeEndMark, ICompressProgressPtr progress, ISzAllocPtr alloc, ISzAllocPtr allocBig)
+{
+ SRes res;
+ // GET_CLzmaEnc_p
+
+ CLzmaEnc_SeqOutStreamBuf outStream;
+
+ outStream.vt.Write = SeqOutStreamBuf_Write;
+ outStream.data = dest;
+ outStream.rem = *destLen;
+ outStream.overflow = False;
+
+ p->writeEndMark = writeEndMark;
+ p->rc.outStream = &outStream.vt;
+
+ res = LzmaEnc_MemPrepare(p, src, srcLen, 0, alloc, allocBig);
+
+ if (res == SZ_OK)
+ {
+ res = LzmaEnc_Encode2(p, progress);
+ if (res == SZ_OK && p->nowPos64 != srcLen)
+ res = SZ_ERROR_FAIL;
+ }
+
+ *destLen -= (SizeT)outStream.rem;
+ if (outStream.overflow)
+ return SZ_ERROR_OUTPUT_EOF;
+ return res;
+}
+
+
+SRes LzmaEncode(Byte *dest, SizeT *destLen, const Byte *src, SizeT srcLen,
+ const CLzmaEncProps *props, Byte *propsEncoded, SizeT *propsSize, int writeEndMark,
+ ICompressProgressPtr progress, ISzAllocPtr alloc, ISzAllocPtr allocBig)
+{
+ CLzmaEncHandle p = LzmaEnc_Create(alloc);
+ SRes res;
+ if (!p)
+ return SZ_ERROR_MEM;
+
+ res = LzmaEnc_SetProps(p, props);
+ if (res == SZ_OK)
+ {
+ res = LzmaEnc_WriteProperties(p, propsEncoded, propsSize);
+ if (res == SZ_OK)
+ res = LzmaEnc_MemEncode(p, dest, destLen, src, srcLen,
+ writeEndMark, progress, alloc, allocBig);
+ }
+
+ LzmaEnc_Destroy(p, alloc, allocBig);
+ return res;
+}
+
+
+/*
+#ifndef Z7_ST
+void LzmaEnc_GetLzThreads(CLzmaEncHandle p, HANDLE lz_threads[2])
+{
+ GET_const_CLzmaEnc_p
+ lz_threads[0] = p->matchFinderMt.hashSync.thread;
+ lz_threads[1] = p->matchFinderMt.btSync.thread;
+}
+#endif
+*/
+++ /dev/null
-# Travis CI is used to test platforms that github-actions currently doesn't support
-# without either self-hosting or some finnicky work-around. Also, some tests
-# are troublesome to migrate since GH Actions runs tests not in a tty.
-language: c
-
-git:
- depth: 1
-
-branches:
- only:
- - dev
- - release
- - master
- - travisTest
-
-addons:
- apt:
- update: true
-
-env:
- global:
- - FUZZERTEST=-T1mn
- ZSTREAM_TESTTIME=-T1mn
- DECODECORPUS_TESTTIME=-T1mn
-
-matrix:
- fast_finish: true
- include:
- - name: S390X (big endian) + Fuzz test
- dist: trusty
- arch: s390x
- script:
- - FUZZER_FLAGS=--no-big-tests make -C tests fuzztest
-
- - name: S390X (big endian) + Fuzz test + no intrinsics
- dist: trusty
- arch: s390x
- script:
- - MOREFLAGS="-DZSTD_NO_INTRINSICS" FUZZER_FLAGS=--no-big-tests make -C tests fuzztest
-
- - name: arm64 # ~2.5 mn
- os: linux
- arch: arm64
- script:
- - make check
-
- - name: arm64fuzz
- os: linux
- arch: arm64
- script:
- - make -C tests fuzztest
-
- # TODO: migrate to GH Actions once newest clang staticanalyze warnings are fixed
- - name: static analyzer scanbuild # ~8mn
- dist: trusty # note : it's important to pin down a version of static analyzer, since different versions report different false positives
- script:
- - make staticAnalyze
-
- # GH actions can't run this command on OS-X, non-tty issues
- - name: OS-X make all lib
- os: osx
- script:
- - make -C lib all
-
- # Introduced to check compat with old toolchains, to prevent e.g. #1872
- - name: ARM Build Test (on Trusty)
- dist: trusty
- script:
- - make arminstall
- - make armbuild
-
- # check release number (release/new tag only)
- - name: Tag-Specific Test
- if: tag =~ ^v[0-9]\.[0-9]
- script:
- - make -C tests checkTag
- - tests/checkTag "$TRAVIS_BRANCH"
-
- - name: PPC64LE + Fuzz test # ~13mn
- arch: ppc64le
- env:
- - FUZZER_FLAGS=--no-big-tests
- - MOREFLAGS="-static"
- script:
- - cat /proc/cpuinfo
- - make -C tests fuzztest
-
- # This test currently fails on GA specifically, for no obvious reason
- # (it works fine on travisCI, and on local test platforms).
- - name: Versions Compatibility Test # ~6mn
- script:
- - make -C tests versionsTest
-
- # meson dedicated test
- - name: Focal (Meson + clang) # ~15mn
- dist: focal
- language: cpp
- compiler: clang
- install:
- - sudo apt-get install -qq liblz4-dev valgrind tree
- - |
- travis_retry curl -o ~/ninja.zip -L 'https://github.com/ninja-build/ninja/releases/download/v1.9.0/ninja-linux.zip' &&
- unzip ~/ninja.zip -d ~/.local/bin
- - |
- travis_retry curl -o ~/get-pip.py -L 'https://bootstrap.pypa.io/pip/3.6/get-pip.py' &&
- python3 ~/get-pip.py --user &&
- pip3 install --user meson
- script:
- - |
- meson setup \
- --buildtype=debugoptimized \
- -Db_lundef=false \
- -Dauto_features=enabled \
- -Dbin_programs=true \
- -Dbin_tests=true \
- -Dbin_contrib=true \
- -Ddefault_library=both \
- build/meson builddir
- - pushd builddir
- - ninja
- - meson test --verbose --no-rebuild
- - DESTDIR=./staging ninja install
- - tree ./staging
- after_failure:
- - cat "$TRAVIS_BUILD_DIR"/builddir/meson-logs/testlog.txt
-
- allow_failures:
- - env: ALLOW_FAILURES=true
+++ /dev/null
-add_library(zstd STATIC
- lib/zstd.h
- lib/zstd_errors.h
- lib/zdict.h
- lib/common/debug.c
- lib/common/entropy_common.c
- lib/common/error_private.c
- lib/common/fse_decompress.c
- lib/common/pool.c
- lib/common/threading.c
- lib/common/xxhash.c
- lib/common/zstd_common.c
- lib/decompress/huf_decompress.c
- lib/decompress/zstd_ddict.c
- lib/decompress/zstd_decompress_block.c
- lib/decompress/zstd_decompress.c
-)
-
-
-target_include_directories(zstd PRIVATE "${CMAKE_CURRENT_SOURCE_DIR}/lib")
-target_include_directories(zstd INTERFACE "${CMAKE_CURRENT_SOURCE_DIR}/lib")
-target_compile_definitions(zstd PRIVATE ZSTD_DISABLE_ASM)
-set_target_properties(zstd PROPERTIES POSITION_INDEPENDENT_CODE ON)
-
+++ /dev/null
-# Following tests are run _only_ on `release` branch
-# and on selected feature branch named `appveyorTest` or `visual*`
-
--
- version: 1.0.{build}
- branches:
- only:
- - release
- - master
- - /appveyor*/
- - /visual*/
- environment:
- matrix:
- - COMPILER: "gcc"
- HOST: "mingw"
- PLATFORM: "x64"
- SCRIPT: "make allzstd MOREFLAGS=-static"
- ARTIFACT: "true"
- BUILD: "true"
- - COMPILER: "gcc"
- HOST: "mingw"
- PLATFORM: "x86"
- SCRIPT: "make allzstd MOREFLAGS=-static"
- ARTIFACT: "true"
- BUILD: "true"
-
- - COMPILER: "clang-cl"
- HOST: "cmake-visual"
- PLATFORM: "x64"
- CONFIGURATION: "Release"
- CMAKE_GENERATOR: "Visual Studio 15 2017"
- CMAKE_GENERATOR_PLATFORM: "x64"
- CMAKE_GENERATOR_TOOLSET: "LLVM"
- APPVEYOR_BUILD_WORKER_IMAGE: "Visual Studio 2017"
-
- install:
- - ECHO Installing %COMPILER% %PLATFORM% %CONFIGURATION%
- - SET PATH_ORIGINAL=%PATH%
- - if [%HOST%]==[mingw] (
- SET "PATH_MINGW32=C:\mingw-w64\i686-6.3.0-posix-dwarf-rt_v5-rev1\mingw32\bin" &&
- SET "PATH_MINGW64=C:\mingw-w64\x86_64-6.3.0-posix-seh-rt_v5-rev1\mingw64\bin" &&
- COPY C:\msys64\usr\bin\make.exe C:\mingw-w64\i686-6.3.0-posix-dwarf-rt_v5-rev1\mingw32\bin\make.exe &&
- COPY C:\msys64\usr\bin\make.exe C:\mingw-w64\x86_64-6.3.0-posix-seh-rt_v5-rev1\mingw64\bin\make.exe
- )
- - IF [%HOST%]==[visual] IF [%PLATFORM%]==[x64] (
- SET ADDITIONALPARAM=/p:LibraryPath="C:\Program Files\Microsoft SDKs\Windows\v7.1\lib\x64;c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\lib\amd64;C:\Program Files (x86)\Microsoft Visual Studio 10.0\;C:\Program Files (x86)\Microsoft Visual Studio 10.0\lib\amd64;"
- )
-
- build_script:
- - if [%HOST%]==[mingw] (
- ( if [%PLATFORM%]==[x64] (
- SET "PATH=%PATH_MINGW64%;%PATH_ORIGINAL%"
- ) else if [%PLATFORM%]==[x86] (
- SET "PATH=%PATH_MINGW32%;%PATH_ORIGINAL%"
- ) )
- )
- - if [%HOST%]==[mingw] if [%BUILD%]==[true] (
- make -v &&
- sh -c "%COMPILER% -v" &&
- ECHO Building zlib to static link &&
- SET "CC=%COMPILER%" &&
- sh -c "cd .. && git clone --depth 1 --branch v1.2.11 https://github.com/madler/zlib" &&
- sh -c "cd ../zlib && make -f win32/Makefile.gcc libz.a"
- ECHO Building zstd &&
- SET "CPPFLAGS=-I../../zlib" &&
- SET "LDFLAGS=../../zlib/libz.a" &&
- sh -c "%SCRIPT%" &&
- ( if [%COMPILER%]==[gcc] if [%ARTIFACT%]==[true]
- ECHO Creating artifacts &&
- ECHO %cd% &&
- lib\dll\example\build_package.bat &&
- make -C programs DEBUGFLAGS= clean zstd &&
- cd programs\ && 7z a -tzip -mx9 zstd-win-binary-%PLATFORM%.zip zstd.exe &&
- appveyor PushArtifact zstd-win-binary-%PLATFORM%.zip &&
- cp zstd.exe ..\bin\zstd.exe &&
- git clone --depth 1 --branch release https://github.com/facebook/zstd &&
- cd zstd &&
- git archive --format=tar release -o zstd-src.tar &&
- ..\zstd -19 zstd-src.tar &&
- appveyor PushArtifact zstd-src.tar.zst &&
- certUtil -hashfile zstd-src.tar.zst SHA256 > zstd-src.tar.zst.sha256.sig &&
- appveyor PushArtifact zstd-src.tar.zst.sha256.sig &&
- cd ..\..\bin\ &&
- 7z a -tzip -mx9 zstd-win-release-%PLATFORM%.zip * &&
- appveyor PushArtifact zstd-win-release-%PLATFORM%.zip
- )
- )
- - if [%HOST%]==[cmake-visual] (
- ECHO *** &&
- ECHO *** Building %CMAKE_GENERATOR% ^(%CMAKE_GENERATOR_TOOLSET%^) %PLATFORM%\%CONFIGURATION% &&
- PUSHD build\cmake &&
- cmake -DBUILD_TESTING=ON . &&
- cmake --build . --config %CONFIGURATION% -j4 &&
- POPD &&
- ECHO ***
- )
-
- test_script:
- - ECHO Testing %COMPILER% %PLATFORM% %CONFIGURATION%
- - SET "CC=gcc"
- - SET "CXX=g++"
- - if [%TEST%]==[cmake] (
- mkdir build\cmake\build &&
- cd build\cmake\build &&
- SET FUZZERTEST=-T2mn &&
- SET ZSTREAM_TESTTIME=-T2mn &&
- cmake -G "Visual Studio 14 2015 Win64" .. &&
- cd ..\..\.. &&
- make clean
- )
-
-
-# The following tests are for regular pushes
-# into `dev` or some feature branch
-# There run less tests, for shorter feedback loop
-
--
- version: 1.0.{build}
- environment:
- matrix:
- - COMPILER: "visual"
- HOST: "visual"
- PLATFORM: "x64"
- CONFIGURATION: "Debug"
- - COMPILER: "visual"
- HOST: "visual"
- PLATFORM: "Win32"
- CONFIGURATION: "Debug"
- - COMPILER: "visual"
- HOST: "visual"
- PLATFORM: "x64"
- CONFIGURATION: "Release"
- - COMPILER: "visual"
- HOST: "visual"
- PLATFORM: "Win32"
- CONFIGURATION: "Release"
-
- - COMPILER: "gcc"
- HOST: "cygwin"
- PLATFORM: "x64"
-
- - COMPILER: "clang-cl"
- HOST: "cmake-visual"
- PLATFORM: "x64"
- CONFIGURATION: "Release"
- CMAKE_GENERATOR: "Visual Studio 15 2017"
- CMAKE_GENERATOR_PLATFORM: "x64"
- CMAKE_GENERATOR_TOOLSET: "LLVM"
- APPVEYOR_BUILD_WORKER_IMAGE: "Visual Studio 2017"
-
- install:
- - ECHO Installing %COMPILER% %PLATFORM% %CONFIGURATION%
- - SET PATH_ORIGINAL=%PATH%
- - if [%HOST%]==[cygwin] (
- ECHO Installing Cygwin Packages &&
- C:\cygwin64\setup-x86_64.exe -qnNdO -R "C:\cygwin64" -g -P ^
- gcc,^
- cmake,^
- make
- )
- - IF [%HOST%]==[visual] IF [%PLATFORM%]==[x64] (
- SET ADDITIONALPARAM=/p:LibraryPath="C:\Program Files\Microsoft SDKs\Windows\v7.1\lib\x64;c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\lib\amd64;C:\Program Files (x86)\Microsoft Visual Studio 10.0\;C:\Program Files (x86)\Microsoft Visual Studio 10.0\lib\amd64;"
- )
-
- build_script:
- - ECHO Building %COMPILER% %PLATFORM% %CONFIGURATION%
- - if [%HOST%]==[cygwin] (
- set CHERE_INVOKING=yes &&
- set CC=%COMPILER% &&
- C:\cygwin64\bin\bash --login -c "
- set -e;
- cd build/cmake;
- CFLAGS='-Werror' cmake -G 'Unix Makefiles' -DCMAKE_BUILD_TYPE=Debug -DZSTD_BUILD_TESTS:BOOL=ON -DZSTD_FUZZER_FLAGS=-T20s -DZSTD_ZSTREAM_FLAGS=-T20s -DZSTD_FULLBENCH_FLAGS=-i0 .;
- make VERBOSE=1 -j;
- ctest -V -L Medium;
- "
- )
- - if [%HOST%]==[cmake-visual] (
- ECHO *** &&
- ECHO *** Building %CMAKE_GENERATOR% ^(%CMAKE_GENERATOR_TOOLSET%^) %PLATFORM%\%CONFIGURATION% &&
- PUSHD build\cmake &&
- cmake -DBUILD_TESTING=ON . &&
- cmake --build . --config %CONFIGURATION% -j4 &&
- POPD &&
- ECHO ***
- )
- - if [%HOST%]==[visual] (
- ECHO *** &&
- ECHO *** Building Visual Studio 2012 %PLATFORM%\%CONFIGURATION% &&
- ECHO *** &&
- msbuild "build\VS2010\zstd.sln" /m /verbosity:minimal /property:PlatformToolset=v110 /p:ForceImportBeforeCppTargets=%APPVEYOR_BUILD_FOLDER%\build\VS2010\CompileAsCpp.props /t:Clean,Build /p:Platform=%PLATFORM% /p:Configuration=%CONFIGURATION% /logger:"C:\Program Files\AppVeyor\BuildAgent\Appveyor.MSBuildLogger.dll" &&
- DIR build\VS2010\bin\%PLATFORM%_%CONFIGURATION%\*.exe &&
- msbuild "build\VS2010\zstd.sln" /m /verbosity:minimal /property:PlatformToolset=v110 /t:Clean,Build /p:Platform=%PLATFORM% /p:Configuration=%CONFIGURATION% /logger:"C:\Program Files\AppVeyor\BuildAgent\Appveyor.MSBuildLogger.dll" &&
- DIR build\VS2010\bin\%PLATFORM%_%CONFIGURATION%\*.exe
- )
-
-
- test_script:
- - ECHO Testing %COMPILER% %PLATFORM% %CONFIGURATION%
- - SET "FUZZERTEST=-T10s"
- - if [%HOST%]==[mingw] (
- set "CC=%COMPILER%" &&
- make clean &&
- make check
- )
\ No newline at end of file
+++ /dev/null
-include("${CMAKE_CURRENT_LIST_DIR}/zstdTargets.cmake")
+++ /dev/null
-/*
- * Copyright (c) Meta Platforms, Inc. and affiliates.
- * All rights reserved.
- *
- * This source code is licensed under both the BSD-style license (found in the
- * LICENSE file in the root directory of this source tree) and the GPLv2 (found
- * in the COPYING file in the root directory of this source tree).
- * You may select, at your option, one of the above-listed licenses.
- */
-
-
-/* **************************************
-* Tuning parameters
-****************************************/
-#ifndef BMK_TIMETEST_DEFAULT_S /* default minimum time per test */
-# define BMK_TIMETEST_DEFAULT_S 3
-#endif
-
-
-/* *************************************
-* Includes
-***************************************/
-#include "platform.h" /* Large Files support */
-#include "util.h" /* UTIL_getFileSize, UTIL_sleep */
-#include <stdlib.h> /* malloc, free */
-#include <string.h> /* memset, strerror */
-#include <stdio.h> /* fprintf, fopen */
-#include <errno.h>
-#include <assert.h> /* assert */
-
-#include "timefn.h" /* UTIL_time_t */
-#include "benchfn.h"
-#include "../lib/common/mem.h"
-#ifndef ZSTD_STATIC_LINKING_ONLY
-#define ZSTD_STATIC_LINKING_ONLY
-#endif
-#include "../lib/zstd.h"
-#include "datagen.h" /* RDG_genBuffer */
-#ifndef XXH_INLINE_ALL
-#define XXH_INLINE_ALL
-#endif
-#include "../lib/common/xxhash.h"
-#include "benchzstd.h"
-#include "../lib/zstd_errors.h"
-
-
-/* *************************************
-* Constants
-***************************************/
-#ifndef ZSTD_GIT_COMMIT
-# define ZSTD_GIT_COMMIT_STRING ""
-#else
-# define ZSTD_GIT_COMMIT_STRING ZSTD_EXPAND_AND_QUOTE(ZSTD_GIT_COMMIT)
-#endif
-
-#define TIMELOOP_MICROSEC (1*1000000ULL) /* 1 second */
-#define TIMELOOP_NANOSEC (1*1000000000ULL) /* 1 second */
-#define ACTIVEPERIOD_MICROSEC (70*TIMELOOP_MICROSEC) /* 70 seconds */
-#define COOLPERIOD_SEC 10
-
-#define KB *(1 <<10)
-#define MB *(1 <<20)
-#define GB *(1U<<30)
-
-#define BMK_RUNTEST_DEFAULT_MS 1000
-
-static const size_t maxMemory = (sizeof(size_t)==4) ?
- /* 32-bit */ (2 GB - 64 MB) :
- /* 64-bit */ (size_t)(1ULL << ((sizeof(size_t)*8)-31));
-
-
-/* *************************************
-* console display
-***************************************/
-#define DISPLAY(...) { fprintf(stderr, __VA_ARGS__); fflush(NULL); }
-#define DISPLAYLEVEL(l, ...) if (displayLevel>=l) { DISPLAY(__VA_ARGS__); }
-/* 0 : no display; 1: errors; 2 : + result + interaction + warnings; 3 : + progression; 4 : + information */
-#define OUTPUT(...) { fprintf(stdout, __VA_ARGS__); fflush(NULL); }
-#define OUTPUTLEVEL(l, ...) if (displayLevel>=l) { OUTPUT(__VA_ARGS__); }
-
-
-/* *************************************
-* Exceptions
-***************************************/
-#ifndef DEBUG
-# define DEBUG 0
-#endif
-#define DEBUGOUTPUT(...) { if (DEBUG) DISPLAY(__VA_ARGS__); }
-
-#define RETURN_ERROR_INT(errorNum, ...) { \
- DEBUGOUTPUT("%s: %i: \n", __FILE__, __LINE__); \
- DISPLAYLEVEL(1, "Error %i : ", errorNum); \
- DISPLAYLEVEL(1, __VA_ARGS__); \
- DISPLAYLEVEL(1, " \n"); \
- return errorNum; \
-}
-
-#define CHECK_Z(zf) { \
- size_t const zerr = zf; \
- if (ZSTD_isError(zerr)) { \
- DEBUGOUTPUT("%s: %i: \n", __FILE__, __LINE__); \
- DISPLAY("Error : "); \
- DISPLAY("%s failed : %s", \
- #zf, ZSTD_getErrorName(zerr)); \
- DISPLAY(" \n"); \
- exit(1); \
- } \
-}
-
-#define RETURN_ERROR(errorNum, retType, ...) { \
- retType r; \
- memset(&r, 0, sizeof(retType)); \
- DEBUGOUTPUT("%s: %i: \n", __FILE__, __LINE__); \
- DISPLAYLEVEL(1, "Error %i : ", errorNum); \
- DISPLAYLEVEL(1, __VA_ARGS__); \
- DISPLAYLEVEL(1, " \n"); \
- r.tag = errorNum; \
- return r; \
-}
-
-
-/* *************************************
-* Benchmark Parameters
-***************************************/
-
-BMK_advancedParams_t BMK_initAdvancedParams(void) {
- BMK_advancedParams_t const res = {
- BMK_both, /* mode */
- BMK_TIMETEST_DEFAULT_S, /* nbSeconds */
- 0, /* blockSize */
- 0, /* nbWorkers */
- 0, /* realTime */
- 0, /* additionalParam */
- 0, /* ldmFlag */
- 0, /* ldmMinMatch */
- 0, /* ldmHashLog */
- 0, /* ldmBuckSizeLog */
- 0, /* ldmHashRateLog */
- ZSTD_ps_auto, /* literalCompressionMode */
- 0 /* useRowMatchFinder */
- };
- return res;
-}
-
-
-/* ********************************************************
-* Bench functions
-**********************************************************/
-typedef struct {
- const void* srcPtr;
- size_t srcSize;
- void* cPtr;
- size_t cRoom;
- size_t cSize;
- void* resPtr;
- size_t resSize;
-} blockParam_t;
-
-#undef MIN
-#undef MAX
-#define MIN(a,b) ((a) < (b) ? (a) : (b))
-#define MAX(a,b) ((a) > (b) ? (a) : (b))
-
-static void
-BMK_initCCtx(ZSTD_CCtx* ctx,
- const void* dictBuffer, size_t dictBufferSize,
- int cLevel,
- const ZSTD_compressionParameters* comprParams,
- const BMK_advancedParams_t* adv)
-{
- ZSTD_CCtx_reset(ctx, ZSTD_reset_session_and_parameters);
- if (adv->nbWorkers==1) {
- CHECK_Z(ZSTD_CCtx_setParameter(ctx, ZSTD_c_nbWorkers, 0));
- } else {
- CHECK_Z(ZSTD_CCtx_setParameter(ctx, ZSTD_c_nbWorkers, adv->nbWorkers));
- }
- CHECK_Z(ZSTD_CCtx_setParameter(ctx, ZSTD_c_compressionLevel, cLevel));
- CHECK_Z(ZSTD_CCtx_setParameter(ctx, ZSTD_c_useRowMatchFinder, adv->useRowMatchFinder));
- CHECK_Z(ZSTD_CCtx_setParameter(ctx, ZSTD_c_enableLongDistanceMatching, adv->ldmFlag));
- CHECK_Z(ZSTD_CCtx_setParameter(ctx, ZSTD_c_ldmMinMatch, adv->ldmMinMatch));
- CHECK_Z(ZSTD_CCtx_setParameter(ctx, ZSTD_c_ldmHashLog, adv->ldmHashLog));
- CHECK_Z(ZSTD_CCtx_setParameter(ctx, ZSTD_c_ldmBucketSizeLog, adv->ldmBucketSizeLog));
- CHECK_Z(ZSTD_CCtx_setParameter(ctx, ZSTD_c_ldmHashRateLog, adv->ldmHashRateLog));
- CHECK_Z(ZSTD_CCtx_setParameter(ctx, ZSTD_c_windowLog, (int)comprParams->windowLog));
- CHECK_Z(ZSTD_CCtx_setParameter(ctx, ZSTD_c_hashLog, (int)comprParams->hashLog));
- CHECK_Z(ZSTD_CCtx_setParameter(ctx, ZSTD_c_chainLog, (int)comprParams->chainLog));
- CHECK_Z(ZSTD_CCtx_setParameter(ctx, ZSTD_c_searchLog, (int)comprParams->searchLog));
- CHECK_Z(ZSTD_CCtx_setParameter(ctx, ZSTD_c_minMatch, (int)comprParams->minMatch));
- CHECK_Z(ZSTD_CCtx_setParameter(ctx, ZSTD_c_targetLength, (int)comprParams->targetLength));
- CHECK_Z(ZSTD_CCtx_setParameter(ctx, ZSTD_c_literalCompressionMode, (int)adv->literalCompressionMode));
- CHECK_Z(ZSTD_CCtx_setParameter(ctx, ZSTD_c_strategy, (int)comprParams->strategy));
- CHECK_Z(ZSTD_CCtx_loadDictionary(ctx, dictBuffer, dictBufferSize));
-}
-
-static void BMK_initDCtx(ZSTD_DCtx* dctx,
- const void* dictBuffer, size_t dictBufferSize) {
- CHECK_Z(ZSTD_DCtx_reset(dctx, ZSTD_reset_session_and_parameters));
- CHECK_Z(ZSTD_DCtx_loadDictionary(dctx, dictBuffer, dictBufferSize));
-}
-
-
-typedef struct {
- ZSTD_CCtx* cctx;
- const void* dictBuffer;
- size_t dictBufferSize;
- int cLevel;
- const ZSTD_compressionParameters* comprParams;
- const BMK_advancedParams_t* adv;
-} BMK_initCCtxArgs;
-
-static size_t local_initCCtx(void* payload) {
- BMK_initCCtxArgs* ag = (BMK_initCCtxArgs*)payload;
- BMK_initCCtx(ag->cctx, ag->dictBuffer, ag->dictBufferSize, ag->cLevel, ag->comprParams, ag->adv);
- return 0;
-}
-
-typedef struct {
- ZSTD_DCtx* dctx;
- const void* dictBuffer;
- size_t dictBufferSize;
-} BMK_initDCtxArgs;
-
-static size_t local_initDCtx(void* payload) {
- BMK_initDCtxArgs* ag = (BMK_initDCtxArgs*)payload;
- BMK_initDCtx(ag->dctx, ag->dictBuffer, ag->dictBufferSize);
- return 0;
-}
-
-
-/* `addArgs` is the context */
-static size_t local_defaultCompress(
- const void* srcBuffer, size_t srcSize,
- void* dstBuffer, size_t dstSize,
- void* addArgs)
-{
- ZSTD_CCtx* const cctx = (ZSTD_CCtx*)addArgs;
- return ZSTD_compress2(cctx, dstBuffer, dstSize, srcBuffer, srcSize);
-}
-
-/* `addArgs` is the context */
-static size_t local_defaultDecompress(
- const void* srcBuffer, size_t srcSize,
- void* dstBuffer, size_t dstCapacity,
- void* addArgs)
-{
- size_t moreToFlush = 1;
- ZSTD_DCtx* const dctx = (ZSTD_DCtx*)addArgs;
- ZSTD_inBuffer in;
- ZSTD_outBuffer out;
- in.src = srcBuffer; in.size = srcSize; in.pos = 0;
- out.dst = dstBuffer; out.size = dstCapacity; out.pos = 0;
- while (moreToFlush) {
- if(out.pos == out.size) {
- return (size_t)-ZSTD_error_dstSize_tooSmall;
- }
- moreToFlush = ZSTD_decompressStream(dctx, &out, &in);
- if (ZSTD_isError(moreToFlush)) {
- return moreToFlush;
- }
- }
- return out.pos;
-
-}
-
-
-/* ================================================================= */
-/* Benchmark Zstandard, mem-to-mem scenarios */
-/* ================================================================= */
-
-int BMK_isSuccessful_benchOutcome(BMK_benchOutcome_t outcome)
-{
- return outcome.tag == 0;
-}
-
-BMK_benchResult_t BMK_extract_benchResult(BMK_benchOutcome_t outcome)
-{
- assert(outcome.tag == 0);
- return outcome.internal_never_use_directly;
-}
-
-static BMK_benchOutcome_t BMK_benchOutcome_error(void)
-{
- BMK_benchOutcome_t b;
- memset(&b, 0, sizeof(b));
- b.tag = 1;
- return b;
-}
-
-static BMK_benchOutcome_t BMK_benchOutcome_setValidResult(BMK_benchResult_t result)
-{
- BMK_benchOutcome_t b;
- b.tag = 0;
- b.internal_never_use_directly = result;
- return b;
-}
-
-
-/* benchMem with no allocation */
-static BMK_benchOutcome_t
-BMK_benchMemAdvancedNoAlloc(
- const void** srcPtrs, size_t* srcSizes,
- void** cPtrs, size_t* cCapacities, size_t* cSizes,
- void** resPtrs, size_t* resSizes,
- void** resultBufferPtr, void* compressedBuffer,
- size_t maxCompressedSize,
- BMK_timedFnState_t* timeStateCompress,
- BMK_timedFnState_t* timeStateDecompress,
-
- const void* srcBuffer, size_t srcSize,
- const size_t* fileSizes, unsigned nbFiles,
- const int cLevel,
- const ZSTD_compressionParameters* comprParams,
- const void* dictBuffer, size_t dictBufferSize,
- ZSTD_CCtx* cctx, ZSTD_DCtx* dctx,
- int displayLevel, const char* displayName,
- const BMK_advancedParams_t* adv)
-{
- size_t const blockSize = ((adv->blockSize>=32 && (adv->mode != BMK_decodeOnly)) ? adv->blockSize : srcSize) + (!srcSize); /* avoid div by 0 */
- BMK_benchResult_t benchResult;
- size_t const loadedCompressedSize = srcSize;
- size_t cSize = 0;
- double ratio = 0.;
- U32 nbBlocks;
-
- assert(cctx != NULL); assert(dctx != NULL);
-
- /* init */
- memset(&benchResult, 0, sizeof(benchResult));
- if (strlen(displayName)>17) displayName += strlen(displayName) - 17; /* display last 17 characters */
- if (adv->mode == BMK_decodeOnly) {
- /* benchmark only decompression : source must be already compressed */
- const char* srcPtr = (const char*)srcBuffer;
- U64 totalDSize64 = 0;
- U32 fileNb;
- for (fileNb=0; fileNb<nbFiles; fileNb++) {
- U64 const fSize64 = ZSTD_findDecompressedSize(srcPtr, fileSizes[fileNb]);
- if (fSize64 == ZSTD_CONTENTSIZE_UNKNOWN) {
- RETURN_ERROR(32, BMK_benchOutcome_t, "Decompressed size cannot be determined: cannot benchmark");
- }
- if (fSize64 == ZSTD_CONTENTSIZE_ERROR) {
- RETURN_ERROR(32, BMK_benchOutcome_t, "Error while trying to assess decompressed size: data may be invalid");
- }
- totalDSize64 += fSize64;
- srcPtr += fileSizes[fileNb];
- }
- { size_t const decodedSize = (size_t)totalDSize64;
- assert((U64)decodedSize == totalDSize64); /* check overflow */
- free(*resultBufferPtr);
- if (totalDSize64 > decodedSize) { /* size_t overflow */
- RETURN_ERROR(32, BMK_benchOutcome_t, "decompressed size is too large for local system");
- }
- *resultBufferPtr = malloc(decodedSize);
- if (!(*resultBufferPtr)) {
- RETURN_ERROR(33, BMK_benchOutcome_t, "allocation error: not enough memory");
- }
- cSize = srcSize;
- srcSize = decodedSize;
- ratio = (double)srcSize / (double)cSize;
- }
- }
-
- /* Init data blocks */
- { const char* srcPtr = (const char*)srcBuffer;
- char* cPtr = (char*)compressedBuffer;
- char* resPtr = (char*)(*resultBufferPtr);
- U32 fileNb;
- for (nbBlocks=0, fileNb=0; fileNb<nbFiles; fileNb++) {
- size_t remaining = fileSizes[fileNb];
- U32 const nbBlocksforThisFile = (adv->mode == BMK_decodeOnly) ? 1 : (U32)((remaining + (blockSize-1)) / blockSize);
- U32 const blockEnd = nbBlocks + nbBlocksforThisFile;
- for ( ; nbBlocks<blockEnd; nbBlocks++) {
- size_t const thisBlockSize = MIN(remaining, blockSize);
- srcPtrs[nbBlocks] = srcPtr;
- srcSizes[nbBlocks] = thisBlockSize;
- cPtrs[nbBlocks] = cPtr;
- cCapacities[nbBlocks] = (adv->mode == BMK_decodeOnly) ? thisBlockSize : ZSTD_compressBound(thisBlockSize);
- resPtrs[nbBlocks] = resPtr;
- resSizes[nbBlocks] = (adv->mode == BMK_decodeOnly) ? (size_t) ZSTD_findDecompressedSize(srcPtr, thisBlockSize) : thisBlockSize;
- srcPtr += thisBlockSize;
- cPtr += cCapacities[nbBlocks];
- resPtr += thisBlockSize;
- remaining -= thisBlockSize;
- if (adv->mode == BMK_decodeOnly) {
- cSizes[nbBlocks] = thisBlockSize;
- benchResult.cSize = thisBlockSize;
- } } } }
-
- /* warming up `compressedBuffer` */
- if (adv->mode == BMK_decodeOnly) {
- memcpy(compressedBuffer, srcBuffer, loadedCompressedSize);
- } else {
- RDG_genBuffer(compressedBuffer, maxCompressedSize, 0.10, 0.50, 1);
- }
-
- if (!UTIL_support_MT_measurements() && adv->nbWorkers > 1) {
- OUTPUTLEVEL(2, "Warning : time measurements may be incorrect in multithreading mode... \n")
- }
-
- /* Bench */
- { U64 const crcOrig = (adv->mode == BMK_decodeOnly) ? 0 : XXH64(srcBuffer, srcSize, 0);
-# define NB_MARKS 4
- const char* marks[NB_MARKS] = { " |", " /", " =", " \\" };
- U32 markNb = 0;
- int compressionCompleted = (adv->mode == BMK_decodeOnly);
- int decompressionCompleted = (adv->mode == BMK_compressOnly);
- BMK_benchParams_t cbp, dbp;
- BMK_initCCtxArgs cctxprep;
- BMK_initDCtxArgs dctxprep;
-
- cbp.benchFn = local_defaultCompress; /* ZSTD_compress2 */
- cbp.benchPayload = cctx;
- cbp.initFn = local_initCCtx; /* BMK_initCCtx */
- cbp.initPayload = &cctxprep;
- cbp.errorFn = ZSTD_isError;
- cbp.blockCount = nbBlocks;
- cbp.srcBuffers = srcPtrs;
- cbp.srcSizes = srcSizes;
- cbp.dstBuffers = cPtrs;
- cbp.dstCapacities = cCapacities;
- cbp.blockResults = cSizes;
-
- cctxprep.cctx = cctx;
- cctxprep.dictBuffer = dictBuffer;
- cctxprep.dictBufferSize = dictBufferSize;
- cctxprep.cLevel = cLevel;
- cctxprep.comprParams = comprParams;
- cctxprep.adv = adv;
-
- dbp.benchFn = local_defaultDecompress;
- dbp.benchPayload = dctx;
- dbp.initFn = local_initDCtx;
- dbp.initPayload = &dctxprep;
- dbp.errorFn = ZSTD_isError;
- dbp.blockCount = nbBlocks;
- dbp.srcBuffers = (const void* const *) cPtrs;
- dbp.srcSizes = cSizes;
- dbp.dstBuffers = resPtrs;
- dbp.dstCapacities = resSizes;
- dbp.blockResults = NULL;
-
- dctxprep.dctx = dctx;
- dctxprep.dictBuffer = dictBuffer;
- dctxprep.dictBufferSize = dictBufferSize;
-
- OUTPUTLEVEL(2, "\r%70s\r", ""); /* blank line */
- assert(srcSize < UINT_MAX);
- OUTPUTLEVEL(2, "%2s-%-17.17s :%10u -> \r", marks[markNb], displayName, (unsigned)srcSize);
-
- while (!(compressionCompleted && decompressionCompleted)) {
- if (!compressionCompleted) {
- BMK_runOutcome_t const cOutcome = BMK_benchTimedFn( timeStateCompress, cbp);
-
- if (!BMK_isSuccessful_runOutcome(cOutcome)) {
- RETURN_ERROR(30, BMK_benchOutcome_t, "compression error");
- }
-
- { BMK_runTime_t const cResult = BMK_extract_runTime(cOutcome);
- cSize = cResult.sumOfReturn;
- ratio = (double)srcSize / (double)cSize;
- { BMK_benchResult_t newResult;
- newResult.cSpeed = (U64)((double)srcSize * TIMELOOP_NANOSEC / cResult.nanoSecPerRun);
- benchResult.cSize = cSize;
- if (newResult.cSpeed > benchResult.cSpeed)
- benchResult.cSpeed = newResult.cSpeed;
- } }
-
- { int const ratioAccuracy = (ratio < 10.) ? 3 : 2;
- assert(cSize < UINT_MAX);
- OUTPUTLEVEL(2, "%2s-%-17.17s :%10u ->%10u (x%5.*f), %6.*f MB/s \r",
- marks[markNb], displayName,
- (unsigned)srcSize, (unsigned)cSize,
- ratioAccuracy, ratio,
- benchResult.cSpeed < (10 * MB_UNIT) ? 2 : 1, (double)benchResult.cSpeed / MB_UNIT);
- }
- compressionCompleted = BMK_isCompleted_TimedFn(timeStateCompress);
- }
-
- if(!decompressionCompleted) {
- BMK_runOutcome_t const dOutcome = BMK_benchTimedFn(timeStateDecompress, dbp);
-
- if(!BMK_isSuccessful_runOutcome(dOutcome)) {
- RETURN_ERROR(30, BMK_benchOutcome_t, "decompression error");
- }
-
- { BMK_runTime_t const dResult = BMK_extract_runTime(dOutcome);
- U64 const newDSpeed = (U64)((double)srcSize * TIMELOOP_NANOSEC / dResult.nanoSecPerRun);
- if (newDSpeed > benchResult.dSpeed)
- benchResult.dSpeed = newDSpeed;
- }
-
- { int const ratioAccuracy = (ratio < 10.) ? 3 : 2;
- OUTPUTLEVEL(2, "%2s-%-17.17s :%10u ->%10u (x%5.*f), %6.*f MB/s, %6.1f MB/s\r",
- marks[markNb], displayName,
- (unsigned)srcSize, (unsigned)cSize,
- ratioAccuracy, ratio,
- benchResult.cSpeed < (10 * MB_UNIT) ? 2 : 1, (double)benchResult.cSpeed / MB_UNIT,
- (double)benchResult.dSpeed / MB_UNIT);
- }
- decompressionCompleted = BMK_isCompleted_TimedFn(timeStateDecompress);
- }
- markNb = (markNb+1) % NB_MARKS;
- } /* while (!(compressionCompleted && decompressionCompleted)) */
-
- /* CRC Checking */
- { const BYTE* resultBuffer = (const BYTE*)(*resultBufferPtr);
- U64 const crcCheck = XXH64(resultBuffer, srcSize, 0);
- if ((adv->mode == BMK_both) && (crcOrig!=crcCheck)) {
- size_t u;
- DISPLAY("!!! WARNING !!! %14s : Invalid Checksum : %x != %x \n",
- displayName, (unsigned)crcOrig, (unsigned)crcCheck);
- for (u=0; u<srcSize; u++) {
- if (((const BYTE*)srcBuffer)[u] != resultBuffer[u]) {
- unsigned segNb, bNb, pos;
- size_t bacc = 0;
- DISPLAY("Decoding error at pos %u ", (unsigned)u);
- for (segNb = 0; segNb < nbBlocks; segNb++) {
- if (bacc + srcSizes[segNb] > u) break;
- bacc += srcSizes[segNb];
- }
- pos = (U32)(u - bacc);
- bNb = pos / (128 KB);
- DISPLAY("(sample %u, block %u, pos %u) \n", segNb, bNb, pos);
- { size_t const lowest = (u>5) ? 5 : u;
- size_t n;
- DISPLAY("origin: ");
- for (n=lowest; n>0; n--)
- DISPLAY("%02X ", ((const BYTE*)srcBuffer)[u-n]);
- DISPLAY(" :%02X: ", ((const BYTE*)srcBuffer)[u]);
- for (n=1; n<3; n++)
- DISPLAY("%02X ", ((const BYTE*)srcBuffer)[u+n]);
- DISPLAY(" \n");
- DISPLAY("decode: ");
- for (n=lowest; n>0; n--)
- DISPLAY("%02X ", resultBuffer[u-n]);
- DISPLAY(" :%02X: ", resultBuffer[u]);
- for (n=1; n<3; n++)
- DISPLAY("%02X ", resultBuffer[u+n]);
- DISPLAY(" \n");
- }
- break;
- }
- if (u==srcSize-1) { /* should never happen */
- DISPLAY("no difference detected\n");
- }
- } /* for (u=0; u<srcSize; u++) */
- } /* if ((adv->mode == BMK_both) && (crcOrig!=crcCheck)) */
- } /* CRC Checking */
-
- if (displayLevel == 1) { /* hidden display mode -q, used by python speed benchmark */
- double const cSpeed = (double)benchResult.cSpeed / MB_UNIT;
- double const dSpeed = (double)benchResult.dSpeed / MB_UNIT;
- if (adv->additionalParam) {
- OUTPUT("-%-3i%11i (%5.3f) %6.2f MB/s %6.1f MB/s %s (param=%d)\n", cLevel, (int)cSize, ratio, cSpeed, dSpeed, displayName, adv->additionalParam);
- } else {
- OUTPUT("-%-3i%11i (%5.3f) %6.2f MB/s %6.1f MB/s %s\n", cLevel, (int)cSize, ratio, cSpeed, dSpeed, displayName);
- }
- }
-
- OUTPUTLEVEL(2, "%2i#\n", cLevel);
- } /* Bench */
-
- benchResult.cMem = (1ULL << (comprParams->windowLog)) + ZSTD_sizeof_CCtx(cctx);
- return BMK_benchOutcome_setValidResult(benchResult);
-}
-
-BMK_benchOutcome_t BMK_benchMemAdvanced(const void* srcBuffer, size_t srcSize,
- void* dstBuffer, size_t dstCapacity,
- const size_t* fileSizes, unsigned nbFiles,
- int cLevel, const ZSTD_compressionParameters* comprParams,
- const void* dictBuffer, size_t dictBufferSize,
- int displayLevel, const char* displayName, const BMK_advancedParams_t* adv)
-
-{
- int const dstParamsError = !dstBuffer ^ !dstCapacity; /* must be both NULL or none */
-
- size_t const blockSize = ((adv->blockSize>=32 && (adv->mode != BMK_decodeOnly)) ? adv->blockSize : srcSize) + (!srcSize) /* avoid div by 0 */ ;
- U32 const maxNbBlocks = (U32) ((srcSize + (blockSize-1)) / blockSize) + nbFiles;
-
- /* these are the blockTable parameters, just split up */
- const void ** const srcPtrs = (const void**)malloc(maxNbBlocks * sizeof(void*));
- size_t* const srcSizes = (size_t*)malloc(maxNbBlocks * sizeof(size_t));
-
-
- void ** const cPtrs = (void**)malloc(maxNbBlocks * sizeof(void*));
- size_t* const cSizes = (size_t*)malloc(maxNbBlocks * sizeof(size_t));
- size_t* const cCapacities = (size_t*)malloc(maxNbBlocks * sizeof(size_t));
-
- void ** const resPtrs = (void**)malloc(maxNbBlocks * sizeof(void*));
- size_t* const resSizes = (size_t*)malloc(maxNbBlocks * sizeof(size_t));
-
- BMK_timedFnState_t* timeStateCompress = BMK_createTimedFnState(adv->nbSeconds * 1000, BMK_RUNTEST_DEFAULT_MS);
- BMK_timedFnState_t* timeStateDecompress = BMK_createTimedFnState(adv->nbSeconds * 1000, BMK_RUNTEST_DEFAULT_MS);
-
- ZSTD_CCtx* const cctx = ZSTD_createCCtx();
- ZSTD_DCtx* const dctx = ZSTD_createDCtx();
-
- const size_t maxCompressedSize = dstCapacity ? dstCapacity : ZSTD_compressBound(srcSize) + (maxNbBlocks * 1024);
-
- void* const internalDstBuffer = dstBuffer ? NULL : malloc(maxCompressedSize);
- void* const compressedBuffer = dstBuffer ? dstBuffer : internalDstBuffer;
-
- BMK_benchOutcome_t outcome = BMK_benchOutcome_error(); /* error by default */
-
- void* resultBuffer = srcSize ? malloc(srcSize) : NULL;
-
- int const allocationincomplete = !srcPtrs || !srcSizes || !cPtrs ||
- !cSizes || !cCapacities || !resPtrs || !resSizes ||
- !timeStateCompress || !timeStateDecompress ||
- !cctx || !dctx ||
- !compressedBuffer || !resultBuffer;
-
-
- if (!allocationincomplete && !dstParamsError) {
- outcome = BMK_benchMemAdvancedNoAlloc(srcPtrs, srcSizes,
- cPtrs, cCapacities, cSizes,
- resPtrs, resSizes,
- &resultBuffer,
- compressedBuffer, maxCompressedSize,
- timeStateCompress, timeStateDecompress,
- srcBuffer, srcSize,
- fileSizes, nbFiles,
- cLevel, comprParams,
- dictBuffer, dictBufferSize,
- cctx, dctx,
- displayLevel, displayName, adv);
- }
-
- /* clean up */
- BMK_freeTimedFnState(timeStateCompress);
- BMK_freeTimedFnState(timeStateDecompress);
-
- ZSTD_freeCCtx(cctx);
- ZSTD_freeDCtx(dctx);
-
- free(internalDstBuffer);
- free(resultBuffer);
-
- free((void*)srcPtrs);
- free(srcSizes);
- free(cPtrs);
- free(cSizes);
- free(cCapacities);
- free(resPtrs);
- free(resSizes);
-
- if(allocationincomplete) {
- RETURN_ERROR(31, BMK_benchOutcome_t, "allocation error : not enough memory");
- }
-
- if(dstParamsError) {
- RETURN_ERROR(32, BMK_benchOutcome_t, "Dst parameters not coherent");
- }
- return outcome;
-}
-
-BMK_benchOutcome_t BMK_benchMem(const void* srcBuffer, size_t srcSize,
- const size_t* fileSizes, unsigned nbFiles,
- int cLevel, const ZSTD_compressionParameters* comprParams,
- const void* dictBuffer, size_t dictBufferSize,
- int displayLevel, const char* displayName) {
-
- BMK_advancedParams_t const adv = BMK_initAdvancedParams();
- return BMK_benchMemAdvanced(srcBuffer, srcSize,
- NULL, 0,
- fileSizes, nbFiles,
- cLevel, comprParams,
- dictBuffer, dictBufferSize,
- displayLevel, displayName, &adv);
-}
-
-static BMK_benchOutcome_t BMK_benchCLevel(const void* srcBuffer, size_t benchedSize,
- const size_t* fileSizes, unsigned nbFiles,
- int cLevel, const ZSTD_compressionParameters* comprParams,
- const void* dictBuffer, size_t dictBufferSize,
- int displayLevel, const char* displayName,
- BMK_advancedParams_t const * const adv)
-{
- const char* pch = strrchr(displayName, '\\'); /* Windows */
- if (!pch) pch = strrchr(displayName, '/'); /* Linux */
- if (pch) displayName = pch+1;
-
- if (adv->realTime) {
- DISPLAYLEVEL(2, "Note : switching to real-time priority \n");
- SET_REALTIME_PRIORITY;
- }
-
- if (displayLevel == 1 && !adv->additionalParam) /* --quiet mode */
- OUTPUT("bench %s %s: input %u bytes, %u seconds, %u KB blocks\n",
- ZSTD_VERSION_STRING, ZSTD_GIT_COMMIT_STRING,
- (unsigned)benchedSize, adv->nbSeconds, (unsigned)(adv->blockSize>>10));
-
- return BMK_benchMemAdvanced(srcBuffer, benchedSize,
- NULL, 0,
- fileSizes, nbFiles,
- cLevel, comprParams,
- dictBuffer, dictBufferSize,
- displayLevel, displayName, adv);
-}
-
-int BMK_syntheticTest(int cLevel, double compressibility,
- const ZSTD_compressionParameters* compressionParams,
- int displayLevel, const BMK_advancedParams_t* adv)
-{
- char name[20] = {0};
- size_t const benchedSize = 10000000;
- void* srcBuffer;
- BMK_benchOutcome_t res;
-
- if (cLevel > ZSTD_maxCLevel()) {
- DISPLAYLEVEL(1, "Invalid Compression Level");
- return 15;
- }
-
- /* Memory allocation */
- srcBuffer = malloc(benchedSize);
- if (!srcBuffer) {
- DISPLAYLEVEL(1, "allocation error : not enough memory");
- return 16;
- }
-
- /* Fill input buffer */
- RDG_genBuffer(srcBuffer, benchedSize, compressibility, 0.0, 0);
-
- /* Bench */
- snprintf (name, sizeof(name), "Synthetic %2u%%", (unsigned)(compressibility*100));
- res = BMK_benchCLevel(srcBuffer, benchedSize,
- &benchedSize /* ? */, 1 /* ? */,
- cLevel, compressionParams,
- NULL, 0, /* dictionary */
- displayLevel, name, adv);
-
- /* clean up */
- free(srcBuffer);
-
- return !BMK_isSuccessful_benchOutcome(res);
-}
-
-
-
-static size_t BMK_findMaxMem(U64 requiredMem)
-{
- size_t const step = 64 MB;
- BYTE* testmem = NULL;
-
- requiredMem = (((requiredMem >> 26) + 1) << 26);
- requiredMem += step;
- if (requiredMem > maxMemory) requiredMem = maxMemory;
-
- do {
- testmem = (BYTE*)malloc((size_t)requiredMem);
- requiredMem -= step;
- } while (!testmem && requiredMem > 0);
-
- free(testmem);
- return (size_t)(requiredMem);
-}
-
-/*! BMK_loadFiles() :
- * Loads `buffer` with content of files listed within `fileNamesTable`.
- * At most, fills `buffer` entirely. */
-static int BMK_loadFiles(void* buffer, size_t bufferSize,
- size_t* fileSizes,
- const char* const * fileNamesTable, unsigned nbFiles,
- int displayLevel)
-{
- size_t pos = 0, totalSize = 0;
- unsigned n;
- for (n=0; n<nbFiles; n++) {
- U64 fileSize = UTIL_getFileSize(fileNamesTable[n]); /* last file may be shortened */
- if (UTIL_isDirectory(fileNamesTable[n])) {
- DISPLAYLEVEL(2, "Ignoring %s directory... \n", fileNamesTable[n]);
- fileSizes[n] = 0;
- continue;
- }
- if (fileSize == UTIL_FILESIZE_UNKNOWN) {
- DISPLAYLEVEL(2, "Cannot evaluate size of %s, ignoring ... \n", fileNamesTable[n]);
- fileSizes[n] = 0;
- continue;
- }
- { FILE* const f = fopen(fileNamesTable[n], "rb");
- if (f==NULL) RETURN_ERROR_INT(10, "impossible to open file %s", fileNamesTable[n]);
- OUTPUTLEVEL(2, "Loading %s... \r", fileNamesTable[n]);
- if (fileSize > bufferSize-pos) fileSize = bufferSize-pos, nbFiles=n; /* buffer too small - stop after this file */
- { size_t const readSize = fread(((char*)buffer)+pos, 1, (size_t)fileSize, f);
- if (readSize != (size_t)fileSize) RETURN_ERROR_INT(11, "could not read %s", fileNamesTable[n]);
- pos += readSize;
- }
- fileSizes[n] = (size_t)fileSize;
- totalSize += (size_t)fileSize;
- fclose(f);
- } }
-
- if (totalSize == 0) RETURN_ERROR_INT(12, "no data to bench");
- return 0;
-}
-
-int BMK_benchFilesAdvanced(
- const char* const * fileNamesTable, unsigned nbFiles,
- const char* dictFileName, int cLevel,
- const ZSTD_compressionParameters* compressionParams,
- int displayLevel, const BMK_advancedParams_t* adv)
-{
- void* srcBuffer = NULL;
- size_t benchedSize;
- void* dictBuffer = NULL;
- size_t dictBufferSize = 0;
- size_t* fileSizes = NULL;
- BMK_benchOutcome_t res;
- U64 const totalSizeToLoad = UTIL_getTotalFileSize(fileNamesTable, nbFiles);
-
- if (!nbFiles) {
- DISPLAYLEVEL(1, "No Files to Benchmark");
- return 13;
- }
-
- if (cLevel > ZSTD_maxCLevel()) {
- DISPLAYLEVEL(1, "Invalid Compression Level");
- return 14;
- }
-
- if (totalSizeToLoad == UTIL_FILESIZE_UNKNOWN) {
- DISPLAYLEVEL(1, "Error loading files");
- return 15;
- }
-
- fileSizes = (size_t*)calloc(nbFiles, sizeof(size_t));
- if (!fileSizes) {
- DISPLAYLEVEL(1, "not enough memory for fileSizes");
- return 16;
- }
-
- /* Load dictionary */
- if (dictFileName != NULL) {
- U64 const dictFileSize = UTIL_getFileSize(dictFileName);
- if (dictFileSize == UTIL_FILESIZE_UNKNOWN) {
- DISPLAYLEVEL(1, "error loading %s : %s \n", dictFileName, strerror(errno));
- free(fileSizes);
- DISPLAYLEVEL(1, "benchmark aborted");
- return 17;
- }
- if (dictFileSize > 64 MB) {
- free(fileSizes);
- DISPLAYLEVEL(1, "dictionary file %s too large", dictFileName);
- return 18;
- }
- dictBufferSize = (size_t)dictFileSize;
- dictBuffer = malloc(dictBufferSize);
- if (dictBuffer==NULL) {
- free(fileSizes);
- DISPLAYLEVEL(1, "not enough memory for dictionary (%u bytes)",
- (unsigned)dictBufferSize);
- return 19;
- }
-
- { int const errorCode = BMK_loadFiles(dictBuffer, dictBufferSize,
- fileSizes, &dictFileName /*?*/,
- 1 /*?*/, displayLevel);
- if (errorCode) {
- res = BMK_benchOutcome_error();
- goto _cleanUp;
- } }
- }
-
- /* Memory allocation & restrictions */
- benchedSize = BMK_findMaxMem(totalSizeToLoad * 3) / 3;
- if ((U64)benchedSize > totalSizeToLoad) benchedSize = (size_t)totalSizeToLoad;
- if (benchedSize < totalSizeToLoad)
- DISPLAY("Not enough memory; testing %u MB only...\n", (unsigned)(benchedSize >> 20));
-
- srcBuffer = benchedSize ? malloc(benchedSize) : NULL;
- if (!srcBuffer) {
- free(dictBuffer);
- free(fileSizes);
- DISPLAYLEVEL(1, "not enough memory for srcBuffer");
- return 20;
- }
-
- /* Load input buffer */
- { int const errorCode = BMK_loadFiles(srcBuffer, benchedSize,
- fileSizes, fileNamesTable, nbFiles,
- displayLevel);
- if (errorCode) {
- res = BMK_benchOutcome_error();
- goto _cleanUp;
- } }
-
- /* Bench */
- { char mfName[20] = {0};
- snprintf (mfName, sizeof(mfName), " %u files", nbFiles);
- { const char* const displayName = (nbFiles > 1) ? mfName : fileNamesTable[0];
- res = BMK_benchCLevel(srcBuffer, benchedSize,
- fileSizes, nbFiles,
- cLevel, compressionParams,
- dictBuffer, dictBufferSize,
- displayLevel, displayName,
- adv);
- } }
-
-_cleanUp:
- free(srcBuffer);
- free(dictBuffer);
- free(fileSizes);
- return !BMK_isSuccessful_benchOutcome(res);
-}
-
-
-int BMK_benchFiles(const char* const * fileNamesTable, unsigned nbFiles,
- const char* dictFileName,
- int cLevel, const ZSTD_compressionParameters* compressionParams,
- int displayLevel)
-{
- BMK_advancedParams_t const adv = BMK_initAdvancedParams();
- return BMK_benchFilesAdvanced(fileNamesTable, nbFiles, dictFileName, cLevel, compressionParams, displayLevel, &adv);
-}
+++ /dev/null
-zstd
\ No newline at end of file
+++ /dev/null
-zstd
\ No newline at end of file
+++ /dev/null
-/*
- * Copyright (c) Meta Platforms, Inc. and affiliates.
- * All rights reserved.
- *
- * This source code is licensed under both the BSD-style license (found in the
- * LICENSE file in the root directory of this source tree) and the GPLv2 (found
- * in the COPYING file in the root directory of this source tree).
- * You may select, at your option, one of the above-listed licenses.
- */
-
-
-/*-************************************
-* Dependencies
-**************************************/
-#include "util.h" /* Compiler options */
-#include <stdio.h> /* fprintf, stderr */
-#include "datagen.h" /* RDG_generate */
-
-
-/*-************************************
-* Constants
-**************************************/
-#define KB *(1 <<10)
-#define MB *(1 <<20)
-#define GB *(1U<<30)
-
-#define SIZE_DEFAULT ((64 KB) + 1)
-#define SEED_DEFAULT 0
-#define COMPRESSIBILITY_DEFAULT 50
-
-
-/*-************************************
-* Macros
-**************************************/
-#define DISPLAY(...) fprintf(stderr, __VA_ARGS__)
-#define DISPLAYLEVEL(l, ...) if (displayLevel>=l) { DISPLAY(__VA_ARGS__); }
-static unsigned displayLevel = 2;
-
-
-/*-*******************************************************
-* Command line
-*********************************************************/
-static int usage(const char* programName)
-{
- DISPLAY( "Compressible data generator\n");
- DISPLAY( "Usage :\n");
- DISPLAY( " %s [args]\n", programName);
- DISPLAY( "\n");
- DISPLAY( "Arguments :\n");
- DISPLAY( " -g# : generate # data (default:%i)\n", SIZE_DEFAULT);
- DISPLAY( " -s# : Select seed (default:%i)\n", SEED_DEFAULT);
- DISPLAY( " -P# : Select compressibility in %% (default:%i%%)\n",
- COMPRESSIBILITY_DEFAULT);
- DISPLAY( " -h : display help and exit\n");
- return 0;
-}
-
-
-int main(int argc, const char** argv)
-{
- unsigned probaU32 = COMPRESSIBILITY_DEFAULT;
- double litProba = 0.0;
- U64 size = SIZE_DEFAULT;
- U32 seed = SEED_DEFAULT;
- const char* const programName = argv[0];
-
- int argNb;
- for(argNb=1; argNb<argc; argNb++) {
- const char* argument = argv[argNb];
-
- if(!argument) continue; /* Protection if argument empty */
-
- /* Handle commands. Aggregated commands are allowed */
- if (*argument=='-') {
- argument++;
- while (*argument!=0) {
- switch(*argument)
- {
- case 'h':
- return usage(programName);
- case 'g':
- argument++;
- size=0;
- while ((*argument>='0') && (*argument<='9'))
- size *= 10, size += *argument++ - '0';
- if (*argument=='K') { size <<= 10; argument++; }
- if (*argument=='M') { size <<= 20; argument++; }
- if (*argument=='G') { size <<= 30; argument++; }
- if (*argument=='B') { argument++; }
- break;
- case 's':
- argument++;
- seed=0;
- while ((*argument>='0') && (*argument<='9'))
- seed *= 10, seed += *argument++ - '0';
- break;
- case 'P':
- argument++;
- probaU32 = 0;
- while ((*argument>='0') && (*argument<='9'))
- probaU32 *= 10, probaU32 += *argument++ - '0';
- if (probaU32>100) probaU32 = 100;
- break;
- case 'L': /* hidden argument : Literal distribution probability */
- argument++;
- litProba=0.;
- while ((*argument>='0') && (*argument<='9'))
- litProba *= 10, litProba += *argument++ - '0';
- if (litProba>100.) litProba=100.;
- litProba /= 100.;
- break;
- case 'v':
- displayLevel = 4;
- argument++;
- break;
- default:
- return usage(programName);
- }
- } } } /* for(argNb=1; argNb<argc; argNb++) */
-
- DISPLAYLEVEL(4, "Compressible data Generator \n");
- if (probaU32!=COMPRESSIBILITY_DEFAULT)
- DISPLAYLEVEL(3, "Compressibility : %i%%\n", probaU32);
- DISPLAYLEVEL(3, "Seed = %u \n", (unsigned)seed);
-
- RDG_genStdout(size, (double)probaU32/100, litProba, seed);
- DISPLAYLEVEL(3, "\n");
-
- return 0;
-}
name: FreeBSD (shortest)
freebsd_instance:
matrix:
- image_family: freebsd-13-0
- image_family: freebsd-12-2
+ image_family: freebsd-14-0
+ image_family: freebsd-13-2
install_script: pkg install -y gmake coreutils
script: |
MOREFLAGS="-Werror" gmake -j all
--- /dev/null
+name: facebook/zstd/commit
+on:
+ push:
+ branches:
+ - dev
+permissions: read-all
+jobs:
+ short-tests-0:
+ runs-on: ubuntu-latest
+ services:
+ docker:
+ image: fbopensource/zstd-circleci-primary:0.0.1
+ options: --entrypoint /bin/bash
+ steps:
+ - uses: actions/checkout@v4
+ - name: Install Dependencies
+ run: |
+ sudo apt-get update
+ sudo apt-get install libcurl4-gnutls-dev
+ - name: Test
+ run: |
+ ./tests/test-license.py
+ cc -v
+ CFLAGS="-O0 -Werror -pedantic" make allmost; make clean
+ make c99build; make clean
+ make c11build; make clean
+ make -j regressiontest; make clean
+ make shortest; make clean
+ make cxxtest; make clean
+ short-tests-1:
+ runs-on: ubuntu-latest
+ services:
+ docker:
+ image: fbopensource/zstd-circleci-primary:0.0.1
+ options: --entrypoint /bin/bash
+ steps:
+ - uses: actions/checkout@v4
+ - name: Install Dependencies
+ run: |
+ sudo apt-get update
+ sudo apt-get install gcc-powerpc-linux-gnu gcc-arm-linux-gnueabi gcc-aarch64-linux-gnu libc6-dev-ppc64-powerpc-cross libcurl4-gnutls-dev lib64gcc-11-dev-powerpc-cross
+ - name: Test
+ run: |-
+ make gnu90build; make clean
+ make gnu99build; make clean
+ make ppc64build V=1; make clean
+ make ppcbuild V=1; make clean
+ make armbuild V=1; make clean
+ make aarch64build V=1; make clean
+ make -C tests test-legacy test-longmatch; make clean
+ make -C lib libzstd-nomt; make clean
+ regression-test:
+ runs-on: ubuntu-latest
+ services:
+ docker:
+ image: fbopensource/zstd-circleci-primary:0.0.1
+ options: --entrypoint /bin/bash
+ env:
+ CIRCLE_ARTIFACTS: "/tmp/circleci-artifacts"
+ steps:
+ - uses: actions/checkout@v4
+ - name: restore_cache
+ uses: actions/cache@v4
+ with:
+ key: regression-cache-{{ checksum "tests/regression/data.c" }}-v0
+ path: tests/regression/cache
+ restore-keys: regression-cache-{{ checksum "tests/regression/data.c" }}-v0
+ - name: Install Dependencies
+ run: |
+ sudo apt-get update
+ sudo apt-get install libcurl4-gnutls-dev
+ - name: Regression Test
+ run: |
+ make -C programs zstd
+ make -C tests/regression test
+ mkdir -p $CIRCLE_ARTIFACTS
+ ./tests/regression/test \
+ --cache tests/regression/cache \
+ --output $CIRCLE_ARTIFACTS/results.csv \
+ --zstd programs/zstd
+ echo "NOTE: The new results.csv is uploaded as an artifact to this job"
+ echo " If this fails, go to the Artifacts pane in CircleCI, "
+ echo " download /tmp/circleci-artifacts/results.csv, and if they "
+ echo " are still good, copy it into the repo and commit it."
+ echo "> diff tests/regression/results.csv $CIRCLE_ARTIFACTS/results.csv"
+ diff tests/regression/results.csv $CIRCLE_ARTIFACTS/results.csv
+ - uses: actions/upload-artifact@v4
+ with:
+ path: "/tmp/circleci-artifacts"
make-all:
runs-on: ubuntu-latest
steps:
- - uses: actions/checkout@8f4b7f84864484a7bf31766abe9204da3cbe65b3 # tag=v3
+ - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # tag=v4.1.1
- name: make all
run: make all
DEVNULLRIGHTS: 1
READFROMBLOCKDEVICE: 1
steps:
- - uses: actions/checkout@8f4b7f84864484a7bf31766abe9204da3cbe65b3 # tag=v3
+ - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # tag=v4.1.1
- name: make test
run: make test
make-test-osx:
runs-on: macos-latest
steps:
- - uses: actions/checkout@8f4b7f84864484a7bf31766abe9204da3cbe65b3 # tag=v3
+ - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # tag=v4.1.1
- name: OS-X test
run: make test # make -c lib all doesn't work because of the fact that it's not a tty
DEVNULLRIGHTS: 1
READFROMBLOCKDEVICE: 1
steps:
- - uses: actions/checkout@8f4b7f84864484a7bf31766abe9204da3cbe65b3 # tag=v3
+ - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # tag=v4.1.1
- name: make test
run: |
sudo apt-get -qqq update
no-intrinsics-fuzztest:
runs-on: ubuntu-latest
steps:
- - uses: actions/checkout@8f4b7f84864484a7bf31766abe9204da3cbe65b3 # tag=v3
+ - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # tag=v4.1.1
- name: no intrinsics fuzztest
run: MOREFLAGS="-DZSTD_NO_INTRINSICS" make -C tests fuzztest
tsan-zstreamtest:
- runs-on: ubuntu-latest
+ runs-on: ubuntu-20.04
steps:
- - uses: actions/checkout@8f4b7f84864484a7bf31766abe9204da3cbe65b3 # tag=v3
+ - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # tag=v4.1.1
- name: thread sanitizer zstreamtest
run: CC=clang ZSTREAM_TESTTIME=-T3mn make tsan-test-zstream
ubsan-zstreamtest:
- runs-on: ubuntu-latest
+ runs-on: ubuntu-20.04
steps:
- - uses: actions/checkout@8f4b7f84864484a7bf31766abe9204da3cbe65b3 # tag=v3
+ - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # tag=v4.1.1
- name: undefined behavior sanitizer zstreamtest
run: CC=clang make uasan-test-zstream
# lasts ~15mn
tsan-fuzztest:
- runs-on: ubuntu-latest
+ runs-on: ubuntu-20.04
steps:
- - uses: actions/checkout@8f4b7f84864484a7bf31766abe9204da3cbe65b3 # tag=v3
+ - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # tag=v4.1.1
- name: thread sanitizer fuzztest
run: CC=clang make tsan-fuzztest
big-tests-zstreamtest32:
runs-on: ubuntu-latest
steps:
- - uses: actions/checkout@8f4b7f84864484a7bf31766abe9204da3cbe65b3 # tag=v3
+ - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # tag=v4.1.1
- name: zstream tests in 32bit mode, with big tests
run: |
sudo apt-get -qqq update
# lasts ~23mn
gcc-8-asan-ubsan-testzstd:
- runs-on: ubuntu-latest
+ runs-on: ubuntu-20.04
steps:
- - uses: actions/checkout@8f4b7f84864484a7bf31766abe9204da3cbe65b3 # tag=v3
+ - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # tag=v4.1.1
- name: gcc-8 + ASan + UBSan + Test Zstd
# See https://askubuntu.com/a/1428822
run: |
CC=gcc-8 make -j uasan-test-zstd </dev/null V=1
clang-asan-ubsan-testzstd:
- runs-on: ubuntu-latest
+ runs-on: ubuntu-20.04
steps:
- - uses: actions/checkout@8f4b7f84864484a7bf31766abe9204da3cbe65b3 # tag=v3
+ - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # tag=v4.1.1
- name: clang + ASan + UBSan + Test Zstd
run: CC=clang make -j uasan-test-zstd </dev/null V=1
gcc-asan-ubsan-testzstd-32bit:
- runs-on: ubuntu-latest
+ runs-on: ubuntu-20.04
steps:
- - uses: actions/checkout@8f4b7f84864484a7bf31766abe9204da3cbe65b3 # tag=v3
+ - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # tag=v4.1.1
- name: ASan + UBSan + Test Zstd, 32bit mode
run: |
sudo apt-get -qqq update
# so any data coming from these libraries is always considered "uninitialized"
gcc-8-asan-ubsan-fuzz:
- runs-on: ubuntu-latest
+ runs-on: ubuntu-20.04
steps:
- - uses: actions/checkout@8f4b7f84864484a7bf31766abe9204da3cbe65b3 # tag=v3
+ - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # tag=v4.1.1
- name: gcc-8 + ASan + UBSan + Fuzz Test
# See https://askubuntu.com/a/1428822
run: |
CC=gcc-8 FUZZER_FLAGS="--long-tests" make clean uasan-fuzztest
clang-asan-ubsan-fuzz:
- runs-on: ubuntu-latest
+ runs-on: ubuntu-20.04
steps:
- - uses: actions/checkout@8f4b7f84864484a7bf31766abe9204da3cbe65b3 # tag=v3
+ - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # tag=v4.1.1
- name: clang + ASan + UBSan + Fuzz Test
run: CC=clang FUZZER_FLAGS="--long-tests" make clean uasan-fuzztest
gcc-asan-ubsan-fuzz32:
- runs-on: ubuntu-latest
+ runs-on: ubuntu-20.04
steps:
- - uses: actions/checkout@8f4b7f84864484a7bf31766abe9204da3cbe65b3 # tag=v3
+ - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # tag=v4.1.1
- name: ASan + UBSan + Fuzz Test 32bit
run: |
sudo apt-get -qqq update
CFLAGS="-O3 -m32" FUZZER_FLAGS="--long-tests" make uasan-fuzztest
clang-asan-ubsan-fuzz32:
- runs-on: ubuntu-latest
+ runs-on: ubuntu-20.04
steps:
- - uses: actions/checkout@8f4b7f84864484a7bf31766abe9204da3cbe65b3 # tag=v3
+ - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # tag=v4.1.1
- name: clang + ASan + UBSan + Fuzz Test 32bit
run: |
sudo apt-get -qqq update
CC=clang CFLAGS="-O3 -m32" FUZZER_FLAGS="--long-tests" make uasan-fuzztest
asan-ubsan-regression:
- runs-on: ubuntu-latest
+ runs-on: ubuntu-20.04
steps:
- - uses: actions/checkout@8f4b7f84864484a7bf31766abe9204da3cbe65b3 # tag=v3
+ - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # tag=v4.1.1
- name: ASan + UBSan + Regression Test
run: make -j uasanregressiontest
clang-ubsan-regression:
- runs-on: ubuntu-latest
+ runs-on: ubuntu-20.04
steps:
- - uses: actions/checkout@8f4b7f84864484a7bf31766abe9204da3cbe65b3 # tag=v3
+ - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # tag=v4.1.1
- name: clang + ASan + UBSan + Regression Test
run: CC=clang make -j uasanregressiontest
msan-regression:
- runs-on: ubuntu-latest
+ runs-on: ubuntu-20.04
steps:
- - uses: actions/checkout@8f4b7f84864484a7bf31766abe9204da3cbe65b3 # tag=v3
+ - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # tag=v4.1.1
- name: MSan + Regression Test
run: make -j msanregressiontest
clang-msan-fuzz-unoptimized:
- runs-on: ubuntu-latest
+ runs-on: ubuntu-20.04
steps:
- - uses: actions/checkout@8f4b7f84864484a7bf31766abe9204da3cbe65b3 # tag=v3
+ - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # tag=v4.1.1
- name: clang + MSan + Fuzz Test
run: |
sudo apt-get -qqq update
CC=clang MOREFLAGS="-O0" make clean msan-fuzztest
clang-msan-fuzz:
- runs-on: ubuntu-latest
+ runs-on: ubuntu-20.04
steps:
- - uses: actions/checkout@8f4b7f84864484a7bf31766abe9204da3cbe65b3 # tag=v3
+ - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # tag=v4.1.1
- name: clang + MSan + Fuzz Test
run: |
sudo apt-get -qqq update
# lasts ~24mn
clang-msan-testzstd:
- runs-on: ubuntu-latest
+ runs-on: ubuntu-20.04
steps:
- - uses: actions/checkout@8f4b7f84864484a7bf31766abe9204da3cbe65b3 # tag=v3
+ - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # tag=v4.1.1
- name: clang + MSan + Test Zstd
run: |
sudo apt-get update
armfuzz:
runs-on: ubuntu-latest
steps:
- - uses: actions/checkout@8f4b7f84864484a7bf31766abe9204da3cbe65b3 # tag=v3
+ - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # tag=v4.1.1
- name: Qemu ARM emulation + Fuzz Test
run: |
sudo apt-get -qqq update
valgrind-fuzz-test:
runs-on: ubuntu-latest
steps:
- - uses: actions/checkout@8f4b7f84864484a7bf31766abe9204da3cbe65b3 # tag=v3
+ - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # tag=v4.1.1
- name: valgrind + fuzz test stack mode # ~ 7mn
shell: 'script -q -e -c "bash {0}"'
run: |
run:
shell: msys2 {0}
steps:
- - uses: actions/checkout@8f4b7f84864484a7bf31766abe9204da3cbe65b3 # tag=v3
- - uses: msys2/setup-msys2@5beef6d11f48bba68b9eb503e3adc60b23c0cc36 # tag=v2
+ - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # tag=v4.1.1
+ - uses: msys2/setup-msys2@cc11e9188b693c2b100158c3322424c4cc1dadea # tag=v2.22.0
with:
msystem: MINGW64
install: make
# lasts ~20mn
oss-fuzz:
- runs-on: ubuntu-latest
+ runs-on: ubuntu-20.04
strategy:
fail-fast: false
matrix:
dry-run: false
sanitizer: ${{ matrix.sanitizer }}
- name: Upload Crash
- uses: actions/upload-artifact@0b7f8abb1508181956e8e162db84b466c27e18ce # tag=v3.1.2
+ uses: actions/upload-artifact@5d5d22a31266ced268874388b861e4b58bb5c2f3 # tag=v4.3.1
if: failure() && steps.build.outcome == 'success'
with:
name: ${{ matrix.sanitizer }}-artifacts
linux-kernel:
runs-on: ubuntu-latest
steps:
- - uses: actions/checkout@8f4b7f84864484a7bf31766abe9204da3cbe65b3 # tag=v3
+ - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # tag=v4.1.1
- name: linux kernel, library + build + test
run: make -C contrib/linux-kernel test CFLAGS="-Werror -Wunused-const-variable -Wunused-but-set-variable"
benchmarking:
runs-on: ubuntu-latest
steps:
- - uses: actions/checkout@8f4b7f84864484a7bf31766abe9204da3cbe65b3 # tag=v3
+ - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # tag=v4.1.1
- name: make benchmarking
run: make benchmarking
check-32bit: # designed to catch https://github.com/facebook/zstd/issues/2428
runs-on: ubuntu-latest
steps:
- - uses: actions/checkout@8f4b7f84864484a7bf31766abe9204da3cbe65b3 # tag=v3
+ - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # tag=v4.1.1
- name: make check on 32-bit
run: |
sudo apt update
CFLAGS="-m32 -O1 -fstack-protector" make check V=1
check-x32:
- runs-on: ubuntu-latest
+ runs-on: ubuntu-20.04 # ubuntu-latest == ubuntu-22.04 have issues currently with x32
steps:
- - uses: actions/checkout@8f4b7f84864484a7bf31766abe9204da3cbe65b3 # tag=v3
+ - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # tag=v4.1.1
- name: make check on x32 ABI # https://en.wikipedia.org/wiki/X32_ABI
env:
CHECK_CONSTRAINED_MEM: true
build-c89:
runs-on: ubuntu-latest
steps:
- - uses: actions/checkout@8f4b7f84864484a7bf31766abe9204da3cbe65b3 # tag=v3
+ - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # tag=v4.1.1
- name: ensure zstd can be build with c89/c90 compilers (+ long long support + variadic macros)
run: |
make c89build V=1
build-zstd-dll:
runs-on: ubuntu-latest
steps:
- - uses: actions/checkout@8f4b7f84864484a7bf31766abe9204da3cbe65b3 # tag=v3
+ - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # tag=v4.1.1
- name: build zstd bin against a dynamic lib (debuglevel for more dependencies)
run: |
- make -C lib lib-mt-release
+ make -C lib lib-mt-release
DEBUGLEVEL=2 make -C programs zstd-dll
gcc-7-libzstd:
runs-on: ubuntu-latest
steps:
- - uses: actions/checkout@8f4b7f84864484a7bf31766abe9204da3cbe65b3 # tag=v3
+ - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # tag=v4.1.1
- name: gcc-7 + libzstdmt compilation
# See https://askubuntu.com/a/1428822
run: |
make clean
LDFLAGS=-Wl,--no-undefined make -C lib libzstd-mt
- # candidate test (to check) : underlink test
+ # candidate test (for discussion) : underlink test
# LDFLAGS=-Wl,--no-undefined : will make the linker fail if dll is underlinked
cmake-build-and-test-check:
runs-on: ubuntu-latest
steps:
- - uses: actions/checkout@8f4b7f84864484a7bf31766abe9204da3cbe65b3 # tag=v3
- - name: cmake build and test check
+ - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # tag=v4.1.1
+ - name: cmake build and test
run: |
- FUZZERTEST=-T1mn ZSTREAM_TESTTIME=-T1mn make cmakebuild
- cp -r ./ "../zstd source"
- cd "../zstd source"
- FUZZERTEST=-T1mn ZSTREAM_TESTTIME=-T1mn make cmakebuild
+ FUZZERTEST=-T1mn ZSTREAM_TESTTIME=-T1mn make cmakebuild V=1
cpp-gnu90-c99-compatibility:
runs-on: ubuntu-latest
steps:
- - uses: actions/checkout@8f4b7f84864484a7bf31766abe9204da3cbe65b3 # tag=v3
+ - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # tag=v4.1.1
- name: C++, gnu90 and c99 compatibility
run: |
make cxxtest
mingw-cross-compilation:
runs-on: ubuntu-latest
steps:
- - uses: actions/checkout@8f4b7f84864484a7bf31766abe9204da3cbe65b3 # tag=v3
+ - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # tag=v4.1.1
- name: mingw cross-compilation
run: |
# sudo update-alternatives --set x86_64-w64-mingw32-g++ /usr/bin/x86_64-w64-mingw32-g++-posix; (doesn't work)
armbuild:
runs-on: ubuntu-latest
steps:
- - uses: actions/checkout@8f4b7f84864484a7bf31766abe9204da3cbe65b3 # tag=v3
+ - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # tag=v4.1.1
- name: ARM Build Test
run: |
sudo apt-get -qqq update
bourne-shell:
runs-on: ubuntu-latest
steps:
- - uses: actions/checkout@8f4b7f84864484a7bf31766abe9204da3cbe65b3 # tag=v3
+ - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # tag=v4.1.1
- name: Bourne shell compatibility (shellcheck)
run: |
wget https://github.com/koalaman/shellcheck/releases/download/v0.7.1/shellcheck-v0.7.1.linux.x86_64.tar.xz
zlib-wrapper:
runs-on: ubuntu-latest
steps:
- - uses: actions/checkout@8f4b7f84864484a7bf31766abe9204da3cbe65b3 # tag=v3
+ - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # tag=v4.1.1
- name: zlib wrapper test
run: |
sudo apt-get -qqq update
lz4-threadpool-libs:
runs-on: ubuntu-latest
steps:
- - uses: actions/checkout@8f4b7f84864484a7bf31766abe9204da3cbe65b3 # tag=v3
+ - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # tag=v4.1.1
- name: LZ4, thread pool, and libs build testslib wrapper test
run: |
make lz4install
gcc-make-tests-32bit:
runs-on: ubuntu-latest
steps:
- - uses: actions/checkout@8f4b7f84864484a7bf31766abe9204da3cbe65b3 # tag=v3
+ - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # tag=v4.1.1
- name: Make all, 32bit mode
run: |
sudo apt-get -qqq update
gcc-8-make:
runs-on: ubuntu-latest
steps:
- - uses: actions/checkout@8f4b7f84864484a7bf31766abe9204da3cbe65b3 # tag=v3
+ - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # tag=v4.1.1
- name: gcc-8 build
# See https://askubuntu.com/a/1428822
run: |
flags: "HAVE_ZLIB=0 HAVE_LZ4=0 HAVE_LZMA=1"
runs-on: ubuntu-latest
steps:
- - uses: actions/checkout@8f4b7f84864484a7bf31766abe9204da3cbe65b3 # tag=v3
+ - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # tag=v4.1.1
- name: Build with ${{matrix.name}}
run: ${{matrix.flags}} make zstd
implicit-fall-through:
runs-on: ubuntu-latest
steps:
- - uses: actions/checkout@8f4b7f84864484a7bf31766abe9204da3cbe65b3 # tag=v3
+ - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # tag=v4.1.1
- name: -Wimplicit-fallthrough build
run: |
make clean
meson-linux:
runs-on: ubuntu-latest
steps:
- - uses: actions/checkout@8f4b7f84864484a7bf31766abe9204da3cbe65b3 # tag=v3
+ - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # tag=v4.1.1
- name: Install packages
run: |
sudo apt-get update
meson-windows:
runs-on: windows-latest
steps:
- - uses: actions/checkout@8f4b7f84864484a7bf31766abe9204da3cbe65b3 # tag=v3
+ - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # tag=v4.1.1
- name: Install packages
run: pip install --pre meson
- - name: Initialize the MSVC dev command prompt
- uses: ilammy/msvc-dev-cmd@cec98b9d092141f74527d0afa6feb2af698cfe89
- name: Configure with Meson
run: |
- meson setup build/meson/ builddir -Dbin_tests=true -Dbin_programs=true -Dbin_contrib=true
+ meson setup --vsenv build/meson/ builddir -Dbin_tests=true -Dbin_programs=true -Dbin_contrib=true
- name: Build with Meson
run: |
- ninja -C builddir/
+ meson compile -C builddir/
- name: Test with Meson
run: |
meson test -C builddir/ --print-errorlogs
flags: "-T ClangCL"
runs-on: windows-2022
steps:
- - uses: actions/checkout@8f4b7f84864484a7bf31766abe9204da3cbe65b3 # tag=v3
+ - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # tag=v4.1.1
- name: Add MSBuild to PATH
- uses: microsoft/setup-msbuild@1ff57057b5cfdc39105cd07a01d78e9b0ea0c14c # tag=v1.3
- - name: Build
+ uses: microsoft/setup-msbuild@6fb02220983dee41ce7ae257b6f4d8f9bf5ed4ce # tag=v2.0.0
+ - name: Build & Test
working-directory: ${{env.GITHUB_WORKSPACE}}
run: |
cd build\cmake
mkdir build
cd build
- cmake.exe -G "${{matrix.generator}}" ${{matrix.flags}} ..
+ cmake.exe -G "${{matrix.generator}}" ${{matrix.flags}} -DCMAKE_BUILD_TYPE=Debug -DZSTD_BUILD_TESTS:BOOL=ON -DZSTD_ZSTREAM_FLAGS=-T30s -DZSTD_FUZZER_FLAGS=-T30s -DZSTD_FULLBENCH_FLAGS=-i0 ..
cmake.exe --build .
+ ctest.exe -V -C Debug
msbuild-visual-studio:
strategy:
]
runs-on: ${{matrix.runner}}
steps:
- - uses: actions/checkout@8f4b7f84864484a7bf31766abe9204da3cbe65b3 # tag=v3
+ - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # tag=v4.1.1
- name: Add MSBuild to PATH
- uses: microsoft/setup-msbuild@1ff57057b5cfdc39105cd07a01d78e9b0ea0c14c # tag=v1.3
+ uses: microsoft/setup-msbuild@6fb02220983dee41ce7ae257b6f4d8f9bf5ed4ce # tag=v2.0.0
- name: Build ${{matrix.name}}
working-directory: ${{env.GITHUB_WORKSPACE}}
# See https://docs.microsoft.com/visualstudio/msbuild/msbuild-command-line-reference
libzstd-size:
runs-on: ubuntu-latest
steps:
- - uses: actions/checkout@8f4b7f84864484a7bf31766abe9204da3cbe65b3 # tag=v3
+ - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # tag=v4.1.1
- name: libzstd size test
run: |
make clean && make -j -C lib libzstd && ./tests/check_size.py lib/libzstd.so 1100000
minimal-decompressor-macros:
runs-on: ubuntu-latest
steps:
- - uses: actions/checkout@8f4b7f84864484a7bf31766abe9204da3cbe65b3 # tag=v3
+ - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # tag=v4.1.1
- name: minimal decompressor macros
run: |
make clean && make -j all ZSTD_LIB_MINIFY=1 MOREFLAGS="-Werror"
make clean && make check MOREFLAGS="-Werror -DHUF_FORCE_DECOMPRESS_X2 -DZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG"
make clean && make -j all MOREFLAGS="-Werror -DZSTD_NO_INLINE -DZSTD_STRIP_ERROR_STRINGS"
make clean && make check MOREFLAGS="-Werror -DZSTD_NO_INLINE -DZSTD_STRIP_ERROR_STRINGS"
+ make clean && make check ZSTD_LIB_EXCLUDE_COMPRESSORS_DFAST_AND_UP=1 MOREFLAGS="-Werror"
+ make clean && make check ZSTD_LIB_EXCLUDE_COMPRESSORS_GREEDY_AND_UP=1 MOREFLAGS="-Werror"
dynamic-bmi2:
runs-on: ubuntu-latest
steps:
- - uses: actions/checkout@8f4b7f84864484a7bf31766abe9204da3cbe65b3 # tag=v3
+ - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # tag=v4.1.1
- name: dynamic bmi2 tests
run: |
make clean && make -j check MOREFLAGS="-O0 -Werror -mbmi2"
test-variants:
runs-on: ubuntu-latest
steps:
- - uses: actions/checkout@8f4b7f84864484a7bf31766abe9204da3cbe65b3 # tag=v3
+ - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # tag=v4.1.1
- name: make all variants & validate
run: |
make -j -C programs allVariants MOREFLAGS=-O0
{ name: PPC64LE, xcc_pkg: gcc-powerpc64le-linux-gnu, xcc: powerpc64le-linux-gnu-gcc, xemu_pkg: qemu-system-ppc, xemu: qemu-ppc64le-static },
{ name: S390X, xcc_pkg: gcc-s390x-linux-gnu, xcc: s390x-linux-gnu-gcc, xemu_pkg: qemu-system-s390x, xemu: qemu-s390x-static },
{ name: MIPS, xcc_pkg: gcc-mips-linux-gnu, xcc: mips-linux-gnu-gcc, xemu_pkg: qemu-system-mips, xemu: qemu-mips-static },
+ { name: RISC-V, xcc_pkg: gcc-riscv64-linux-gnu, xcc: riscv64-linux-gnu-gcc, xemu_pkg: qemu-system-riscv64,xemu: qemu-riscv64-static },
{ name: M68K, xcc_pkg: gcc-m68k-linux-gnu, xcc: m68k-linux-gnu-gcc, xemu_pkg: qemu-system-m68k, xemu: qemu-m68k-static },
+ { name: SPARC, xcc_pkg: gcc-sparc64-linux-gnu, xcc: sparc64-linux-gnu-gcc, xemu_pkg: qemu-system-sparc, xemu: qemu-sparc64-static },
]
env: # Set environment variables
XCC: ${{ matrix.xcc }}
XEMU: ${{ matrix.xemu }}
steps:
- - uses: actions/checkout@8f4b7f84864484a7bf31766abe9204da3cbe65b3 # tag=v3
+ - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # tag=v4.1.1
- name: apt update & install
run: |
sudo apt-get update
- name: ARM64
if: ${{ matrix.name == 'ARM64' }}
run: |
+ LDFLAGS="-static -z force-bti" MOREFLAGS="-mbranch-protection=standard" CC=$XCC QEMU_SYS=$XEMU make clean check
LDFLAGS="-static" CC=$XCC QEMU_SYS=$XEMU make clean check
- name: PPC
if: ${{ matrix.name == 'PPC' }}
if: ${{ matrix.name == 'MIPS' }}
run: |
LDFLAGS="-static" CC=$XCC QEMU_SYS=$XEMU make clean check
+ - name: RISC-V
+ if: ${{ matrix.name == 'RISC-V' }}
+ run: |
+ LDFLAGS="-static" CC=$XCC QEMU_SYS=$XEMU make clean check
- name: M68K
if: ${{ matrix.name == 'M68K' }}
run: |
LDFLAGS="-static" CC=$XCC QEMU_SYS=$XEMU make clean check
+ - name: SPARC
+ if: ${{ matrix.name == 'SPARC' }}
+ run: |
+ LDFLAGS="-static" CC=$XCC QEMU_SYS=$XEMU make clean check
mingw-short-test:
runs-on: windows-latest
run:
shell: msys2 {0}
steps:
- - uses: actions/checkout@8f4b7f84864484a7bf31766abe9204da3cbe65b3 # tag=v3
- - uses: msys2/setup-msys2@5beef6d11f48bba68b9eb503e3adc60b23c0cc36 # tag=v2
+ - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # tag=v4.1.1
+ - uses: msys2/setup-msys2@cc11e9188b693c2b100158c3322424c4cc1dadea # tag=v2.22.0
with:
msystem: ${{ matrix.msystem }}
install: make diffutils
platform: [x64, Win32]
configuration: [Release]
steps:
- - uses: actions/checkout@8f4b7f84864484a7bf31766abe9204da3cbe65b3 # tag=v3
+ - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # tag=v4.1.1
- name: Add MSBuild to PATH
- uses: microsoft/setup-msbuild@1ff57057b5cfdc39105cd07a01d78e9b0ea0c14c # tag=v1.3
+ uses: microsoft/setup-msbuild@6fb02220983dee41ce7ae257b6f4d8f9bf5ed4ce # tag=v2.0.0
- name: Build and run tests
working-directory: ${{env.GITHUB_WORKSPACE}}
env:
runs-on: windows-latest
steps:
- run: git config --global core.autocrlf input
- - uses: actions/checkout@8f4b7f84864484a7bf31766abe9204da3cbe65b3 # tag=v3
- - uses: cygwin/cygwin-install-action@f5e0f048310c425e84bc789f493a828c6dc80a25 # tag=master
+ - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # tag=v4.1.1
+ - uses: cygwin/cygwin-install-action@006ad0b0946ca6d0a3ea2d4437677fa767392401 # tag=master
with:
platform: x86_64
packages: >-
make -C tests fuzzer &&
./tests/fuzzer.exe -v -T1m
- intel-cet-compatibility:
- runs-on: ubuntu-latest
- steps:
- - uses: actions/checkout@8f4b7f84864484a7bf31766abe9204da3cbe65b3 # tag=v3
- - name: Build Zstd
- run: |
- make -j zstd V=1
- readelf -n zstd
- - name: Get Intel SDE
- run: |
- curl -LO https://downloadmirror.intel.com/684899/sde-external-9.0.0-2021-11-07-lin.tar.xz
- tar xJvf sde-external-9.0.0-2021-11-07-lin.tar.xz
- - name: Configure Permissions
- run: |
- echo 0 | sudo tee /proc/sys/kernel/yama/ptrace_scope
- - name: Run Under SDE
- run: |
- sde-external-9.0.0-2021-11-07-lin/sde -cet -cet-raise 0 -cet-endbr-exe -cet-stderr -cet-abort -- ./zstd -b3
-
pkg-config:
runs-on: ubuntu-latest
container:
image: debian:testing
steps:
- - uses: actions/checkout@8f4b7f84864484a7bf31766abe9204da3cbe65b3 # tag=v3
+ - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # tag=v4.1.1
- name: Install dependencies
run: |
apt -y update
versions-compatibility:
runs-on: ubuntu-latest
steps:
- - uses: actions/checkout@8f4b7f84864484a7bf31766abe9204da3cbe65b3 # tag=v3
+ - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # tag=v4.1.1
- name: Versions Compatibility Test
run: |
make -C tests versionsTest
clangbuild:
runs-on: ubuntu-latest
steps:
- - uses: actions/checkout@8f4b7f84864484a7bf31766abe9204da3cbe65b3 # tag=v3
+ - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # tag=v4.1.1
- name: make clangbuild
run: |
make clangbuild
clang-pgo:
runs-on: ubuntu-latest
steps:
- - uses: actions/checkout@8f4b7f84864484a7bf31766abe9204da3cbe65b3 # tag=v3
+ - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # tag=v4.1.1
- name: Build PGO Zstd with Clang
env:
CC: clang-14
gcc-pgo:
runs-on: ubuntu-latest
steps:
- - uses: actions/checkout@8f4b7f84864484a7bf31766abe9204da3cbe65b3 # tag=v3
+ - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # tag=v4.1.1
- name: Build PGO Zstd with GCC
env:
CC: gcc
make -C programs zstd-pgo
./programs/zstd -b
+ intel-cet-compatibility:
+ runs-on: ubuntu-latest
+ steps:
+ - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # tag=v4.1.1
+ - name: Build Zstd
+ run: |
+ make -j zstd V=1
+ readelf -n zstd
+ - name: Get Intel SDE
+ run: |
+ curl -LO https://downloadmirror.intel.com/813591/sde-external-9.33.0-2024-01-07-lin.tar.xz
+ tar xJvf sde-external-9.33.0-2024-01-07-lin.tar.xz
+ - name: Configure Permissions
+ run: |
+ echo 0 | sudo tee /proc/sys/kernel/yama/ptrace_scope
+ - name: Run Under SDE
+ run: |
+ sde-external-9.33.0-2024-01-07-lin/sde -cet -cet-raise 0 -cet-endbr-exe -cet-stderr -cet-abort -- ./zstd -b3
+
+
+# Failing tests, for reference
-# For reference : icc tests
# icc tests are currently failing on Github Actions, likely to issues during installation stage
-# To be fixed later
#
# icc:
# name: icc-check
# sudo add-apt-repository "deb https://apt.repos.intel.com/oneapi all main"
# sudo apt-get update
# sudo apt-get install -y intel-basekit intel-hpckit
-# - uses: actions/checkout@8f4b7f84864484a7bf31766abe9204da3cbe65b3 # tag=v3
+# - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # tag=v4.1.1
# - name: make check
# run: |
# make CC=/opt/intel/oneapi/compiler/latest/linux/bin/intel64/icc check
--- /dev/null
+name: facebook/zstd/nightly
+on:
+ schedule:
+ - cron: 0 0 * * *
+ push:
+ branches:
+ - release
+ - dev
+ - master
+permissions: read-all
+jobs:
+ regression-test:
+ runs-on: ubuntu-latest
+ services:
+ docker:
+ image: fbopensource/zstd-circleci-primary:0.0.1
+ options: --entrypoint /bin/bash
+ env:
+ CIRCLE_ARTIFACTS: "/tmp/circleci-artifacts"
+ steps:
+ - uses: actions/checkout@v4
+ - uses: actions/cache@v4
+ with:
+ key: regression-cache-{{ checksum "tests/regression/data.c" }}-v0
+ path: tests/regression/cache
+ restore-keys: regression-cache-{{ checksum "tests/regression/data.c" }}-v0
+ - uses: actions/upload-artifact@v4
+ with:
+ path: "/tmp/circleci-artifacts"
+ - name: Install Dependencies
+ run: |
+ sudo apt-get update
+ sudo apt-get install libcurl4-gnutls-dev
+ - name: Regression Test
+ run: |
+ make -C programs zstd
+ make -C tests/regression test
+ mkdir -p $CIRCLE_ARTIFACTS
+ ./tests/regression/test \
+ --cache tests/regression/cache \
+ --output $CIRCLE_ARTIFACTS/results.csv \
+ --zstd programs/zstd
+ echo "NOTE: The new results.csv is uploaded as an artifact to this job"
+ echo " If this fails, go to the Artifacts pane in CircleCI, "
+ echo " download /tmp/circleci-artifacts/results.csv, and if they "
+ echo " are still good, copy it into the repo and commit it."
+ echo "> diff tests/regression/results.csv $CIRCLE_ARTIFACTS/results.csv"
+ diff tests/regression/results.csv $CIRCLE_ARTIFACTS/results.csv
+
+# Longer tests
+ #- make -C tests test-zstd-nolegacy && make clean
+ #- pyenv global 3.4.4; make -C tests versionsTest && make clean
+ #- make zlibwrapper && make clean
+ #- gcc -v; make -C tests test32 MOREFLAGS="-I/usr/include/x86_64-linux-gnu" && make clean
+ #- make uasan && make clean
+ #- make asan32 && make clean
+ #- make -C tests test32 CC=clang MOREFLAGS="-g -fsanitize=address -I/usr/include/x86_64-linux-gnu"
+# Valgrind tests
+ #- CFLAGS="-O1 -g" make -C zlibWrapper valgrindTest && make clean
+ #- make -C tests valgrindTest && make clean
+# ARM, AArch64, PowerPC, PowerPC64 tests
+ #- make ppctest && make clean
+ #- make ppc64test && make clean
+ #- make armtest && make clean
+ #- make aarch64test && make clean
steps:
- name: Checkout
- uses: actions/checkout@8f4b7f84864484a7bf31766abe9204da3cbe65b3 # tag=v3
+ uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # tag=v3
- name: Archive
env:
steps:
- name: "Checkout code"
- uses: actions/checkout@8f4b7f84864484a7bf31766abe9204da3cbe65b3 # tag=v3
+ uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # tag=v3
with:
persist-credentials: false
- name: "Run analysis"
- uses: ossf/scorecard-action@e38b1902ae4f44df626f11ba0734b14fb91f8f86 # tag=v2.1.2
+ uses: ossf/scorecard-action@0864cf19026789058feabb7e87baa5f140aac736 # tag=v2.3.1
with:
results_file: results.sarif
results_format: sarif
# Upload the results as artifacts (optional). Commenting out will disable uploads of run results in SARIF
# format to the repository Actions tab.
- name: "Upload artifact"
- uses: actions/upload-artifact@0b7f8abb1508181956e8e162db84b466c27e18ce # tag=v3.1.2
+ uses: actions/upload-artifact@5d5d22a31266ced268874388b861e4b58bb5c2f3 # tag=v4.3.1
with:
name: SARIF file
path: results.sarif
# Upload the results to GitHub's code scanning dashboard.
- name: "Upload to code-scanning"
- uses: github/codeql-action/upload-sarif@67a35a08586135a9573f4327e904ecbf517a882d # tag=v2.2.8
+ uses: github/codeql-action/upload-sarif@3ab4101902695724f9365a384f86c1074d94e18c # tag=v3.24.7
with:
sarif_file: results.sarif
permissions: read-all
jobs:
- windows-64-artifacts:
+ windows-artifacts:
# see https://ariya.io/2020/07/on-github-actions-with-msys2
runs-on: windows-latest
+ # see https://github.com/msys2/setup-msys2
+ strategy:
+ matrix:
+ include:
+ - { msystem: mingw64, env: x86_64, ziparch: win64 }
+ - { msystem: mingw32, env: i686, ziparch: win32 }
defaults:
run:
shell: msys2 {0}
steps:
- - uses: actions/checkout@8f4b7f84864484a7bf31766abe9204da3cbe65b3 # tag=v3
+ - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # tag=v3
- uses: msys2/setup-msys2@5beef6d11f48bba68b9eb503e3adc60b23c0cc36 # tag=v2
with:
- msystem: MINGW64
- install: make zlib git p7zip mingw-w64-x86_64-gcc
+ msystem: ${{ matrix.msystem }}
+ install: make zlib git p7zip mingw-w64-${{matrix.env}}-gcc
update: true
+
- name: display versions
run: |
make -v
git clone --depth 1 --branch v1.2.11 https://github.com/madler/zlib
make -C zlib -f win32/Makefile.gcc libz.a
- - name: Building zstd programs in 64-bit mode
+ - name: Building zstd programs
run: |
CPPFLAGS=-I../zlib LDFLAGS=../zlib/libz.a make -j allzstd MOREFLAGS=-static V=1
- name: Create artifacts
run: |
./lib/dll/example/build_package.bat
- mv bin/ zstd-${{ github.ref_name }}-win64/
- 7z a -tzip -mx9 zstd-${{ github.ref_name }}-win64.zip zstd-${{ github.ref_name }}-win64/
+ mv bin/ zstd-${{ github.ref_name }}-${{matrix.ziparch}}/
+ 7z a -tzip -mx9 zstd-${{ github.ref_name }}-${{matrix.ziparch}}.zip zstd-${{ github.ref_name }}-${{matrix.ziparch}}/
cd ..
- - name: Publish zstd-$VERSION-win64.zip
- uses: actions/upload-artifact@0b7f8abb1508181956e8e162db84b466c27e18ce # tag=v3
+ - name: Publish zstd-$VERSION-${{matrix.ziparch}}.zip
+ uses: actions/upload-artifact@5d5d22a31266ced268874388b861e4b58bb5c2f3 # tag=v4.3.1
with:
- path: ${{ github.workspace }}/zstd-${{ github.ref_name }}-win64.zip
- name: zstd-${{ github.ref_name }}-win64.zip
+ path: ${{ github.workspace }}/zstd-${{ github.ref_name }}-${{matrix.ziparch}}.zip
+ name: zstd-${{ github.ref_name }}-${{matrix.ziparch}}.zip
dictionary.
dictionary
NUL
+cmakebuild/
+install/
# Build artefacts
contrib/linux-kernel/linux/
build-*
*.gcda
+# IDE
+.clang_complete
+compile_flags.txt
+.clang-format
+
# Other files
.directory
_codelite/
_zstdbench/
-.clang_complete
*.idea
*.swp
.DS_Store
+V1.5.6 (Mar 2024)
+api: Promote `ZSTD_c_targetCBlockSize` to Stable API by @felixhandte
+api: new `ZSTD_d_maxBlockSize` experimental parameter, to reduce streaming decompression memory, by @terrelln
+perf: improve performance of param `ZSTD_c_targetCBlockSize`, by @Cyan4973
+perf: improved compression of arrays of integers at high compression, by @Cyan4973
+lib: reduce binary size with selective built-time exclusion, by @felixhandte
+lib: improved huffman speed on small data and linux kernel, by @terrelln
+lib: accept dictionaries with partial literal tables, by @terrelln
+lib: fix CCtx size estimation with external sequence producer, by @embg
+lib: fix corner case decoder behaviors, by @Cyan4973 and @aimuz
+lib: fix zdict prototype mismatch in static_only mode, by @ldv-alt
+lib: fix several bugs in magicless-format decoding, by @embg
+cli: add common compressed file types to `--exclude-compressed`` by @daniellerozenblit
+cli: fix mixing `-c` and `-o` commands with `--rm`, by @Cyan4973
+cli: fix erroneous exclusion of hidden files with `--output-dir-mirror` by @felixhandte
+cli: improved time accuracy on BSD, by @felixhandte
+cli: better errors on argument parsing, by @KapJI
+tests: better compatibility with older versions of `grep`, by @Cyan4973
+tests: lorem ipsum generator as default backup content, by @Cyan4973
+build: cmake improvements by @terrelln, @sighingnow, @gjasny, @JohanMabille, @Saverio976, @gruenich, @teo-tsirpanis
+build: bazel support, by @jondo2010
+build: fix cross-compiling for AArch64 with lld by @jcelerier
+build: fix Apple platform compatibility, by @nidhijaju
+build: fix Visual 2012 and lower compatibility, by @Cyan4973
+build: improve win32 support, by @DimitriPapadopoulos
+build: better C90 compliance for zlibWrapper, by @emaste
+port: make: fat binaries on macos, by @mredig
+port: ARM64EC compatibility for Windows, by @dunhor
+port: QNX support by @klausholstjacobsen
+port: MSYS2 and Cygwin makefile installation and test support, by @QBos07
+port: risc-v support validation in CI, by @Cyan4973
+port: sparc64 support validation in CI, by @Cyan4973
+port: AIX compatibility, by @likema
+port: HP-UX compatibility, by @likema
+doc: Improved specification accuracy, by @elasota
+bug: Fix and deprecate ZSTD_generateSequences (#3981)
+
v1.5.5 (Apr 2023)
fix: fix rare corruption bug affecting the high compression mode, reported by @danlark1 (#3517, @terrelln)
perf: improve mid-level compression speed (#3529, #3533, #3543, @yoniko and #3552, @terrelln)
build: improved AIX support, by @Helflym
build: improved meson unofficial build, by @eli-schwartz
cli : custom memory limit when training dictionary (#2925), by @embg
-cli : report advanced parameters information when compressing in very verbose mode (``-vv`), by @Svetlitski-FB
+cli : report advanced parameters information when compressing in very verbose mode (`-vv`), by @Svetlitski-FB
v1.5.0 (May 11, 2021)
api: Various functions promoted from experimental to stable API: (#2579-2581, @senhuang42)
perf: New Algorithms for the Long Distance Matcher (#2483, @mpu)
perf: Performance Improvements for Long Distance Matcher (#2464, @mpu)
perf: Don't Shrink Window Log when Streaming with a Dictionary (#2451, @terrelln)
-cli: Fix `--output-dir-mirror`'s Rejection of `..`-Containing Paths (#2512, @felixhandte)
+cli: Fix `--output-dir-mirror` rejection of `..` -containing paths (#2512, @felixhandte)
cli: Allow Input From Console When `-f`/`--force` is Passed (#2466, @felixhandte)
cli: Improve Help Message (#2500, @senhuang42)
tests: Remove Flaky Tests (#2455, #2486, #2445, @Cyan4973)
| Cirrus CI | Used for testing on FreeBSD | https://github.com/marketplace/cirrus-ci/ | `.cirrus.yml` |
| Circle CI | Historically was used to provide faster signal,<br/> but we may be able to migrate these to Github Actions | https://circleci.com/docs/2.0/getting-started/#setting-up-circleci <br> https://youtu.be/Js3hMUsSZ2c <br> https://circleci.com/docs/2.0/enable-checks/ | `.circleci/config.yml` |
-Note: the instructions linked above mostly cover how to set up a repository with CI from scratch.
-The general idea should be the same for setting up CI on your fork of zstd, but you may have to
+Note: the instructions linked above mostly cover how to set up a repository with CI from scratch.
+The general idea should be the same for setting up CI on your fork of zstd, but you may have to
follow slightly different steps. In particular, please ignore any instructions related to setting up
config files (since zstd already has configs for each of these services).
hands on a desktop, this is usually a better scenario.
Of course, benchmarking can be done on non-hyper-stable machines as well. You will just have to
-do a little more work to ensure that you are in fact measuring the changes you've made not and
+do a little more work to ensure that you are in fact measuring the changes you've made and not
noise. Here are some things you can do to make your benchmarks more stable:
1. The most simple thing you can do to drastically improve the stability of your benchmark is
$(Q)$(MAKE) -C contrib/largeNbDicts $@ > $(VOID)
$(Q)$(MAKE) -C contrib/externalSequenceProducer $@ > $(VOID)
$(Q)$(RM) zstd$(EXT) zstdmt$(EXT) tmp*
- $(Q)$(RM) -r lz4
+ $(Q)$(RM) -r lz4 cmakebuild install
@echo Cleaning completed
#------------------------------------------------------------------------------
# make install is validated only for Linux, macOS, Hurd and some BSD targets
#------------------------------------------------------------------------------
-ifneq (,$(filter $(shell uname),Linux Darwin GNU/kFreeBSD GNU OpenBSD FreeBSD DragonFly NetBSD MSYS_NT Haiku AIX))
+ifneq (,$(filter $(shell uname),Linux Darwin GNU/kFreeBSD GNU OpenBSD FreeBSD DragonFly NetBSD MSYS_NT CYGWIN_NT Haiku AIX))
HOST_OS = POSIX
travis-install:
$(MAKE) install PREFIX=~/install_test_dir
+.PHONY: clangbuild-darwin-fat
+clangbuild-darwin-fat: clean
+ clang -v
+ CXX=clang++ CC=clang CFLAGS="-Werror -Wconversion -Wno-sign-conversion -Wdocumentation -arch arm64" $(MAKE) zstd-release
+ mv programs/zstd programs/zstd_arm64
+ CXX=clang++ CC=clang CFLAGS="-Werror -Wconversion -Wno-sign-conversion -Wdocumentation -arch x86_64" $(MAKE) zstd-release
+ mv programs/zstd programs/zstd_x64
+ lipo -create programs/zstd_x64 programs/zstd_arm64 -output programs/zstd
+
.PHONY: gcc5build gcc6build gcc7build clangbuild m32build armbuild aarch64build ppcbuild ppc64build
gcc5build: clean
gcc-5 -v
# run UBsan with -fsanitize-recover=pointer-overflow
# this only works with recent compilers such as gcc 8+
usan: clean
- $(MAKE) test CC=clang MOREFLAGS="-g -fno-sanitize-recover=all -fsanitize-recover=pointer-overflow -fsanitize=undefined -Werror $(MOREFLAGS)"
+ $(MAKE) test CC=clang MOREFLAGS="-g -fno-sanitize-recover=all -fsanitize=undefined -Werror $(MOREFLAGS)"
asan: clean
$(MAKE) test CC=clang MOREFLAGS="-g -fsanitize=address -Werror $(MOREFLAGS)"
msan: clean
$(MAKE) test CC=clang MOREFLAGS="-g -fsanitize=memory -fno-omit-frame-pointer -Werror $(MOREFLAGS)" HAVE_LZMA=0 # datagen.c fails this test for no obvious reason
-msan-%: clean
- LDFLAGS=-fuse-ld=gold MOREFLAGS="-g -fno-sanitize-recover=all -fsanitize=memory -fno-omit-frame-pointer -Werror $(MOREFLAGS)" FUZZER_FLAGS="--no-big-tests $(FUZZER_FLAGS)" $(MAKE) -C $(TESTDIR) HAVE_LZMA=0 $*
+msan-%:
+ $(MAKE) clean
+ LDFLAGS=-fuse-ld=gold MOREFLAGS="-g -fno-sanitize-recover=all -fsanitize=memory -fno-omit-frame-pointer -Werror $(MOREFLAGS)" FUZZER_FLAGS="--no-big-tests $(FUZZER_FLAGS)" $(MAKE) -j -C $(TESTDIR) HAVE_LZMA=0 $*
asan32: clean
$(MAKE) -C $(TESTDIR) test32 CC=clang MOREFLAGS="-g -fsanitize=address $(MOREFLAGS)"
uasan: clean
- $(MAKE) test CC=clang MOREFLAGS="-g -fno-sanitize-recover=all -fsanitize-recover=pointer-overflow -fsanitize=address,undefined -Werror $(MOREFLAGS)"
+ $(MAKE) test CC=clang MOREFLAGS="-g -fno-sanitize-recover=all -fsanitize=address,undefined -Werror $(MOREFLAGS)"
uasan-%: clean
- LDFLAGS=-fuse-ld=gold MOREFLAGS="-g -fno-sanitize-recover=all -fsanitize-recover=pointer-overflow -fsanitize=address,undefined -Werror $(MOREFLAGS)" $(MAKE) -C $(TESTDIR) $*
+ LDFLAGS=-fuse-ld=gold MOREFLAGS="-g -fno-sanitize-recover=all -fsanitize=address,undefined -Werror $(MOREFLAGS)" $(MAKE) -C $(TESTDIR) $*
tsan-%: clean
LDFLAGS=-fuse-ld=gold MOREFLAGS="-g -fno-sanitize-recover=all -fsanitize=thread -Werror $(MOREFLAGS)" $(MAKE) -C $(TESTDIR) $* FUZZER_FLAGS="--no-big-tests $(FUZZER_FLAGS)"
endif
-CMAKE_PARAMS = -DZSTD_BUILD_CONTRIB:BOOL=ON -DZSTD_BUILD_STATIC:BOOL=ON -DZSTD_BUILD_TESTS:BOOL=ON -DZSTD_ZLIB_SUPPORT:BOOL=ON -DZSTD_LZMA_SUPPORT:BOOL=ON -DCMAKE_BUILD_TYPE=Release
-
ifneq (,$(filter MSYS%,$(shell uname)))
HOST_OS = MSYS
-CMAKE_PARAMS = -G"MSYS Makefiles" -DCMAKE_BUILD_TYPE=Debug -DZSTD_MULTITHREAD_SUPPORT:BOOL=OFF -DZSTD_BUILD_STATIC:BOOL=ON -DZSTD_BUILD_TESTS:BOOL=ON
endif
#------------------------------------------------------------------------
# target specific tests
#------------------------------------------------------------------------
ifneq (,$(filter $(HOST_OS),MSYS POSIX))
-.PHONY: cmakebuild c89build gnu90build c99build gnu99build c11build bmix64build bmix32build bmi32build staticAnalyze
-cmakebuild:
- cmake --version
- $(RM) -r $(BUILDIR)/cmake/build
- $(MKDIR) $(BUILDIR)/cmake/build
- cd $(BUILDIR)/cmake/build; cmake -DCMAKE_INSTALL_PREFIX:PATH=~/install_test_dir $(CMAKE_PARAMS) ..
- $(MAKE) -C $(BUILDIR)/cmake/build -j4;
- $(MAKE) -C $(BUILDIR)/cmake/build install;
- $(MAKE) -C $(BUILDIR)/cmake/build uninstall;
- cd $(BUILDIR)/cmake/build; ctest -V -L Medium
+CMAKE ?= cmake
+CMAKE_PARAMS = -DZSTD_BUILD_CONTRIB:BOOL=ON -DZSTD_BUILD_STATIC:BOOL=ON -DZSTD_BUILD_TESTS:BOOL=ON -DZSTD_ZLIB_SUPPORT:BOOL=ON -DZSTD_LZMA_SUPPORT:BOOL=ON
+
+ifneq (,$(filter MSYS%,$(shell uname)))
+CMAKE_PARAMS = -G"MSYS Makefiles" -DZSTD_MULTITHREAD_SUPPORT:BOOL=OFF -DZSTD_BUILD_STATIC:BOOL=ON -DZSTD_BUILD_TESTS:BOOL=ON
+endif
+
+.PHONY: cmakebuild
+cmakebuild:
+ $(CMAKE) --version
+ $(RM) -r cmakebuild install
+ $(MKDIR) cmakebuild install
+ cd cmakebuild; $(CMAKE) -Wdev -DCMAKE_BUILD_TYPE=Debug -DCMAKE_C_FLAGS="-Werror -O0" -DCMAKE_INSTALL_PREFIX=install $(CMAKE_PARAMS) ../build/cmake
+ $(CMAKE) --build cmakebuild --target install -- -j V=1
+ cd cmakebuild; ctest -V -L Medium
+
+.PHONY: c89build gnu90build c99build gnu99build c11build bmix64build bmix32build bmi32build staticAnalyze
c89build: clean
$(CC) -v
CFLAGS="-std=c89 -Werror -Wno-attributes -Wpedantic -Wno-long-long -Wno-variadic-macros -O0" $(MAKE) lib zstd
It's backed by a very fast entropy stage, provided by [Huff0 and FSE library](https://github.com/Cyan4973/FiniteStateEntropy).
Zstandard's format is stable and documented in [RFC8878](https://datatracker.ietf.org/doc/html/rfc8878). Multiple independent implementations are already available.
-This repository represents the reference implementation, provided as an open-source dual [BSD](LICENSE) and [GPLv2](COPYING) licensed **C** library,
+This repository represents the reference implementation, provided as an open-source dual [BSD](LICENSE) OR [GPLv2](COPYING) licensed **C** library,
and a command line utility producing and decoding `.zst`, `.gz`, `.xz` and `.lz4` files.
Should your project require another programming language,
a list of known ports and bindings is provided on [Zstandard homepage](https://facebook.github.io/zstd/#other-languages).
You can build the zstd binary via buck by executing: `buck build programs:zstd` from the root of the repo.
The output binary will be in `buck-out/gen/programs/`.
+### Bazel
+
+You easily can integrate zstd into your Bazel project by using the module hosted on the [Bazel Central Repository](https://registry.bazel.build/modules/zstd).
+
## Testing
You can run quick local smoke tests by running `make check`.
## License
-Zstandard is dual-licensed under [BSD](LICENSE) and [GPLv2](COPYING).
+Zstandard is dual-licensed under [BSD](LICENSE) OR [GPLv2](COPYING).
## Contributing
--- /dev/null
+# Reporting and Fixing Security Issues
+
+Please do not open GitHub issues or pull requests - this makes the problem immediately visible to everyone, including malicious actors. Security issues in this open source project can be safely reported via the Meta Bug Bounty program:
+
+https://www.facebook.com/whitehat
+
+Meta's security team will triage your report and determine whether or not is it eligible for a bounty under our program.
+
+# Receiving Vulnerability Notifications
+
+In the case that a significant security vulnerability is reported to us or discovered by us---without being publicly known---we will, at our discretion, notify high-profile, high-exposure users of Zstandard ahead of our public disclosure of the issue and associated fix.
+
+If you believe your project would benefit from inclusion in this list, please reach out to one of the maintainers.
+
+<!-- Note to maintainers: this list is kept [here](https://fburl.com/wiki/cgc1l62x). -->
RelativePath="..\..\..\programs\dibio.c"
>
</File>
+ <File
+ RelativePath="..\..\..\programs\lorem.c"
+ >
+ </File>
<File
RelativePath="..\..\..\lib\dictBuilder\cover.c"
>
<ItemGroup>
<ClCompile Include="..\..\..\programs\util.c" />
<ClCompile Include="..\..\..\programs\datagen.c" />
+ <ClCompile Include="..\..\..\programs\lorem.c" />
+ <ClCompile Include="..\..\..\tests\loremOut.c" />
<ClCompile Include="..\..\..\tests\datagencli.c" />
</ItemGroup>
<ItemGroup>
<ClCompile Include="..\..\..\programs\dibio.c" />
<ClCompile Include="..\..\..\programs\fileio.c" />
<ClCompile Include="..\..\..\programs\fileio_asyncio.c" />
+ <ClCompile Include="..\..\..\programs\lorem.c" />
<ClCompile Include="..\..\..\programs\zstdcli.c" />
<ClCompile Include="..\..\..\programs\zstdcli_trace.c" />
</ItemGroup>
# in the COPYING file in the root directory of this source tree).
# ################################################################
-cmake_minimum_required(VERSION 2.8.12 FATAL_ERROR)
+cmake_minimum_required(VERSION 3.5 FATAL_ERROR)
# As of 2018-12-26 ZSTD has been validated to build with cmake version 3.13.2 new policies.
# Set and use the newest cmake policies that are validated to work
set(ZSTD_MAX_VALIDATED_CMAKE_MAJOR_VERSION "3")
set(ZSTD_MAX_VALIDATED_CMAKE_MINOR_VERSION "13") #Policies never changed at PATCH level
-if("${CMAKE_MAJOR_VERSION}" LESS 3)
- set(ZSTD_CMAKE_POLICY_VERSION "${CMAKE_VERSION}")
-elseif( "${ZSTD_MAX_VALIDATED_CMAKE_MAJOR_VERSION}" EQUAL "${CMAKE_MAJOR_VERSION}" AND
- "${ZSTD_MAX_VALIDATED_CMAKE_MINOR_VERSION}" GREATER "${CMAKE_MINOR_VERSION}")
+if("${ZSTD_MAX_VALIDATED_CMAKE_MAJOR_VERSION}" EQUAL "${CMAKE_MAJOR_VERSION}" AND
+ "${ZSTD_MAX_VALIDATED_CMAKE_MINOR_VERSION}" GREATER "${CMAKE_MINOR_VERSION}")
set(ZSTD_CMAKE_POLICY_VERSION "${CMAKE_VERSION}")
else()
set(ZSTD_CMAKE_POLICY_VERSION "${ZSTD_MAX_VALIDATED_CMAKE_MAJOR_VERSION}.${ZSTD_MAX_VALIDATED_CMAKE_MINOR_VERSION}.0")
include(GetZstdLibraryVersion)
GetZstdLibraryVersion(${LIBRARY_DIR}/zstd.h zstd_VERSION_MAJOR zstd_VERSION_MINOR zstd_VERSION_PATCH)
-if( CMAKE_MAJOR_VERSION LESS 3 )
- ## Provide cmake 3+ behavior for older versions of cmake
- project(zstd)
- set(PROJECT_VERSION_MAJOR ${zstd_VERSION_MAJOR})
- set(PROJECT_VERSION_MINOR ${zstd_VERSION_MINOR})
- set(PROJECT_VERSION_PATCH ${zstd_VERSION_PATCH})
- set(PROJECT_VERSION "${zstd_VERSION_MAJOR}.${zstd_VERSION_MINOR}.${zstd_VERSION_PATCH}")
- enable_language(C) # Main library is in C
- enable_language(ASM) # And ASM
- enable_language(CXX) # Testing contributed code also utilizes CXX
-else()
- project(zstd
- VERSION "${zstd_VERSION_MAJOR}.${zstd_VERSION_MINOR}.${zstd_VERSION_PATCH}"
- LANGUAGES C # Main library is in C
- ASM # And ASM
- CXX # Testing contributed code also utilizes CXX
- )
-endif()
+project(zstd
+ VERSION "${zstd_VERSION_MAJOR}.${zstd_VERSION_MINOR}.${zstd_VERSION_PATCH}"
+ LANGUAGES C # Main library is in C
+ ASM # And ASM
+ CXX # Testing contributed code also utilizes CXX
+ )
+
message(STATUS "ZSTD VERSION: ${zstd_VERSION}")
set(zstd_HOMEPAGE_URL "https://facebook.github.io/zstd")
set(zstd_DESCRIPTION "Zstandard is a real-time compression algorithm, providing high compression ratios.")
#-----------------------------------------------------------------------------
# External dependencies
#-----------------------------------------------------------------------------
+# Define a function to handle special thread settings for HP-UX
+# See https://github.com/facebook/zstd/pull/3862 for details.
+function(setup_hpux_threads)
+ find_package(Threads)
+ if (NOT Threads_FOUND)
+ set(CMAKE_USE_PTHREADS_INIT 1 PARENT_SCOPE)
+ set(CMAKE_THREAD_LIBS_INIT -lpthread PARENT_SCOPE)
+ set(CMAKE_HAVE_THREADS_LIBRARY 1 PARENT_SCOPE)
+ set(Threads_FOUND TRUE PARENT_SCOPE)
+ endif()
+endfunction()
+
if (ZSTD_MULTITHREAD_SUPPORT AND UNIX)
- set(THREADS_PREFER_PTHREAD_FLAG ON)
- find_package(Threads REQUIRED)
- if(CMAKE_USE_PTHREADS_INIT)
+ if (CMAKE_SYSTEM_NAME MATCHES "HP-UX")
+ setup_hpux_threads()
+ else()
+ set(THREADS_PREFER_PTHREAD_FLAG ON)
+ find_package(Threads REQUIRED)
+ endif()
+ if (CMAKE_USE_PTHREADS_INIT)
set(THREADS_LIBS "${CMAKE_THREAD_LIBS_INIT}")
else()
message(SEND_ERROR "ZSTD currently does not support thread libraries other than pthreads")
FILE "${CMAKE_CURRENT_BINARY_DIR}/zstdTargets.cmake"
NAMESPACE zstd::
)
-configure_file(zstdConfig.cmake
- "${CMAKE_CURRENT_BINARY_DIR}/zstdConfig.cmake"
- COPYONLY
- )
# A Package Config file that works from the installation directory
set(ConfigPackageLocation ${CMAKE_INSTALL_LIBDIR}/cmake/zstd)
NAMESPACE zstd::
DESTINATION ${ConfigPackageLocation}
)
+configure_package_config_file(
+ zstdConfig.cmake.in
+ "${CMAKE_CURRENT_BINARY_DIR}/zstdConfig.cmake"
+ INSTALL_DESTINATION ${ConfigPackageLocation}
+)
install(FILES
- zstdConfig.cmake
+ "${CMAKE_CURRENT_BINARY_DIR}/zstdConfig.cmake"
"${CMAKE_CURRENT_BINARY_DIR}/zstdConfigVersion.cmake"
DESTINATION ${ConfigPackageLocation}
)
make
```
+### how to use it with CMake FetchContent
+
+For all options available, you can see it on <https://github.com/facebook/zstd/blob/dev/build/cmake/lib/CMakeLists.txt>
+```cmake
+include(FetchContent)
+
+set(ZSTD_BUILD_STATIC ON)
+set(ZSTD_BUILD_SHARED OFF)
+
+FetchContent_Declare(
+ zstd
+ URL "https://github.com/facebook/zstd/releases/download/v1.5.5/zstd-1.5.5.tar.gz"
+ DOWNLOAD_EXTRACT_TIMESTAMP TRUE
+ SOURCE_SUBDIR build/cmake
+)
+
+FetchContent_MakeAvailable(zstd)
+
+target_link_libraries(
+ ${PROJECT_NAME}
+ PRIVATE
+ libzstd_static
+)
+
+# On windows and macos this is needed
+target_include_directories(
+ ${PROJECT_NAME}
+ PRIVATE
+ ${zstd_SOURCE_DIR}/lib
+)
+```
+
### referring
[Looking for a 'cmake clean' command to clear up CMake output](https://stackoverflow.com/questions/9680420/looking-for-a-cmake-clean-command-to-clear-up-cmake-output)
include_directories(${PROGRAMS_DIR} ${LIBRARY_DIR} ${LIBRARY_DIR}/common ${PZSTD_DIR})
add_executable(pzstd ${PROGRAMS_DIR}/util.c ${PZSTD_DIR}/main.cpp ${PZSTD_DIR}/Options.cpp ${PZSTD_DIR}/Pzstd.cpp ${PZSTD_DIR}/SkippableFrame.cpp)
+target_compile_features(pzstd PRIVATE cxx_std_11)
set_property(TARGET pzstd APPEND PROPERTY COMPILE_DEFINITIONS "NDEBUG")
set_property(TARGET pzstd APPEND PROPERTY COMPILE_OPTIONS "-Wno-shadow")
set(CMAKE_INCLUDE_CURRENT_DIR TRUE)
option(ZSTD_BUILD_STATIC "BUILD STATIC LIBRARIES" ON)
option(ZSTD_BUILD_SHARED "BUILD SHARED LIBRARIES" ON)
+option(ZSTD_BUILD_COMPRESSION "BUILD COMPRESSION MODULE" ON)
+option(ZSTD_BUILD_DECOMPRESSION "BUILD DECOMPRESSION MODULE" ON)
+option(ZSTD_BUILD_DICTBUILDER "BUILD DICTBUILDER MODULE" ON)
+option(ZSTD_BUILD_DEPRECATED "BUILD DEPRECATED MODULE" OFF)
+
+set(ZSTDLIB_VISIBLE "" CACHE STRING "Visibility for ZSTDLIB API")
+set(ZSTDERRORLIB_VISIBLE "" CACHE STRING "Visibility for ZSTDERRORLIB_VISIBLE API")
+set(ZDICTLIB_VISIBLE "" CACHE STRING "Visibility for ZDICTLIB_VISIBLE API")
+set(ZSTDLIB_STATIC_API "" CACHE STRING "Visibility for ZSTDLIB_STATIC_API API")
+set(ZDICTLIB_STATIC_API "" CACHE STRING "Visibility for ZDICTLIB_STATIC_API API")
+
+set_property(CACHE ZSTDLIB_VISIBLE PROPERTY STRINGS "" "hidden" "default" "protected" "internal")
+set_property(CACHE ZSTDERRORLIB_VISIBLE PROPERTY STRINGS "" "hidden" "default" "protected" "internal")
+set_property(CACHE ZDICTLIB_VISIBLE PROPERTY STRINGS "" "hidden" "default" "protected" "internal")
+set_property(CACHE ZSTDLIB_STATIC_API PROPERTY STRINGS "" "hidden" "default" "protected" "internal")
+set_property(CACHE ZDICTLIB_STATIC_API PROPERTY STRINGS "" "hidden" "default" "protected" "internal")
if(NOT ZSTD_BUILD_SHARED AND NOT ZSTD_BUILD_STATIC)
message(SEND_ERROR "You need to build at least one flavor of libzstd")
endif()
-# Define library directory, where sources and header files are located
-include_directories(${LIBRARY_DIR} ${LIBRARY_DIR}/common)
-
file(GLOB CommonSources ${LIBRARY_DIR}/common/*.c)
file(GLOB CompressSources ${LIBRARY_DIR}/compress/*.c)
+file(GLOB DecompressSources ${LIBRARY_DIR}/decompress/*.c)
if (MSVC)
- file(GLOB DecompressSources ${LIBRARY_DIR}/decompress/*.c)
add_compile_options(-DZSTD_DISABLE_ASM)
else ()
- file(GLOB DecompressSources ${LIBRARY_DIR}/decompress/*.c ${LIBRARY_DIR}/decompress/*.S)
+ if(CMAKE_SYSTEM_PROCESSOR MATCHES "amd64.*|AMD64.*|x86_64.*|X86_64.*")
+ set(DecompressSources ${DecompressSources} ${LIBRARY_DIR}/decompress/huf_decompress_amd64.S)
+ else()
+ add_compile_options(-DZSTD_DISABLE_ASM)
+ endif()
endif ()
file(GLOB DictBuilderSources ${LIBRARY_DIR}/dictBuilder/*.c)
+file(GLOB DeprecatedSources ${LIBRARY_DIR}/deprecated/*.c)
-set(Sources
- ${CommonSources}
- ${CompressSources}
- ${DecompressSources}
- ${DictBuilderSources})
-
+file(GLOB PublicHeaders ${LIBRARY_DIR}/*.h)
file(GLOB CommonHeaders ${LIBRARY_DIR}/common/*.h)
file(GLOB CompressHeaders ${LIBRARY_DIR}/compress/*.h)
file(GLOB DecompressHeaders ${LIBRARY_DIR}/decompress/*.h)
file(GLOB DictBuilderHeaders ${LIBRARY_DIR}/dictBuilder/*.h)
+file(GLOB DeprecatedHeaders ${LIBRARY_DIR}/deprecated/*.h)
-set(Headers
- ${LIBRARY_DIR}/zstd.h
- ${CommonHeaders}
- ${CompressHeaders}
- ${DecompressHeaders}
- ${DictBuilderHeaders})
+set(Sources ${CommonSources})
+set(Headers ${PublicHeaders} ${CommonHeaders})
+if (ZSTD_BUILD_COMPRESSION)
+ set(Sources ${Sources} ${CompressSources})
+ set(Headers ${Headers} ${CompressHeaders})
+endif()
+if (ZSTD_BUILD_DECOMPRESSION)
+ set(Sources ${Sources} ${DecompressSources})
+ set(Headers ${Headers} ${DecompressHeaders})
+endif()
+if (ZSTD_BUILD_DICTBUILDER)
+ set(Sources ${Sources} ${DictBuilderSources})
+ set(Headers ${Headers} ${DictBuilderHeaders})
+endif()
+if (ZSTD_BUILD_DEPRECATED)
+ set(Sources ${Sources} ${DeprecatedSources})
+ set(Headers ${Headers} ${DeprecatedHeaders})
+endif()
if (ZSTD_LEGACY_SUPPORT)
set(LIBRARY_LEGACY_DIR ${LIBRARY_DIR}/legacy)
- include_directories(${LIBRARY_LEGACY_DIR})
set(Sources ${Sources}
${LIBRARY_LEGACY_DIR}/zstd_v01.c
# Our assembly expects to be compiled by a C compiler, and is only enabled for
# __GNUC__ compatible compilers. Otherwise all the ASM code is disabled by
# macros.
-set_source_files_properties(${Sources} PROPERTIES LANGUAGE C)
+if(NOT CMAKE_ASM_COMPILER STREQUAL CMAKE_C_COMPILER)
+ set_source_files_properties(${Sources} PROPERTIES LANGUAGE C)
+endif()
+
+macro (add_definition target var)
+ if (NOT ("${${var}}" STREQUAL ""))
+ set_property(TARGET ${target} APPEND PROPERTY COMPILE_DEFINITIONS "${var}=__attribute__((visibility(\"${${var}}\")))")
+ endif ()
+endmacro ()
+
+# Define directories containing the library's public headers
+set(PUBLIC_INCLUDE_DIRS ${LIBRARY_DIR})
# Split project to static and shared libraries build
set(library_targets)
if (ZSTD_BUILD_SHARED)
add_library(libzstd_shared SHARED ${Sources} ${Headers} ${PlatformDependResources})
+ target_include_directories(libzstd_shared INTERFACE $<BUILD_INTERFACE:${PUBLIC_INCLUDE_DIRS}>)
list(APPEND library_targets libzstd_shared)
if (ZSTD_MULTITHREAD_SUPPORT)
set_property(TARGET libzstd_shared APPEND PROPERTY COMPILE_DEFINITIONS "ZSTD_MULTITHREAD")
if (UNIX)
target_link_libraries(libzstd_shared ${THREADS_LIBS})
endif ()
- endif()
+ endif ()
+ add_definition(libzstd_shared ZSTDLIB_VISIBLE)
+ add_definition(libzstd_shared ZSTDERRORLIB_VISIBLE)
+ add_definition(libzstd_shared ZDICTLIB_VISIBLE)
endif ()
if (ZSTD_BUILD_STATIC)
add_library(libzstd_static STATIC ${Sources} ${Headers})
+ target_include_directories(libzstd_static INTERFACE $<BUILD_INTERFACE:${PUBLIC_INCLUDE_DIRS}>)
list(APPEND library_targets libzstd_static)
if (ZSTD_MULTITHREAD_SUPPORT)
set_property(TARGET libzstd_static APPEND PROPERTY COMPILE_DEFINITIONS "ZSTD_MULTITHREAD")
target_link_libraries(libzstd_static ${THREADS_LIBS})
endif ()
endif ()
+ add_definition(libzstd_static ZSTDLIB_VISIBLE)
+ add_definition(libzstd_static ZSTDERRORLIB_VISIBLE)
+ add_definition(libzstd_static ZDICTLIB_VISIBLE)
+ add_definition(libzstd_static ZSTDLIB_STATIC_API)
+ add_definition(libzstd_static ZDICTLIB_STATIC_API)
+endif ()
+if (ZSTD_BUILD_SHARED AND NOT ZSTD_BUILD_STATIC)
+ if (NOT BUILD_SHARED_LIBS)
+ message(WARNING "BUILD_SHARED_LIBS is OFF, but ZSTD_BUILD_SHARED is ON and ZSTD_BUILD_STATIC is OFF, which takes precedence, so libzstd is a shared library")
+ endif ()
+ add_library(libzstd INTERFACE)
+ target_link_libraries(libzstd INTERFACE libzstd_shared)
+ list(APPEND library_targets libzstd)
+endif ()
+if (ZSTD_BUILD_STATIC AND NOT ZSTD_BUILD_SHARED)
+ if (BUILD_SHARED_LIBS)
+ message(WARNING "BUILD_SHARED_LIBS is ON, but ZSTD_BUILD_SHARED is OFF and ZSTD_BUILD_STATIC is ON, which takes precedence, is set so libzstd is a static library")
+ endif ()
+ add_library(libzstd INTERFACE)
+ target_link_libraries(libzstd INTERFACE libzstd_static)
+ list(APPEND library_targets libzstd)
+endif ()
+if (ZSTD_BUILD_SHARED AND ZSTD_BUILD_STATIC)
+ # If both ZSTD_BUILD_SHARED and ZSTD_BUILD_STATIC are set, which is the
+ # default, fallback to using BUILD_SHARED_LIBS to determine whether to
+ # set libzstd to static or shared.
+ if (BUILD_SHARED_LIBS)
+ add_library(libzstd INTERFACE)
+ target_link_libraries(libzstd INTERFACE libzstd_shared)
+ list(APPEND library_targets libzstd)
+ else ()
+ add_library(libzstd INTERFACE)
+ target_link_libraries(libzstd INTERFACE libzstd_static)
+ list(APPEND library_targets libzstd)
+ endif ()
endif ()
# Add specific compile definitions for MSVC project
install(FILES "${CMAKE_CURRENT_BINARY_DIR}/libzstd.pc" DESTINATION "${CMAKE_INSTALL_LIBDIR}/pkgconfig")
# install target
-install(FILES
- "${LIBRARY_DIR}/zstd.h"
- "${LIBRARY_DIR}/zdict.h"
- "${LIBRARY_DIR}/zstd_errors.h"
- DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}")
+install(FILES ${PublicHeaders} DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}")
install(TARGETS ${library_targets}
EXPORT zstdExports
set(PlatformDependResources ${MSVC_RESOURCE_DIR}/zstd.rc)
endif ()
-add_executable(zstd ${PROGRAMS_DIR}/zstdcli.c ${PROGRAMS_DIR}/util.c ${PROGRAMS_DIR}/timefn.c ${PROGRAMS_DIR}/fileio.c ${PROGRAMS_DIR}/fileio_asyncio.c ${PROGRAMS_DIR}/benchfn.c ${PROGRAMS_DIR}/benchzstd.c ${PROGRAMS_DIR}/datagen.c ${PROGRAMS_DIR}/dibio.c ${PROGRAMS_DIR}/zstdcli_trace.c ${PlatformDependResources})
+file(GLOB ZSTD_PROGRAM_SRCS "${PROGRAMS_DIR}/*.c")
+if (MSVC AND ZSTD_PROGRAMS_LINK_SHARED)
+ list(APPEND ZSTD_PROGRAM_SRCS ${LIBRARY_DIR}/common/pool.c ${LIBRARY_DIR}/common/threading.c)
+endif ()
+
+add_executable(zstd ${ZSTD_PROGRAM_SRCS})
target_link_libraries(zstd ${PROGRAMS_ZSTD_LINK_TARGET})
if (CMAKE_SYSTEM_NAME MATCHES "(Solaris|SunOS)")
target_link_libraries(zstd rt)
${CMAKE_CURRENT_BINARY_DIR}/zstdless.1
DESTINATION "${MAN_INSTALL_DIR}")
- add_executable(zstd-frugal ${PROGRAMS_DIR}/zstdcli.c ${PROGRAMS_DIR}/util.c ${PROGRAMS_DIR}/timefn.c ${PROGRAMS_DIR}/fileio.c ${PROGRAMS_DIR}/fileio_asyncio.c)
+ add_executable(zstd-frugal ${PROGRAMS_DIR}/zstdcli.c
+ ${PROGRAMS_DIR}/util.c ${PROGRAMS_DIR}/timefn.c
+ ${PROGRAMS_DIR}/fileio.c ${PROGRAMS_DIR}/fileio_asyncio.c)
target_link_libraries(zstd-frugal ${PROGRAMS_ZSTD_LINK_TARGET})
set_property(TARGET zstd-frugal APPEND PROPERTY COMPILE_DEFINITIONS "ZSTD_NOBENCH;ZSTD_NODICT;ZSTD_NOTRACE")
endif ()
set(TESTS_DIR ${ZSTD_SOURCE_DIR}/tests)
include_directories(${TESTS_DIR} ${PROGRAMS_DIR} ${LIBRARY_DIR} ${LIBRARY_DIR}/common ${LIBRARY_DIR}/compress ${LIBRARY_DIR}/dictBuilder)
-add_executable(datagen ${PROGRAMS_DIR}/datagen.c ${TESTS_DIR}/datagencli.c)
+add_executable(datagen ${PROGRAMS_DIR}/datagen.c ${PROGRAMS_DIR}/lorem.c ${TESTS_DIR}/loremOut.c ${TESTS_DIR}/datagencli.c)
target_link_libraries(datagen libzstd_static)
#
# fullbench
#
-add_executable(fullbench ${PROGRAMS_DIR}/datagen.c ${PROGRAMS_DIR}/util.c ${PROGRAMS_DIR}/timefn.c ${PROGRAMS_DIR}/benchfn.c ${PROGRAMS_DIR}/benchzstd.c ${TESTS_DIR}/fullbench.c)
+add_executable(fullbench ${PROGRAMS_DIR}/datagen.c ${PROGRAMS_DIR}/lorem.c ${PROGRAMS_DIR}/util.c ${PROGRAMS_DIR}/timefn.c ${PROGRAMS_DIR}/benchfn.c ${PROGRAMS_DIR}/benchzstd.c ${TESTS_DIR}/fullbench.c)
if (NOT MSVC)
target_compile_options(fullbench PRIVATE "-Wno-deprecated-declarations")
endif()
target_link_libraries(fullbench libzstd_static)
-add_test(NAME fullbench COMMAND fullbench ${ZSTD_FULLBENCH_FLAGS})
+add_test(NAME fullbench COMMAND "$<TARGET_FILE:fullbench>" ${ZSTD_FULLBENCH_FLAGS})
#
# fuzzer
target_link_libraries(fuzzer libzstd_static)
AddTestFlagsOption(ZSTD_FUZZER_FLAGS "$ENV{FUZZERTEST} $ENV{FUZZER_FLAGS}"
"Semicolon-separated list of flags to pass to the fuzzer test (see `fuzzer -h` for usage)")
-add_test(NAME fuzzer COMMAND fuzzer ${ZSTD_FUZZER_FLAGS})
+add_test(NAME fuzzer COMMAND "$<TARGET_FILE:fuzzer>" ${ZSTD_FUZZER_FLAGS})
# Disable the timeout since the run time is too long for the default timeout of
# 1500 seconds and varies considerably between low-end and high-end CPUs.
# set_tests_properties(fuzzer PROPERTIES TIMEOUT 0)
target_link_libraries(zstreamtest libzstd_static)
AddTestFlagsOption(ZSTD_ZSTREAM_FLAGS "$ENV{ZSTREAM_TESTTIME} $ENV{FUZZER_FLAGS}"
"Semicolon-separated list of flags to pass to the zstreamtest test (see `zstreamtest -h` for usage)")
-add_test(NAME zstreamtest COMMAND zstreamtest ${ZSTD_ZSTREAM_FLAGS})
+add_test(NAME zstreamtest COMMAND "$<TARGET_FILE:zstreamtest>" ${ZSTD_ZSTREAM_FLAGS})
#
# playTests.sh
# Label the "Medium" set of tests (see TESTING.md)
set_property(TEST fuzzer zstreamtest playTests APPEND PROPERTY LABELS Medium)
-add_executable(paramgrill ${PROGRAMS_DIR}/benchfn.c ${PROGRAMS_DIR}/benchzstd.c ${PROGRAMS_DIR}/datagen.c ${PROGRAMS_DIR}/util.c ${PROGRAMS_DIR}/timefn.c ${TESTS_DIR}/paramgrill.c)
+add_executable(paramgrill ${PROGRAMS_DIR}/benchfn.c ${PROGRAMS_DIR}/benchzstd.c ${PROGRAMS_DIR}/datagen.c ${PROGRAMS_DIR}/lorem.c ${PROGRAMS_DIR}/util.c ${PROGRAMS_DIR}/timefn.c ${TESTS_DIR}/paramgrill.c)
if (UNIX)
target_link_libraries(paramgrill libzstd_static m) #m is math library
else()
--- /dev/null
+@PACKAGE_INIT@
+
+include(CMakeFindDependencyMacro)
+if(@ZSTD_MULTITHREAD_SUPPORT@ AND "@UNIX@")
+ find_dependency(Threads)
+endif()
+
+include("${CMAKE_CURRENT_LIST_DIR}/zstdTargets.cmake")
+
+check_required_components("zstd")
join_paths(zstd_rootdir, 'programs/benchfn.c'),
join_paths(zstd_rootdir, 'programs/benchzstd.c'),
join_paths(zstd_rootdir, 'programs/datagen.c'),
+ join_paths(zstd_rootdir, 'programs/lorem.c'),
join_paths(zstd_rootdir, 'programs/dibio.c'),
join_paths(zstd_rootdir, 'programs/zstdcli_trace.c')]
test_includes = [ include_directories(join_paths(zstd_rootdir, 'programs')) ]
testcommon_sources = [join_paths(zstd_rootdir, 'programs/datagen.c'),
+ join_paths(zstd_rootdir, 'programs/lorem.c'),
join_paths(zstd_rootdir, 'programs/util.c'),
join_paths(zstd_rootdir, 'programs/timefn.c'),
join_paths(zstd_rootdir, 'programs/benchfn.c'),
dependencies: libzstd_deps,
include_directories: libzstd_includes)
-datagen_sources = [join_paths(zstd_rootdir, 'tests/datagencli.c')]
+datagen_sources = [join_paths(zstd_rootdir, 'tests/datagencli.c'),
+ join_paths(zstd_rootdir, 'tests/loremOut.c')]
datagen = executable('datagen',
datagen_sources,
c_args: [ '-DNDEBUG' ],
/*-****************************************
* Compiler specifics
******************************************/
+#undef MEM_STATIC /* may be already defined from common/compiler.h */
#define MEM_STATIC static inline
/*-**************************************************************
size_t zstd_reset_dstream(zstd_dstream *dstream)
{
- return ZSTD_resetDStream(dstream);
+ return ZSTD_DCtx_reset(dstream, ZSTD_reset_session_only);
}
EXPORT_SYMBOL(zstd_reset_dstream);
#ifndef ZSTD_DEPS_STDINT
#define ZSTD_DEPS_STDINT
-/*
- * The Linux Kernel doesn't provide intptr_t, only uintptr_t, which
- * is an unsigned long.
- */
-typedef long intptr_t;
+/* intptr_t already provided by ZSTD_DEPS_COMMON */
#endif /* ZSTD_DEPS_STDINT */
#endif /* ZSTD_DEPS_NEED_STDINT */
# Standard variables for installation
DESTDIR ?=
PREFIX ?= /usr/local
-BINDIR := $(DESTDIR)$(PREFIX)/bin
+BINDIR := $(PREFIX)/bin
ZSTDDIR = ../../lib
PROGDIR = ../../programs
PZSTD_INC = -I$(ZSTDDIR) -I$(ZSTDDIR)/common -I$(PROGDIR) -I.
GTEST_INC = -isystem googletest/googletest/include
-# If default C++ version is older than C++11, explicitly set C++11, which is the
-# minimum required by the code.
-ifeq ($(shell echo "\043if __cplusplus < 201103L\n\043error\n\043endif" | $(CXX) -x c++ -Werror -c - -o /dev/null 2>/dev/null && echo 1 || echo 0),0)
-PZSTD_CXX_STD := -std=c++11
-endif
+# Set the minimum required by gtest
+PZSTD_CXX_STD := -std=c++14
PZSTD_CPPFLAGS = $(PZSTD_INC)
PZSTD_CCXXFLAGS =
.PHONY: install
install: PZSTD_CPPFLAGS += -DNDEBUG
install: pzstd$(EXT)
- install -d -m 755 $(BINDIR)/
- install -m 755 pzstd$(EXT) $(BINDIR)/pzstd$(EXT)
+ install -d -m 755 $(DESTDIR)$(BINDIR)/
+ install -m 755 pzstd$(EXT) $(DESTDIR)$(BINDIR)/pzstd$(EXT)
.PHONY: uninstall
uninstall:
- $(RM) $(BINDIR)/pzstd$(EXT)
+ $(RM) $(DESTDIR)$(BINDIR)/pzstd$(EXT)
# Targets for many different builds
.PHONY: all
#define ZSTD_STATIC_LINKING_ONLY
#include <zstd.h> // presumes zstd library is installed
#include <zstd_errors.h>
-#if defined(WIN32) || defined(_WIN32)
+#if defined(_WIN32)
# include <windows.h>
# define SLEEP(x) Sleep(x)
#else
1. The last affected decompressor versions.
2. The decompressor components affected.
2. Whether the compressed frame could ever be produced by the reference compressor.
-3. An example frame.
+3. An example frame (hexadecimal string when it can be short enough, link to golden file otherwise)
4. A description of the bug.
The document is in reverse chronological order, with the bugs that affect the most recent zstd decompressor versions listed first.
+No sequence using the 2-bytes format
+------------------------------------------------
+
+**Last affected version**: v1.5.5
+
+**Affected decompressor component(s)**: Library & CLI
+
+**Produced by the reference compressor**: No
+
+**Example Frame**: see zstd/tests/golden-decompression/zeroSeq_2B.zst
+
+The zstd decoder incorrectly expects FSE tables when there are 0 sequences present in the block
+if the value 0 is encoded using the 2-bytes format.
+Instead, it should immediately end the sequence section, and move on to next block.
+
+This situation was never generated by the reference compressor,
+because representing 0 sequences with the 2-bytes format is inefficient
+(the 1-byte format is always used in this case).
+
+
+Compressed block with a size of exactly 128 KB
+------------------------------------------------
+
+**Last affected version**: v1.5.2
+
+**Affected decompressor component(s)**: Library & CLI
+
+**Produced by the reference compressor**: No
+
+**Example Frame**: see zstd/tests/golden-decompression/block-128k.zst
+
+The zstd decoder incorrectly rejected blocks of type `Compressed_Block` when their size was exactly 128 KB.
+Note that `128 KB - 1` was accepted, and `128 KB + 1` is forbidden by the spec.
+
+This type of block was never generated by the reference compressor.
+
+These blocks used to be disallowed by the spec up until spec version 0.3.2 when the restriction was lifted by [PR#1689](https://github.com/facebook/zstd/pull/1689).
+
+> A Compressed_Block has the extra restriction that Block_Size is always strictly less than the decompressed size. If this condition cannot be respected, the block must be sent uncompressed instead (Raw_Block).
+
+
Compressed block with 0 literals and 0 sequences
------------------------------------------------
> A Compressed_Block has the extra restriction that Block_Size is always strictly less than the decompressed size. If this condition cannot be respected, the block must be sent uncompressed instead (Raw_Block).
+
First block is RLE block
------------------------
https://github.com/facebook/zstd/blob/8814aa5bfa74f05a86e55e9d508da177a893ceeb/lib/compress/zstd_compress.c#L3527-L3535
+
Tiny FSE Table & Block
----------------------
See the compressor workaround code:
https://github.com/facebook/zstd/blob/8814aa5bfa74f05a86e55e9d508da177a893ceeb/lib/compress/zstd_compress.c#L2667-L2682
+
+Magicless format
+----------------------
+
+**Last affected version**: v1.5.5
+
+**Affected decompressor component(s)**: Library
+
+**Produced by the reference compressor**: Yes (example: https://gist.github.com/embg/9940726094f4cf2cef162cffe9319232)
+
+**Example Frame**: `27 b5 2f fd 00 03 19 00 00 66 6f 6f 3f ba c4 59`
+
+v1.5.6 fixes several bugs in which the magicless-format decoder rejects valid frames.
+These include but are not limited to:
+* Valid frames that happen to begin with a legacy magic number (little-endian)
+* Valid frames that happen to begin with a skippable magic number (little-endian)
+
+If you are affected by this issue and cannot update to v1.5.6 or later, there is a
+workaround to recover affected data. Simply prepend the ZSTD magic number
+`0xFD2FB528` (little-endian) to your data and decompress using the standard-format
+decoder.
--- /dev/null
+Decompressor Permissiveness to Invalid Data
+===========================================
+
+This document describes the behavior of the reference decompressor in cases
+where it accepts formally invalid data instead of reporting an error.
+
+While the reference decompressor *must* decode any compliant frame following
+the specification, its ability to detect erroneous data is on a best effort
+basis: the decoder may accept input data that would be formally invalid,
+when it causes no risk to the decoder, and which detection would cost too much
+complexity or speed regression.
+
+In practice, the vast majority of invalid data are detected, if only because
+many corruption events are dangerous for the decoder process (such as
+requesting an out-of-bound memory access) and many more are easy to check.
+
+This document lists a few known cases where invalid data was formerly accepted
+by the decoder, and what has changed since.
+
+
+Offset == 0
+-----------
+
+**Last affected version**: v1.5.5
+
+**Produced by the reference compressor**: No
+
+**Example Frame**: `28b5 2ffd 0000 4500 0008 0002 002f 430b ae`
+
+If a sequence is decoded with `literals_length = 0` and `offset_value = 3`
+while `Repeated_Offset_1 = 1`, the computed offset will be `0`, which is
+invalid.
+
+The reference decompressor up to v1.5.5 processes this case as if the computed
+offset was `1`, including inserting `1` into the repeated offset list.
+This prevents the output buffer from remaining uninitialized, thus denying a
+potential attack vector from an untrusted source.
+However, in the rare case where this scenario would be the outcome of a
+transmission or storage error, the decoder relies on the checksum to detect
+the error.
+
+In newer versions, this case is always detected and reported as a corruption error.
+
+
+Non-zeroes reserved bits
+------------------------
+
+**Last affected version**: v1.5.5
+
+**Produced by the reference compressor**: No
+
+The Sequences section of each block has a header, and one of its elements is a
+byte, which describes the compression mode of each symbol.
+This byte contains 2 reserved bits which must be set to zero.
+
+The reference decompressor up to v1.5.5 just ignores these 2 bits.
+This behavior has no consequence for the rest of the frame decoding process.
+
+In newer versions, the 2 reserved bits are actively checked for value zero,
+and the decoder reports a corruption error if they are not.
const size_t num_sequences);
static sequence_command_t decode_sequence(sequence_states_t *const state,
const u8 *const src,
- i64 *const offset);
+ i64 *const offset,
+ int lastSequence);
static void decode_seq_table(FSE_dtable *const table, istream_t *const in,
const seq_part_t type, const seq_mode_t mode);
// This is a variable size field using between 1 and 3 bytes. Let's call its
// first byte byte0."
u8 header = IO_read_bits(in, 8);
- if (header == 0) {
- // "There are no sequences. The sequence section stops there.
- // Regenerated content is defined entirely by literals section."
- *sequences = NULL;
- return 0;
- } else if (header < 128) {
+ if (header < 128) {
// "Number_of_Sequences = byte0 . Uses 1 byte."
num_sequences = header;
} else if (header < 255) {
num_sequences = IO_read_bits(in, 16) + 0x7F00;
}
+ if (num_sequences == 0) {
+ // "There are no sequences. The sequence section stops there."
+ *sequences = NULL;
+ return 0;
+ }
+
*sequences = malloc(num_sequences * sizeof(sequence_command_t));
if (!*sequences) {
BAD_ALLOC();
for (size_t i = 0; i < num_sequences; i++) {
// Decode sequences one by one
- sequences[i] = decode_sequence(&states, src, &bit_offset);
+ sequences[i] = decode_sequence(&states, src, &bit_offset, i==num_sequences-1);
}
if (bit_offset != 0) {
// Decode a single sequence and update the state
static sequence_command_t decode_sequence(sequence_states_t *const states,
const u8 *const src,
- i64 *const offset) {
+ i64 *const offset,
+ int lastSequence) {
// "Each symbol is a code in its own context, which specifies Baseline and
// Number_of_Bits to add. Codes are FSE compressed, and interleaved with raw
// additional bits in the same bitstream."
// Literals_Length_State is updated, followed by Match_Length_State, and
// then Offset_State."
// If the stream is complete don't read bits to update state
- if (*offset != 0) {
+ if (!lastSequence) {
FSE_update_state(&states->ll_table, &states->ll_state, src, offset);
FSE_update_state(&states->ml_table, &states->ml_state, src, offset);
FSE_update_state(&states->of_table, &states->of_state, src, offset);
break;
}
case seq_repeat:
- // "Repeat_Mode : re-use distribution table from previous compressed
+ // "Repeat_Mode : reuse distribution table from previous compressed
// block."
// Nothing to do here, table will be unchanged
if (!table->symbols) {
/******* END OUTPUT SIZE COUNTING *********************************************/
/******* DICTIONARY PARSING ***************************************************/
-dictionary_t* create_dictionary() {
+dictionary_t* create_dictionary(void) {
dictionary_t* const dict = calloc(1, sizeof(dictionary_t));
if (!dict) {
BAD_ALLOC();
### Version
-0.3.9 (2023-03-08)
+0.4.0 (2023-06-05)
Introduction
This is a variable size field using between 1 and 3 bytes.
Let's call its first byte `byte0`.
-- `if (byte0 == 0)` : there are no sequences.
- The sequence section stops there.
- Decompressed content is defined entirely as Literals Section content.
- The FSE tables used in `Repeat_Mode` aren't updated.
- `if (byte0 < 128)` : `Number_of_Sequences = byte0` . Uses 1 byte.
-- `if (byte0 < 255)` : `Number_of_Sequences = ((byte0-128) << 8) + byte1` . Uses 2 bytes.
-- `if (byte0 == 255)`: `Number_of_Sequences = byte1 + (byte2<<8) + 0x7F00` . Uses 3 bytes.
+- `if (byte0 < 255)` : `Number_of_Sequences = ((byte0 - 0x80) << 8) + byte1`. Uses 2 bytes.
+ Note that the 2 bytes format fully overlaps the 1 byte format.
+- `if (byte0 == 255)`: `Number_of_Sequences = byte1 + (byte2<<8) + 0x7F00`. Uses 3 bytes.
+
+`if (Number_of_Sequences == 0)` : there are no sequences.
+ The sequence section stops immediately,
+ FSE tables used in `Repeat_Mode` aren't updated.
+ Block's decompressed content is defined solely by the Literals Section content.
__Symbol compression modes__
In this case, repeated offsets are shifted by one,
so an `offset_value` of 1 means `Repeated_Offset2`,
an `offset_value` of 2 means `Repeated_Offset3`,
-and an `offset_value` of 3 means `Repeated_Offset1 - 1_byte`.
+and an `offset_value` of 3 means `Repeated_Offset1 - 1`.
+
+In the final case, if `Repeated_Offset1 - 1` evaluates to 0, then the
+data is considered corrupted.
For the first block, the starting offset history is populated with following values :
`Repeated_Offset1`=1, `Repeated_Offset2`=4, `Repeated_Offset3`=8,
Presuming an `Accuracy_Log` of 8,
and presuming 100 probabilities points have already been distributed,
the decoder may read any value from `0` to `256 - 100 + 1 == 157` (inclusive).
- Therefore, it must read `log2sup(157) == 8` bits.
+ Therefore, it may read up to `log2sup(157) == 8` bits, where `log2sup(N)`
+ is the smallest integer `T` that satisfies `(1 << T) > N`.
- Value decoded : small values use 1 less bit :
__example__ :
decoding is complete.
If the last symbol makes cumulated total go above `1 << Accuracy_Log`,
distribution is considered corrupted.
+If this process results in a non-zero probability for a value outside of the
+valid range of values that the FSE table is defined for, even if that value is
+not used, then the data is considered corrupted.
Then the decoder can tell how many bytes were used in this process,
and how many symbols are present.
increasing at each state, then resuming at the first state,
each state takes its allocated width from Baseline.
-| state value | 1 | 39 | 77 | 84 | 122 |
| state order | 0 | 1 | 2 | 3 | 4 |
| ---------------- | ----- | ----- | ------ | ---- | ------ |
+| state value | 1 | 39 | 77 | 84 | 122 |
| width | 32 | 32 | 32 | 16 | 16 |
| `Number_of_Bits` | 5 | 5 | 5 | 4 | 4 |
| range number | 2 | 4 | 6 | 0 | 1 |
```
When a literal value is not present, it receives a `Weight` of 0.
The least frequent symbol receives a `Weight` of 1.
-Consequently, the `Weight` 1 is necessarily present.
+If no literal has a `Weight` of 1, then the data is considered corrupted.
+If there are not at least two literals with non-zero `Weight`, then the data
+is considered corrupted.
The most frequent symbol receives a `Weight` anywhere between 1 and 11 (max).
The last symbol's `Weight` is deduced from previously retrieved Weights,
by completing to the nearest power of 2. It's necessarily non 0.
remain in the stream, it is assumed that extra bits are 0. Then,
symbols for each of the final states are decoded and the process is complete.
+If this process would produce more weights than the maximum number of decoded
+weights (255), then the data is considered corrupted.
+
#### Conversion from weights to Huffman prefix codes
All present symbols shall now have a `Weight` value.
Version changes
---------------
+- 0.4.0 : fixed imprecise behavior for nbSeq==0, detected by Igor Pavlov
- 0.3.9 : clarifications for Huffman-compressed literal sizes.
- 0.3.8 : clarifications for Huffman Blocks and Huffman Tree descriptions.
- 0.3.7 : clarifications for Repeat_Offsets, matching RFC8878
<html>
<head>
<meta http-equiv="Content-Type" content="text/html; charset=ISO-8859-1">
-<title>zstd 1.5.5 Manual</title>
+<title>zstd 1.5.6 Manual</title>
</head>
<body>
-<h1>zstd 1.5.5 Manual</h1>
+<h1>zstd 1.5.6 Manual</h1>
<hr>
<a name="Contents"></a><h2>Contents</h2>
<ol>
* for example to size a static array on stack.
* Will produce constant value 0 if srcSize too large.
*/
-#define ZSTD_MAX_INPUT_SIZE ((sizeof(size_t)==8) ? 0xFF00FF00FF00FF00LLU : 0xFF00FF00U)
+#define ZSTD_MAX_INPUT_SIZE ((sizeof(size_t)==8) ? 0xFF00FF00FF00FF00ULL : 0xFF00FF00U)
#define ZSTD_COMPRESSBOUND(srcSize) (((size_t)(srcSize) >= ZSTD_MAX_INPUT_SIZE) ? 0 : (srcSize) + ((srcSize)>>8) + (((srcSize) < (128<<10)) ? (((128<<10) - (srcSize)) >> 11) </b>/* margin, from 64 to 0 */ : 0)) /* this formula ensures that bound(A) + bound(B) <= bound(A+B) as long as A and B >= 128 KB */<b>
size_t ZSTD_compressBound(size_t srcSize); </b>/*!< maximum compressed size in worst case single-pass scenario */<b>
</b>/* ZSTD_isError() :<b>
<h3>Compression context</h3><pre> When compressing many times,
it is recommended to allocate a context just once,
- and re-use it for each successive compression operation.
+ and reuse it for each successive compression operation.
This will make workload friendlier for system's memory.
Note : re-using context is just a speed / resource optimization.
It doesn't change the compression ratio, which remains identical.
const void* src, size_t srcSize,
int compressionLevel);
</b><p> Same as ZSTD_compress(), using an explicit ZSTD_CCtx.
- Important : in order to behave similarly to `ZSTD_compress()`,
- this function compresses at requested compression level,
- __ignoring any other parameter__ .
+ Important : in order to mirror `ZSTD_compress()` behavior,
+ this function compresses at the requested compression level,
+ __ignoring any other advanced parameter__ .
If any advanced parameter was set using the advanced API,
they will all be reset. Only `compressionLevel` remains.
<h3>Decompression context</h3><pre> When decompressing many times,
it is recommended to allocate a context only once,
- and re-use it for each successive compression operation.
+ and reuse it for each successive compression operation.
This will make workload friendlier for system's memory.
Use one context per thread for parallel execution.
</pre><b><pre>typedef struct ZSTD_DCtx_s ZSTD_DCtx;
const void* src, size_t srcSize);
</b><p> Same as ZSTD_decompress(),
requires an allocated ZSTD_DCtx.
- Compatible with sticky parameters.
+ Compatible with sticky parameters (see below).
</p></pre><BR>
* The higher the value of selected strategy, the more complex it is,
* resulting in stronger and slower compression.
* Special: value 0 means "use default strategy". */
+
+ ZSTD_c_targetCBlockSize=130, </b>/* v1.5.6+<b>
+ * Attempts to fit compressed block size into approximatively targetCBlockSize.
+ * Bound by ZSTD_TARGETCBLOCKSIZE_MIN and ZSTD_TARGETCBLOCKSIZE_MAX.
+ * Note that it's not a guarantee, just a convergence target (default:0).
+ * No target when targetCBlockSize == 0.
+ * This is helpful in low bandwidth streaming environments to improve end-to-end latency,
+ * when a client can make use of partial documents (a prominent example being Chrome).
+ * Note: this parameter is stable since v1.5.6.
+ * It was present as an experimental parameter in earlier versions,
+ * but it's not recommended using it with earlier library versions
+ * due to massive performance regressions.
+ */
</b>/* LDM mode parameters */<b>
ZSTD_c_enableLongDistanceMatching=160, </b>/* Enable long distance matching.<b>
* This parameter is designed to improve compression ratio
* ZSTD_c_forceMaxWindow
* ZSTD_c_forceAttachDict
* ZSTD_c_literalCompressionMode
- * ZSTD_c_targetCBlockSize
* ZSTD_c_srcSizeHint
* ZSTD_c_enableDedicatedDictSearch
* ZSTD_c_stableInBuffer
ZSTD_c_experimentalParam3=1000,
ZSTD_c_experimentalParam4=1001,
ZSTD_c_experimentalParam5=1002,
- ZSTD_c_experimentalParam6=1003,
+ </b>/* was ZSTD_c_experimentalParam6=1003; is now ZSTD_c_targetCBlockSize */<b>
ZSTD_c_experimentalParam7=1004,
ZSTD_c_experimentalParam8=1005,
ZSTD_c_experimentalParam9=1006,
void* dst, size_t dstCapacity,
const void* src, size_t srcSize);
</b><p> Behave the same as ZSTD_compressCCtx(), but compression parameters are set using the advanced API.
+ (note that this entry point doesn't even expose a compression level parameter).
ZSTD_compress2() always starts a new frame.
Should cctx hold data from a previously unfinished frame, everything about it is forgotten.
- Compression parameters are pushed into CCtx before starting compression, using ZSTD_CCtx_set*()
* ZSTD_d_forceIgnoreChecksum
* ZSTD_d_refMultipleDDicts
* ZSTD_d_disableHuffmanAssembly
+ * ZSTD_d_maxBlockSize
* Because they are not stable, it's necessary to define ZSTD_STATIC_LINKING_ONLY to access them.
* note : never ever use experimentalParam? names directly
*/
ZSTD_d_experimentalParam2=1001,
ZSTD_d_experimentalParam3=1002,
ZSTD_d_experimentalParam4=1003,
- ZSTD_d_experimentalParam5=1004
+ ZSTD_d_experimentalParam5=1004,
+ ZSTD_d_experimentalParam6=1005
} ZSTD_dParameter;
</b></pre><BR>
A ZSTD_CStream object is required to track streaming operation.
Use ZSTD_createCStream() and ZSTD_freeCStream() to create/release resources.
ZSTD_CStream objects can be reused multiple times on consecutive compression operations.
- It is recommended to re-use ZSTD_CStream since it will play nicer with system's memory, by re-using already allocated memory.
+ It is recommended to reuse ZSTD_CStream since it will play nicer with system's memory, by re-using already allocated memory.
For parallel execution, use one separate ZSTD_CStream per thread.
note : since v1.3.0, ZSTD_CStream and ZSTD_CCtx are the same thing.
Parameters are sticky : when starting a new compression on the same context,
- it will re-use the same sticky parameters as previous compression session.
+ it will reuse the same sticky parameters as previous compression session.
When in doubt, it's recommended to fully initialize the context before usage.
Use ZSTD_CCtx_reset() to reset the context and ZSTD_CCtx_setParameter(),
ZSTD_CCtx_setPledgedSrcSize(), or ZSTD_CCtx_loadDictionary() and friends to
only ZSTD_e_end or ZSTD_e_flush operations are allowed.
Before starting a new compression job, or changing compression parameters,
it is required to fully flush internal buffers.
+ - note: if an operation ends with an error, it may leave @cctx in an undefined state.
+ Therefore, it's UB to invoke ZSTD_compressStream2() of ZSTD_compressStream() on such a state.
+ In order to be re-employed after an error, a state must be reset,
+ which can be done explicitly (ZSTD_CCtx_reset()),
+ or is sometimes implied by methods starting a new compression job (ZSTD_initCStream(), ZSTD_compressCCtx())
</p></pre><BR>
<a name="Chapter9"></a><h2>Streaming decompression - HowTo</h2><pre>
A ZSTD_DStream object is required to track streaming operations.
Use ZSTD_createDStream() and ZSTD_freeDStream() to create/release resources.
- ZSTD_DStream objects can be re-used multiple times.
+ ZSTD_DStream objects can be reused multiple times.
Use ZSTD_initDStream() to start a new decompression operation.
@return : recommended first input size
@return : 0 when a frame is completely decoded and fully flushed,
or an error code, which can be tested using ZSTD_isError(),
or any other value > 0, which means there is some decoding or flushing to do to complete current frame.
+
+ Note: when an operation returns with an error code, the @zds state may be left in undefined state.
+ It's UB to invoke `ZSTD_decompressStream()` on such a state.
+ In order to re-use such a state, it must be first reset,
+ which can be done explicitly (`ZSTD_DCtx_reset()`),
+ or is implied for operations starting some new decompression job (`ZSTD_initDStream`, `ZSTD_decompressDCtx()`, `ZSTD_decompress_usingDict()`)
</p></pre><BR>
<a name="Chapter13"></a><h2>Advanced dictionary and prefix API (Requires v1.4.0+)</h2><pre>
This API allows dictionaries to be used with ZSTD_compress2(),
ZSTD_compressStream2(), and ZSTD_decompressDCtx().
- Dictionaries are sticky, they remain valid when same context is re-used,
+ Dictionaries are sticky, they remain valid when same context is reused,
they only reset when the context is reset
with ZSTD_reset_parameters or ZSTD_reset_session_and_parameters.
In contrast, Prefixes are single-use.
<a name="Chapter16"></a><h2>Memory management</h2><pre></pre>
-<pre><b>ZSTDLIB_STATIC_API size_t ZSTD_estimateCCtxSize(int compressionLevel);
+<pre><b>ZSTDLIB_STATIC_API size_t ZSTD_estimateCCtxSize(int maxCompressionLevel);
ZSTDLIB_STATIC_API size_t ZSTD_estimateCCtxSize_usingCParams(ZSTD_compressionParameters cParams);
ZSTDLIB_STATIC_API size_t ZSTD_estimateCCtxSize_usingCCtxParams(const ZSTD_CCtx_params* params);
ZSTDLIB_STATIC_API size_t ZSTD_estimateDCtxSize(void);
</b><p> These functions make it possible to estimate memory usage
of a future {D,C}Ctx, before its creation.
+ This is useful in combination with ZSTD_initStatic(),
+ which makes it possible to employ a static buffer for ZSTD_CCtx* state.
ZSTD_estimateCCtxSize() will provide a memory budget large enough
- for any compression level up to selected one.
- Note : Unlike ZSTD_estimateCStreamSize*(), this estimate
- does not include space for a window buffer.
- Therefore, the estimation is only guaranteed for single-shot compressions, not streaming.
+ to compress data of any size using one-shot compression ZSTD_compressCCtx() or ZSTD_compress2()
+ associated with any compression level up to max specified one.
The estimate will assume the input may be arbitrarily large,
which is the worst case.
+ Note that the size estimation is specific for one-shot compression,
+ it is not valid for streaming (see ZSTD_estimateCStreamSize*())
+ nor other potential ways of using a ZSTD_CCtx* state.
+
When srcSize can be bound by a known and rather "small" value,
- this fact can be used to provide a tighter estimation
- because the CCtx compression context will need less memory.
- This tighter estimation can be provided by more advanced functions
+ this knowledge can be used to provide a tighter budget estimation
+ because the ZSTD_CCtx* state will need less memory for small inputs.
+ This tighter estimation can be provided by employing more advanced functions
ZSTD_estimateCCtxSize_usingCParams(), which can be used in tandem with ZSTD_getCParams(),
and ZSTD_estimateCCtxSize_usingCCtxParams(), which can be used in tandem with ZSTD_CCtxParams_setParameter().
Both can be used to estimate memory using custom compression parameters and arbitrary srcSize limits.
Note : only single-threaded compression is supported.
ZSTD_estimateCCtxSize_usingCCtxParams() will return an error code if ZSTD_c_nbWorkers is >= 1.
-
- Note 2 : ZSTD_estimateCCtxSize* functions are not compatible with the Block-Level Sequence Producer API at this time.
- Size estimates assume that no external sequence producer is registered.
</p></pre><BR>
-<pre><b>ZSTDLIB_STATIC_API size_t ZSTD_estimateCStreamSize(int compressionLevel);
+<pre><b>ZSTDLIB_STATIC_API size_t ZSTD_estimateCStreamSize(int maxCompressionLevel);
ZSTDLIB_STATIC_API size_t ZSTD_estimateCStreamSize_usingCParams(ZSTD_compressionParameters cParams);
ZSTDLIB_STATIC_API size_t ZSTD_estimateCStreamSize_usingCCtxParams(const ZSTD_CCtx_params* params);
-ZSTDLIB_STATIC_API size_t ZSTD_estimateDStreamSize(size_t windowSize);
+ZSTDLIB_STATIC_API size_t ZSTD_estimateDStreamSize(size_t maxWindowSize);
ZSTDLIB_STATIC_API size_t ZSTD_estimateDStreamSize_fromFrame(const void* src, size_t srcSize);
-</b><p> ZSTD_estimateCStreamSize() will provide a budget large enough for any compression level up to selected one.
- It will also consider src size to be arbitrarily "large", which is worst case.
+</b><p> ZSTD_estimateCStreamSize() will provide a memory budget large enough for streaming compression
+ using any compression level up to the max specified one.
+ It will also consider src size to be arbitrarily "large", which is a worst case scenario.
If srcSize is known to always be small, ZSTD_estimateCStreamSize_usingCParams() can provide a tighter estimation.
ZSTD_estimateCStreamSize_usingCParams() can be used in tandem with ZSTD_getCParams() to create cParams from compressionLevel.
ZSTD_estimateCStreamSize_usingCCtxParams() can be used in tandem with ZSTD_CCtxParams_setParameter(). Only single-threaded compression is supported. This function will return an error code if ZSTD_c_nbWorkers is >= 1.
Note : CStream size estimation is only correct for single-threaded compression.
- ZSTD_DStream memory budget depends on window Size.
+ ZSTD_estimateCStreamSize_usingCCtxParams() will return an error code if ZSTD_c_nbWorkers is >= 1.
+ Note 2 : ZSTD_estimateCStreamSize* functions are not compatible with the Block-Level Sequence Producer API at this time.
+ Size estimates assume that no external sequence producer is registered.
+
+ ZSTD_DStream memory budget depends on frame's window Size.
This information can be passed manually, using ZSTD_estimateDStreamSize,
or deducted from a valid frame Header, using ZSTD_estimateDStreamSize_fromFrame();
+ Any frame requesting a window size larger than max specified one will be rejected.
Note : if streaming is init with function ZSTD_init?Stream_usingDict(),
an internal ?Dict will be created, which additional size is not estimated here.
In this case, get total size by adding ZSTD_estimate?DictSize
- Note 2 : only single-threaded compression is supported.
- ZSTD_estimateCStreamSize_usingCCtxParams() will return an error code if ZSTD_c_nbWorkers is >= 1.
- Note 3 : ZSTD_estimateCStreamSize* functions are not compatible with the Block-Level Sequence Producer API at this time.
- Size estimates assume that no external sequence producer is registered.
</p></pre><BR>
explicitly specified.
start a new frame, using same parameters from previous frame.
- This is typically useful to skip dictionary loading stage, since it will re-use it in-place.
+ This is typically useful to skip dictionary loading stage, since it will reuse it in-place.
Note that zcs must be init at least once before using ZSTD_resetCStream().
If pledgedSrcSize is not known at reset time, use macro ZSTD_CONTENTSIZE_UNKNOWN.
If pledgedSrcSize > 0, its value must be correct, as it will be written in header, and controlled at the end.
</b><p>
ZSTD_DCtx_reset(zds, ZSTD_reset_session_only);
- re-use decompression parameters from previous init; saves dictionary loading
+ reuse decompression parameters from previous init; saves dictionary loading
</p></pre><BR>
ZSTD_registerSequenceProducer(
ZSTD_CCtx* cctx,
void* sequenceProducerState,
- ZSTD_sequenceProducer_F* sequenceProducer
+ ZSTD_sequenceProducer_F sequenceProducer
);
</b><p> Instruct zstd to use a block-level external sequence producer function.
calling this function.
</p></pre><BR>
+<pre><b>ZSTDLIB_STATIC_API void
+ZSTD_CCtxParams_registerSequenceProducer(
+ ZSTD_CCtx_params* params,
+ void* sequenceProducerState,
+ ZSTD_sequenceProducer_F sequenceProducer
+);
+</b><p> Same as ZSTD_registerSequenceProducer(), but operates on ZSTD_CCtx_params.
+ This is used for accurate size estimation with ZSTD_estimateCCtxSize_usingCCtxParams(),
+ which is needed when creating a ZSTD_CCtx with ZSTD_initStaticCCtx().
+
+ If you are using the external sequence producer API in a scenario where ZSTD_initStaticCCtx()
+ is required, then this function is for you. Otherwise, you probably don't need it.
+
+ See tests/zstreamtest.c for example usage.
+</p></pre><BR>
+
<a name="Chapter20"></a><h2>Buffer-less and synchronous inner streaming functions (DEPRECATED)</h2><pre>
This API is deprecated, and will be removed in a future version.
It allows streaming (de)compression with user allocated buffers.
<a name="Chapter21"></a><h2>Buffer-less streaming compression (synchronous mode)</h2><pre>
A ZSTD_CCtx object is required to track streaming operations.
Use ZSTD_createCCtx() / ZSTD_freeCCtx() to manage resource.
- ZSTD_CCtx object can be re-used multiple times within successive compression operations.
+ ZSTD_CCtx object can be reused multiple times within successive compression operations.
Start by initializing a context.
Use ZSTD_compressBegin(), or ZSTD_compressBegin_usingDict() for dictionary compression.
It's possible to use srcSize==0, in which case, it will write a final empty block to end the frame.
Without last block mark, frames are considered unfinished (hence corrupted) by compliant decoders.
- `ZSTD_CCtx` object can be re-used (ZSTD_compressBegin()) to compress again.
+ `ZSTD_CCtx` object can be reused (ZSTD_compressBegin()) to compress again.
<BR></pre>
<h3>Buffer-less streaming compression functions</h3><pre></pre><b><pre>ZSTD_DEPRECATED("The buffer-less API is deprecated in favor of the normal streaming API. See docs.")
<a name="Chapter22"></a><h2>Buffer-less streaming decompression (synchronous mode)</h2><pre>
A ZSTD_DCtx object is required to track streaming operations.
Use ZSTD_createDCtx() / ZSTD_freeDCtx() to manage it.
- A ZSTD_DCtx object can be re-used multiple times.
+ A ZSTD_DCtx object can be reused multiple times.
First typical operation is to retrieve frame parameters, using ZSTD_getFrameHeader().
Frame header is extracted from the beginning of compressed frame, so providing only the frame's beginning is enough.
*/
CHECK_ZSTD( ZSTD_CCtx_setParameter(cctx, ZSTD_c_compressionLevel, cLevel) );
CHECK_ZSTD( ZSTD_CCtx_setParameter(cctx, ZSTD_c_checksumFlag, 1) );
- ZSTD_CCtx_setParameter(cctx, ZSTD_c_nbWorkers, nbThreads);
+ if (nbThreads > 1) {
+ size_t const r = ZSTD_CCtx_setParameter(cctx, ZSTD_c_nbWorkers, nbThreads);
+ if (ZSTD_isError(r)) {
+ fprintf (stderr, "Note: the linked libzstd library doesn't support multithreading. "
+ "Reverting to single-thread mode. \n");
+ }
+ }
/* This loop read from the input file, compresses that entire chunk,
* and writes all output produced to the output file.
}
int cLevel = 1;
- int nbThreads = 4;
+ int nbThreads = 1;
if (argc >= 3) {
cLevel = atoi (argv[2]);
# You may select, at your option, one of the above-listed licenses.
# ################################################################
+# default target (when running `make` with no argument)
+lib-release:
+
# Modules
ZSTD_LIB_COMPRESSION ?= 1
ZSTD_LIB_DECOMPRESSION ?= 1
# Note: by default, the static library is built single-threaded and dynamic library is built
# multi-threaded. It is possible to force multi or single threaded builds by appending
# -mt or -nomt to the build target (like lib-mt for multi-threaded, lib-nomt for single-threaded).
-.PHONY: default
-default: lib-release
+
CPPFLAGS_DYNLIB += -DZSTD_MULTITHREAD # dynamic library build defaults to multi-threaded
LDFLAGS_DYNLIB += -pthread
-CPPFLAGS_STATLIB += # static library build defaults to single-threaded
+CPPFLAGS_STATICLIB += # static library build defaults to single-threaded
ifeq ($(findstring GCC,$(CCVER)),GCC)
.PHONY: libzstd.a # must be run every time
-libzstd.a: CPPFLAGS += $(CPPFLAGS_STATLIB)
+libzstd.a: CPPFLAGS += $(CPPFLAGS_STATICLIB)
SET_CACHE_DIRECTORY = \
+$(MAKE) --no-print-directory $@ \
else
# BUILD_DIR is defined
-ZSTD_STATLIB_DIR := $(BUILD_DIR)/static
-ZSTD_STATLIB := $(ZSTD_STATLIB_DIR)/libzstd.a
-ZSTD_STATLIB_OBJ := $(addprefix $(ZSTD_STATLIB_DIR)/,$(ZSTD_LOCAL_OBJ))
-$(ZSTD_STATLIB): ARFLAGS = rcs
-$(ZSTD_STATLIB): | $(ZSTD_STATLIB_DIR)
-$(ZSTD_STATLIB): $(ZSTD_STATLIB_OBJ)
+ZSTD_STATICLIB_DIR := $(BUILD_DIR)/static
+ZSTD_STATICLIB := $(ZSTD_STATICLIB_DIR)/libzstd.a
+ZSTD_STATICLIB_OBJ := $(addprefix $(ZSTD_STATICLIB_DIR)/,$(ZSTD_LOCAL_OBJ))
+$(ZSTD_STATICLIB): ARFLAGS = rcs
+$(ZSTD_STATICLIB): | $(ZSTD_STATICLIB_DIR)
+$(ZSTD_STATICLIB): $(ZSTD_STATICLIB_OBJ)
# Check for multithread flag at target execution time
$(if $(filter -DZSTD_MULTITHREAD,$(CPPFLAGS)),\
@echo compiling multi-threaded static library $(LIBVER),\
@echo compiling single-threaded static library $(LIBVER))
$(AR) $(ARFLAGS) $@ $^
-libzstd.a: $(ZSTD_STATLIB)
+libzstd.a: $(ZSTD_STATICLIB)
cp -f $< $@
endif
# make does not consider implicit pattern rule for .PHONY target
%-mt : CPPFLAGS_DYNLIB := -DZSTD_MULTITHREAD
-%-mt : CPPFLAGS_STATLIB := -DZSTD_MULTITHREAD
+%-mt : CPPFLAGS_STATICLIB := -DZSTD_MULTITHREAD
%-mt : LDFLAGS_DYNLIB := -pthread
%-mt : %
@echo multi-threaded build completed
%-nomt : CPPFLAGS_DYNLIB :=
%-nomt : LDFLAGS_DYNLIB :=
-%-nomt : CPPFLAGS_STATLIB :=
+%-nomt : CPPFLAGS_STATICLIB :=
%-nomt : %
@echo single-threaded build completed
# Generate .h dependencies automatically
-DEPFLAGS = -MT $@ -MMD -MP -MF
+# -MMD: compiler generates dependency information as a side-effect of compilation, without system headers
+# -MP: adds phony target for each dependency other than main file.
+DEPFLAGS = -MMD -MP
-$(ZSTD_DYNLIB_DIR)/%.o : %.c $(ZSTD_DYNLIB_DIR)/%.d | $(ZSTD_DYNLIB_DIR)
+# ensure that ZSTD_DYNLIB_DIR exists prior to generating %.o
+$(ZSTD_DYNLIB_DIR)/%.o : %.c | $(ZSTD_DYNLIB_DIR)
@echo CC $@
- $(COMPILE.c) $(DEPFLAGS) $(ZSTD_DYNLIB_DIR)/$*.d $(OUTPUT_OPTION) $<
+ $(COMPILE.c) $(DEPFLAGS) $(OUTPUT_OPTION) $<
-$(ZSTD_STATLIB_DIR)/%.o : %.c $(ZSTD_STATLIB_DIR)/%.d | $(ZSTD_STATLIB_DIR)
+$(ZSTD_STATICLIB_DIR)/%.o : %.c | $(ZSTD_STATICLIB_DIR)
@echo CC $@
- $(COMPILE.c) $(DEPFLAGS) $(ZSTD_STATLIB_DIR)/$*.d $(OUTPUT_OPTION) $<
+ $(COMPILE.c) $(DEPFLAGS) $(OUTPUT_OPTION) $<
$(ZSTD_DYNLIB_DIR)/%.o : %.S | $(ZSTD_DYNLIB_DIR)
@echo AS $@
$(COMPILE.S) $(OUTPUT_OPTION) $<
-$(ZSTD_STATLIB_DIR)/%.o : %.S | $(ZSTD_STATLIB_DIR)
+$(ZSTD_STATICLIB_DIR)/%.o : %.S | $(ZSTD_STATICLIB_DIR)
@echo AS $@
$(COMPILE.S) $(OUTPUT_OPTION) $<
-MKDIR ?= mkdir
-$(BUILD_DIR) $(ZSTD_DYNLIB_DIR) $(ZSTD_STATLIB_DIR):
- $(MKDIR) -p $@
+MKDIR ?= mkdir -p
+$(BUILD_DIR) $(ZSTD_DYNLIB_DIR) $(ZSTD_STATICLIB_DIR):
+ $(MKDIR) $@
-DEPFILES := $(ZSTD_DYNLIB_OBJ:.o=.d) $(ZSTD_STATLIB_OBJ:.o=.d)
+DEPFILES := $(ZSTD_DYNLIB_OBJ:.o=.d) $(ZSTD_STATICLIB_OBJ:.o=.d)
$(DEPFILES):
-include $(wildcard $(DEPFILES))
+# The leading '-' means: do not fail is include fails (ex: directory does not exist yet)
+-include $(wildcard $(DEPFILES))
-# Special case : building library in single-thread mode _and_ without zstdmt_compress.c
-ZSTDMT_FILES = compress/zstdmt_compress.c
-ZSTD_NOMT_FILES = $(filter-out $(ZSTDMT_FILES),$(ZSTD_FILES))
+# Special case : build library in single-thread mode _and_ without zstdmt_compress.c
+# Note : we still need threading.c and pool.c for the dictionary builder,
+# but they will correctly behave single-threaded.
+ZSTDMT_FILES = zstdmt_compress.c
+ZSTD_NOMT_FILES = $(filter-out $(ZSTDMT_FILES),$(notdir $(ZSTD_FILES)))
libzstd-nomt: CFLAGS += -fPIC -fvisibility=hidden
libzstd-nomt: LDFLAGS += -shared
libzstd-nomt: $(ZSTD_NOMT_FILES)
@echo compiling single-thread dynamic library $(LIBVER)
@echo files : $(ZSTD_NOMT_FILES)
+ @if echo "$(ZSTD_NOMT_FILES)" | tr ' ' '\n' | $(GREP) -q zstdmt; then \
+ echo "Error: Found zstdmt in list."; \
+ exit 1; \
+ fi
$(CC) $(FLAGS) $^ $(LDFLAGS) $(SONAME_FLAGS) -o $@
.PHONY: clean
#-----------------------------------------------------------------------------
# make install is validated only for below listed environments
#-----------------------------------------------------------------------------
-ifneq (,$(filter $(UNAME),Linux Darwin GNU/kFreeBSD GNU OpenBSD FreeBSD NetBSD DragonFly SunOS Haiku AIX))
+ifneq (,$(filter $(UNAME),Linux Darwin GNU/kFreeBSD GNU OpenBSD FreeBSD NetBSD DragonFly SunOS Haiku AIX MSYS_NT CYGWIN_NT))
lib: libzstd.pc
For example, advanced API for version `v0.4` is exposed in `lib/legacy/zstd_v04.h` .
- While invoking `make libzstd`, it's possible to define build macros
- `ZSTD_LIB_COMPRESSION, ZSTD_LIB_DECOMPRESSION`, `ZSTD_LIB_DICTBUILDER`,
+ `ZSTD_LIB_COMPRESSION`, `ZSTD_LIB_DECOMPRESSION`, `ZSTD_LIB_DICTBUILDER`,
and `ZSTD_LIB_DEPRECATED` as `0` to forgo compilation of the
corresponding features. This will also disable compilation of all
dependencies (e.g. `ZSTD_LIB_COMPRESSION=0` will also disable
binary is achieved by using `HUF_FORCE_DECOMPRESS_X1` and
`ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT` (implied by `ZSTD_LIB_MINIFY`).
+ On the compressor side, Zstd's compression levels map to several internal
+ strategies. In environments where the higher compression levels aren't used,
+ it is possible to exclude all but the fastest strategy with
+ `ZSTD_LIB_EXCLUDE_COMPRESSORS_DFAST_AND_UP=1`. (Note that this will change
+ the behavior of the default compression level.) Or if you want to retain the
+ default compressor as well, you can set
+ `ZSTD_LIB_EXCLUDE_COMPRESSORS_GREEDY_AND_UP=1`, at the cost of an additional
+ ~20KB or so.
+
For squeezing the last ounce of size out, you can also define
`ZSTD_NO_INLINE`, which disables inlining, and `ZSTD_STRIP_ERROR_STRINGS`,
which removes the error messages that are otherwise returned by
`ZSTDERRORLIB_VSIBILITY`, and `ZDICTLIB_VISIBILITY` if unset, for backwards compatibility
with the old macro names.
+- The C compiler macro `HUF_DISABLE_FAST_DECODE` disables the newer Huffman fast C
+ and assembly decoding loops. You may want to use this macro if these loops are
+ slower on your platform.
+
#### Windows : using MinGW+MSYS to create DLL
DLL can be created using MinGW+MSYS with the `make libzstd` command.
#define ZSTD_DEPS_NEED_MALLOC
#include "zstd_deps.h" /* ZSTD_malloc, ZSTD_calloc, ZSTD_free, ZSTD_memset */
-#include "mem.h" /* MEM_STATIC */
+#include "compiler.h" /* MEM_STATIC */
#define ZSTD_STATIC_LINKING_ONLY
#include "../zstd.h" /* ZSTD_customMem */
/*-********************************************
* bitStream decoding API (read backward)
**********************************************/
+typedef size_t BitContainerType;
typedef struct {
- size_t bitContainer;
+ BitContainerType bitContainer;
unsigned bitsConsumed;
const char* ptr;
const char* start;
const char* limitPtr;
} BIT_DStream_t;
-typedef enum { BIT_DStream_unfinished = 0,
- BIT_DStream_endOfBuffer = 1,
- BIT_DStream_completed = 2,
- BIT_DStream_overflow = 3 } BIT_DStream_status; /* result of BIT_reloadDStream() */
- /* 1,2,4,8 would be better for bitmap combinations, but slows down performance a bit ... :( */
+typedef enum { BIT_DStream_unfinished = 0, /* fully refilled */
+ BIT_DStream_endOfBuffer = 1, /* still some bits left in bitstream */
+ BIT_DStream_completed = 2, /* bitstream entirely consumed, bit-exact */
+ BIT_DStream_overflow = 3 /* user requested more bits than present in bitstream */
+ } BIT_DStream_status; /* result of BIT_reloadDStream() */
MEM_STATIC size_t BIT_initDStream(BIT_DStream_t* bitD, const void* srcBuffer, size_t srcSize);
MEM_STATIC size_t BIT_readBits(BIT_DStream_t* bitD, unsigned nbBits);
/* Start by invoking BIT_initDStream().
* A chunk of the bitStream is then stored into a local register.
-* Local register size is 64-bits on 64-bits systems, 32-bits on 32-bits systems (size_t).
+* Local register size is 64-bits on 64-bits systems, 32-bits on 32-bits systems (BitContainerType).
* You can then retrieve bitFields stored into the local register, **in reverse order**.
* Local register is explicitly reloaded from memory by the BIT_reloadDStream() method.
* A reload guarantee a minimum of ((8*sizeof(bitD->bitContainer))-7) bits when its result is BIT_DStream_unfinished.
return 0;
}
-MEM_STATIC FORCE_INLINE_ATTR size_t BIT_getLowerBits(size_t bitContainer, U32 const nbBits)
+FORCE_INLINE_TEMPLATE size_t BIT_getLowerBits(size_t bitContainer, U32 const nbBits)
{
#if defined(STATIC_BMI2) && STATIC_BMI2 == 1 && !defined(ZSTD_NO_INTRINSICS)
return _bzhi_u64(bitContainer, nbBits);
bitD->bitContainer = *(const BYTE*)(bitD->start);
switch(srcSize)
{
- case 7: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[6]) << (sizeof(bitD->bitContainer)*8 - 16);
+ case 7: bitD->bitContainer += (BitContainerType)(((const BYTE*)(srcBuffer))[6]) << (sizeof(bitD->bitContainer)*8 - 16);
ZSTD_FALLTHROUGH;
- case 6: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[5]) << (sizeof(bitD->bitContainer)*8 - 24);
+ case 6: bitD->bitContainer += (BitContainerType)(((const BYTE*)(srcBuffer))[5]) << (sizeof(bitD->bitContainer)*8 - 24);
ZSTD_FALLTHROUGH;
- case 5: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[4]) << (sizeof(bitD->bitContainer)*8 - 32);
+ case 5: bitD->bitContainer += (BitContainerType)(((const BYTE*)(srcBuffer))[4]) << (sizeof(bitD->bitContainer)*8 - 32);
ZSTD_FALLTHROUGH;
- case 4: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[3]) << 24;
+ case 4: bitD->bitContainer += (BitContainerType)(((const BYTE*)(srcBuffer))[3]) << 24;
ZSTD_FALLTHROUGH;
- case 3: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[2]) << 16;
+ case 3: bitD->bitContainer += (BitContainerType)(((const BYTE*)(srcBuffer))[2]) << 16;
ZSTD_FALLTHROUGH;
- case 2: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[1]) << 8;
+ case 2: bitD->bitContainer += (BitContainerType)(((const BYTE*)(srcBuffer))[1]) << 8;
ZSTD_FALLTHROUGH;
default: break;
return srcSize;
}
-MEM_STATIC FORCE_INLINE_ATTR size_t BIT_getUpperBits(size_t bitContainer, U32 const start)
+FORCE_INLINE_TEMPLATE size_t BIT_getUpperBits(BitContainerType bitContainer, U32 const start)
{
return bitContainer >> start;
}
-MEM_STATIC FORCE_INLINE_ATTR size_t BIT_getMiddleBits(size_t bitContainer, U32 const start, U32 const nbBits)
+FORCE_INLINE_TEMPLATE size_t BIT_getMiddleBits(BitContainerType bitContainer, U32 const start, U32 const nbBits)
{
U32 const regMask = sizeof(bitContainer)*8 - 1;
/* if start > regMask, bitstream is corrupted, and result is undefined */
* On 32-bits, maxNbBits==24.
* On 64-bits, maxNbBits==56.
* @return : value extracted */
-MEM_STATIC FORCE_INLINE_ATTR size_t BIT_lookBits(const BIT_DStream_t* bitD, U32 nbBits)
+FORCE_INLINE_TEMPLATE size_t BIT_lookBits(const BIT_DStream_t* bitD, U32 nbBits)
{
/* arbitrate between double-shift and shift+mask */
#if 1
return (bitD->bitContainer << (bitD->bitsConsumed & regMask)) >> (((regMask+1)-nbBits) & regMask);
}
-MEM_STATIC FORCE_INLINE_ATTR void BIT_skipBits(BIT_DStream_t* bitD, U32 nbBits)
+FORCE_INLINE_TEMPLATE void BIT_skipBits(BIT_DStream_t* bitD, U32 nbBits)
{
bitD->bitsConsumed += nbBits;
}
* Read (consume) next n bits from local register and update.
* Pay attention to not read more than nbBits contained into local register.
* @return : extracted value. */
-MEM_STATIC FORCE_INLINE_ATTR size_t BIT_readBits(BIT_DStream_t* bitD, unsigned nbBits)
+FORCE_INLINE_TEMPLATE size_t BIT_readBits(BIT_DStream_t* bitD, unsigned nbBits)
{
size_t const value = BIT_lookBits(bitD, nbBits);
BIT_skipBits(bitD, nbBits);
return value;
}
+/*! BIT_reloadDStream_internal() :
+ * Simple variant of BIT_reloadDStream(), with two conditions:
+ * 1. bitstream is valid : bitsConsumed <= sizeof(bitD->bitContainer)*8
+ * 2. look window is valid after shifted down : bitD->ptr >= bitD->start
+ */
+MEM_STATIC BIT_DStream_status BIT_reloadDStream_internal(BIT_DStream_t* bitD)
+{
+ assert(bitD->bitsConsumed <= sizeof(bitD->bitContainer)*8);
+ bitD->ptr -= bitD->bitsConsumed >> 3;
+ assert(bitD->ptr >= bitD->start);
+ bitD->bitsConsumed &= 7;
+ bitD->bitContainer = MEM_readLEST(bitD->ptr);
+ return BIT_DStream_unfinished;
+}
+
/*! BIT_reloadDStreamFast() :
* Similar to BIT_reloadDStream(), but with two differences:
* 1. bitsConsumed <= sizeof(bitD->bitContainer)*8 must hold!
{
if (UNLIKELY(bitD->ptr < bitD->limitPtr))
return BIT_DStream_overflow;
- assert(bitD->bitsConsumed <= sizeof(bitD->bitContainer)*8);
- bitD->ptr -= bitD->bitsConsumed >> 3;
- bitD->bitsConsumed &= 7;
- bitD->bitContainer = MEM_readLEST(bitD->ptr);
- return BIT_DStream_unfinished;
+ return BIT_reloadDStream_internal(bitD);
}
/*! BIT_reloadDStream() :
* Refill `bitD` from buffer previously set in BIT_initDStream() .
- * This function is safe, it guarantees it will not read beyond src buffer.
+ * This function is safe, it guarantees it will not never beyond src buffer.
* @return : status of `BIT_DStream_t` internal register.
* when status == BIT_DStream_unfinished, internal register is filled with at least 25 or 57 bits */
-MEM_STATIC FORCE_INLINE_ATTR BIT_DStream_status BIT_reloadDStream(BIT_DStream_t* bitD)
+FORCE_INLINE_TEMPLATE BIT_DStream_status BIT_reloadDStream(BIT_DStream_t* bitD)
{
- if (bitD->bitsConsumed > (sizeof(bitD->bitContainer)*8)) /* overflow detected, like end of stream */
+ /* note : once in overflow mode, a bitstream remains in this mode until it's reset */
+ if (UNLIKELY(bitD->bitsConsumed > (sizeof(bitD->bitContainer)*8))) {
+ static const BitContainerType zeroFilled = 0;
+ bitD->ptr = (const char*)&zeroFilled; /* aliasing is allowed for char */
+ /* overflow detected, erroneous scenario or end of stream: no update */
return BIT_DStream_overflow;
+ }
+
+ assert(bitD->ptr >= bitD->start);
if (bitD->ptr >= bitD->limitPtr) {
- return BIT_reloadDStreamFast(bitD);
+ return BIT_reloadDStream_internal(bitD);
}
if (bitD->ptr == bitD->start) {
+ /* reached end of bitStream => no update */
if (bitD->bitsConsumed < sizeof(bitD->bitContainer)*8) return BIT_DStream_endOfBuffer;
return BIT_DStream_completed;
}
- /* start < ptr < limitPtr */
+ /* start < ptr < limitPtr => cautious update */
{ U32 nbBytes = bitD->bitsConsumed >> 3;
BIT_DStream_status result = BIT_DStream_unfinished;
if (bitD->ptr - nbBytes < bitD->start) {
#ifndef ZSTD_COMPILER_H
#define ZSTD_COMPILER_H
+#include <stddef.h>
+
#include "portability_macros.h"
/*-*******************************************************
# define WIN_CDECL
#endif
+/* UNUSED_ATTR tells the compiler it is okay if the function is unused. */
+#if defined(__GNUC__)
+# define UNUSED_ATTR __attribute__((unused))
+#else
+# define UNUSED_ATTR
+#endif
+
/**
* FORCE_INLINE_TEMPLATE is used to define C "templates", which take constant
* parameters. They must be inlined for the compiler to eliminate the constant
* branches.
*/
-#define FORCE_INLINE_TEMPLATE static INLINE_KEYWORD FORCE_INLINE_ATTR
+#define FORCE_INLINE_TEMPLATE static INLINE_KEYWORD FORCE_INLINE_ATTR UNUSED_ATTR
/**
* HINT_INLINE is used to help the compiler generate better code. It is *not*
* used for "templates", so it can be tweaked based on the compilers
#if !defined(__clang__) && defined(__GNUC__) && __GNUC__ >= 4 && __GNUC_MINOR__ >= 8 && __GNUC__ < 5
# define HINT_INLINE static INLINE_KEYWORD
#else
-# define HINT_INLINE static INLINE_KEYWORD FORCE_INLINE_ATTR
+# define HINT_INLINE FORCE_INLINE_TEMPLATE
#endif
-/* UNUSED_ATTR tells the compiler it is okay if the function is unused. */
+/* "soft" inline :
+ * The compiler is free to select if it's a good idea to inline or not.
+ * The main objective is to silence compiler warnings
+ * when a defined function in included but not used.
+ *
+ * Note : this macro is prefixed `MEM_` because it used to be provided by `mem.h` unit.
+ * Updating the prefix is probably preferable, but requires a fairly large codemod,
+ * since this name is used everywhere.
+ */
+#ifndef MEM_STATIC /* already defined in Linux Kernel mem.h */
#if defined(__GNUC__)
-# define UNUSED_ATTR __attribute__((unused))
+# define MEM_STATIC static __inline UNUSED_ATTR
+#elif defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */)
+# define MEM_STATIC static inline
+#elif defined(_MSC_VER)
+# define MEM_STATIC static __inline
#else
-# define UNUSED_ATTR
+# define MEM_STATIC static /* this version may generate warnings for unused static functions; disable the relevant warning */
+#endif
#endif
/* force no inlining */
/* prefetch
* can be disabled, by declaring NO_PREFETCH build macro */
#if defined(NO_PREFETCH)
-# define PREFETCH_L1(ptr) (void)(ptr) /* disabled */
-# define PREFETCH_L2(ptr) (void)(ptr) /* disabled */
+# define PREFETCH_L1(ptr) do { (void)(ptr); } while (0) /* disabled */
+# define PREFETCH_L2(ptr) do { (void)(ptr); } while (0) /* disabled */
#else
-# if defined(_MSC_VER) && (defined(_M_X64) || defined(_M_I86)) /* _mm_prefetch() is not defined outside of x86/x64 */
+# if defined(_MSC_VER) && (defined(_M_X64) || defined(_M_I86)) && !defined(_M_ARM64EC) /* _mm_prefetch() is not defined outside of x86/x64 */
# include <mmintrin.h> /* https://msdn.microsoft.com/fr-fr/library/84szxsww(v=vs.90).aspx */
# define PREFETCH_L1(ptr) _mm_prefetch((const char*)(ptr), _MM_HINT_T0)
# define PREFETCH_L2(ptr) _mm_prefetch((const char*)(ptr), _MM_HINT_T1)
# define PREFETCH_L1(ptr) __builtin_prefetch((ptr), 0 /* rw==read */, 3 /* locality */)
# define PREFETCH_L2(ptr) __builtin_prefetch((ptr), 0 /* rw==read */, 2 /* locality */)
# elif defined(__aarch64__)
-# define PREFETCH_L1(ptr) __asm__ __volatile__("prfm pldl1keep, %0" ::"Q"(*(ptr)))
-# define PREFETCH_L2(ptr) __asm__ __volatile__("prfm pldl2keep, %0" ::"Q"(*(ptr)))
+# define PREFETCH_L1(ptr) do { __asm__ __volatile__("prfm pldl1keep, %0" ::"Q"(*(ptr))); } while (0)
+# define PREFETCH_L2(ptr) do { __asm__ __volatile__("prfm pldl2keep, %0" ::"Q"(*(ptr))); } while (0)
# else
-# define PREFETCH_L1(ptr) (void)(ptr) /* disabled */
-# define PREFETCH_L2(ptr) (void)(ptr) /* disabled */
+# define PREFETCH_L1(ptr) do { (void)(ptr); } while (0) /* disabled */
+# define PREFETCH_L2(ptr) do { (void)(ptr); } while (0) /* disabled */
# endif
#endif /* NO_PREFETCH */
#define CACHELINE_SIZE 64
-#define PREFETCH_AREA(p, s) { \
- const char* const _ptr = (const char*)(p); \
- size_t const _size = (size_t)(s); \
- size_t _pos; \
- for (_pos=0; _pos<_size; _pos+=CACHELINE_SIZE) { \
- PREFETCH_L2(_ptr + _pos); \
- } \
-}
+#define PREFETCH_AREA(p, s) \
+ do { \
+ const char* const _ptr = (const char*)(p); \
+ size_t const _size = (size_t)(s); \
+ size_t _pos; \
+ for (_pos=0; _pos<_size; _pos+=CACHELINE_SIZE) { \
+ PREFETCH_L2(_ptr + _pos); \
+ } \
+ } while (0)
/* vectorization
* older GCC (pre gcc-4.3 picked as the cutoff) uses a different syntax,
#endif
#if __has_builtin(__builtin_unreachable) || (defined(__GNUC__) && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 5)))
-# define ZSTD_UNREACHABLE { assert(0), __builtin_unreachable(); }
+# define ZSTD_UNREACHABLE do { assert(0), __builtin_unreachable(); } while (0)
#else
-# define ZSTD_UNREACHABLE { assert(0); }
+# define ZSTD_UNREACHABLE do { assert(0); } while (0)
#endif
/* disable warnings */
* Sanitizer
*****************************************************************/
+/**
+ * Zstd relies on pointer overflow in its decompressor.
+ * We add this attribute to functions that rely on pointer overflow.
+ */
+#ifndef ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
+# if __has_attribute(no_sanitize)
+# if !defined(__clang__) && defined(__GNUC__) && __GNUC__ < 8
+ /* gcc < 8 only has signed-integer-overlow which triggers on pointer overflow */
+# define ZSTD_ALLOW_POINTER_OVERFLOW_ATTR __attribute__((no_sanitize("signed-integer-overflow")))
+# else
+ /* older versions of clang [3.7, 5.0) will warn that pointer-overflow is ignored. */
+# define ZSTD_ALLOW_POINTER_OVERFLOW_ATTR __attribute__((no_sanitize("pointer-overflow")))
+# endif
+# else
+# define ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
+# endif
+#endif
+
+/**
+ * Helper function to perform a wrapped pointer difference without trigging
+ * UBSAN.
+ *
+ * @returns lhs - rhs with wrapping
+ */
+MEM_STATIC
+ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
+ptrdiff_t ZSTD_wrappedPtrDiff(unsigned char const* lhs, unsigned char const* rhs)
+{
+ return lhs - rhs;
+}
+
+/**
+ * Helper function to perform a wrapped pointer add without triggering UBSAN.
+ *
+ * @return ptr + add with wrapping
+ */
+MEM_STATIC
+ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
+unsigned char const* ZSTD_wrappedPtrAdd(unsigned char const* ptr, ptrdiff_t add)
+{
+ return ptr + add;
+}
+
+/**
+ * Helper function to perform a wrapped pointer subtraction without triggering
+ * UBSAN.
+ *
+ * @return ptr - sub with wrapping
+ */
+MEM_STATIC
+ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
+unsigned char const* ZSTD_wrappedPtrSub(unsigned char const* ptr, ptrdiff_t sub)
+{
+ return ptr - sub;
+}
+
+/**
+ * Helper function to add to a pointer that works around C's undefined behavior
+ * of adding 0 to NULL.
+ *
+ * @returns `ptr + add` except it defines `NULL + 0 == NULL`.
+ */
+MEM_STATIC
+unsigned char* ZSTD_maybeNullPtrAdd(unsigned char* ptr, ptrdiff_t add)
+{
+ return add > 0 ? ptr + add : ptr;
+}
+
/* Issue #3240 reports an ASAN failure on an llvm-mingw build. Out of an
* abundance of caution, disable our custom poisoning on mingw. */
#ifdef __MINGW32__
U32 f7b = 0;
U32 f7c = 0;
#if defined(_MSC_VER) && (defined(_M_X64) || defined(_M_IX86))
+#if !defined(__clang__)
int reg[4];
__cpuid((int*)reg, 0);
{
f7c = (U32)reg[2];
}
}
+#else
+ /* Clang compiler has a bug (fixed in https://reviews.llvm.org/D101338) in
+ * which the `__cpuid` intrinsic does not save and restore `rbx` as it needs
+ * to due to being a reserved register. So in that case, do the `cpuid`
+ * ourselves. Clang supports inline assembly anyway.
+ */
+ U32 n;
+ __asm__(
+ "pushq %%rbx\n\t"
+ "cpuid\n\t"
+ "popq %%rbx\n\t"
+ : "=a"(n)
+ : "a"(0)
+ : "rcx", "rdx");
+ if (n >= 1) {
+ U32 f1a;
+ __asm__(
+ "pushq %%rbx\n\t"
+ "cpuid\n\t"
+ "popq %%rbx\n\t"
+ : "=a"(f1a), "=c"(f1c), "=d"(f1d)
+ : "a"(1)
+ :);
+ }
+ if (n >= 7) {
+ __asm__(
+ "pushq %%rbx\n\t"
+ "cpuid\n\t"
+ "movq %%rbx, %%rax\n\t"
+ "popq %%rbx"
+ : "=a"(f7b), "=c"(f7c)
+ : "a"(7), "c"(0)
+ : "rdx");
+ }
+#endif
#elif defined(__i386__) && defined(__PIC__) && !defined(__clang__) && defined(__GNUC__)
/* The following block like the normal cpuid branch below, but gcc
* reserves ebx for use of its pic register so we must specially
#include "debug.h"
+#if !defined(ZSTD_LINUX_KERNEL) || (DEBUGLEVEL>=2)
+/* We only use this when DEBUGLEVEL>=2, but we get -Werror=pedantic errors if a
+ * translation unit is empty. So remove this from Linux kernel builds, but
+ * otherwise just leave it in.
+ */
int g_debuglevel = DEBUGLEVEL;
+#endif
It's useful when enabling very verbose levels
on selective conditions (such as position in src) */
-# define RAWLOG(l, ...) { \
- if (l<=g_debuglevel) { \
- ZSTD_DEBUG_PRINT(__VA_ARGS__); \
- } }
-# define DEBUGLOG(l, ...) { \
- if (l<=g_debuglevel) { \
- ZSTD_DEBUG_PRINT(__FILE__ ": " __VA_ARGS__); \
- ZSTD_DEBUG_PRINT(" \n"); \
- } }
+# define RAWLOG(l, ...) \
+ do { \
+ if (l<=g_debuglevel) { \
+ ZSTD_DEBUG_PRINT(__VA_ARGS__); \
+ } \
+ } while (0)
+
+#define STRINGIFY(x) #x
+#define TOSTRING(x) STRINGIFY(x)
+#define LINE_AS_STRING TOSTRING(__LINE__)
+
+# define DEBUGLOG(l, ...) \
+ do { \
+ if (l<=g_debuglevel) { \
+ ZSTD_DEBUG_PRINT(__FILE__ ":" LINE_AS_STRING ": " __VA_ARGS__); \
+ ZSTD_DEBUG_PRINT(" \n"); \
+ } \
+ } while (0)
#else
-# define RAWLOG(l, ...) {} /* disabled */
-# define DEBUGLOG(l, ...) {} /* disabled */
+# define RAWLOG(l, ...) do { } while (0) /* disabled */
+# define DEBUGLOG(l, ...) do { } while (0) /* disabled */
#endif
ERR_STATIC ERR_enum ERR_getErrorCode(size_t code) { if (!ERR_isError(code)) return (ERR_enum)0; return (ERR_enum) (0-code); }
/* check and forward error code */
-#define CHECK_V_F(e, f) size_t const e = f; if (ERR_isError(e)) return e
-#define CHECK_F(f) { CHECK_V_F(_var_err__, f); }
+#define CHECK_V_F(e, f) \
+ size_t const e = f; \
+ do { \
+ if (ERR_isError(e)) \
+ return e; \
+ } while (0)
+#define CHECK_F(f) do { CHECK_V_F(_var_err__, f); } while (0)
/*-****************************************
* We want to force this function invocation to be syntactically correct, but
* we don't want to force runtime evaluation of its arguments.
*/
-#define _FORCE_HAS_FORMAT_STRING(...) \
- if (0) { \
- _force_has_format_string(__VA_ARGS__); \
- }
+#define _FORCE_HAS_FORMAT_STRING(...) \
+ do { \
+ if (0) { \
+ _force_has_format_string(__VA_ARGS__); \
+ } \
+ } while (0)
#define ERR_QUOTE(str) #str
* In order to do that (particularly, printing the conditional that failed),
* this can't just wrap RETURN_ERROR().
*/
-#define RETURN_ERROR_IF(cond, err, ...) \
- if (cond) { \
- RAWLOG(3, "%s:%d: ERROR!: check %s failed, returning %s", \
- __FILE__, __LINE__, ERR_QUOTE(cond), ERR_QUOTE(ERROR(err))); \
- _FORCE_HAS_FORMAT_STRING(__VA_ARGS__); \
- RAWLOG(3, ": " __VA_ARGS__); \
- RAWLOG(3, "\n"); \
- return ERROR(err); \
- }
+#define RETURN_ERROR_IF(cond, err, ...) \
+ do { \
+ if (cond) { \
+ RAWLOG(3, "%s:%d: ERROR!: check %s failed, returning %s", \
+ __FILE__, __LINE__, ERR_QUOTE(cond), ERR_QUOTE(ERROR(err))); \
+ _FORCE_HAS_FORMAT_STRING(__VA_ARGS__); \
+ RAWLOG(3, ": " __VA_ARGS__); \
+ RAWLOG(3, "\n"); \
+ return ERROR(err); \
+ } \
+ } while (0)
/**
* Unconditionally return the specified error.
*
* In debug modes, prints additional information.
*/
-#define RETURN_ERROR(err, ...) \
- do { \
- RAWLOG(3, "%s:%d: ERROR!: unconditional check failed, returning %s", \
- __FILE__, __LINE__, ERR_QUOTE(ERROR(err))); \
- _FORCE_HAS_FORMAT_STRING(__VA_ARGS__); \
- RAWLOG(3, ": " __VA_ARGS__); \
- RAWLOG(3, "\n"); \
- return ERROR(err); \
- } while(0);
+#define RETURN_ERROR(err, ...) \
+ do { \
+ RAWLOG(3, "%s:%d: ERROR!: unconditional check failed, returning %s", \
+ __FILE__, __LINE__, ERR_QUOTE(ERROR(err))); \
+ _FORCE_HAS_FORMAT_STRING(__VA_ARGS__); \
+ RAWLOG(3, ": " __VA_ARGS__); \
+ RAWLOG(3, "\n"); \
+ return ERROR(err); \
+ } while(0)
/**
* If the provided expression evaluates to an error code, returns that error code.
*
* In debug modes, prints additional information.
*/
-#define FORWARD_IF_ERROR(err, ...) \
- do { \
- size_t const err_code = (err); \
- if (ERR_isError(err_code)) { \
- RAWLOG(3, "%s:%d: ERROR!: forwarding error in %s: %s", \
- __FILE__, __LINE__, ERR_QUOTE(err), ERR_getErrorName(err_code)); \
- _FORCE_HAS_FORMAT_STRING(__VA_ARGS__); \
- RAWLOG(3, ": " __VA_ARGS__); \
- RAWLOG(3, "\n"); \
- return err_code; \
- } \
- } while(0);
+#define FORWARD_IF_ERROR(err, ...) \
+ do { \
+ size_t const err_code = (err); \
+ if (ERR_isError(err_code)) { \
+ RAWLOG(3, "%s:%d: ERROR!: forwarding error in %s: %s", \
+ __FILE__, __LINE__, ERR_QUOTE(err), ERR_getErrorName(err_code)); \
+ _FORCE_HAS_FORMAT_STRING(__VA_ARGS__); \
+ RAWLOG(3, ": " __VA_ARGS__); \
+ RAWLOG(3, "\n"); \
+ return err_code; \
+ } \
+ } while(0)
#if defined (__cplusplus)
}
#endif /* FSE_H */
+
#if defined(FSE_STATIC_LINKING_ONLY) && !defined(FSE_H_FSE_STATIC_LINKING_ONLY)
#define FSE_H_FSE_STATIC_LINKING_ONLY
FSE_symbolCompressionTransform const symbolTT = ((const FSE_symbolCompressionTransform*)(statePtr->symbolTT))[symbol];
const U16* const stateTable = (const U16*)(statePtr->stateTable);
U32 const nbBitsOut = (U32)((statePtr->value + symbolTT.deltaNbBits) >> 16);
- BIT_addBits(bitC, statePtr->value, nbBitsOut);
+ BIT_addBits(bitC, (size_t)statePtr->value, nbBitsOut);
statePtr->value = stateTable[ (statePtr->value >> nbBitsOut) + symbolTT.deltaFindState];
}
MEM_STATIC void FSE_flushCState(BIT_CStream_t* bitC, const FSE_CState_t* statePtr)
{
- BIT_addBits(bitC, statePtr->value, statePtr->stateLog);
+ BIT_addBits(bitC, (size_t)statePtr->value, statePtr->stateLog);
BIT_flushBits(bitC);
}
#define FSE_STATIC_LINKING_ONLY
#include "fse.h"
#include "error_private.h"
-#define ZSTD_DEPS_NEED_MALLOC
-#include "zstd_deps.h"
+#include "zstd_deps.h" /* ZSTD_memcpy */
#include "bits.h" /* ZSTD_highbit32 */
symbolNext[s] = 1;
} else {
if (normalizedCounter[s] >= largeLimit) DTableH.fastMode=0;
- symbolNext[s] = normalizedCounter[s];
+ symbolNext[s] = (U16)normalizedCounter[s];
} } }
ZSTD_memcpy(dt, &DTableH, sizeof(DTableH));
}
* all symbols have counts <= 8. We ensure we have 8 bytes at the end of
* our buffer to handle the over-write.
*/
- {
- U64 const add = 0x0101010101010101ull;
+ { U64 const add = 0x0101010101010101ull;
size_t pos = 0;
U64 sv = 0;
U32 s;
for (i = 8; i < n; i += 8) {
MEM_write64(spread + pos + i, sv);
}
- pos += n;
- }
- }
+ pos += (size_t)n;
+ } }
/* Now we spread those positions across the table.
* The benefit of doing it in two stages is that we avoid the
* variable size inner loop, which caused lots of branch misses.
break;
} }
- return op-ostart;
+ assert(op >= ostart);
+ return (size_t)(op-ostart);
}
typedef struct {
short ncount[FSE_MAX_SYMBOL_VALUE + 1];
- FSE_DTable dtable[1]; /* Dynamically sized */
} FSE_DecompressWksp;
unsigned tableLog;
unsigned maxSymbolValue = FSE_MAX_SYMBOL_VALUE;
FSE_DecompressWksp* const wksp = (FSE_DecompressWksp*)workSpace;
+ size_t const dtablePos = sizeof(FSE_DecompressWksp) / sizeof(FSE_DTable);
+ FSE_DTable* const dtable = (FSE_DTable*)workSpace + dtablePos;
- DEBUG_STATIC_ASSERT((FSE_MAX_SYMBOL_VALUE + 1) % 2 == 0);
+ FSE_STATIC_ASSERT((FSE_MAX_SYMBOL_VALUE + 1) % 2 == 0);
if (wkspSize < sizeof(*wksp)) return ERROR(GENERIC);
+ /* correct offset to dtable depends on this property */
+ FSE_STATIC_ASSERT(sizeof(FSE_DecompressWksp) % sizeof(FSE_DTable) == 0);
+
/* normal FSE decoding mode */
- {
- size_t const NCountLength = FSE_readNCount_bmi2(wksp->ncount, &maxSymbolValue, &tableLog, istart, cSrcSize, bmi2);
+ { size_t const NCountLength =
+ FSE_readNCount_bmi2(wksp->ncount, &maxSymbolValue, &tableLog, istart, cSrcSize, bmi2);
if (FSE_isError(NCountLength)) return NCountLength;
if (tableLog > maxLog) return ERROR(tableLog_tooLarge);
assert(NCountLength <= cSrcSize);
workSpace = (BYTE*)workSpace + sizeof(*wksp) + FSE_DTABLE_SIZE(tableLog);
wkspSize -= sizeof(*wksp) + FSE_DTABLE_SIZE(tableLog);
- CHECK_F( FSE_buildDTable_internal(wksp->dtable, wksp->ncount, maxSymbolValue, tableLog, workSpace, wkspSize) );
+ CHECK_F( FSE_buildDTable_internal(dtable, wksp->ncount, maxSymbolValue, tableLog, workSpace, wkspSize) );
{
- const void* ptr = wksp->dtable;
+ const void* ptr = dtable;
const FSE_DTableHeader* DTableH = (const FSE_DTableHeader*)ptr;
const U32 fastMode = DTableH->fastMode;
/* select fast mode (static) */
- if (fastMode) return FSE_decompress_usingDTable_generic(dst, dstCapacity, ip, cSrcSize, wksp->dtable, 1);
- return FSE_decompress_usingDTable_generic(dst, dstCapacity, ip, cSrcSize, wksp->dtable, 0);
+ if (fastMode) return FSE_decompress_usingDTable_generic(dst, dstCapacity, ip, cSrcSize, dtable, 1);
+ return FSE_decompress_usingDTable_generic(dst, dstCapacity, ip, cSrcSize, dtable, 0);
}
}
/** HUF_getNbBitsFromCTable() :
* Read nbBits from CTable symbolTable, for symbol `symbolValue` presumed <= HUF_SYMBOLVALUE_MAX
- * Note 1 : is not inlined, as HUF_CElt definition is private */
+ * Note 1 : If symbolValue > HUF_readCTableHeader(symbolTable).maxSymbolValue, returns 0
+ * Note 2 : is not inlined, as HUF_CElt definition is private
+ */
U32 HUF_getNbBitsFromCTable(const HUF_CElt* symbolTable, U32 symbolValue);
+typedef struct {
+ BYTE tableLog;
+ BYTE maxSymbolValue;
+ BYTE unused[sizeof(size_t) - 2];
+} HUF_CTableHeader;
+
+/** HUF_readCTableHeader() :
+ * @returns The header from the CTable specifying the tableLog and the maxSymbolValue.
+ */
+HUF_CTableHeader HUF_readCTableHeader(HUF_CElt const* ctable);
+
/*
* HUF_decompress() does the following:
* 1. select the decompression algorithm (X1, X2) based on pre-computed heuristics
# include <stdlib.h> /* _byteswap_ulong */
# include <intrin.h> /* _byteswap_* */
#endif
-#if defined(__GNUC__)
-# define MEM_STATIC static __inline __attribute__((unused))
-#elif defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */)
-# define MEM_STATIC static inline
-#elif defined(_MSC_VER)
-# define MEM_STATIC static __inline
-#else
-# define MEM_STATIC static /* this version may generate warnings for unused static functions; disable the relevant warning */
-#endif
/*-**************************************************************
* Basic Types
{ ZSTD_pthread_t* const threadPool = (ZSTD_pthread_t*)ZSTD_customCalloc(numThreads * sizeof(ZSTD_pthread_t), ctx->customMem);
if (!threadPool) return 1;
/* replace existing thread pool */
- ZSTD_memcpy(threadPool, ctx->threads, ctx->threadCapacity * sizeof(*threadPool));
+ ZSTD_memcpy(threadPool, ctx->threads, ctx->threadCapacity * sizeof(ZSTD_pthread_t));
ZSTD_customFree(ctx->threads, ctx->customMem);
ctx->threads = threadPool;
/* Initialize additional threads */
/*! POOL_resize() :
* Expands or shrinks pool's number of threads.
* This is more efficient than releasing + creating a new context,
- * since it tries to preserve and re-use existing threads.
+ * since it tries to preserve and reuse existing threads.
* `numThreads` must be at least 1.
* @return : 0 when resize was successful,
* !0 (typically 1) if there is an error.
/* Mark the internal assembly functions as hidden */
#ifdef __ELF__
# define ZSTD_HIDE_ASM_FUNCTION(func) .hidden func
+#elif defined(__APPLE__)
+# define ZSTD_HIDE_ASM_FUNCTION(func) .private_extern func
#else
# define ZSTD_HIDE_ASM_FUNCTION(func)
#endif
ZSTD_thread_params_t thread_param;
(void)unused;
+ if (thread==NULL) return -1;
+ *thread = NULL;
+
thread_param.start_routine = start_routine;
thread_param.arg = arg;
thread_param.initialized = 0;
- *thread = NULL;
/* Setup thread initialization synchronization */
if(ZSTD_pthread_cond_init(&thread_param.initialized_cond, NULL)) {
/* Spawn thread */
*thread = (HANDLE)_beginthreadex(NULL, 0, worker, &thread_param, 0, NULL);
- if (!thread) {
+ if (*thread==NULL) {
ZSTD_pthread_mutex_destroy(&thread_param.initialized_mutex);
ZSTD_pthread_cond_destroy(&thread_param.initialized_cond);
return errno;
int ZSTD_pthread_mutex_init(ZSTD_pthread_mutex_t* mutex, pthread_mutexattr_t const* attr)
{
+ assert(mutex != NULL);
*mutex = (pthread_mutex_t*)ZSTD_malloc(sizeof(pthread_mutex_t));
if (!*mutex)
return 1;
int ZSTD_pthread_mutex_destroy(ZSTD_pthread_mutex_t* mutex)
{
+ assert(mutex != NULL);
if (!*mutex)
return 0;
{
int ZSTD_pthread_cond_init(ZSTD_pthread_cond_t* cond, pthread_condattr_t const* attr)
{
+ assert(cond != NULL);
*cond = (pthread_cond_t*)ZSTD_malloc(sizeof(pthread_cond_t));
if (!*cond)
return 1;
int ZSTD_pthread_cond_destroy(ZSTD_pthread_cond_t* cond)
{
+ assert(cond != NULL);
if (!*cond)
return 0;
{
/*
- * xxHash - Fast Hash algorithm
- * Copyright (c) Meta Platforms, Inc. and affiliates.
- *
- * You can contact the author at :
- * - xxHash homepage: https://cyan4973.github.io/xxHash/
- * - xxHash source repository : https://github.com/Cyan4973/xxHash
+ * xxHash - Extremely Fast Hash algorithm
+ * Copyright (c) Yann Collet - Meta Platforms, Inc
*
* This source code is licensed under both the BSD-style license (found in the
* LICENSE file in the root directory of this source tree) and the GPLv2 (found
* in the COPYING file in the root directory of this source tree).
* You may select, at your option, one of the above-listed licenses.
-*/
-
-
+ */
/*
* xxhash.c instantiates functions defined in xxhash.h
*/
-#define XXH_STATIC_LINKING_ONLY /* access advanced declarations */
-#define XXH_IMPLEMENTATION /* access definitions */
+#define XXH_STATIC_LINKING_ONLY /* access advanced declarations */
+#define XXH_IMPLEMENTATION /* access definitions */
#include "xxhash.h"
/*
- * xxHash - Fast Hash algorithm
- * Copyright (c) Meta Platforms, Inc. and affiliates.
- *
- * You can contact the author at :
- * - xxHash homepage: https://cyan4973.github.io/xxHash/
- * - xxHash source repository : https://github.com/Cyan4973/xxHash
+ * xxHash - Extremely Fast Hash algorithm
+ * Header File
+ * Copyright (c) Yann Collet - Meta Platforms, Inc
*
* This source code is licensed under both the BSD-style license (found in the
* LICENSE file in the root directory of this source tree) and the GPLv2 (found
* in the COPYING file in the root directory of this source tree).
* You may select, at your option, one of the above-listed licenses.
-*/
+ */
+/* Local adaptations for Zstandard */
#ifndef XXH_NO_XXH3
# define XXH_NO_XXH3
/*!
* @mainpage xxHash
*
+ * xxHash is an extremely fast non-cryptographic hash algorithm, working at RAM speed
+ * limits.
+ *
+ * It is proposed in four flavors, in three families:
+ * 1. @ref XXH32_family
+ * - Classic 32-bit hash function. Simple, compact, and runs on almost all
+ * 32-bit and 64-bit systems.
+ * 2. @ref XXH64_family
+ * - Classic 64-bit adaptation of XXH32. Just as simple, and runs well on most
+ * 64-bit systems (but _not_ 32-bit systems).
+ * 3. @ref XXH3_family
+ * - Modern 64-bit and 128-bit hash function family which features improved
+ * strength and performance across the board, especially on smaller data.
+ * It benefits greatly from SIMD and 64-bit without requiring it.
+ *
+ * Benchmarks
+ * ---
+ * The reference system uses an Intel i7-9700K CPU, and runs Ubuntu x64 20.04.
+ * The open source benchmark program is compiled with clang v10.0 using -O3 flag.
+ *
+ * | Hash Name | ISA ext | Width | Large Data Speed | Small Data Velocity |
+ * | -------------------- | ------- | ----: | ---------------: | ------------------: |
+ * | XXH3_64bits() | @b AVX2 | 64 | 59.4 GB/s | 133.1 |
+ * | MeowHash | AES-NI | 128 | 58.2 GB/s | 52.5 |
+ * | XXH3_128bits() | @b AVX2 | 128 | 57.9 GB/s | 118.1 |
+ * | CLHash | PCLMUL | 64 | 37.1 GB/s | 58.1 |
+ * | XXH3_64bits() | @b SSE2 | 64 | 31.5 GB/s | 133.1 |
+ * | XXH3_128bits() | @b SSE2 | 128 | 29.6 GB/s | 118.1 |
+ * | RAM sequential read | | N/A | 28.0 GB/s | N/A |
+ * | ahash | AES-NI | 64 | 22.5 GB/s | 107.2 |
+ * | City64 | | 64 | 22.0 GB/s | 76.6 |
+ * | T1ha2 | | 64 | 22.0 GB/s | 99.0 |
+ * | City128 | | 128 | 21.7 GB/s | 57.7 |
+ * | FarmHash | AES-NI | 64 | 21.3 GB/s | 71.9 |
+ * | XXH64() | | 64 | 19.4 GB/s | 71.0 |
+ * | SpookyHash | | 64 | 19.3 GB/s | 53.2 |
+ * | Mum | | 64 | 18.0 GB/s | 67.0 |
+ * | CRC32C | SSE4.2 | 32 | 13.0 GB/s | 57.9 |
+ * | XXH32() | | 32 | 9.7 GB/s | 71.9 |
+ * | City32 | | 32 | 9.1 GB/s | 66.0 |
+ * | Blake3* | @b AVX2 | 256 | 4.4 GB/s | 8.1 |
+ * | Murmur3 | | 32 | 3.9 GB/s | 56.1 |
+ * | SipHash* | | 64 | 3.0 GB/s | 43.2 |
+ * | Blake3* | @b SSE2 | 256 | 2.4 GB/s | 8.1 |
+ * | HighwayHash | | 64 | 1.4 GB/s | 6.0 |
+ * | FNV64 | | 64 | 1.2 GB/s | 62.7 |
+ * | Blake2* | | 256 | 1.1 GB/s | 5.1 |
+ * | SHA1* | | 160 | 0.8 GB/s | 5.6 |
+ * | MD5* | | 128 | 0.6 GB/s | 7.8 |
+ * @note
+ * - Hashes which require a specific ISA extension are noted. SSE2 is also noted,
+ * even though it is mandatory on x64.
+ * - Hashes with an asterisk are cryptographic. Note that MD5 is non-cryptographic
+ * by modern standards.
+ * - Small data velocity is a rough average of algorithm's efficiency for small
+ * data. For more accurate information, see the wiki.
+ * - More benchmarks and strength tests are found on the wiki:
+ * https://github.com/Cyan4973/xxHash/wiki
+ *
+ * Usage
+ * ------
+ * All xxHash variants use a similar API. Changing the algorithm is a trivial
+ * substitution.
+ *
+ * @pre
+ * For functions which take an input and length parameter, the following
+ * requirements are assumed:
+ * - The range from [`input`, `input + length`) is valid, readable memory.
+ * - The only exception is if the `length` is `0`, `input` may be `NULL`.
+ * - For C++, the objects must have the *TriviallyCopyable* property, as the
+ * functions access bytes directly as if it was an array of `unsigned char`.
+ *
+ * @anchor single_shot_example
+ * **Single Shot**
+ *
+ * These functions are stateless functions which hash a contiguous block of memory,
+ * immediately returning the result. They are the easiest and usually the fastest
+ * option.
+ *
+ * XXH32(), XXH64(), XXH3_64bits(), XXH3_128bits()
+ *
+ * @code{.c}
+ * #include <string.h>
+ * #include "xxhash.h"
+ *
+ * // Example for a function which hashes a null terminated string with XXH32().
+ * XXH32_hash_t hash_string(const char* string, XXH32_hash_t seed)
+ * {
+ * // NULL pointers are only valid if the length is zero
+ * size_t length = (string == NULL) ? 0 : strlen(string);
+ * return XXH32(string, length, seed);
+ * }
+ * @endcode
+ *
+ *
+ * @anchor streaming_example
+ * **Streaming**
+ *
+ * These groups of functions allow incremental hashing of unknown size, even
+ * more than what would fit in a size_t.
+ *
+ * XXH32_reset(), XXH64_reset(), XXH3_64bits_reset(), XXH3_128bits_reset()
+ *
+ * @code{.c}
+ * #include <stdio.h>
+ * #include <assert.h>
+ * #include "xxhash.h"
+ * // Example for a function which hashes a FILE incrementally with XXH3_64bits().
+ * XXH64_hash_t hashFile(FILE* f)
+ * {
+ * // Allocate a state struct. Do not just use malloc() or new.
+ * XXH3_state_t* state = XXH3_createState();
+ * assert(state != NULL && "Out of memory!");
+ * // Reset the state to start a new hashing session.
+ * XXH3_64bits_reset(state);
+ * char buffer[4096];
+ * size_t count;
+ * // Read the file in chunks
+ * while ((count = fread(buffer, 1, sizeof(buffer), f)) != 0) {
+ * // Run update() as many times as necessary to process the data
+ * XXH3_64bits_update(state, buffer, count);
+ * }
+ * // Retrieve the finalized hash. This will not change the state.
+ * XXH64_hash_t result = XXH3_64bits_digest(state);
+ * // Free the state. Do not use free().
+ * XXH3_freeState(state);
+ * return result;
+ * }
+ * @endcode
+ *
+ * Streaming functions generate the xxHash value from an incremental input.
+ * This method is slower than single-call functions, due to state management.
+ * For small inputs, prefer `XXH32()` and `XXH64()`, which are better optimized.
+ *
+ * An XXH state must first be allocated using `XXH*_createState()`.
+ *
+ * Start a new hash by initializing the state with a seed using `XXH*_reset()`.
+ *
+ * Then, feed the hash state by calling `XXH*_update()` as many times as necessary.
+ *
+ * The function returns an error code, with 0 meaning OK, and any other value
+ * meaning there is an error.
+ *
+ * Finally, a hash value can be produced anytime, by using `XXH*_digest()`.
+ * This function returns the nn-bits hash as an int or long long.
+ *
+ * It's still possible to continue inserting input into the hash state after a
+ * digest, and generate new hash values later on by invoking `XXH*_digest()`.
+ *
+ * When done, release the state using `XXH*_freeState()`.
+ *
+ *
+ * @anchor canonical_representation_example
+ * **Canonical Representation**
+ *
+ * The default return values from XXH functions are unsigned 32, 64 and 128 bit
+ * integers.
+ * This the simplest and fastest format for further post-processing.
+ *
+ * However, this leaves open the question of what is the order on the byte level,
+ * since little and big endian conventions will store the same number differently.
+ *
+ * The canonical representation settles this issue by mandating big-endian
+ * convention, the same convention as human-readable numbers (large digits first).
+ *
+ * When writing hash values to storage, sending them over a network, or printing
+ * them, it's highly recommended to use the canonical representation to ensure
+ * portability across a wider range of systems, present and future.
+ *
+ * The following functions allow transformation of hash values to and from
+ * canonical format.
+ *
+ * XXH32_canonicalFromHash(), XXH32_hashFromCanonical(),
+ * XXH64_canonicalFromHash(), XXH64_hashFromCanonical(),
+ * XXH128_canonicalFromHash(), XXH128_hashFromCanonical(),
+ *
+ * @code{.c}
+ * #include <stdio.h>
+ * #include "xxhash.h"
+ *
+ * // Example for a function which prints XXH32_hash_t in human readable format
+ * void printXxh32(XXH32_hash_t hash)
+ * {
+ * XXH32_canonical_t cano;
+ * XXH32_canonicalFromHash(&cano, hash);
+ * size_t i;
+ * for(i = 0; i < sizeof(cano.digest); ++i) {
+ * printf("%02x", cano.digest[i]);
+ * }
+ * printf("\n");
+ * }
+ *
+ * // Example for a function which converts XXH32_canonical_t to XXH32_hash_t
+ * XXH32_hash_t convertCanonicalToXxh32(XXH32_canonical_t cano)
+ * {
+ * XXH32_hash_t hash = XXH32_hashFromCanonical(&cano);
+ * return hash;
+ * }
+ * @endcode
+ *
+ *
* @file xxhash.h
* xxHash prototypes and implementation
*/
-/* TODO: update */
-/* Notice extracted from xxHash homepage:
-
-xxHash is an extremely fast hash algorithm, running at RAM speed limits.
-It also successfully passes all tests from the SMHasher suite.
-
-Comparison (single thread, Windows Seven 32 bits, using SMHasher on a Core 2 Duo @3GHz)
-
-Name Speed Q.Score Author
-xxHash 5.4 GB/s 10
-CrapWow 3.2 GB/s 2 Andrew
-MurmurHash 3a 2.7 GB/s 10 Austin Appleby
-SpookyHash 2.0 GB/s 10 Bob Jenkins
-SBox 1.4 GB/s 9 Bret Mulvey
-Lookup3 1.2 GB/s 9 Bob Jenkins
-SuperFastHash 1.2 GB/s 1 Paul Hsieh
-CityHash64 1.05 GB/s 10 Pike & Alakuijala
-FNV 0.55 GB/s 5 Fowler, Noll, Vo
-CRC32 0.43 GB/s 9
-MD5-32 0.33 GB/s 10 Ronald L. Rivest
-SHA1-32 0.28 GB/s 10
-
-Q.Score is a measure of quality of the hash function.
-It depends on successfully passing SMHasher test set.
-10 is a perfect score.
-
-Note: SMHasher's CRC32 implementation is not the fastest one.
-Other speed-oriented implementations can be faster,
-especially in combination with PCLMUL instruction:
-https://fastcompression.blogspot.com/2019/03/presenting-xxh3.html?showComment=1552696407071#c3490092340461170735
-
-A 64-bit version, named XXH64, is available since r35.
-It offers much better speed, but for 64-bit applications only.
-Name Speed on 64 bits Speed on 32 bits
-XXH64 13.8 GB/s 1.9 GB/s
-XXH32 6.8 GB/s 6.0 GB/s
-*/
#if defined (__cplusplus)
extern "C" {
* INLINE mode
******************************/
/*!
- * XXH_INLINE_ALL (and XXH_PRIVATE_API)
+ * @defgroup public Public API
+ * Contains details on the public xxHash functions.
+ * @{
+ */
+#ifdef XXH_DOXYGEN
+/*!
+ * @brief Gives access to internal state declaration, required for static allocation.
+ *
+ * Incompatible with dynamic linking, due to risks of ABI changes.
+ *
+ * Usage:
+ * @code{.c}
+ * #define XXH_STATIC_LINKING_ONLY
+ * #include "xxhash.h"
+ * @endcode
+ */
+# define XXH_STATIC_LINKING_ONLY
+/* Do not undef XXH_STATIC_LINKING_ONLY for Doxygen */
+
+/*!
+ * @brief Gives access to internal definitions.
+ *
+ * Usage:
+ * @code{.c}
+ * #define XXH_STATIC_LINKING_ONLY
+ * #define XXH_IMPLEMENTATION
+ * #include "xxhash.h"
+ * @endcode
+ */
+# define XXH_IMPLEMENTATION
+/* Do not undef XXH_IMPLEMENTATION for Doxygen */
+
+/*!
+ * @brief Exposes the implementation and marks all functions as `inline`.
+ *
* Use these build macros to inline xxhash into the target unit.
* Inlining improves performance on small inputs, especially when the length is
* expressed as a compile-time constant:
*
- * https://fastcompression.blogspot.com/2018/03/xxhash-for-small-keys-impressive-power.html
+ * https://fastcompression.blogspot.com/2018/03/xxhash-for-small-keys-impressive-power.html
*
* It also keeps xxHash symbols private to the unit, so they are not exported.
*
* Usage:
+ * @code{.c}
* #define XXH_INLINE_ALL
* #include "xxhash.h"
- *
+ * @endcode
* Do not compile and link xxhash.o as a separate object, as it is not useful.
*/
+# define XXH_INLINE_ALL
+# undef XXH_INLINE_ALL
+/*!
+ * @brief Exposes the implementation without marking functions as inline.
+ */
+# define XXH_PRIVATE_API
+# undef XXH_PRIVATE_API
+/*!
+ * @brief Emulate a namespace by transparently prefixing all symbols.
+ *
+ * If you want to include _and expose_ xxHash functions from within your own
+ * library, but also want to avoid symbol collisions with other libraries which
+ * may also include xxHash, you can use @ref XXH_NAMESPACE to automatically prefix
+ * any public symbol from xxhash library with the value of @ref XXH_NAMESPACE
+ * (therefore, avoid empty or numeric values).
+ *
+ * Note that no change is required within the calling program as long as it
+ * includes `xxhash.h`: Regular symbol names will be automatically translated
+ * by this header.
+ */
+# define XXH_NAMESPACE /* YOUR NAME HERE */
+# undef XXH_NAMESPACE
+#endif
+
#if (defined(XXH_INLINE_ALL) || defined(XXH_PRIVATE_API)) \
&& !defined(XXH_INLINE_ALL_31684351384)
/* this section should be traversed only once */
# undef XXHASH_H_STATIC_13879238742
#endif /* XXH_INLINE_ALL || XXH_PRIVATE_API */
-
-
/* ****************************************************************
* Stable API
*****************************************************************/
#ifndef XXHASH_H_5627135585666179
#define XXHASH_H_5627135585666179 1
-
-/*!
- * @defgroup public Public API
- * Contains details on the public xxHash functions.
- * @{
- */
-/* specific declaration modes for Windows */
+/*! @brief Marks a global symbol. */
#if !defined(XXH_INLINE_ALL) && !defined(XXH_PRIVATE_API)
# if defined(WIN32) && defined(_MSC_VER) && (defined(XXH_IMPORT) || defined(XXH_EXPORT))
# ifdef XXH_EXPORT
# endif
#endif
-#ifdef XXH_DOXYGEN
-/*!
- * @brief Emulate a namespace by transparently prefixing all symbols.
- *
- * If you want to include _and expose_ xxHash functions from within your own
- * library, but also want to avoid symbol collisions with other libraries which
- * may also include xxHash, you can use XXH_NAMESPACE to automatically prefix
- * any public symbol from xxhash library with the value of XXH_NAMESPACE
- * (therefore, avoid empty or numeric values).
- *
- * Note that no change is required within the calling program as long as it
- * includes `xxhash.h`: Regular symbol names will be automatically translated
- * by this header.
- */
-# define XXH_NAMESPACE /* YOUR NAME HERE */
-# undef XXH_NAMESPACE
-#endif
-
#ifdef XXH_NAMESPACE
# define XXH_CAT(A,B) A##B
# define XXH_NAME2(A,B) XXH_CAT(A,B)
#endif
+/* *************************************
+* Compiler specifics
+***************************************/
+
+/* specific declaration modes for Windows */
+#if !defined(XXH_INLINE_ALL) && !defined(XXH_PRIVATE_API)
+# if defined(WIN32) && defined(_MSC_VER) && (defined(XXH_IMPORT) || defined(XXH_EXPORT))
+# ifdef XXH_EXPORT
+# define XXH_PUBLIC_API __declspec(dllexport)
+# elif XXH_IMPORT
+# define XXH_PUBLIC_API __declspec(dllimport)
+# endif
+# else
+# define XXH_PUBLIC_API /* do nothing */
+# endif
+#endif
+
+#if defined (__GNUC__)
+# define XXH_CONSTF __attribute__((const))
+# define XXH_PUREF __attribute__((pure))
+# define XXH_MALLOCF __attribute__((malloc))
+#else
+# define XXH_CONSTF /* disable */
+# define XXH_PUREF
+# define XXH_MALLOCF
+#endif
+
/* *************************************
* Version
***************************************/
#define XXH_VERSION_MAJOR 0
#define XXH_VERSION_MINOR 8
-#define XXH_VERSION_RELEASE 1
+#define XXH_VERSION_RELEASE 2
+/*! @brief Version number, encoded as two digits each */
#define XXH_VERSION_NUMBER (XXH_VERSION_MAJOR *100*100 + XXH_VERSION_MINOR *100 + XXH_VERSION_RELEASE)
/*!
* This is mostly useful when xxHash is compiled as a shared library,
* since the returned value comes from the library, as opposed to header file.
*
- * @return `XXH_VERSION_NUMBER` of the invoked library.
+ * @return @ref XXH_VERSION_NUMBER of the invoked library.
*/
-XXH_PUBLIC_API unsigned XXH_versionNumber (void);
+XXH_PUBLIC_API XXH_CONSTF unsigned XXH_versionNumber (void);
/* ****************************
* Common basic types
******************************/
#include <stddef.h> /* size_t */
-typedef enum { XXH_OK=0, XXH_ERROR } XXH_errorcode;
+/*!
+ * @brief Exit code for the streaming API.
+ */
+typedef enum {
+ XXH_OK = 0, /*!< OK */
+ XXH_ERROR /*!< Error */
+} XXH_errorcode;
/*-**********************************************************************
#elif !defined (__VMS) \
&& (defined (__cplusplus) \
|| (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) )
-# include <stdint.h>
+# ifdef _AIX
+# include <inttypes.h>
+# else
+# include <stdint.h>
+# endif
typedef uint32_t XXH32_hash_t;
#else
# include <limits.h>
# if UINT_MAX == 0xFFFFFFFFUL
typedef unsigned int XXH32_hash_t;
+# elif ULONG_MAX == 0xFFFFFFFFUL
+ typedef unsigned long XXH32_hash_t;
# else
-# if ULONG_MAX == 0xFFFFFFFFUL
- typedef unsigned long XXH32_hash_t;
-# else
-# error "unsupported platform: need a 32-bit type"
-# endif
+# error "unsupported platform: need a 32-bit type"
# endif
#endif
/*!
* @}
*
- * @defgroup xxh32_family XXH32 family
+ * @defgroup XXH32_family XXH32 family
* @ingroup public
* Contains functions used in the classic 32-bit xxHash algorithm.
*
* @note
* XXH32 is useful for older platforms, with no or poor 64-bit performance.
- * Note that @ref xxh3_family provides competitive speed
- * for both 32-bit and 64-bit systems, and offers true 64/128 bit hash results.
+ * Note that the @ref XXH3_family provides competitive speed for both 32-bit
+ * and 64-bit systems, and offers true 64/128 bit hash results.
*
- * @see @ref xxh64_family, @ref xxh3_family : Other xxHash families
- * @see @ref xxh32_impl for implementation details
+ * @see @ref XXH64_family, @ref XXH3_family : Other xxHash families
+ * @see @ref XXH32_impl for implementation details
* @{
*/
/*!
* @brief Calculates the 32-bit hash of @p input using xxHash32.
*
- * Speed on Core 2 Duo @ 3 GHz (single thread, SMHasher benchmark): 5.4 GB/s
- *
* @param input The block of data to be hashed, at least @p length bytes in size.
* @param length The length of @p input, in bytes.
* @param seed The 32-bit seed to alter the hash's output predictably.
* readable, contiguous memory. However, if @p length is `0`, @p input may be
* `NULL`. In C++, this also must be *TriviallyCopyable*.
*
- * @return The calculated 32-bit hash value.
+ * @return The calculated 32-bit xxHash32 value.
*
- * @see
- * XXH64(), XXH3_64bits_withSeed(), XXH3_128bits_withSeed(), XXH128():
- * Direct equivalents for the other variants of xxHash.
- * @see
- * XXH32_createState(), XXH32_update(), XXH32_digest(): Streaming version.
- */
-XXH_PUBLIC_API XXH32_hash_t XXH32 (const void* input, size_t length, XXH32_hash_t seed);
-
-/*!
- * Streaming functions generate the xxHash value from an incremental input.
- * This method is slower than single-call functions, due to state management.
- * For small inputs, prefer `XXH32()` and `XXH64()`, which are better optimized.
- *
- * An XXH state must first be allocated using `XXH*_createState()`.
- *
- * Start a new hash by initializing the state with a seed using `XXH*_reset()`.
- *
- * Then, feed the hash state by calling `XXH*_update()` as many times as necessary.
- *
- * The function returns an error code, with 0 meaning OK, and any other value
- * meaning there is an error.
- *
- * Finally, a hash value can be produced anytime, by using `XXH*_digest()`.
- * This function returns the nn-bits hash as an int or long long.
- *
- * It's still possible to continue inserting input into the hash state after a
- * digest, and generate new hash values later on by invoking `XXH*_digest()`.
- *
- * When done, release the state using `XXH*_freeState()`.
- *
- * Example code for incrementally hashing a file:
- * @code{.c}
- * #include <stdio.h>
- * #include <xxhash.h>
- * #define BUFFER_SIZE 256
- *
- * // Note: XXH64 and XXH3 use the same interface.
- * XXH32_hash_t
- * hashFile(FILE* stream)
- * {
- * XXH32_state_t* state;
- * unsigned char buf[BUFFER_SIZE];
- * size_t amt;
- * XXH32_hash_t hash;
- *
- * state = XXH32_createState(); // Create a state
- * assert(state != NULL); // Error check here
- * XXH32_reset(state, 0xbaad5eed); // Reset state with our seed
- * while ((amt = fread(buf, 1, sizeof(buf), stream)) != 0) {
- * XXH32_update(state, buf, amt); // Hash the file in chunks
- * }
- * hash = XXH32_digest(state); // Finalize the hash
- * XXH32_freeState(state); // Clean up
- * return hash;
- * }
- * @endcode
+ * @see @ref single_shot_example "Single Shot Example" for an example.
*/
+XXH_PUBLIC_API XXH_PUREF XXH32_hash_t XXH32 (const void* input, size_t length, XXH32_hash_t seed);
+#ifndef XXH_NO_STREAM
/*!
* @typedef struct XXH32_state_s XXH32_state_t
* @brief The opaque state struct for the XXH32 streaming API.
/*!
* @brief Allocates an @ref XXH32_state_t.
*
- * Must be freed with XXH32_freeState().
- * @return An allocated XXH32_state_t on success, `NULL` on failure.
+ * @return An allocated pointer of @ref XXH32_state_t on success.
+ * @return `NULL` on failure.
+ *
+ * @note Must be freed with XXH32_freeState().
*/
-XXH_PUBLIC_API XXH32_state_t* XXH32_createState(void);
+XXH_PUBLIC_API XXH_MALLOCF XXH32_state_t* XXH32_createState(void);
/*!
* @brief Frees an @ref XXH32_state_t.
*
- * Must be allocated with XXH32_createState().
* @param statePtr A pointer to an @ref XXH32_state_t allocated with @ref XXH32_createState().
- * @return XXH_OK.
+ *
+ * @return @ref XXH_OK.
+ *
+ * @note @p statePtr must be allocated with XXH32_createState().
+ *
*/
XXH_PUBLIC_API XXH_errorcode XXH32_freeState(XXH32_state_t* statePtr);
/*!
/*!
* @brief Resets an @ref XXH32_state_t to begin a new hash.
*
- * This function resets and seeds a state. Call it before @ref XXH32_update().
- *
* @param statePtr The state struct to reset.
* @param seed The 32-bit seed to alter the hash result predictably.
*
* @pre
* @p statePtr must not be `NULL`.
*
- * @return @ref XXH_OK on success, @ref XXH_ERROR on failure.
+ * @return @ref XXH_OK on success.
+ * @return @ref XXH_ERROR on failure.
+ *
+ * @note This function resets and seeds a state. Call it before @ref XXH32_update().
*/
XXH_PUBLIC_API XXH_errorcode XXH32_reset (XXH32_state_t* statePtr, XXH32_hash_t seed);
/*!
* @brief Consumes a block of @p input to an @ref XXH32_state_t.
*
- * Call this to incrementally consume blocks of data.
- *
* @param statePtr The state struct to update.
* @param input The block of data to be hashed, at least @p length bytes in size.
* @param length The length of @p input, in bytes.
* readable, contiguous memory. However, if @p length is `0`, @p input may be
* `NULL`. In C++, this also must be *TriviallyCopyable*.
*
- * @return @ref XXH_OK on success, @ref XXH_ERROR on failure.
+ * @return @ref XXH_OK on success.
+ * @return @ref XXH_ERROR on failure.
+ *
+ * @note Call this to incrementally consume blocks of data.
*/
XXH_PUBLIC_API XXH_errorcode XXH32_update (XXH32_state_t* statePtr, const void* input, size_t length);
/*!
* @brief Returns the calculated hash value from an @ref XXH32_state_t.
*
- * @note
- * Calling XXH32_digest() will not affect @p statePtr, so you can update,
- * digest, and update again.
- *
* @param statePtr The state struct to calculate the hash from.
*
* @pre
* @p statePtr must not be `NULL`.
*
- * @return The calculated xxHash32 value from that state.
+ * @return The calculated 32-bit xxHash32 value from that state.
+ *
+ * @note
+ * Calling XXH32_digest() will not affect @p statePtr, so you can update,
+ * digest, and update again.
*/
-XXH_PUBLIC_API XXH32_hash_t XXH32_digest (const XXH32_state_t* statePtr);
+XXH_PUBLIC_API XXH_PUREF XXH32_hash_t XXH32_digest (const XXH32_state_t* statePtr);
+#endif /* !XXH_NO_STREAM */
/******* Canonical representation *******/
-/*
- * The default return values from XXH functions are unsigned 32 and 64 bit
- * integers.
- * This the simplest and fastest format for further post-processing.
- *
- * However, this leaves open the question of what is the order on the byte level,
- * since little and big endian conventions will store the same number differently.
- *
- * The canonical representation settles this issue by mandating big-endian
- * convention, the same convention as human-readable numbers (large digits first).
- *
- * When writing hash values to storage, sending them over a network, or printing
- * them, it's highly recommended to use the canonical representation to ensure
- * portability across a wider range of systems, present and future.
- *
- * The following functions allow transformation of hash values to and from
- * canonical format.
- */
-
/*!
* @brief Canonical (big endian) representation of @ref XXH32_hash_t.
*/
/*!
* @brief Converts an @ref XXH32_hash_t to a big endian @ref XXH32_canonical_t.
*
- * @param dst The @ref XXH32_canonical_t pointer to be stored to.
+ * @param dst The @ref XXH32_canonical_t pointer to be stored to.
* @param hash The @ref XXH32_hash_t to be converted.
*
* @pre
* @p dst must not be `NULL`.
+ *
+ * @see @ref canonical_representation_example "Canonical Representation Example"
*/
XXH_PUBLIC_API void XXH32_canonicalFromHash(XXH32_canonical_t* dst, XXH32_hash_t hash);
* @p src must not be `NULL`.
*
* @return The converted hash.
+ *
+ * @see @ref canonical_representation_example "Canonical Representation Example"
*/
-XXH_PUBLIC_API XXH32_hash_t XXH32_hashFromCanonical(const XXH32_canonical_t* src);
+XXH_PUBLIC_API XXH_PUREF XXH32_hash_t XXH32_hashFromCanonical(const XXH32_canonical_t* src);
+/*! @cond Doxygen ignores this part */
#ifdef __has_attribute
# define XXH_HAS_ATTRIBUTE(x) __has_attribute(x)
#else
# define XXH_HAS_ATTRIBUTE(x) 0
#endif
+/*! @endcond */
+
+/*! @cond Doxygen ignores this part */
+/*
+ * C23 __STDC_VERSION__ number hasn't been specified yet. For now
+ * leave as `201711L` (C17 + 1).
+ * TODO: Update to correct value when its been specified.
+ */
+#define XXH_C23_VN 201711L
+/*! @endcond */
+/*! @cond Doxygen ignores this part */
/* C-language Attributes are added in C23. */
-#if defined(__STDC_VERSION__) && (__STDC_VERSION__ > 201710L) && defined(__has_c_attribute)
+#if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= XXH_C23_VN) && defined(__has_c_attribute)
# define XXH_HAS_C_ATTRIBUTE(x) __has_c_attribute(x)
#else
# define XXH_HAS_C_ATTRIBUTE(x) 0
#endif
+/*! @endcond */
+/*! @cond Doxygen ignores this part */
#if defined(__cplusplus) && defined(__has_cpp_attribute)
# define XXH_HAS_CPP_ATTRIBUTE(x) __has_cpp_attribute(x)
#else
# define XXH_HAS_CPP_ATTRIBUTE(x) 0
#endif
+/*! @endcond */
+/*! @cond Doxygen ignores this part */
/*
-Define XXH_FALLTHROUGH macro for annotating switch case with the 'fallthrough' attribute
-introduced in CPP17 and C23.
-CPP17 : https://en.cppreference.com/w/cpp/language/attributes/fallthrough
-C23 : https://en.cppreference.com/w/c/language/attributes/fallthrough
-*/
-#if XXH_HAS_C_ATTRIBUTE(x)
-# define XXH_FALLTHROUGH [[fallthrough]]
-#elif XXH_HAS_CPP_ATTRIBUTE(x)
+ * Define XXH_FALLTHROUGH macro for annotating switch case with the 'fallthrough' attribute
+ * introduced in CPP17 and C23.
+ * CPP17 : https://en.cppreference.com/w/cpp/language/attributes/fallthrough
+ * C23 : https://en.cppreference.com/w/c/language/attributes/fallthrough
+ */
+#if XXH_HAS_C_ATTRIBUTE(fallthrough) || XXH_HAS_CPP_ATTRIBUTE(fallthrough)
# define XXH_FALLTHROUGH [[fallthrough]]
#elif XXH_HAS_ATTRIBUTE(__fallthrough__)
-# define XXH_FALLTHROUGH __attribute__ ((fallthrough))
+# define XXH_FALLTHROUGH __attribute__ ((__fallthrough__))
+#else
+# define XXH_FALLTHROUGH /* fallthrough */
+#endif
+/*! @endcond */
+
+/*! @cond Doxygen ignores this part */
+/*
+ * Define XXH_NOESCAPE for annotated pointers in public API.
+ * https://clang.llvm.org/docs/AttributeReference.html#noescape
+ * As of writing this, only supported by clang.
+ */
+#if XXH_HAS_ATTRIBUTE(noescape)
+# define XXH_NOESCAPE __attribute__((noescape))
#else
-# define XXH_FALLTHROUGH
+# define XXH_NOESCAPE
#endif
+/*! @endcond */
+
/*!
* @}
#elif !defined (__VMS) \
&& (defined (__cplusplus) \
|| (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) )
-# include <stdint.h>
+# ifdef _AIX
+# include <inttypes.h>
+# else
+# include <stdint.h>
+# endif
typedef uint64_t XXH64_hash_t;
#else
# include <limits.h>
/*!
* @}
*
- * @defgroup xxh64_family XXH64 family
+ * @defgroup XXH64_family XXH64 family
* @ingroup public
* @{
* Contains functions used in the classic 64-bit xxHash algorithm.
* It provides better speed for systems with vector processing capabilities.
*/
-
/*!
* @brief Calculates the 64-bit hash of @p input using xxHash64.
*
- * This function usually runs faster on 64-bit systems, but slower on 32-bit
- * systems (see benchmark).
- *
* @param input The block of data to be hashed, at least @p length bytes in size.
* @param length The length of @p input, in bytes.
* @param seed The 64-bit seed to alter the hash's output predictably.
* readable, contiguous memory. However, if @p length is `0`, @p input may be
* `NULL`. In C++, this also must be *TriviallyCopyable*.
*
- * @return The calculated 64-bit hash.
+ * @return The calculated 64-bit xxHash64 value.
*
- * @see
- * XXH32(), XXH3_64bits_withSeed(), XXH3_128bits_withSeed(), XXH128():
- * Direct equivalents for the other variants of xxHash.
- * @see
- * XXH64_createState(), XXH64_update(), XXH64_digest(): Streaming version.
+ * @see @ref single_shot_example "Single Shot Example" for an example.
*/
-XXH_PUBLIC_API XXH64_hash_t XXH64(const void* input, size_t length, XXH64_hash_t seed);
+XXH_PUBLIC_API XXH_PUREF XXH64_hash_t XXH64(XXH_NOESCAPE const void* input, size_t length, XXH64_hash_t seed);
/******* Streaming *******/
+#ifndef XXH_NO_STREAM
/*!
* @brief The opaque state struct for the XXH64 streaming API.
*
* @see XXH64_state_s for details.
*/
typedef struct XXH64_state_s XXH64_state_t; /* incomplete type */
-XXH_PUBLIC_API XXH64_state_t* XXH64_createState(void);
-XXH_PUBLIC_API XXH_errorcode XXH64_freeState(XXH64_state_t* statePtr);
-XXH_PUBLIC_API void XXH64_copyState(XXH64_state_t* dst_state, const XXH64_state_t* src_state);
-XXH_PUBLIC_API XXH_errorcode XXH64_reset (XXH64_state_t* statePtr, XXH64_hash_t seed);
-XXH_PUBLIC_API XXH_errorcode XXH64_update (XXH64_state_t* statePtr, const void* input, size_t length);
-XXH_PUBLIC_API XXH64_hash_t XXH64_digest (const XXH64_state_t* statePtr);
-
-/******* Canonical representation *******/
-typedef struct { unsigned char digest[sizeof(XXH64_hash_t)]; } XXH64_canonical_t;
-XXH_PUBLIC_API void XXH64_canonicalFromHash(XXH64_canonical_t* dst, XXH64_hash_t hash);
-XXH_PUBLIC_API XXH64_hash_t XXH64_hashFromCanonical(const XXH64_canonical_t* src);
+/*!
+ * @brief Allocates an @ref XXH64_state_t.
+ *
+ * @return An allocated pointer of @ref XXH64_state_t on success.
+ * @return `NULL` on failure.
+ *
+ * @note Must be freed with XXH64_freeState().
+ */
+XXH_PUBLIC_API XXH_MALLOCF XXH64_state_t* XXH64_createState(void);
+
+/*!
+ * @brief Frees an @ref XXH64_state_t.
+ *
+ * @param statePtr A pointer to an @ref XXH64_state_t allocated with @ref XXH64_createState().
+ *
+ * @return @ref XXH_OK.
+ *
+ * @note @p statePtr must be allocated with XXH64_createState().
+ */
+XXH_PUBLIC_API XXH_errorcode XXH64_freeState(XXH64_state_t* statePtr);
+
+/*!
+ * @brief Copies one @ref XXH64_state_t to another.
+ *
+ * @param dst_state The state to copy to.
+ * @param src_state The state to copy from.
+ * @pre
+ * @p dst_state and @p src_state must not be `NULL` and must not overlap.
+ */
+XXH_PUBLIC_API void XXH64_copyState(XXH_NOESCAPE XXH64_state_t* dst_state, const XXH64_state_t* src_state);
+
+/*!
+ * @brief Resets an @ref XXH64_state_t to begin a new hash.
+ *
+ * @param statePtr The state struct to reset.
+ * @param seed The 64-bit seed to alter the hash result predictably.
+ *
+ * @pre
+ * @p statePtr must not be `NULL`.
+ *
+ * @return @ref XXH_OK on success.
+ * @return @ref XXH_ERROR on failure.
+ *
+ * @note This function resets and seeds a state. Call it before @ref XXH64_update().
+ */
+XXH_PUBLIC_API XXH_errorcode XXH64_reset (XXH_NOESCAPE XXH64_state_t* statePtr, XXH64_hash_t seed);
+
+/*!
+ * @brief Consumes a block of @p input to an @ref XXH64_state_t.
+ *
+ * @param statePtr The state struct to update.
+ * @param input The block of data to be hashed, at least @p length bytes in size.
+ * @param length The length of @p input, in bytes.
+ *
+ * @pre
+ * @p statePtr must not be `NULL`.
+ * @pre
+ * The memory between @p input and @p input + @p length must be valid,
+ * readable, contiguous memory. However, if @p length is `0`, @p input may be
+ * `NULL`. In C++, this also must be *TriviallyCopyable*.
+ *
+ * @return @ref XXH_OK on success.
+ * @return @ref XXH_ERROR on failure.
+ *
+ * @note Call this to incrementally consume blocks of data.
+ */
+XXH_PUBLIC_API XXH_errorcode XXH64_update (XXH_NOESCAPE XXH64_state_t* statePtr, XXH_NOESCAPE const void* input, size_t length);
+
+/*!
+ * @brief Returns the calculated hash value from an @ref XXH64_state_t.
+ *
+ * @param statePtr The state struct to calculate the hash from.
+ *
+ * @pre
+ * @p statePtr must not be `NULL`.
+ *
+ * @return The calculated 64-bit xxHash64 value from that state.
+ *
+ * @note
+ * Calling XXH64_digest() will not affect @p statePtr, so you can update,
+ * digest, and update again.
+ */
+XXH_PUBLIC_API XXH_PUREF XXH64_hash_t XXH64_digest (XXH_NOESCAPE const XXH64_state_t* statePtr);
+#endif /* !XXH_NO_STREAM */
+/******* Canonical representation *******/
+
+/*!
+ * @brief Canonical (big endian) representation of @ref XXH64_hash_t.
+ */
+typedef struct { unsigned char digest[sizeof(XXH64_hash_t)]; } XXH64_canonical_t;
+
+/*!
+ * @brief Converts an @ref XXH64_hash_t to a big endian @ref XXH64_canonical_t.
+ *
+ * @param dst The @ref XXH64_canonical_t pointer to be stored to.
+ * @param hash The @ref XXH64_hash_t to be converted.
+ *
+ * @pre
+ * @p dst must not be `NULL`.
+ *
+ * @see @ref canonical_representation_example "Canonical Representation Example"
+ */
+XXH_PUBLIC_API void XXH64_canonicalFromHash(XXH_NOESCAPE XXH64_canonical_t* dst, XXH64_hash_t hash);
+
+/*!
+ * @brief Converts an @ref XXH64_canonical_t to a native @ref XXH64_hash_t.
+ *
+ * @param src The @ref XXH64_canonical_t to convert.
+ *
+ * @pre
+ * @p src must not be `NULL`.
+ *
+ * @return The converted hash.
+ *
+ * @see @ref canonical_representation_example "Canonical Representation Example"
+ */
+XXH_PUBLIC_API XXH_PUREF XXH64_hash_t XXH64_hashFromCanonical(XXH_NOESCAPE const XXH64_canonical_t* src);
+
+#ifndef XXH_NO_XXH3
-#ifndef XXH_NO_XXH3
/*!
* @}
* ************************************************************************
- * @defgroup xxh3_family XXH3 family
+ * @defgroup XXH3_family XXH3 family
* @ingroup public
* @{
*
*
* XXH3's speed benefits greatly from SIMD and 64-bit arithmetic,
* but does not require it.
- * Any 32-bit and 64-bit targets that can run XXH32 smoothly
- * can run XXH3 at competitive speeds, even without vector support.
- * Further details are explained in the implementation.
- *
- * Optimized implementations are provided for AVX512, AVX2, SSE2, NEON, POWER8,
- * ZVector and scalar targets. This can be controlled via the XXH_VECTOR macro.
+ * Most 32-bit and 64-bit targets that can run XXH32 smoothly can run XXH3
+ * at competitive speeds, even without vector support. Further details are
+ * explained in the implementation.
+ *
+ * XXH3 has a fast scalar implementation, but it also includes accelerated SIMD
+ * implementations for many common platforms:
+ * - AVX512
+ * - AVX2
+ * - SSE2
+ * - ARM NEON
+ * - WebAssembly SIMD128
+ * - POWER8 VSX
+ * - s390x ZVector
+ * This can be controlled via the @ref XXH_VECTOR macro, but it automatically
+ * selects the best version according to predefined macros. For the x86 family, an
+ * automatic runtime dispatcher is included separately in @ref xxh_x86dispatch.c.
*
* XXH3 implementation is portable:
* it has a generic C90 formulation that can be compiled on any platform,
- * all implementations generage exactly the same hash value on all platforms.
+ * all implementations generate exactly the same hash value on all platforms.
* Starting from v0.8.0, it's also labelled "stable", meaning that
* any future version will also generate the same hash value.
*
*
* The API supports one-shot hashing, streaming mode, and custom secrets.
*/
-
/*-**********************************************************************
* XXH3 64-bit variant
************************************************************************/
-/* XXH3_64bits():
- * default 64-bit variant, using default secret and default seed of 0.
- * It's the fastest variant. */
-XXH_PUBLIC_API XXH64_hash_t XXH3_64bits(const void* data, size_t len);
+/*!
+ * @brief Calculates 64-bit unseeded variant of XXH3 hash of @p input.
+ *
+ * @param input The block of data to be hashed, at least @p length bytes in size.
+ * @param length The length of @p input, in bytes.
+ *
+ * @pre
+ * The memory between @p input and @p input + @p length must be valid,
+ * readable, contiguous memory. However, if @p length is `0`, @p input may be
+ * `NULL`. In C++, this also must be *TriviallyCopyable*.
+ *
+ * @return The calculated 64-bit XXH3 hash value.
+ *
+ * @note
+ * This is equivalent to @ref XXH3_64bits_withSeed() with a seed of `0`, however
+ * it may have slightly better performance due to constant propagation of the
+ * defaults.
+ *
+ * @see
+ * XXH3_64bits_withSeed(), XXH3_64bits_withSecret(): other seeding variants
+ * @see @ref single_shot_example "Single Shot Example" for an example.
+ */
+XXH_PUBLIC_API XXH_PUREF XXH64_hash_t XXH3_64bits(XXH_NOESCAPE const void* input, size_t length);
-/*
- * XXH3_64bits_withSeed():
- * This variant generates a custom secret on the fly
- * based on default secret altered using the `seed` value.
+/*!
+ * @brief Calculates 64-bit seeded variant of XXH3 hash of @p input.
+ *
+ * @param input The block of data to be hashed, at least @p length bytes in size.
+ * @param length The length of @p input, in bytes.
+ * @param seed The 64-bit seed to alter the hash result predictably.
+ *
+ * @pre
+ * The memory between @p input and @p input + @p length must be valid,
+ * readable, contiguous memory. However, if @p length is `0`, @p input may be
+ * `NULL`. In C++, this also must be *TriviallyCopyable*.
+ *
+ * @return The calculated 64-bit XXH3 hash value.
+ *
+ * @note
+ * seed == 0 produces the same results as @ref XXH3_64bits().
+ *
+ * This variant generates a custom secret on the fly based on default secret
+ * altered using the @p seed value.
+ *
* While this operation is decently fast, note that it's not completely free.
- * Note: seed==0 produces the same results as XXH3_64bits().
+ *
+ * @see @ref single_shot_example "Single Shot Example" for an example.
*/
-XXH_PUBLIC_API XXH64_hash_t XXH3_64bits_withSeed(const void* data, size_t len, XXH64_hash_t seed);
+XXH_PUBLIC_API XXH_PUREF XXH64_hash_t XXH3_64bits_withSeed(XXH_NOESCAPE const void* input, size_t length, XXH64_hash_t seed);
/*!
* The bare minimum size for a custom secret.
*/
#define XXH3_SECRET_SIZE_MIN 136
-/*
- * XXH3_64bits_withSecret():
+/*!
+ * @brief Calculates 64-bit variant of XXH3 with a custom "secret".
+ *
+ * @param data The block of data to be hashed, at least @p len bytes in size.
+ * @param len The length of @p data, in bytes.
+ * @param secret The secret data.
+ * @param secretSize The length of @p secret, in bytes.
+ *
+ * @return The calculated 64-bit XXH3 hash value.
+ *
+ * @pre
+ * The memory between @p data and @p data + @p len must be valid,
+ * readable, contiguous memory. However, if @p length is `0`, @p data may be
+ * `NULL`. In C++, this also must be *TriviallyCopyable*.
+ *
* It's possible to provide any blob of bytes as a "secret" to generate the hash.
* This makes it more difficult for an external actor to prepare an intentional collision.
- * The main condition is that secretSize *must* be large enough (>= XXH3_SECRET_SIZE_MIN).
+ * The main condition is that @p secretSize *must* be large enough (>= @ref XXH3_SECRET_SIZE_MIN).
* However, the quality of the secret impacts the dispersion of the hash algorithm.
* Therefore, the secret _must_ look like a bunch of random bytes.
* Avoid "trivial" or structured data such as repeated sequences or a text document.
* Whenever in doubt about the "randomness" of the blob of bytes,
- * consider employing "XXH3_generateSecret()" instead (see below).
+ * consider employing @ref XXH3_generateSecret() instead (see below).
* It will generate a proper high entropy secret derived from the blob of bytes.
* Another advantage of using XXH3_generateSecret() is that
* it guarantees that all bits within the initial blob of bytes
* will impact every bit of the output.
* This is not necessarily the case when using the blob of bytes directly
* because, when hashing _small_ inputs, only a portion of the secret is employed.
+ *
+ * @see @ref single_shot_example "Single Shot Example" for an example.
*/
-XXH_PUBLIC_API XXH64_hash_t XXH3_64bits_withSecret(const void* data, size_t len, const void* secret, size_t secretSize);
+XXH_PUBLIC_API XXH_PUREF XXH64_hash_t XXH3_64bits_withSecret(XXH_NOESCAPE const void* data, size_t len, XXH_NOESCAPE const void* secret, size_t secretSize);
/******* Streaming *******/
+#ifndef XXH_NO_STREAM
/*
* Streaming requires state maintenance.
* This operation costs memory and CPU.
*/
/*!
- * @brief The state struct for the XXH3 streaming API.
+ * @brief The opaque state struct for the XXH3 streaming API.
*
* @see XXH3_state_s for details.
*/
typedef struct XXH3_state_s XXH3_state_t;
-XXH_PUBLIC_API XXH3_state_t* XXH3_createState(void);
+XXH_PUBLIC_API XXH_MALLOCF XXH3_state_t* XXH3_createState(void);
XXH_PUBLIC_API XXH_errorcode XXH3_freeState(XXH3_state_t* statePtr);
-XXH_PUBLIC_API void XXH3_copyState(XXH3_state_t* dst_state, const XXH3_state_t* src_state);
-/*
- * XXH3_64bits_reset():
- * Initialize with default parameters.
- * digest will be equivalent to `XXH3_64bits()`.
+/*!
+ * @brief Copies one @ref XXH3_state_t to another.
+ *
+ * @param dst_state The state to copy to.
+ * @param src_state The state to copy from.
+ * @pre
+ * @p dst_state and @p src_state must not be `NULL` and must not overlap.
*/
-XXH_PUBLIC_API XXH_errorcode XXH3_64bits_reset(XXH3_state_t* statePtr);
-/*
- * XXH3_64bits_reset_withSeed():
- * Generate a custom secret from `seed`, and store it into `statePtr`.
- * digest will be equivalent to `XXH3_64bits_withSeed()`.
+XXH_PUBLIC_API void XXH3_copyState(XXH_NOESCAPE XXH3_state_t* dst_state, XXH_NOESCAPE const XXH3_state_t* src_state);
+
+/*!
+ * @brief Resets an @ref XXH3_state_t to begin a new hash.
+ *
+ * @param statePtr The state struct to reset.
+ *
+ * @pre
+ * @p statePtr must not be `NULL`.
+ *
+ * @return @ref XXH_OK on success.
+ * @return @ref XXH_ERROR on failure.
+ *
+ * @note
+ * - This function resets `statePtr` and generate a secret with default parameters.
+ * - Call this function before @ref XXH3_64bits_update().
+ * - Digest will be equivalent to `XXH3_64bits()`.
+ *
*/
-XXH_PUBLIC_API XXH_errorcode XXH3_64bits_reset_withSeed(XXH3_state_t* statePtr, XXH64_hash_t seed);
-/*
- * XXH3_64bits_reset_withSecret():
- * `secret` is referenced, it _must outlive_ the hash streaming session.
- * Similar to one-shot API, `secretSize` must be >= `XXH3_SECRET_SIZE_MIN`,
+XXH_PUBLIC_API XXH_errorcode XXH3_64bits_reset(XXH_NOESCAPE XXH3_state_t* statePtr);
+
+/*!
+ * @brief Resets an @ref XXH3_state_t with 64-bit seed to begin a new hash.
+ *
+ * @param statePtr The state struct to reset.
+ * @param seed The 64-bit seed to alter the hash result predictably.
+ *
+ * @pre
+ * @p statePtr must not be `NULL`.
+ *
+ * @return @ref XXH_OK on success.
+ * @return @ref XXH_ERROR on failure.
+ *
+ * @note
+ * - This function resets `statePtr` and generate a secret from `seed`.
+ * - Call this function before @ref XXH3_64bits_update().
+ * - Digest will be equivalent to `XXH3_64bits_withSeed()`.
+ *
+ */
+XXH_PUBLIC_API XXH_errorcode XXH3_64bits_reset_withSeed(XXH_NOESCAPE XXH3_state_t* statePtr, XXH64_hash_t seed);
+
+/*!
+ * @brief Resets an @ref XXH3_state_t with secret data to begin a new hash.
+ *
+ * @param statePtr The state struct to reset.
+ * @param secret The secret data.
+ * @param secretSize The length of @p secret, in bytes.
+ *
+ * @pre
+ * @p statePtr must not be `NULL`.
+ *
+ * @return @ref XXH_OK on success.
+ * @return @ref XXH_ERROR on failure.
+ *
+ * @note
+ * `secret` is referenced, it _must outlive_ the hash streaming session.
+ *
+ * Similar to one-shot API, `secretSize` must be >= @ref XXH3_SECRET_SIZE_MIN,
* and the quality of produced hash values depends on secret's entropy
* (secret's content should look like a bunch of random bytes).
* When in doubt about the randomness of a candidate `secret`,
* consider employing `XXH3_generateSecret()` instead (see below).
*/
-XXH_PUBLIC_API XXH_errorcode XXH3_64bits_reset_withSecret(XXH3_state_t* statePtr, const void* secret, size_t secretSize);
+XXH_PUBLIC_API XXH_errorcode XXH3_64bits_reset_withSecret(XXH_NOESCAPE XXH3_state_t* statePtr, XXH_NOESCAPE const void* secret, size_t secretSize);
-XXH_PUBLIC_API XXH_errorcode XXH3_64bits_update (XXH3_state_t* statePtr, const void* input, size_t length);
-XXH_PUBLIC_API XXH64_hash_t XXH3_64bits_digest (const XXH3_state_t* statePtr);
+/*!
+ * @brief Consumes a block of @p input to an @ref XXH3_state_t.
+ *
+ * @param statePtr The state struct to update.
+ * @param input The block of data to be hashed, at least @p length bytes in size.
+ * @param length The length of @p input, in bytes.
+ *
+ * @pre
+ * @p statePtr must not be `NULL`.
+ * @pre
+ * The memory between @p input and @p input + @p length must be valid,
+ * readable, contiguous memory. However, if @p length is `0`, @p input may be
+ * `NULL`. In C++, this also must be *TriviallyCopyable*.
+ *
+ * @return @ref XXH_OK on success.
+ * @return @ref XXH_ERROR on failure.
+ *
+ * @note Call this to incrementally consume blocks of data.
+ */
+XXH_PUBLIC_API XXH_errorcode XXH3_64bits_update (XXH_NOESCAPE XXH3_state_t* statePtr, XXH_NOESCAPE const void* input, size_t length);
+
+/*!
+ * @brief Returns the calculated XXH3 64-bit hash value from an @ref XXH3_state_t.
+ *
+ * @param statePtr The state struct to calculate the hash from.
+ *
+ * @pre
+ * @p statePtr must not be `NULL`.
+ *
+ * @return The calculated XXH3 64-bit hash value from that state.
+ *
+ * @note
+ * Calling XXH3_64bits_digest() will not affect @p statePtr, so you can update,
+ * digest, and update again.
+ */
+XXH_PUBLIC_API XXH_PUREF XXH64_hash_t XXH3_64bits_digest (XXH_NOESCAPE const XXH3_state_t* statePtr);
+#endif /* !XXH_NO_STREAM */
/* note : canonical representation of XXH3 is the same as XXH64
* since they both produce XXH64_hash_t values */
XXH64_hash_t high64; /*!< `value >> 64` */
} XXH128_hash_t;
-XXH_PUBLIC_API XXH128_hash_t XXH3_128bits(const void* data, size_t len);
-XXH_PUBLIC_API XXH128_hash_t XXH3_128bits_withSeed(const void* data, size_t len, XXH64_hash_t seed);
-XXH_PUBLIC_API XXH128_hash_t XXH3_128bits_withSecret(const void* data, size_t len, const void* secret, size_t secretSize);
+/*!
+ * @brief Calculates 128-bit unseeded variant of XXH3 of @p data.
+ *
+ * @param data The block of data to be hashed, at least @p length bytes in size.
+ * @param len The length of @p data, in bytes.
+ *
+ * @return The calculated 128-bit variant of XXH3 value.
+ *
+ * The 128-bit variant of XXH3 has more strength, but it has a bit of overhead
+ * for shorter inputs.
+ *
+ * This is equivalent to @ref XXH3_128bits_withSeed() with a seed of `0`, however
+ * it may have slightly better performance due to constant propagation of the
+ * defaults.
+ *
+ * @see XXH3_128bits_withSeed(), XXH3_128bits_withSecret(): other seeding variants
+ * @see @ref single_shot_example "Single Shot Example" for an example.
+ */
+XXH_PUBLIC_API XXH_PUREF XXH128_hash_t XXH3_128bits(XXH_NOESCAPE const void* data, size_t len);
+/*! @brief Calculates 128-bit seeded variant of XXH3 hash of @p data.
+ *
+ * @param data The block of data to be hashed, at least @p length bytes in size.
+ * @param len The length of @p data, in bytes.
+ * @param seed The 64-bit seed to alter the hash result predictably.
+ *
+ * @return The calculated 128-bit variant of XXH3 value.
+ *
+ * @note
+ * seed == 0 produces the same results as @ref XXH3_64bits().
+ *
+ * This variant generates a custom secret on the fly based on default secret
+ * altered using the @p seed value.
+ *
+ * While this operation is decently fast, note that it's not completely free.
+ *
+ * @see XXH3_128bits(), XXH3_128bits_withSecret(): other seeding variants
+ * @see @ref single_shot_example "Single Shot Example" for an example.
+ */
+XXH_PUBLIC_API XXH_PUREF XXH128_hash_t XXH3_128bits_withSeed(XXH_NOESCAPE const void* data, size_t len, XXH64_hash_t seed);
+/*!
+ * @brief Calculates 128-bit variant of XXH3 with a custom "secret".
+ *
+ * @param data The block of data to be hashed, at least @p len bytes in size.
+ * @param len The length of @p data, in bytes.
+ * @param secret The secret data.
+ * @param secretSize The length of @p secret, in bytes.
+ *
+ * @return The calculated 128-bit variant of XXH3 value.
+ *
+ * It's possible to provide any blob of bytes as a "secret" to generate the hash.
+ * This makes it more difficult for an external actor to prepare an intentional collision.
+ * The main condition is that @p secretSize *must* be large enough (>= @ref XXH3_SECRET_SIZE_MIN).
+ * However, the quality of the secret impacts the dispersion of the hash algorithm.
+ * Therefore, the secret _must_ look like a bunch of random bytes.
+ * Avoid "trivial" or structured data such as repeated sequences or a text document.
+ * Whenever in doubt about the "randomness" of the blob of bytes,
+ * consider employing @ref XXH3_generateSecret() instead (see below).
+ * It will generate a proper high entropy secret derived from the blob of bytes.
+ * Another advantage of using XXH3_generateSecret() is that
+ * it guarantees that all bits within the initial blob of bytes
+ * will impact every bit of the output.
+ * This is not necessarily the case when using the blob of bytes directly
+ * because, when hashing _small_ inputs, only a portion of the secret is employed.
+ *
+ * @see @ref single_shot_example "Single Shot Example" for an example.
+ */
+XXH_PUBLIC_API XXH_PUREF XXH128_hash_t XXH3_128bits_withSecret(XXH_NOESCAPE const void* data, size_t len, XXH_NOESCAPE const void* secret, size_t secretSize);
/******* Streaming *******/
+#ifndef XXH_NO_STREAM
/*
* Streaming requires state maintenance.
* This operation costs memory and CPU.
* XXH3_128bits uses the same XXH3_state_t as XXH3_64bits().
* Use already declared XXH3_createState() and XXH3_freeState().
*
- * All reset and streaming functions have same meaning as their 64-bit counterpart.
+ * All reset and streaming functions have same meaning as their 64-bit counterpart.
+ */
+
+/*!
+ * @brief Resets an @ref XXH3_state_t to begin a new hash.
+ *
+ * @param statePtr The state struct to reset.
+ *
+ * @pre
+ * @p statePtr must not be `NULL`.
+ *
+ * @return @ref XXH_OK on success.
+ * @return @ref XXH_ERROR on failure.
+ *
+ * @note
+ * - This function resets `statePtr` and generate a secret with default parameters.
+ * - Call it before @ref XXH3_128bits_update().
+ * - Digest will be equivalent to `XXH3_128bits()`.
+ */
+XXH_PUBLIC_API XXH_errorcode XXH3_128bits_reset(XXH_NOESCAPE XXH3_state_t* statePtr);
+
+/*!
+ * @brief Resets an @ref XXH3_state_t with 64-bit seed to begin a new hash.
+ *
+ * @param statePtr The state struct to reset.
+ * @param seed The 64-bit seed to alter the hash result predictably.
+ *
+ * @pre
+ * @p statePtr must not be `NULL`.
+ *
+ * @return @ref XXH_OK on success.
+ * @return @ref XXH_ERROR on failure.
+ *
+ * @note
+ * - This function resets `statePtr` and generate a secret from `seed`.
+ * - Call it before @ref XXH3_128bits_update().
+ * - Digest will be equivalent to `XXH3_128bits_withSeed()`.
+ */
+XXH_PUBLIC_API XXH_errorcode XXH3_128bits_reset_withSeed(XXH_NOESCAPE XXH3_state_t* statePtr, XXH64_hash_t seed);
+/*!
+ * @brief Resets an @ref XXH3_state_t with secret data to begin a new hash.
+ *
+ * @param statePtr The state struct to reset.
+ * @param secret The secret data.
+ * @param secretSize The length of @p secret, in bytes.
+ *
+ * @pre
+ * @p statePtr must not be `NULL`.
+ *
+ * @return @ref XXH_OK on success.
+ * @return @ref XXH_ERROR on failure.
+ *
+ * `secret` is referenced, it _must outlive_ the hash streaming session.
+ * Similar to one-shot API, `secretSize` must be >= @ref XXH3_SECRET_SIZE_MIN,
+ * and the quality of produced hash values depends on secret's entropy
+ * (secret's content should look like a bunch of random bytes).
+ * When in doubt about the randomness of a candidate `secret`,
+ * consider employing `XXH3_generateSecret()` instead (see below).
+ */
+XXH_PUBLIC_API XXH_errorcode XXH3_128bits_reset_withSecret(XXH_NOESCAPE XXH3_state_t* statePtr, XXH_NOESCAPE const void* secret, size_t secretSize);
+
+/*!
+ * @brief Consumes a block of @p input to an @ref XXH3_state_t.
+ *
+ * Call this to incrementally consume blocks of data.
+ *
+ * @param statePtr The state struct to update.
+ * @param input The block of data to be hashed, at least @p length bytes in size.
+ * @param length The length of @p input, in bytes.
+ *
+ * @pre
+ * @p statePtr must not be `NULL`.
+ *
+ * @return @ref XXH_OK on success.
+ * @return @ref XXH_ERROR on failure.
+ *
+ * @note
+ * The memory between @p input and @p input + @p length must be valid,
+ * readable, contiguous memory. However, if @p length is `0`, @p input may be
+ * `NULL`. In C++, this also must be *TriviallyCopyable*.
+ *
+ */
+XXH_PUBLIC_API XXH_errorcode XXH3_128bits_update (XXH_NOESCAPE XXH3_state_t* statePtr, XXH_NOESCAPE const void* input, size_t length);
+
+/*!
+ * @brief Returns the calculated XXH3 128-bit hash value from an @ref XXH3_state_t.
+ *
+ * @param statePtr The state struct to calculate the hash from.
+ *
+ * @pre
+ * @p statePtr must not be `NULL`.
+ *
+ * @return The calculated XXH3 128-bit hash value from that state.
+ *
+ * @note
+ * Calling XXH3_128bits_digest() will not affect @p statePtr, so you can update,
+ * digest, and update again.
+ *
*/
-
-XXH_PUBLIC_API XXH_errorcode XXH3_128bits_reset(XXH3_state_t* statePtr);
-XXH_PUBLIC_API XXH_errorcode XXH3_128bits_reset_withSeed(XXH3_state_t* statePtr, XXH64_hash_t seed);
-XXH_PUBLIC_API XXH_errorcode XXH3_128bits_reset_withSecret(XXH3_state_t* statePtr, const void* secret, size_t secretSize);
-
-XXH_PUBLIC_API XXH_errorcode XXH3_128bits_update (XXH3_state_t* statePtr, const void* input, size_t length);
-XXH_PUBLIC_API XXH128_hash_t XXH3_128bits_digest (const XXH3_state_t* statePtr);
+XXH_PUBLIC_API XXH_PUREF XXH128_hash_t XXH3_128bits_digest (XXH_NOESCAPE const XXH3_state_t* statePtr);
+#endif /* !XXH_NO_STREAM */
/* Following helper functions make it possible to compare XXH128_hast_t values.
* Since XXH128_hash_t is a structure, this capability is not offered by the language.
* Note: For better performance, these functions can be inlined using XXH_INLINE_ALL */
/*!
- * XXH128_isEqual():
- * Return: 1 if `h1` and `h2` are equal, 0 if they are not.
+ * @brief Check equality of two XXH128_hash_t values
+ *
+ * @param h1 The 128-bit hash value.
+ * @param h2 Another 128-bit hash value.
+ *
+ * @return `1` if `h1` and `h2` are equal.
+ * @return `0` if they are not.
*/
-XXH_PUBLIC_API int XXH128_isEqual(XXH128_hash_t h1, XXH128_hash_t h2);
+XXH_PUBLIC_API XXH_PUREF int XXH128_isEqual(XXH128_hash_t h1, XXH128_hash_t h2);
/*!
- * XXH128_cmp():
+ * @brief Compares two @ref XXH128_hash_t
*
* This comparator is compatible with stdlib's `qsort()`/`bsearch()`.
*
- * return: >0 if *h128_1 > *h128_2
- * =0 if *h128_1 == *h128_2
- * <0 if *h128_1 < *h128_2
+ * @param h128_1 Left-hand side value
+ * @param h128_2 Right-hand side value
+ *
+ * @return >0 if @p h128_1 > @p h128_2
+ * @return =0 if @p h128_1 == @p h128_2
+ * @return <0 if @p h128_1 < @p h128_2
*/
-XXH_PUBLIC_API int XXH128_cmp(const void* h128_1, const void* h128_2);
+XXH_PUBLIC_API XXH_PUREF int XXH128_cmp(XXH_NOESCAPE const void* h128_1, XXH_NOESCAPE const void* h128_2);
/******* Canonical representation *******/
typedef struct { unsigned char digest[sizeof(XXH128_hash_t)]; } XXH128_canonical_t;
-XXH_PUBLIC_API void XXH128_canonicalFromHash(XXH128_canonical_t* dst, XXH128_hash_t hash);
-XXH_PUBLIC_API XXH128_hash_t XXH128_hashFromCanonical(const XXH128_canonical_t* src);
+
+
+/*!
+ * @brief Converts an @ref XXH128_hash_t to a big endian @ref XXH128_canonical_t.
+ *
+ * @param dst The @ref XXH128_canonical_t pointer to be stored to.
+ * @param hash The @ref XXH128_hash_t to be converted.
+ *
+ * @pre
+ * @p dst must not be `NULL`.
+ * @see @ref canonical_representation_example "Canonical Representation Example"
+ */
+XXH_PUBLIC_API void XXH128_canonicalFromHash(XXH_NOESCAPE XXH128_canonical_t* dst, XXH128_hash_t hash);
+
+/*!
+ * @brief Converts an @ref XXH128_canonical_t to a native @ref XXH128_hash_t.
+ *
+ * @param src The @ref XXH128_canonical_t to convert.
+ *
+ * @pre
+ * @p src must not be `NULL`.
+ *
+ * @return The converted hash.
+ * @see @ref canonical_representation_example "Canonical Representation Example"
+ */
+XXH_PUBLIC_API XXH_PUREF XXH128_hash_t XXH128_hashFromCanonical(XXH_NOESCAPE const XXH128_canonical_t* src);
#endif /* !XXH_NO_XXH3 */
XXH64_hash_t reserved64; /*!< Reserved field. Do not read or write to it. */
}; /* typedef'd to XXH64_state_t */
-
#ifndef XXH_NO_XXH3
#if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L) /* >= C11 */
#define XXH3_INTERNALBUFFER_SIZE 256
/*!
+ * @internal
* @brief Default size of the secret buffer (and @ref XXH3_kSecret).
*
* This is the size used in @ref XXH3_kSecret and the seeded functions.
*/
struct XXH3_state_s {
XXH_ALIGN_MEMBER(64, XXH64_hash_t acc[8]);
- /*!< The 8 accumulators. Similar to `vN` in @ref XXH32_state_s::v1 and @ref XXH64_state_s */
+ /*!< The 8 accumulators. See @ref XXH32_state_s::v and @ref XXH64_state_s::v */
XXH_ALIGN_MEMBER(64, unsigned char customSecret[XXH3_SECRET_DEFAULT_SIZE]);
/*!< Used to store a custom secret generated from a seed. */
XXH_ALIGN_MEMBER(64, unsigned char buffer[XXH3_INTERNALBUFFER_SIZE]);
* Note that this doesn't prepare the state for a streaming operation,
* it's still necessary to use XXH3_NNbits_reset*() afterwards.
*/
-#define XXH3_INITSTATE(XXH3_state_ptr) { (XXH3_state_ptr)->seed = 0; }
+#define XXH3_INITSTATE(XXH3_state_ptr) \
+ do { \
+ XXH3_state_t* tmp_xxh3_state_ptr = (XXH3_state_ptr); \
+ tmp_xxh3_state_ptr->seed = 0; \
+ tmp_xxh3_state_ptr->extSecret = NULL; \
+ } while(0)
-/* XXH128() :
- * simple alias to pre-selected XXH3_128bits variant
+/*!
+ * @brief Calculates the 128-bit hash of @p data using XXH3.
+ *
+ * @param data The block of data to be hashed, at least @p len bytes in size.
+ * @param len The length of @p data, in bytes.
+ * @param seed The 64-bit seed to alter the hash's output predictably.
+ *
+ * @pre
+ * The memory between @p data and @p data + @p len must be valid,
+ * readable, contiguous memory. However, if @p len is `0`, @p data may be
+ * `NULL`. In C++, this also must be *TriviallyCopyable*.
+ *
+ * @return The calculated 128-bit XXH3 value.
+ *
+ * @see @ref single_shot_example "Single Shot Example" for an example.
*/
-XXH_PUBLIC_API XXH128_hash_t XXH128(const void* data, size_t len, XXH64_hash_t seed);
+XXH_PUBLIC_API XXH_PUREF XXH128_hash_t XXH128(XXH_NOESCAPE const void* data, size_t len, XXH64_hash_t seed);
/* === Experimental API === */
/* Symbols defined below must be considered tied to a specific library version. */
-/*
- * XXH3_generateSecret():
+/*!
+ * @brief Derive a high-entropy secret from any user-defined content, named customSeed.
+ *
+ * @param secretBuffer A writable buffer for derived high-entropy secret data.
+ * @param secretSize Size of secretBuffer, in bytes. Must be >= XXH3_SECRET_DEFAULT_SIZE.
+ * @param customSeed A user-defined content.
+ * @param customSeedSize Size of customSeed, in bytes.
+ *
+ * @return @ref XXH_OK on success.
+ * @return @ref XXH_ERROR on failure.
*
- * Derive a high-entropy secret from any user-defined content, named customSeed.
* The generated secret can be used in combination with `*_withSecret()` functions.
- * The `_withSecret()` variants are useful to provide a higher level of protection than 64-bit seed,
- * as it becomes much more difficult for an external actor to guess how to impact the calculation logic.
+ * The `_withSecret()` variants are useful to provide a higher level of protection
+ * than 64-bit seed, as it becomes much more difficult for an external actor to
+ * guess how to impact the calculation logic.
*
* The function accepts as input a custom seed of any length and any content,
- * and derives from it a high-entropy secret of length @secretSize
- * into an already allocated buffer @secretBuffer.
- * @secretSize must be >= XXH3_SECRET_SIZE_MIN
+ * and derives from it a high-entropy secret of length @p secretSize into an
+ * already allocated buffer @p secretBuffer.
*
* The generated secret can then be used with any `*_withSecret()` variant.
- * Functions `XXH3_128bits_withSecret()`, `XXH3_64bits_withSecret()`,
- * `XXH3_128bits_reset_withSecret()` and `XXH3_64bits_reset_withSecret()`
+ * The functions @ref XXH3_128bits_withSecret(), @ref XXH3_64bits_withSecret(),
+ * @ref XXH3_128bits_reset_withSecret() and @ref XXH3_64bits_reset_withSecret()
* are part of this list. They all accept a `secret` parameter
- * which must be large enough for implementation reasons (>= XXH3_SECRET_SIZE_MIN)
+ * which must be large enough for implementation reasons (>= @ref XXH3_SECRET_SIZE_MIN)
* _and_ feature very high entropy (consist of random-looking bytes).
- * These conditions can be a high bar to meet, so
- * XXH3_generateSecret() can be employed to ensure proper quality.
+ * These conditions can be a high bar to meet, so @ref XXH3_generateSecret() can
+ * be employed to ensure proper quality.
*
- * customSeed can be anything. It can have any size, even small ones,
- * and its content can be anything, even "poor entropy" sources such as a bunch of zeroes.
- * The resulting `secret` will nonetheless provide all required qualities.
+ * @p customSeed can be anything. It can have any size, even small ones,
+ * and its content can be anything, even "poor entropy" sources such as a bunch
+ * of zeroes. The resulting `secret` will nonetheless provide all required qualities.
*
- * When customSeedSize > 0, supplying NULL as customSeed is undefined behavior.
+ * @pre
+ * - @p secretSize must be >= @ref XXH3_SECRET_SIZE_MIN
+ * - When @p customSeedSize > 0, supplying NULL as customSeed is undefined behavior.
+ *
+ * Example code:
+ * @code{.c}
+ * #include <stdio.h>
+ * #include <stdlib.h>
+ * #include <string.h>
+ * #define XXH_STATIC_LINKING_ONLY // expose unstable API
+ * #include "xxhash.h"
+ * // Hashes argv[2] using the entropy from argv[1].
+ * int main(int argc, char* argv[])
+ * {
+ * char secret[XXH3_SECRET_SIZE_MIN];
+ * if (argv != 3) { return 1; }
+ * XXH3_generateSecret(secret, sizeof(secret), argv[1], strlen(argv[1]));
+ * XXH64_hash_t h = XXH3_64bits_withSecret(
+ * argv[2], strlen(argv[2]),
+ * secret, sizeof(secret)
+ * );
+ * printf("%016llx\n", (unsigned long long) h);
+ * }
+ * @endcode
*/
-XXH_PUBLIC_API XXH_errorcode XXH3_generateSecret(void* secretBuffer, size_t secretSize, const void* customSeed, size_t customSeedSize);
-
+XXH_PUBLIC_API XXH_errorcode XXH3_generateSecret(XXH_NOESCAPE void* secretBuffer, size_t secretSize, XXH_NOESCAPE const void* customSeed, size_t customSeedSize);
-/*
- * XXH3_generateSecret_fromSeed():
- *
- * Generate the same secret as the _withSeed() variants.
+/*!
+ * @brief Generate the same secret as the _withSeed() variants.
*
- * The resulting secret has a length of XXH3_SECRET_DEFAULT_SIZE (necessarily).
- * @secretBuffer must be already allocated, of size at least XXH3_SECRET_DEFAULT_SIZE bytes.
+ * @param secretBuffer A writable buffer of @ref XXH3_SECRET_SIZE_MIN bytes
+ * @param seed The 64-bit seed to alter the hash result predictably.
*
* The generated secret can be used in combination with
*`*_withSecret()` and `_withSecretandSeed()` variants.
- * This generator is notably useful in combination with `_withSecretandSeed()`,
- * as a way to emulate a faster `_withSeed()` variant.
+ *
+ * Example C++ `std::string` hash class:
+ * @code{.cpp}
+ * #include <string>
+ * #define XXH_STATIC_LINKING_ONLY // expose unstable API
+ * #include "xxhash.h"
+ * // Slow, seeds each time
+ * class HashSlow {
+ * XXH64_hash_t seed;
+ * public:
+ * HashSlow(XXH64_hash_t s) : seed{s} {}
+ * size_t operator()(const std::string& x) const {
+ * return size_t{XXH3_64bits_withSeed(x.c_str(), x.length(), seed)};
+ * }
+ * };
+ * // Fast, caches the seeded secret for future uses.
+ * class HashFast {
+ * unsigned char secret[XXH3_SECRET_SIZE_MIN];
+ * public:
+ * HashFast(XXH64_hash_t s) {
+ * XXH3_generateSecret_fromSeed(secret, seed);
+ * }
+ * size_t operator()(const std::string& x) const {
+ * return size_t{
+ * XXH3_64bits_withSecret(x.c_str(), x.length(), secret, sizeof(secret))
+ * };
+ * }
+ * };
+ * @endcode
*/
-XXH_PUBLIC_API void XXH3_generateSecret_fromSeed(void* secretBuffer, XXH64_hash_t seed);
+XXH_PUBLIC_API void XXH3_generateSecret_fromSeed(XXH_NOESCAPE void* secretBuffer, XXH64_hash_t seed);
-/*
- * *_withSecretandSeed() :
+/*!
+ * @brief Calculates 64/128-bit seeded variant of XXH3 hash of @p data.
+ *
+ * @param data The block of data to be hashed, at least @p len bytes in size.
+ * @param len The length of @p data, in bytes.
+ * @param secret The secret data.
+ * @param secretSize The length of @p secret, in bytes.
+ * @param seed The 64-bit seed to alter the hash result predictably.
+ *
* These variants generate hash values using either
- * @seed for "short" keys (< XXH3_MIDSIZE_MAX = 240 bytes)
- * or @secret for "large" keys (>= XXH3_MIDSIZE_MAX).
+ * @p seed for "short" keys (< @ref XXH3_MIDSIZE_MAX = 240 bytes)
+ * or @p secret for "large" keys (>= @ref XXH3_MIDSIZE_MAX).
*
* This generally benefits speed, compared to `_withSeed()` or `_withSecret()`.
* `_withSeed()` has to generate the secret on the fly for "large" keys.
* which requires more instructions than _withSeed() variants.
* Therefore, _withSecretandSeed variant combines the best of both worlds.
*
- * When @secret has been generated by XXH3_generateSecret_fromSeed(),
+ * When @p secret has been generated by XXH3_generateSecret_fromSeed(),
* this variant produces *exactly* the same results as `_withSeed()` variant,
* hence offering only a pure speed benefit on "large" input,
* by skipping the need to regenerate the secret for every large input.
* for example with XXH3_64bits(), which then becomes the seed,
* and then employ both the seed and the secret in _withSecretandSeed().
* On top of speed, an added benefit is that each bit in the secret
- * has a 50% chance to swap each bit in the output,
- * via its impact to the seed.
+ * has a 50% chance to swap each bit in the output, via its impact to the seed.
+ *
* This is not guaranteed when using the secret directly in "small data" scenarios,
* because only portions of the secret are employed for small data.
*/
-XXH_PUBLIC_API XXH64_hash_t
-XXH3_64bits_withSecretandSeed(const void* data, size_t len,
- const void* secret, size_t secretSize,
+XXH_PUBLIC_API XXH_PUREF XXH64_hash_t
+XXH3_64bits_withSecretandSeed(XXH_NOESCAPE const void* data, size_t len,
+ XXH_NOESCAPE const void* secret, size_t secretSize,
XXH64_hash_t seed);
-
-XXH_PUBLIC_API XXH128_hash_t
-XXH3_128bits_withSecretandSeed(const void* data, size_t len,
- const void* secret, size_t secretSize,
+/*!
+ * @brief Calculates 128-bit seeded variant of XXH3 hash of @p data.
+ *
+ * @param input The block of data to be hashed, at least @p len bytes in size.
+ * @param length The length of @p data, in bytes.
+ * @param secret The secret data.
+ * @param secretSize The length of @p secret, in bytes.
+ * @param seed64 The 64-bit seed to alter the hash result predictably.
+ *
+ * @return @ref XXH_OK on success.
+ * @return @ref XXH_ERROR on failure.
+ *
+ * @see XXH3_64bits_withSecretandSeed()
+ */
+XXH_PUBLIC_API XXH_PUREF XXH128_hash_t
+XXH3_128bits_withSecretandSeed(XXH_NOESCAPE const void* input, size_t length,
+ XXH_NOESCAPE const void* secret, size_t secretSize,
XXH64_hash_t seed64);
-
+#ifndef XXH_NO_STREAM
+/*!
+ * @brief Resets an @ref XXH3_state_t with secret data to begin a new hash.
+ *
+ * @param statePtr A pointer to an @ref XXH3_state_t allocated with @ref XXH3_createState().
+ * @param secret The secret data.
+ * @param secretSize The length of @p secret, in bytes.
+ * @param seed64 The 64-bit seed to alter the hash result predictably.
+ *
+ * @return @ref XXH_OK on success.
+ * @return @ref XXH_ERROR on failure.
+ *
+ * @see XXH3_64bits_withSecretandSeed()
+ */
XXH_PUBLIC_API XXH_errorcode
-XXH3_64bits_reset_withSecretandSeed(XXH3_state_t* statePtr,
- const void* secret, size_t secretSize,
+XXH3_64bits_reset_withSecretandSeed(XXH_NOESCAPE XXH3_state_t* statePtr,
+ XXH_NOESCAPE const void* secret, size_t secretSize,
XXH64_hash_t seed64);
-
+/*!
+ * @brief Resets an @ref XXH3_state_t with secret data to begin a new hash.
+ *
+ * @param statePtr A pointer to an @ref XXH3_state_t allocated with @ref XXH3_createState().
+ * @param secret The secret data.
+ * @param secretSize The length of @p secret, in bytes.
+ * @param seed64 The 64-bit seed to alter the hash result predictably.
+ *
+ * @return @ref XXH_OK on success.
+ * @return @ref XXH_ERROR on failure.
+ *
+ * @see XXH3_64bits_withSecretandSeed()
+ */
XXH_PUBLIC_API XXH_errorcode
-XXH3_128bits_reset_withSecretandSeed(XXH3_state_t* statePtr,
- const void* secret, size_t secretSize,
+XXH3_128bits_reset_withSecretandSeed(XXH_NOESCAPE XXH3_state_t* statePtr,
+ XXH_NOESCAPE const void* secret, size_t secretSize,
XXH64_hash_t seed64);
+#endif /* !XXH_NO_STREAM */
-
-#endif /* XXH_NO_XXH3 */
+#endif /* !XXH_NO_XXH3 */
#endif /* XXH_NO_LONG_LONG */
#if defined(XXH_INLINE_ALL) || defined(XXH_PRIVATE_API)
# define XXH_IMPLEMENTATION
/*!
* @brief Define this to disable 64-bit code.
*
- * Useful if only using the @ref xxh32_family and you have a strict C90 compiler.
+ * Useful if only using the @ref XXH32_family and you have a strict C90 compiler.
*/
# define XXH_NO_LONG_LONG
# undef XXH_NO_LONG_LONG /* don't actually */
* Use `memcpy()`. Safe and portable. Note that most modern compilers will
* eliminate the function call and treat it as an unaligned access.
*
- * - `XXH_FORCE_MEMORY_ACCESS=1`: `__attribute__((packed))`
+ * - `XXH_FORCE_MEMORY_ACCESS=1`: `__attribute__((aligned(1)))`
* @par
* Depends on compiler extensions and is therefore not portable.
* This method is safe _if_ your compiler supports it,
* inline small `memcpy()` calls, and it might also be faster on big-endian
* systems which lack a native byteswap instruction. However, some compilers
* will emit literal byteshifts even if the target supports unaligned access.
- * .
+ *
*
* @warning
* Methods 1 and 2 rely on implementation-defined behavior. Use these with
*/
# define XXH_FORCE_MEMORY_ACCESS 0
+/*!
+ * @def XXH_SIZE_OPT
+ * @brief Controls how much xxHash optimizes for size.
+ *
+ * xxHash, when compiled, tends to result in a rather large binary size. This
+ * is mostly due to heavy usage to forced inlining and constant folding of the
+ * @ref XXH3_family to increase performance.
+ *
+ * However, some developers prefer size over speed. This option can
+ * significantly reduce the size of the generated code. When using the `-Os`
+ * or `-Oz` options on GCC or Clang, this is defined to 1 by default,
+ * otherwise it is defined to 0.
+ *
+ * Most of these size optimizations can be controlled manually.
+ *
+ * This is a number from 0-2.
+ * - `XXH_SIZE_OPT` == 0: Default. xxHash makes no size optimizations. Speed
+ * comes first.
+ * - `XXH_SIZE_OPT` == 1: Default for `-Os` and `-Oz`. xxHash is more
+ * conservative and disables hacks that increase code size. It implies the
+ * options @ref XXH_NO_INLINE_HINTS == 1, @ref XXH_FORCE_ALIGN_CHECK == 0,
+ * and @ref XXH3_NEON_LANES == 8 if they are not already defined.
+ * - `XXH_SIZE_OPT` == 2: xxHash tries to make itself as small as possible.
+ * Performance may cry. For example, the single shot functions just use the
+ * streaming API.
+ */
+# define XXH_SIZE_OPT 0
+
/*!
* @def XXH_FORCE_ALIGN_CHECK
* @brief If defined to non-zero, adds a special path for aligned inputs (XXH32()
*
* In these cases, the alignment check can be removed by setting this macro to 0.
* Then the code will always use unaligned memory access.
- * Align check is automatically disabled on x86, x64 & arm64,
+ * Align check is automatically disabled on x86, x64, ARM64, and some ARM chips
* which are platforms known to offer good unaligned memory accesses performance.
*
+ * It is also disabled by default when @ref XXH_SIZE_OPT >= 1.
+ *
* This option does not affect XXH3 (only XXH32 and XXH64).
*/
# define XXH_FORCE_ALIGN_CHECK 0
* XXH_NO_INLINE_HINTS marks all internal functions as static, giving the
* compiler full control on whether to inline or not.
*
- * When not optimizing (-O0), optimizing for size (-Os, -Oz), or using
- * -fno-inline with GCC or Clang, this will automatically be defined.
+ * When not optimizing (-O0), using `-fno-inline` with GCC or Clang, or if
+ * @ref XXH_SIZE_OPT >= 1, this will automatically be defined.
*/
# define XXH_NO_INLINE_HINTS 0
+/*!
+ * @def XXH3_INLINE_SECRET
+ * @brief Determines whether to inline the XXH3 withSecret code.
+ *
+ * When the secret size is known, the compiler can improve the performance
+ * of XXH3_64bits_withSecret() and XXH3_128bits_withSecret().
+ *
+ * However, if the secret size is not known, it doesn't have any benefit. This
+ * happens when xxHash is compiled into a global symbol. Therefore, if
+ * @ref XXH_INLINE_ALL is *not* defined, this will be defined to 0.
+ *
+ * Additionally, this defaults to 0 on GCC 12+, which has an issue with function pointers
+ * that are *sometimes* force inline on -Og, and it is impossible to automatically
+ * detect this optimization level.
+ */
+# define XXH3_INLINE_SECRET 0
+
/*!
* @def XXH32_ENDJMP
* @brief Whether to use a jump for `XXH32_finalize`.
*/
# define XXH_OLD_NAMES
# undef XXH_OLD_NAMES /* don't actually use, it is ugly. */
+
+/*!
+ * @def XXH_NO_STREAM
+ * @brief Disables the streaming API.
+ *
+ * When xxHash is not inlined and the streaming functions are not used, disabling
+ * the streaming functions can improve code size significantly, especially with
+ * the @ref XXH3_family which tends to make constant folded copies of itself.
+ */
+# define XXH_NO_STREAM
+# undef XXH_NO_STREAM /* don't actually */
#endif /* XXH_DOXYGEN */
/*!
* @}
*/
#ifndef XXH_FORCE_MEMORY_ACCESS /* can be defined externally, on command line for example */
- /* prefer __packed__ structures (method 1) for gcc on armv7+ and mips */
-# if !defined(__clang__) && \
-( \
- (defined(__INTEL_COMPILER) && !defined(_WIN32)) || \
- ( \
- defined(__GNUC__) && ( \
- (defined(__ARM_ARCH) && __ARM_ARCH >= 7) || \
- ( \
- defined(__mips__) && \
- (__mips <= 5 || __mips_isa_rev < 6) && \
- (!defined(__mips16) || defined(__mips_mips16e2)) \
- ) \
- ) \
- ) \
-)
+ /* prefer __packed__ structures (method 1) for GCC
+ * < ARMv7 with unaligned access (e.g. Raspbian armhf) still uses byte shifting, so we use memcpy
+ * which for some reason does unaligned loads. */
+# if defined(__GNUC__) && !(defined(__ARM_ARCH) && __ARM_ARCH < 7 && defined(__ARM_FEATURE_UNALIGNED))
# define XXH_FORCE_MEMORY_ACCESS 1
# endif
#endif
+#ifndef XXH_SIZE_OPT
+ /* default to 1 for -Os or -Oz */
+# if (defined(__GNUC__) || defined(__clang__)) && defined(__OPTIMIZE_SIZE__)
+# define XXH_SIZE_OPT 1
+# else
+# define XXH_SIZE_OPT 0
+# endif
+#endif
+
#ifndef XXH_FORCE_ALIGN_CHECK /* can be defined externally */
-# if defined(__i386) || defined(__x86_64__) || defined(__aarch64__) \
- || defined(_M_IX86) || defined(_M_X64) || defined(_M_ARM64) /* visual */
+ /* don't check on sizeopt, x86, aarch64, or arm when unaligned access is available */
+# if XXH_SIZE_OPT >= 1 || \
+ defined(__i386) || defined(__x86_64__) || defined(__aarch64__) || defined(__ARM_FEATURE_UNALIGNED) \
+ || defined(_M_IX86) || defined(_M_X64) || defined(_M_ARM64) || defined(_M_ARM) /* visual */
# define XXH_FORCE_ALIGN_CHECK 0
# else
# define XXH_FORCE_ALIGN_CHECK 1
#endif
#ifndef XXH_NO_INLINE_HINTS
-# if defined(__OPTIMIZE_SIZE__) /* -Os, -Oz */ \
- || defined(__NO_INLINE__) /* -O0, -fno-inline */
+# if XXH_SIZE_OPT >= 1 || defined(__NO_INLINE__) /* -O0, -fno-inline */
# define XXH_NO_INLINE_HINTS 1
# else
# define XXH_NO_INLINE_HINTS 0
# endif
#endif
+#ifndef XXH3_INLINE_SECRET
+# if (defined(__GNUC__) && !defined(__clang__) && __GNUC__ >= 12) \
+ || !defined(XXH_INLINE_ALL)
+# define XXH3_INLINE_SECRET 0
+# else
+# define XXH3_INLINE_SECRET 1
+# endif
+#endif
+
#ifndef XXH32_ENDJMP
/* generally preferable for performance */
# define XXH32_ENDJMP 0
/* *************************************
* Includes & Memory related functions
***************************************/
-/* Modify the local functions below should you wish to use some other memory routines */
-/* for ZSTD_malloc(), ZSTD_free() */
-#define ZSTD_DEPS_NEED_MALLOC
-#include "zstd_deps.h" /* size_t, ZSTD_malloc, ZSTD_free, ZSTD_memcpy */
-static void* XXH_malloc(size_t s) { return ZSTD_malloc(s); }
-static void XXH_free (void* p) { ZSTD_free(p); }
-static void* XXH_memcpy(void* dest, const void* src, size_t size) { return ZSTD_memcpy(dest,src,size); }
+#if defined(XXH_NO_STREAM)
+/* nothing */
+#elif defined(XXH_NO_STDLIB)
+
+/* When requesting to disable any mention of stdlib,
+ * the library loses the ability to invoked malloc / free.
+ * In practice, it means that functions like `XXH*_createState()`
+ * will always fail, and return NULL.
+ * This flag is useful in situations where
+ * xxhash.h is integrated into some kernel, embedded or limited environment
+ * without access to dynamic allocation.
+ */
+
+static XXH_CONSTF void* XXH_malloc(size_t s) { (void)s; return NULL; }
+static void XXH_free(void* p) { (void)p; }
+
+#else
+
+/*
+ * Modify the local functions below should you wish to use
+ * different memory routines for malloc() and free()
+ */
+#include <stdlib.h>
+
+/*!
+ * @internal
+ * @brief Modify this function to use a different routine than malloc().
+ */
+static XXH_MALLOCF void* XXH_malloc(size_t s) { return malloc(s); }
+
+/*!
+ * @internal
+ * @brief Modify this function to use a different routine than free().
+ */
+static void XXH_free(void* p) { free(p); }
+
+#endif /* XXH_NO_STDLIB */
+
+#include <string.h>
+
+/*!
+ * @internal
+ * @brief Modify this function to use a different routine than memcpy().
+ */
+static void* XXH_memcpy(void* dest, const void* src, size_t size)
+{
+ return memcpy(dest,src,size);
+}
+
+#include <limits.h> /* ULLONG_MAX */
/* *************************************
# define XXH_NO_INLINE static
#endif
+#if XXH3_INLINE_SECRET
+# define XXH3_WITH_SECRET_INLINE XXH_FORCE_INLINE
+#else
+# define XXH3_WITH_SECRET_INLINE XXH_NO_INLINE
+#endif
/* *************************************
# include <assert.h> /* note: can still be disabled with NDEBUG */
# define XXH_ASSERT(c) assert(c)
#else
-# define XXH_ASSERT(c) ((void)0)
+# if defined(__INTEL_COMPILER)
+# define XXH_ASSERT(c) XXH_ASSUME((unsigned char) (c))
+# else
+# define XXH_ASSERT(c) XXH_ASSUME(c)
+# endif
#endif
/* note: use after variable declarations */
#ifndef XXH_STATIC_ASSERT
# if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L) /* C11 */
-# include <assert.h>
-# define XXH_STATIC_ASSERT_WITH_MESSAGE(c,m) do { static_assert((c),m); } while(0)
+# define XXH_STATIC_ASSERT_WITH_MESSAGE(c,m) do { _Static_assert((c),m); } while(0)
# elif defined(__cplusplus) && (__cplusplus >= 201103L) /* C++11 */
# define XXH_STATIC_ASSERT_WITH_MESSAGE(c,m) do { static_assert((c),m); } while(0)
# else
* @brief Used to prevent unwanted optimizations for @p var.
*
* It uses an empty GCC inline assembly statement with a register constraint
- * which forces @p var into a general purpose register (e.g. eax, ebx, ecx
+ * which forces @p var into a general purpose register (eg eax, ebx, ecx
* on x86) and marks it as modified.
*
* This is used in a few places to avoid unwanted autovectorization (e.g.
* XXH3_initCustomSecret_scalar().
*/
#if defined(__GNUC__) || defined(__clang__)
-# define XXH_COMPILER_GUARD(var) __asm__ __volatile__("" : "+r" (var))
+# define XXH_COMPILER_GUARD(var) __asm__("" : "+r" (var))
#else
# define XXH_COMPILER_GUARD(var) ((void)0)
#endif
+/* Specifically for NEON vectors which use the "w" constraint, on
+ * Clang. */
+#if defined(__clang__) && defined(__ARM_ARCH) && !defined(__wasm__)
+# define XXH_COMPILER_GUARD_CLANG_NEON(var) __asm__("" : "+w" (var))
+#else
+# define XXH_COMPILER_GUARD_CLANG_NEON(var) ((void)0)
+#endif
+
/* *************************************
* Basic Types
***************************************/
#if !defined (__VMS) \
&& (defined (__cplusplus) \
|| (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) )
-# include <stdint.h>
+# ifdef _AIX
+# include <inttypes.h>
+# else
+# include <stdint.h>
+# endif
typedef uint8_t xxh_u8;
#else
typedef unsigned char xxh_u8;
typedef XXH32_hash_t xxh_u32;
#ifdef XXH_OLD_NAMES
+# warning "XXH_OLD_NAMES is planned to be removed starting v0.9. If the program depends on it, consider moving away from it by employing newer type names directly"
# define BYTE xxh_u8
# define U8 xxh_u8
# define U32 xxh_u32
#elif (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==1))
/*
- * __pack instructions are safer but compiler specific, hence potentially
- * problematic for some compilers.
- *
- * Currently only defined for GCC and ICC.
+ * __attribute__((aligned(1))) is supported by gcc and clang. Originally the
+ * documentation claimed that it only increased the alignment, but actually it
+ * can decrease it on gcc, clang, and icc:
+ * https://gcc.gnu.org/bugzilla/show_bug.cgi?id=69502,
+ * https://gcc.godbolt.org/z/xYez1j67Y.
*/
#ifdef XXH_OLD_NAMES
typedef union { xxh_u32 u32; } __attribute__((packed)) unalign;
#endif
static xxh_u32 XXH_read32(const void* ptr)
{
- typedef union { xxh_u32 u32; } __attribute__((packed)) xxh_unalign;
- return ((const xxh_unalign*)ptr)->u32;
+ typedef __attribute__((aligned(1))) xxh_u32 xxh_unalign32;
+ return *((const xxh_unalign32*)ptr);
}
#else
# define XXH_HAS_BUILTIN(x) 0
#endif
+
+
+/*
+ * C23 and future versions have standard "unreachable()".
+ * Once it has been implemented reliably we can add it as an
+ * additional case:
+ *
+ * ```
+ * #if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= XXH_C23_VN)
+ * # include <stddef.h>
+ * # ifdef unreachable
+ * # define XXH_UNREACHABLE() unreachable()
+ * # endif
+ * #endif
+ * ```
+ *
+ * Note C++23 also has std::unreachable() which can be detected
+ * as follows:
+ * ```
+ * #if defined(__cpp_lib_unreachable) && (__cpp_lib_unreachable >= 202202L)
+ * # include <utility>
+ * # define XXH_UNREACHABLE() std::unreachable()
+ * #endif
+ * ```
+ * NB: `__cpp_lib_unreachable` is defined in the `<version>` header.
+ * We don't use that as including `<utility>` in `extern "C"` blocks
+ * doesn't work on GCC12
+ */
+
+#if XXH_HAS_BUILTIN(__builtin_unreachable)
+# define XXH_UNREACHABLE() __builtin_unreachable()
+
+#elif defined(_MSC_VER)
+# define XXH_UNREACHABLE() __assume(0)
+
+#else
+# define XXH_UNREACHABLE()
+#endif
+
+#if XXH_HAS_BUILTIN(__builtin_assume)
+# define XXH_ASSUME(c) __builtin_assume(c)
+#else
+# define XXH_ASSUME(c) if (!(c)) { XXH_UNREACHABLE(); }
+#endif
+
/*!
* @internal
* @def XXH_rotl32(x,r)
*********************************************************************/
/*!
* @}
- * @defgroup xxh32_impl XXH32 implementation
+ * @defgroup XXH32_impl XXH32 implementation
* @ingroup impl
+ *
+ * Details on the XXH32 implementation.
* @{
*/
/* #define instead of static const, to be used as initializers */
acc += input * XXH_PRIME32_2;
acc = XXH_rotl32(acc, 13);
acc *= XXH_PRIME32_1;
-#if (defined(__SSE4_1__) || defined(__aarch64__)) && !defined(XXH_ENABLE_AUTOVECTORIZE)
+#if (defined(__SSE4_1__) || defined(__aarch64__) || defined(__wasm_simd128__)) && !defined(XXH_ENABLE_AUTOVECTORIZE)
/*
* UGLY HACK:
* A compiler fence is the only thing that prevents GCC and Clang from
* can load data, while v3 can multiply. SSE forces them to operate
* together.
*
- * This is also enabled on AArch64, as Clang autovectorizes it incorrectly
- * and it is pointless writing a NEON implementation that is basically the
- * same speed as scalar for XXH32.
+ * This is also enabled on AArch64, as Clang is *very aggressive* in vectorizing
+ * the loop. NEON is only faster on the A53, and with the newer cores, it is less
+ * than half the speed.
+ *
+ * Additionally, this is used on WASM SIMD128 because it JITs to the same
+ * SIMD instructions and has the same issue.
*/
XXH_COMPILER_GUARD(acc);
#endif
* The final mix ensures that all input bits have a chance to impact any bit in
* the output digest, resulting in an unbiased distribution.
*
- * @param h32 The hash to avalanche.
+ * @param hash The hash to avalanche.
* @return The avalanched hash.
*/
-static xxh_u32 XXH32_avalanche(xxh_u32 h32)
+static xxh_u32 XXH32_avalanche(xxh_u32 hash)
{
- h32 ^= h32 >> 15;
- h32 *= XXH_PRIME32_2;
- h32 ^= h32 >> 13;
- h32 *= XXH_PRIME32_3;
- h32 ^= h32 >> 16;
- return(h32);
+ hash ^= hash >> 15;
+ hash *= XXH_PRIME32_2;
+ hash ^= hash >> 13;
+ hash *= XXH_PRIME32_3;
+ hash ^= hash >> 16;
+ return hash;
}
#define XXH_get32bits(p) XXH_readLE32_align(p, align)
* This final stage will digest them to ensure that all input bytes are present
* in the final mix.
*
- * @param h32 The hash to finalize.
+ * @param hash The hash to finalize.
* @param ptr The pointer to the remaining input.
* @param len The remaining length, modulo 16.
* @param align Whether @p ptr is aligned.
* @return The finalized hash.
+ * @see XXH64_finalize().
*/
-static xxh_u32
-XXH32_finalize(xxh_u32 h32, const xxh_u8* ptr, size_t len, XXH_alignment align)
+static XXH_PUREF xxh_u32
+XXH32_finalize(xxh_u32 hash, const xxh_u8* ptr, size_t len, XXH_alignment align)
{
-#define XXH_PROCESS1 do { \
- h32 += (*ptr++) * XXH_PRIME32_5; \
- h32 = XXH_rotl32(h32, 11) * XXH_PRIME32_1; \
+#define XXH_PROCESS1 do { \
+ hash += (*ptr++) * XXH_PRIME32_5; \
+ hash = XXH_rotl32(hash, 11) * XXH_PRIME32_1; \
} while (0)
-#define XXH_PROCESS4 do { \
- h32 += XXH_get32bits(ptr) * XXH_PRIME32_3; \
- ptr += 4; \
- h32 = XXH_rotl32(h32, 17) * XXH_PRIME32_4; \
+#define XXH_PROCESS4 do { \
+ hash += XXH_get32bits(ptr) * XXH_PRIME32_3; \
+ ptr += 4; \
+ hash = XXH_rotl32(hash, 17) * XXH_PRIME32_4; \
} while (0)
if (ptr==NULL) XXH_ASSERT(len == 0);
XXH_PROCESS1;
--len;
}
- return XXH32_avalanche(h32);
+ return XXH32_avalanche(hash);
} else {
switch(len&15) /* or switch(bEnd - p) */ {
case 12: XXH_PROCESS4;
- XXH_FALLTHROUGH;
+ XXH_FALLTHROUGH; /* fallthrough */
case 8: XXH_PROCESS4;
- XXH_FALLTHROUGH;
+ XXH_FALLTHROUGH; /* fallthrough */
case 4: XXH_PROCESS4;
- return XXH32_avalanche(h32);
+ return XXH32_avalanche(hash);
case 13: XXH_PROCESS4;
- XXH_FALLTHROUGH;
+ XXH_FALLTHROUGH; /* fallthrough */
case 9: XXH_PROCESS4;
- XXH_FALLTHROUGH;
+ XXH_FALLTHROUGH; /* fallthrough */
case 5: XXH_PROCESS4;
XXH_PROCESS1;
- return XXH32_avalanche(h32);
+ return XXH32_avalanche(hash);
case 14: XXH_PROCESS4;
- XXH_FALLTHROUGH;
+ XXH_FALLTHROUGH; /* fallthrough */
case 10: XXH_PROCESS4;
- XXH_FALLTHROUGH;
+ XXH_FALLTHROUGH; /* fallthrough */
case 6: XXH_PROCESS4;
XXH_PROCESS1;
XXH_PROCESS1;
- return XXH32_avalanche(h32);
+ return XXH32_avalanche(hash);
case 15: XXH_PROCESS4;
- XXH_FALLTHROUGH;
+ XXH_FALLTHROUGH; /* fallthrough */
case 11: XXH_PROCESS4;
- XXH_FALLTHROUGH;
+ XXH_FALLTHROUGH; /* fallthrough */
case 7: XXH_PROCESS4;
- XXH_FALLTHROUGH;
+ XXH_FALLTHROUGH; /* fallthrough */
case 3: XXH_PROCESS1;
- XXH_FALLTHROUGH;
+ XXH_FALLTHROUGH; /* fallthrough */
case 2: XXH_PROCESS1;
- XXH_FALLTHROUGH;
+ XXH_FALLTHROUGH; /* fallthrough */
case 1: XXH_PROCESS1;
- XXH_FALLTHROUGH;
- case 0: return XXH32_avalanche(h32);
+ XXH_FALLTHROUGH; /* fallthrough */
+ case 0: return XXH32_avalanche(hash);
}
XXH_ASSERT(0);
- return h32; /* reaching this point is deemed impossible */
+ return hash; /* reaching this point is deemed impossible */
}
}
* @param align Whether @p input is aligned.
* @return The calculated hash.
*/
-XXH_FORCE_INLINE xxh_u32
+XXH_FORCE_INLINE XXH_PUREF xxh_u32
XXH32_endian_align(const xxh_u8* input, size_t len, xxh_u32 seed, XXH_alignment align)
{
xxh_u32 h32;
return XXH32_finalize(h32, input, len&15, align);
}
-/*! @ingroup xxh32_family */
+/*! @ingroup XXH32_family */
XXH_PUBLIC_API XXH32_hash_t XXH32 (const void* input, size_t len, XXH32_hash_t seed)
{
-#if 0
+#if !defined(XXH_NO_STREAM) && XXH_SIZE_OPT >= 2
/* Simple version, good for code maintenance, but unfortunately slow for small inputs */
XXH32_state_t state;
XXH32_reset(&state, seed);
/******* Hash streaming *******/
-/*!
- * @ingroup xxh32_family
- */
+#ifndef XXH_NO_STREAM
+/*! @ingroup XXH32_family */
XXH_PUBLIC_API XXH32_state_t* XXH32_createState(void)
{
return (XXH32_state_t*)XXH_malloc(sizeof(XXH32_state_t));
}
-/*! @ingroup xxh32_family */
+/*! @ingroup XXH32_family */
XXH_PUBLIC_API XXH_errorcode XXH32_freeState(XXH32_state_t* statePtr)
{
XXH_free(statePtr);
return XXH_OK;
}
-/*! @ingroup xxh32_family */
+/*! @ingroup XXH32_family */
XXH_PUBLIC_API void XXH32_copyState(XXH32_state_t* dstState, const XXH32_state_t* srcState)
{
XXH_memcpy(dstState, srcState, sizeof(*dstState));
}
-/*! @ingroup xxh32_family */
+/*! @ingroup XXH32_family */
XXH_PUBLIC_API XXH_errorcode XXH32_reset(XXH32_state_t* statePtr, XXH32_hash_t seed)
{
XXH_ASSERT(statePtr != NULL);
}
-/*! @ingroup xxh32_family */
+/*! @ingroup XXH32_family */
XXH_PUBLIC_API XXH_errorcode
XXH32_update(XXH32_state_t* state, const void* input, size_t len)
{
}
-/*! @ingroup xxh32_family */
+/*! @ingroup XXH32_family */
XXH_PUBLIC_API XXH32_hash_t XXH32_digest(const XXH32_state_t* state)
{
xxh_u32 h32;
return XXH32_finalize(h32, (const xxh_u8*)state->mem32, state->memsize, XXH_aligned);
}
-
+#endif /* !XXH_NO_STREAM */
/******* Canonical representation *******/
-/*!
- * @ingroup xxh32_family
- * The default return values from XXH functions are unsigned 32 and 64 bit
- * integers.
- *
- * The canonical representation uses big endian convention, the same convention
- * as human-readable numbers (large digits first).
- *
- * This way, hash values can be written into a file or buffer, remaining
- * comparable across different systems.
- *
- * The following functions allow transformation of hash values to and from their
- * canonical format.
- */
+/*! @ingroup XXH32_family */
XXH_PUBLIC_API void XXH32_canonicalFromHash(XXH32_canonical_t* dst, XXH32_hash_t hash)
{
- /* XXH_STATIC_ASSERT(sizeof(XXH32_canonical_t) == sizeof(XXH32_hash_t)); */
+ XXH_STATIC_ASSERT(sizeof(XXH32_canonical_t) == sizeof(XXH32_hash_t));
if (XXH_CPU_LITTLE_ENDIAN) hash = XXH_swap32(hash);
XXH_memcpy(dst, &hash, sizeof(*dst));
}
-/*! @ingroup xxh32_family */
+/*! @ingroup XXH32_family */
XXH_PUBLIC_API XXH32_hash_t XXH32_hashFromCanonical(const XXH32_canonical_t* src)
{
return XXH_readBE32(src);
#elif (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==1))
/*
- * __pack instructions are safer, but compiler specific, hence potentially
- * problematic for some compilers.
- *
- * Currently only defined for GCC and ICC.
+ * __attribute__((aligned(1))) is supported by gcc and clang. Originally the
+ * documentation claimed that it only increased the alignment, but actually it
+ * can decrease it on gcc, clang, and icc:
+ * https://gcc.gnu.org/bugzilla/show_bug.cgi?id=69502,
+ * https://gcc.godbolt.org/z/xYez1j67Y.
*/
#ifdef XXH_OLD_NAMES
typedef union { xxh_u32 u32; xxh_u64 u64; } __attribute__((packed)) unalign64;
#endif
static xxh_u64 XXH_read64(const void* ptr)
{
- typedef union { xxh_u32 u32; xxh_u64 u64; } __attribute__((packed)) xxh_unalign64;
- return ((const xxh_unalign64*)ptr)->u64;
+ typedef __attribute__((aligned(1))) xxh_u64 xxh_unalign64;
+ return *((const xxh_unalign64*)ptr);
}
#else
/******* xxh64 *******/
/*!
* @}
- * @defgroup xxh64_impl XXH64 implementation
+ * @defgroup XXH64_impl XXH64 implementation
* @ingroup impl
+ *
+ * Details on the XXH64 implementation.
* @{
*/
/* #define rather that static const, to be used as initializers */
# define PRIME64_5 XXH_PRIME64_5
#endif
+/*! @copydoc XXH32_round */
static xxh_u64 XXH64_round(xxh_u64 acc, xxh_u64 input)
{
acc += input * XXH_PRIME64_2;
acc = XXH_rotl64(acc, 31);
acc *= XXH_PRIME64_1;
+#if (defined(__AVX512F__)) && !defined(XXH_ENABLE_AUTOVECTORIZE)
+ /*
+ * DISABLE AUTOVECTORIZATION:
+ * A compiler fence is used to prevent GCC and Clang from
+ * autovectorizing the XXH64 loop (pragmas and attributes don't work for some
+ * reason) without globally disabling AVX512.
+ *
+ * Autovectorization of XXH64 tends to be detrimental,
+ * though the exact outcome may change depending on exact cpu and compiler version.
+ * For information, it has been reported as detrimental for Skylake-X,
+ * but possibly beneficial for Zen4.
+ *
+ * The default is to disable auto-vectorization,
+ * but you can select to enable it instead using `XXH_ENABLE_AUTOVECTORIZE` build variable.
+ */
+ XXH_COMPILER_GUARD(acc);
+#endif
return acc;
}
return acc;
}
-static xxh_u64 XXH64_avalanche(xxh_u64 h64)
+/*! @copydoc XXH32_avalanche */
+static xxh_u64 XXH64_avalanche(xxh_u64 hash)
{
- h64 ^= h64 >> 33;
- h64 *= XXH_PRIME64_2;
- h64 ^= h64 >> 29;
- h64 *= XXH_PRIME64_3;
- h64 ^= h64 >> 32;
- return h64;
+ hash ^= hash >> 33;
+ hash *= XXH_PRIME64_2;
+ hash ^= hash >> 29;
+ hash *= XXH_PRIME64_3;
+ hash ^= hash >> 32;
+ return hash;
}
#define XXH_get64bits(p) XXH_readLE64_align(p, align)
-static xxh_u64
-XXH64_finalize(xxh_u64 h64, const xxh_u8* ptr, size_t len, XXH_alignment align)
+/*!
+ * @internal
+ * @brief Processes the last 0-31 bytes of @p ptr.
+ *
+ * There may be up to 31 bytes remaining to consume from the input.
+ * This final stage will digest them to ensure that all input bytes are present
+ * in the final mix.
+ *
+ * @param hash The hash to finalize.
+ * @param ptr The pointer to the remaining input.
+ * @param len The remaining length, modulo 32.
+ * @param align Whether @p ptr is aligned.
+ * @return The finalized hash
+ * @see XXH32_finalize().
+ */
+static XXH_PUREF xxh_u64
+XXH64_finalize(xxh_u64 hash, const xxh_u8* ptr, size_t len, XXH_alignment align)
{
if (ptr==NULL) XXH_ASSERT(len == 0);
len &= 31;
while (len >= 8) {
xxh_u64 const k1 = XXH64_round(0, XXH_get64bits(ptr));
ptr += 8;
- h64 ^= k1;
- h64 = XXH_rotl64(h64,27) * XXH_PRIME64_1 + XXH_PRIME64_4;
+ hash ^= k1;
+ hash = XXH_rotl64(hash,27) * XXH_PRIME64_1 + XXH_PRIME64_4;
len -= 8;
}
if (len >= 4) {
- h64 ^= (xxh_u64)(XXH_get32bits(ptr)) * XXH_PRIME64_1;
+ hash ^= (xxh_u64)(XXH_get32bits(ptr)) * XXH_PRIME64_1;
ptr += 4;
- h64 = XXH_rotl64(h64, 23) * XXH_PRIME64_2 + XXH_PRIME64_3;
+ hash = XXH_rotl64(hash, 23) * XXH_PRIME64_2 + XXH_PRIME64_3;
len -= 4;
}
while (len > 0) {
- h64 ^= (*ptr++) * XXH_PRIME64_5;
- h64 = XXH_rotl64(h64, 11) * XXH_PRIME64_1;
+ hash ^= (*ptr++) * XXH_PRIME64_5;
+ hash = XXH_rotl64(hash, 11) * XXH_PRIME64_1;
--len;
}
- return XXH64_avalanche(h64);
+ return XXH64_avalanche(hash);
}
#ifdef XXH_OLD_NAMES
# undef XXH_PROCESS8_64
#endif
-XXH_FORCE_INLINE xxh_u64
+/*!
+ * @internal
+ * @brief The implementation for @ref XXH64().
+ *
+ * @param input , len , seed Directly passed from @ref XXH64().
+ * @param align Whether @p input is aligned.
+ * @return The calculated hash.
+ */
+XXH_FORCE_INLINE XXH_PUREF xxh_u64
XXH64_endian_align(const xxh_u8* input, size_t len, xxh_u64 seed, XXH_alignment align)
{
xxh_u64 h64;
}
-/*! @ingroup xxh64_family */
-XXH_PUBLIC_API XXH64_hash_t XXH64 (const void* input, size_t len, XXH64_hash_t seed)
+/*! @ingroup XXH64_family */
+XXH_PUBLIC_API XXH64_hash_t XXH64 (XXH_NOESCAPE const void* input, size_t len, XXH64_hash_t seed)
{
-#if 0
+#if !defined(XXH_NO_STREAM) && XXH_SIZE_OPT >= 2
/* Simple version, good for code maintenance, but unfortunately slow for small inputs */
XXH64_state_t state;
XXH64_reset(&state, seed);
}
/******* Hash Streaming *******/
-
-/*! @ingroup xxh64_family*/
+#ifndef XXH_NO_STREAM
+/*! @ingroup XXH64_family*/
XXH_PUBLIC_API XXH64_state_t* XXH64_createState(void)
{
return (XXH64_state_t*)XXH_malloc(sizeof(XXH64_state_t));
}
-/*! @ingroup xxh64_family */
+/*! @ingroup XXH64_family */
XXH_PUBLIC_API XXH_errorcode XXH64_freeState(XXH64_state_t* statePtr)
{
XXH_free(statePtr);
return XXH_OK;
}
-/*! @ingroup xxh64_family */
-XXH_PUBLIC_API void XXH64_copyState(XXH64_state_t* dstState, const XXH64_state_t* srcState)
+/*! @ingroup XXH64_family */
+XXH_PUBLIC_API void XXH64_copyState(XXH_NOESCAPE XXH64_state_t* dstState, const XXH64_state_t* srcState)
{
XXH_memcpy(dstState, srcState, sizeof(*dstState));
}
-/*! @ingroup xxh64_family */
-XXH_PUBLIC_API XXH_errorcode XXH64_reset(XXH64_state_t* statePtr, XXH64_hash_t seed)
+/*! @ingroup XXH64_family */
+XXH_PUBLIC_API XXH_errorcode XXH64_reset(XXH_NOESCAPE XXH64_state_t* statePtr, XXH64_hash_t seed)
{
XXH_ASSERT(statePtr != NULL);
memset(statePtr, 0, sizeof(*statePtr));
return XXH_OK;
}
-/*! @ingroup xxh64_family */
+/*! @ingroup XXH64_family */
XXH_PUBLIC_API XXH_errorcode
-XXH64_update (XXH64_state_t* state, const void* input, size_t len)
+XXH64_update (XXH_NOESCAPE XXH64_state_t* state, XXH_NOESCAPE const void* input, size_t len)
{
if (input==NULL) {
XXH_ASSERT(len == 0);
}
-/*! @ingroup xxh64_family */
-XXH_PUBLIC_API XXH64_hash_t XXH64_digest(const XXH64_state_t* state)
+/*! @ingroup XXH64_family */
+XXH_PUBLIC_API XXH64_hash_t XXH64_digest(XXH_NOESCAPE const XXH64_state_t* state)
{
xxh_u64 h64;
return XXH64_finalize(h64, (const xxh_u8*)state->mem64, (size_t)state->total_len, XXH_aligned);
}
-
+#endif /* !XXH_NO_STREAM */
/******* Canonical representation *******/
-/*! @ingroup xxh64_family */
-XXH_PUBLIC_API void XXH64_canonicalFromHash(XXH64_canonical_t* dst, XXH64_hash_t hash)
+/*! @ingroup XXH64_family */
+XXH_PUBLIC_API void XXH64_canonicalFromHash(XXH_NOESCAPE XXH64_canonical_t* dst, XXH64_hash_t hash)
{
- /* XXH_STATIC_ASSERT(sizeof(XXH64_canonical_t) == sizeof(XXH64_hash_t)); */
+ XXH_STATIC_ASSERT(sizeof(XXH64_canonical_t) == sizeof(XXH64_hash_t));
if (XXH_CPU_LITTLE_ENDIAN) hash = XXH_swap64(hash);
XXH_memcpy(dst, &hash, sizeof(*dst));
}
-/*! @ingroup xxh64_family */
-XXH_PUBLIC_API XXH64_hash_t XXH64_hashFromCanonical(const XXH64_canonical_t* src)
+/*! @ingroup XXH64_family */
+XXH_PUBLIC_API XXH64_hash_t XXH64_hashFromCanonical(XXH_NOESCAPE const XXH64_canonical_t* src)
{
return XXH_readBE64(src);
}
************************************************************************ */
/*!
* @}
- * @defgroup xxh3_impl XXH3 implementation
+ * @defgroup XXH3_impl XXH3 implementation
* @ingroup impl
* @{
*/
/* === Compiler specifics === */
#if ((defined(sun) || defined(__sun)) && __cplusplus) /* Solaris includes __STDC_VERSION__ with C++. Tested with GCC 5.5 */
-# define XXH_RESTRICT /* disable */
+# define XXH_RESTRICT /* disable */
#elif defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L /* >= C99 */
# define XXH_RESTRICT restrict
+#elif (defined (__GNUC__) && ((__GNUC__ > 3) || (__GNUC__ == 3 && __GNUC_MINOR__ >= 1))) \
+ || (defined (__clang__)) \
+ || (defined (_MSC_VER) && (_MSC_VER >= 1400)) \
+ || (defined (__INTEL_COMPILER) && (__INTEL_COMPILER >= 1300))
+/*
+ * There are a LOT more compilers that recognize __restrict but this
+ * covers the major ones.
+ */
+# define XXH_RESTRICT __restrict
#else
-/* Note: it might be useful to define __restrict or __restrict__ for some C++ compilers */
# define XXH_RESTRICT /* disable */
#endif
# define XXH_unlikely(x) (x)
#endif
+#ifndef XXH_HAS_INCLUDE
+# ifdef __has_include
+/*
+ * Not defined as XXH_HAS_INCLUDE(x) (function-like) because
+ * this causes segfaults in Apple Clang 4.2 (on Mac OS X 10.7 Lion)
+ */
+# define XXH_HAS_INCLUDE __has_include
+# else
+# define XXH_HAS_INCLUDE(x) 0
+# endif
+#endif
+
#if defined(__GNUC__) || defined(__clang__)
+# if defined(__ARM_FEATURE_SVE)
+# include <arm_sve.h>
+# endif
# if defined(__ARM_NEON__) || defined(__ARM_NEON) \
- || defined(__aarch64__) || defined(_M_ARM) \
- || defined(_M_ARM64) || defined(_M_ARM64EC)
+ || (defined(_M_ARM) && _M_ARM >= 7) \
+ || defined(_M_ARM64) || defined(_M_ARM64EC) \
+ || (defined(__wasm_simd128__) && XXH_HAS_INCLUDE(<arm_neon.h>)) /* WASM SIMD128 via SIMDe */
# define inline __inline__ /* circumvent a clang bug */
# include <arm_neon.h>
# undef inline
* Note that these are actually implemented as macros.
*
* If this is not defined, it is detected automatically.
- * @ref XXH_X86DISPATCH overrides this.
+ * internal macro XXH_X86DISPATCH overrides this.
*/
enum XXH_VECTOR_TYPE /* fake enum */ {
XXH_SCALAR = 0, /*!< Portable scalar version */
*/
XXH_AVX2 = 2, /*!< AVX2 for Haswell and Bulldozer */
XXH_AVX512 = 3, /*!< AVX512 for Skylake and Icelake */
- XXH_NEON = 4, /*!< NEON for most ARMv7-A and all AArch64 */
+ XXH_NEON = 4, /*!<
+ * NEON for most ARMv7-A, all AArch64, and WASM SIMD128
+ * via the SIMDeverywhere polyfill provided with the
+ * Emscripten SDK.
+ */
XXH_VSX = 5, /*!< VSX and ZVector for POWER8/z13 (64-bit) */
+ XXH_SVE = 6, /*!< SVE for some ARMv8-A and ARMv9-A */
};
/*!
* @ingroup tuning
# define XXH_AVX512 3
# define XXH_NEON 4
# define XXH_VSX 5
+# define XXH_SVE 6
#endif
#ifndef XXH_VECTOR /* can be defined on command line */
-# if ( \
+# if defined(__ARM_FEATURE_SVE)
+# define XXH_VECTOR XXH_SVE
+# elif ( \
defined(__ARM_NEON__) || defined(__ARM_NEON) /* gcc */ \
|| defined(_M_ARM) || defined(_M_ARM64) || defined(_M_ARM64EC) /* msvc */ \
+ || (defined(__wasm_simd128__) && XXH_HAS_INCLUDE(<arm_neon.h>)) /* wasm simd128 via SIMDe */ \
) && ( \
defined(_WIN32) || defined(__LITTLE_ENDIAN__) /* little endian only */ \
|| (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__) \
# endif
#endif
+/* __ARM_FEATURE_SVE is only supported by GCC & Clang. */
+#if (XXH_VECTOR == XXH_SVE) && !defined(__ARM_FEATURE_SVE)
+# ifdef _MSC_VER
+# pragma warning(once : 4606)
+# else
+# warning "__ARM_FEATURE_SVE isn't supported. Use SCALAR instead."
+# endif
+# undef XXH_VECTOR
+# define XXH_VECTOR XXH_SCALAR
+#endif
+
/*
* Controls the alignment of the accumulator,
* for compatibility with aligned vector loads, which are usually faster.
# define XXH_ACC_ALIGN 16
# elif XXH_VECTOR == XXH_AVX512 /* avx512 */
# define XXH_ACC_ALIGN 64
+# elif XXH_VECTOR == XXH_SVE /* sve */
+# define XXH_ACC_ALIGN 64
# endif
#endif
#if defined(XXH_X86DISPATCH) || XXH_VECTOR == XXH_SSE2 \
|| XXH_VECTOR == XXH_AVX2 || XXH_VECTOR == XXH_AVX512
# define XXH_SEC_ALIGN XXH_ACC_ALIGN
+#elif XXH_VECTOR == XXH_SVE
+# define XXH_SEC_ALIGN XXH_ACC_ALIGN
#else
# define XXH_SEC_ALIGN 8
#endif
+#if defined(__GNUC__) || defined(__clang__)
+# define XXH_ALIASING __attribute__((may_alias))
+#else
+# define XXH_ALIASING /* nothing */
+#endif
+
/*
* UGLY HACK:
* GCC usually generates the best code with -O3 for xxHash.
* only applies to Sandy and Ivy Bridge... which don't even support AVX2.
*
* That is why when compiling the AVX2 version, it is recommended to use either
- * -O2 -mavx2 -march=haswell
- * or
- * -O2 -mavx2 -mno-avx256-split-unaligned-load
- * for decent performance, or to use Clang instead.
- *
- * Fortunately, we can control the first one with a pragma that forces GCC into
- * -O2, but the other one we can't control without "failed to inline always
- * inline function due to target mismatch" warnings.
- */
-#if XXH_VECTOR == XXH_AVX2 /* AVX2 */ \
- && defined(__GNUC__) && !defined(__clang__) /* GCC, not Clang */ \
- && defined(__OPTIMIZE__) && !defined(__OPTIMIZE_SIZE__) /* respect -O0 and -Os */
-# pragma GCC push_options
-# pragma GCC optimize("-O2")
-#endif
-
-
-#if XXH_VECTOR == XXH_NEON
-/*
- * NEON's setup for vmlal_u32 is a little more complicated than it is on
- * SSE2, AVX2, and VSX.
- *
- * While PMULUDQ and VMULEUW both perform a mask, VMLAL.U32 performs an upcast.
- *
- * To do the same operation, the 128-bit 'Q' register needs to be split into
- * two 64-bit 'D' registers, performing this operation::
- *
- * [ a | b ]
- * | '---------. .--------' |
- * | x |
- * | .---------' '--------. |
- * [ a & 0xFFFFFFFF | b & 0xFFFFFFFF ],[ a >> 32 | b >> 32 ]
- *
- * Due to significant changes in aarch64, the fastest method for aarch64 is
- * completely different than the fastest method for ARMv7-A.
- *
- * ARMv7-A treats D registers as unions overlaying Q registers, so modifying
- * D11 will modify the high half of Q5. This is similar to how modifying AH
- * will only affect bits 8-15 of AX on x86.
- *
- * VZIP takes two registers, and puts even lanes in one register and odd lanes
- * in the other.
- *
- * On ARMv7-A, this strangely modifies both parameters in place instead of
- * taking the usual 3-operand form.
- *
- * Therefore, if we want to do this, we can simply use a D-form VZIP.32 on the
- * lower and upper halves of the Q register to end up with the high and low
- * halves where we want - all in one instruction.
- *
- * vzip.32 d10, d11 @ d10 = { d10[0], d11[0] }; d11 = { d10[1], d11[1] }
- *
- * Unfortunately we need inline assembly for this: Instructions modifying two
- * registers at once is not possible in GCC or Clang's IR, and they have to
- * create a copy.
- *
- * aarch64 requires a different approach.
- *
- * In order to make it easier to write a decent compiler for aarch64, many
- * quirks were removed, such as conditional execution.
- *
- * NEON was also affected by this.
- *
- * aarch64 cannot access the high bits of a Q-form register, and writes to a
- * D-form register zero the high bits, similar to how writes to W-form scalar
- * registers (or DWORD registers on x86_64) work.
- *
- * The formerly free vget_high intrinsics now require a vext (with a few
- * exceptions)
- *
- * Additionally, VZIP was replaced by ZIP1 and ZIP2, which are the equivalent
- * of PUNPCKL* and PUNPCKH* in SSE, respectively, in order to only modify one
- * operand.
+ * -O2 -mavx2 -march=haswell
+ * or
+ * -O2 -mavx2 -mno-avx256-split-unaligned-load
+ * for decent performance, or to use Clang instead.
*
- * The equivalent of the VZIP.32 on the lower and upper halves would be this
- * mess:
+ * Fortunately, we can control the first one with a pragma that forces GCC into
+ * -O2, but the other one we can't control without "failed to inline always
+ * inline function due to target mismatch" warnings.
+ */
+#if XXH_VECTOR == XXH_AVX2 /* AVX2 */ \
+ && defined(__GNUC__) && !defined(__clang__) /* GCC, not Clang */ \
+ && defined(__OPTIMIZE__) && XXH_SIZE_OPT <= 0 /* respect -O0 and -Os */
+# pragma GCC push_options
+# pragma GCC optimize("-O2")
+#endif
+
+#if XXH_VECTOR == XXH_NEON
+
+/*
+ * UGLY HACK: While AArch64 GCC on Linux does not seem to care, on macOS, GCC -O3
+ * optimizes out the entire hashLong loop because of the aliasing violation.
*
- * ext v2.4s, v0.4s, v0.4s, #2 // v2 = { v0[2], v0[3], v0[0], v0[1] }
- * zip1 v1.2s, v0.2s, v2.2s // v1 = { v0[0], v2[0] }
- * zip2 v0.2s, v0.2s, v1.2s // v0 = { v0[1], v2[1] }
+ * However, GCC is also inefficient at load-store optimization with vld1q/vst1q,
+ * so the only option is to mark it as aliasing.
+ */
+typedef uint64x2_t xxh_aliasing_uint64x2_t XXH_ALIASING;
+
+/*!
+ * @internal
+ * @brief `vld1q_u64` but faster and alignment-safe.
*
- * Instead, we use a literal downcast, vmovn_u64 (XTN), and vshrn_n_u64 (SHRN):
+ * On AArch64, unaligned access is always safe, but on ARMv7-a, it is only
+ * *conditionally* safe (`vld1` has an alignment bit like `movdq[ua]` in x86).
*
- * shrn v1.2s, v0.2d, #32 // v1 = (uint32x2_t)(v0 >> 32);
- * xtn v0.2s, v0.2d // v0 = (uint32x2_t)(v0 & 0xFFFFFFFF);
+ * GCC for AArch64 sees `vld1q_u8` as an intrinsic instead of a load, so it
+ * prohibits load-store optimizations. Therefore, a direct dereference is used.
*
- * This is available on ARMv7-A, but is less efficient than a single VZIP.32.
+ * Otherwise, `vld1q_u8` is used with `vreinterpretq_u8_u64` to do a safe
+ * unaligned load.
*/
+#if defined(__aarch64__) && defined(__GNUC__) && !defined(__clang__)
+XXH_FORCE_INLINE uint64x2_t XXH_vld1q_u64(void const* ptr) /* silence -Wcast-align */
+{
+ return *(xxh_aliasing_uint64x2_t const *)ptr;
+}
+#else
+XXH_FORCE_INLINE uint64x2_t XXH_vld1q_u64(void const* ptr)
+{
+ return vreinterpretq_u64_u8(vld1q_u8((uint8_t const*)ptr));
+}
+#endif
/*!
- * Function-like macro:
- * void XXH_SPLIT_IN_PLACE(uint64x2_t &in, uint32x2_t &outLo, uint32x2_t &outHi)
- * {
- * outLo = (uint32x2_t)(in & 0xFFFFFFFF);
- * outHi = (uint32x2_t)(in >> 32);
- * in = UNDEFINED;
- * }
+ * @internal
+ * @brief `vmlal_u32` on low and high halves of a vector.
+ *
+ * This is a workaround for AArch64 GCC < 11 which implemented arm_neon.h with
+ * inline assembly and were therefore incapable of merging the `vget_{low, high}_u32`
+ * with `vmlal_u32`.
*/
-# if !defined(XXH_NO_VZIP_HACK) /* define to disable */ \
- && (defined(__GNUC__) || defined(__clang__)) \
- && (defined(__arm__) || defined(__thumb__) || defined(_M_ARM))
-# define XXH_SPLIT_IN_PLACE(in, outLo, outHi) \
- do { \
- /* Undocumented GCC/Clang operand modifier: %e0 = lower D half, %f0 = upper D half */ \
- /* https://github.com/gcc-mirror/gcc/blob/38cf91e5/gcc/config/arm/arm.c#L22486 */ \
- /* https://github.com/llvm-mirror/llvm/blob/2c4ca683/lib/Target/ARM/ARMAsmPrinter.cpp#L399 */ \
- __asm__("vzip.32 %e0, %f0" : "+w" (in)); \
- (outLo) = vget_low_u32 (vreinterpretq_u32_u64(in)); \
- (outHi) = vget_high_u32(vreinterpretq_u32_u64(in)); \
- } while (0)
-# else
-# define XXH_SPLIT_IN_PLACE(in, outLo, outHi) \
- do { \
- (outLo) = vmovn_u64 (in); \
- (outHi) = vshrn_n_u64 ((in), 32); \
- } while (0)
-# endif
+#if defined(__aarch64__) && defined(__GNUC__) && !defined(__clang__) && __GNUC__ < 11
+XXH_FORCE_INLINE uint64x2_t
+XXH_vmlal_low_u32(uint64x2_t acc, uint32x4_t lhs, uint32x4_t rhs)
+{
+ /* Inline assembly is the only way */
+ __asm__("umlal %0.2d, %1.2s, %2.2s" : "+w" (acc) : "w" (lhs), "w" (rhs));
+ return acc;
+}
+XXH_FORCE_INLINE uint64x2_t
+XXH_vmlal_high_u32(uint64x2_t acc, uint32x4_t lhs, uint32x4_t rhs)
+{
+ /* This intrinsic works as expected */
+ return vmlal_high_u32(acc, lhs, rhs);
+}
+#else
+/* Portable intrinsic versions */
+XXH_FORCE_INLINE uint64x2_t
+XXH_vmlal_low_u32(uint64x2_t acc, uint32x4_t lhs, uint32x4_t rhs)
+{
+ return vmlal_u32(acc, vget_low_u32(lhs), vget_low_u32(rhs));
+}
+/*! @copydoc XXH_vmlal_low_u32
+ * Assume the compiler converts this to vmlal_high_u32 on aarch64 */
+XXH_FORCE_INLINE uint64x2_t
+XXH_vmlal_high_u32(uint64x2_t acc, uint32x4_t lhs, uint32x4_t rhs)
+{
+ return vmlal_u32(acc, vget_high_u32(lhs), vget_high_u32(rhs));
+}
+#endif
/*!
* @ingroup tuning
* @brief Controls the NEON to scalar ratio for XXH3
*
- * On AArch64 when not optimizing for size, XXH3 will run 6 lanes using NEON and
- * 2 lanes on scalar by default.
+ * This can be set to 2, 4, 6, or 8.
*
- * This can be set to 2, 4, 6, or 8. ARMv7 will default to all 8 NEON lanes, as the
- * emulated 64-bit arithmetic is too slow.
+ * ARM Cortex CPUs are _very_ sensitive to how their pipelines are used.
*
- * Modern ARM CPUs are _very_ sensitive to how their pipelines are used.
+ * For example, the Cortex-A73 can dispatch 3 micro-ops per cycle, but only 2 of those
+ * can be NEON. If you are only using NEON instructions, you are only using 2/3 of the CPU
+ * bandwidth.
*
- * For example, the Cortex-A73 can dispatch 3 micro-ops per cycle, but it can't
- * have more than 2 NEON (F0/F1) micro-ops. If you are only using NEON instructions,
- * you are only using 2/3 of the CPU bandwidth.
- *
- * This is even more noticeable on the more advanced cores like the A76 which
+ * This is even more noticeable on the more advanced cores like the Cortex-A76 which
* can dispatch 8 micro-ops per cycle, but still only 2 NEON micro-ops at once.
*
- * Therefore, @ref XXH3_NEON_LANES lanes will be processed using NEON, and the
- * remaining lanes will use scalar instructions. This improves the bandwidth
- * and also gives the integer pipelines something to do besides twiddling loop
- * counters and pointers.
+ * Therefore, to make the most out of the pipeline, it is beneficial to run 6 NEON lanes
+ * and 2 scalar lanes, which is chosen by default.
+ *
+ * This does not apply to Apple processors or 32-bit processors, which run better with
+ * full NEON. These will default to 8. Additionally, size-optimized builds run 8 lanes.
*
* This change benefits CPUs with large micro-op buffers without negatively affecting
- * other CPUs:
+ * most other CPUs:
*
* | Chipset | Dispatch type | NEON only | 6:2 hybrid | Diff. |
* |:----------------------|:--------------------|----------:|-----------:|------:|
* | Snapdragon 730 (A76) | 2 NEON/8 micro-ops | 8.8 GB/s | 10.1 GB/s | ~16% |
* | Snapdragon 835 (A73) | 2 NEON/3 micro-ops | 5.1 GB/s | 5.3 GB/s | ~5% |
* | Marvell PXA1928 (A53) | In-order dual-issue | 1.9 GB/s | 1.9 GB/s | 0% |
+ * | Apple M1 | 4 NEON/8 micro-ops | 37.3 GB/s | 36.1 GB/s | ~-3% |
*
* It also seems to fix some bad codegen on GCC, making it almost as fast as clang.
*
+ * When using WASM SIMD128, if this is 2 or 6, SIMDe will scalarize 2 of the lanes meaning
+ * it effectively becomes worse 4.
+ *
* @see XXH3_accumulate_512_neon()
*/
# ifndef XXH3_NEON_LANES
# if (defined(__aarch64__) || defined(__arm64__) || defined(_M_ARM64) || defined(_M_ARM64EC)) \
- && !defined(__OPTIMIZE_SIZE__)
+ && !defined(__APPLE__) && XXH_SIZE_OPT <= 0
# define XXH3_NEON_LANES 6
# else
# define XXH3_NEON_LANES XXH_ACC_NB
* inconsistent intrinsics, spotty coverage, and multiple endiannesses.
*/
#if XXH_VECTOR == XXH_VSX
+/* Annoyingly, these headers _may_ define three macros: `bool`, `vector`,
+ * and `pixel`. This is a problem for obvious reasons.
+ *
+ * These keywords are unnecessary; the spec literally says they are
+ * equivalent to `__bool`, `__vector`, and `__pixel` and may be undef'd
+ * after including the header.
+ *
+ * We use pragma push_macro/pop_macro to keep the namespace clean. */
+# pragma push_macro("bool")
+# pragma push_macro("vector")
+# pragma push_macro("pixel")
+/* silence potential macro redefined warnings */
+# undef bool
+# undef vector
+# undef pixel
+
# if defined(__s390x__)
# include <s390intrin.h>
# else
-/* gcc's altivec.h can have the unwanted consequence to unconditionally
- * #define bool, vector, and pixel keywords,
- * with bad consequences for programs already using these keywords for other purposes.
- * The paragraph defining these macros is skipped when __APPLE_ALTIVEC__ is defined.
- * __APPLE_ALTIVEC__ is _generally_ defined automatically by the compiler,
- * but it seems that, in some cases, it isn't.
- * Force the build macro to be defined, so that keywords are not altered.
- */
-# if defined(__GNUC__) && !defined(__APPLE_ALTIVEC__)
-# define __APPLE_ALTIVEC__
-# endif
# include <altivec.h>
# endif
+/* Restore the original macro values, if applicable. */
+# pragma pop_macro("pixel")
+# pragma pop_macro("vector")
+# pragma pop_macro("bool")
+
typedef __vector unsigned long long xxh_u64x2;
typedef __vector unsigned char xxh_u8x16;
typedef __vector unsigned xxh_u32x4;
+/*
+ * UGLY HACK: Similar to aarch64 macOS GCC, s390x GCC has the same aliasing issue.
+ */
+typedef xxh_u64x2 xxh_aliasing_u64x2 XXH_ALIASING;
+
# ifndef XXH_VSX_BE
# if defined(__BIG_ENDIAN__) \
|| (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__)
/* s390x is always big endian, no issue on this platform */
# define XXH_vec_mulo vec_mulo
# define XXH_vec_mule vec_mule
-# elif defined(__clang__) && XXH_HAS_BUILTIN(__builtin_altivec_vmuleuw)
+# elif defined(__clang__) && XXH_HAS_BUILTIN(__builtin_altivec_vmuleuw) && !defined(__ibmxl__)
/* Clang has a better way to control this, we can just use the builtin which doesn't swap. */
+ /* The IBM XL Compiler (which defined __clang__) only implements the vec_* operations */
# define XXH_vec_mulo __builtin_altivec_vmulouw
# define XXH_vec_mule __builtin_altivec_vmuleuw
# else
# endif /* XXH_vec_mulo, XXH_vec_mule */
#endif /* XXH_VECTOR == XXH_VSX */
+#if XXH_VECTOR == XXH_SVE
+#define ACCRND(acc, offset) \
+do { \
+ svuint64_t input_vec = svld1_u64(mask, xinput + offset); \
+ svuint64_t secret_vec = svld1_u64(mask, xsecret + offset); \
+ svuint64_t mixed = sveor_u64_x(mask, secret_vec, input_vec); \
+ svuint64_t swapped = svtbl_u64(input_vec, kSwap); \
+ svuint64_t mixed_lo = svextw_u64_x(mask, mixed); \
+ svuint64_t mixed_hi = svlsr_n_u64_x(mask, mixed, 32); \
+ svuint64_t mul = svmad_u64_x(mask, mixed_lo, mixed_hi, swapped); \
+ acc = svadd_u64_x(mask, acc, mul); \
+} while (0)
+#endif /* XXH_VECTOR == XXH_SVE */
/* prefetch
* can be disabled, by declaring XXH_NO_PREFETCH build macro */
#if defined(XXH_NO_PREFETCH)
# define XXH_PREFETCH(ptr) (void)(ptr) /* disabled */
#else
-# if defined(_MSC_VER) && (defined(_M_X64) || defined(_M_IX86)) /* _mm_prefetch() not defined outside of x86/x64 */
+# if XXH_SIZE_OPT >= 1
+# define XXH_PREFETCH(ptr) (void)(ptr)
+# elif defined(_MSC_VER) && (defined(_M_X64) || defined(_M_IX86)) /* _mm_prefetch() not defined outside of x86/x64 */
# include <mmintrin.h> /* https://msdn.microsoft.com/fr-fr/library/84szxsww(v=vs.90).aspx */
# define XXH_PREFETCH(ptr) _mm_prefetch((const char*)(ptr), _MM_HINT_T0)
# elif defined(__GNUC__) && ( (__GNUC__ >= 4) || ( (__GNUC__ == 3) && (__GNUC_MINOR__ >= 1) ) )
0x45, 0xcb, 0x3a, 0x8f, 0x95, 0x16, 0x04, 0x28, 0xaf, 0xd7, 0xfb, 0xca, 0xbb, 0x4b, 0x40, 0x7e,
};
+static const xxh_u64 PRIME_MX1 = 0x165667919E3779F9ULL; /*!< 0b0001011001010110011001111001000110011110001101110111100111111001 */
+static const xxh_u64 PRIME_MX2 = 0x9FB21C651E98DF25ULL; /*!< 0b1001111110110010000111000110010100011110100110001101111100100101 */
#ifdef XXH_OLD_NAMES
# define kSecret XXH3_kSecret
}
/*! Seems to produce slightly better code on GCC for some reason. */
-XXH_FORCE_INLINE xxh_u64 XXH_xorshift64(xxh_u64 v64, int shift)
+XXH_FORCE_INLINE XXH_CONSTF xxh_u64 XXH_xorshift64(xxh_u64 v64, int shift)
{
XXH_ASSERT(0 <= shift && shift < 64);
return v64 ^ (v64 >> shift);
static XXH64_hash_t XXH3_avalanche(xxh_u64 h64)
{
h64 = XXH_xorshift64(h64, 37);
- h64 *= 0x165667919E3779F9ULL;
+ h64 *= PRIME_MX1;
h64 = XXH_xorshift64(h64, 32);
return h64;
}
{
/* this mix is inspired by Pelle Evensen's rrmxmx */
h64 ^= XXH_rotl64(h64, 49) ^ XXH_rotl64(h64, 24);
- h64 *= 0x9FB21C651E98DF25ULL;
+ h64 *= PRIME_MX2;
h64 ^= (h64 >> 35) + len ;
- h64 *= 0x9FB21C651E98DF25ULL;
+ h64 *= PRIME_MX2;
return XXH_xorshift64(h64, 28);
}
*
* This adds an extra layer of strength for custom secrets.
*/
-XXH_FORCE_INLINE XXH64_hash_t
+XXH_FORCE_INLINE XXH_PUREF XXH64_hash_t
XXH3_len_1to3_64b(const xxh_u8* input, size_t len, const xxh_u8* secret, XXH64_hash_t seed)
{
XXH_ASSERT(input != NULL);
}
}
-XXH_FORCE_INLINE XXH64_hash_t
+XXH_FORCE_INLINE XXH_PUREF XXH64_hash_t
XXH3_len_4to8_64b(const xxh_u8* input, size_t len, const xxh_u8* secret, XXH64_hash_t seed)
{
XXH_ASSERT(input != NULL);
}
}
-XXH_FORCE_INLINE XXH64_hash_t
+XXH_FORCE_INLINE XXH_PUREF XXH64_hash_t
XXH3_len_9to16_64b(const xxh_u8* input, size_t len, const xxh_u8* secret, XXH64_hash_t seed)
{
XXH_ASSERT(input != NULL);
}
}
-XXH_FORCE_INLINE XXH64_hash_t
+XXH_FORCE_INLINE XXH_PUREF XXH64_hash_t
XXH3_len_0to16_64b(const xxh_u8* input, size_t len, const xxh_u8* secret, XXH64_hash_t seed)
{
XXH_ASSERT(len <= 16);
}
/* For mid range keys, XXH3 uses a Mum-hash variant. */
-XXH_FORCE_INLINE XXH64_hash_t
+XXH_FORCE_INLINE XXH_PUREF XXH64_hash_t
XXH3_len_17to128_64b(const xxh_u8* XXH_RESTRICT input, size_t len,
const xxh_u8* XXH_RESTRICT secret, size_t secretSize,
XXH64_hash_t seed)
XXH_ASSERT(16 < len && len <= 128);
{ xxh_u64 acc = len * XXH_PRIME64_1;
+#if XXH_SIZE_OPT >= 1
+ /* Smaller and cleaner, but slightly slower. */
+ unsigned int i = (unsigned int)(len - 1) / 32;
+ do {
+ acc += XXH3_mix16B(input+16 * i, secret+32*i, seed);
+ acc += XXH3_mix16B(input+len-16*(i+1), secret+32*i+16, seed);
+ } while (i-- != 0);
+#else
if (len > 32) {
if (len > 64) {
if (len > 96) {
}
acc += XXH3_mix16B(input+0, secret+0, seed);
acc += XXH3_mix16B(input+len-16, secret+16, seed);
-
+#endif
return XXH3_avalanche(acc);
}
}
+/*!
+ * @brief Maximum size of "short" key in bytes.
+ */
#define XXH3_MIDSIZE_MAX 240
-XXH_NO_INLINE XXH64_hash_t
+XXH_NO_INLINE XXH_PUREF XXH64_hash_t
XXH3_len_129to240_64b(const xxh_u8* XXH_RESTRICT input, size_t len,
const xxh_u8* XXH_RESTRICT secret, size_t secretSize,
XXH64_hash_t seed)
#define XXH3_MIDSIZE_LASTOFFSET 17
{ xxh_u64 acc = len * XXH_PRIME64_1;
- int const nbRounds = (int)len / 16;
- int i;
+ xxh_u64 acc_end;
+ unsigned int const nbRounds = (unsigned int)len / 16;
+ unsigned int i;
+ XXH_ASSERT(128 < len && len <= XXH3_MIDSIZE_MAX);
for (i=0; i<8; i++) {
acc += XXH3_mix16B(input+(16*i), secret+(16*i), seed);
}
- acc = XXH3_avalanche(acc);
+ /* last bytes */
+ acc_end = XXH3_mix16B(input + len - 16, secret + XXH3_SECRET_SIZE_MIN - XXH3_MIDSIZE_LASTOFFSET, seed);
XXH_ASSERT(nbRounds >= 8);
+ acc = XXH3_avalanche(acc);
#if defined(__clang__) /* Clang */ \
&& (defined(__ARM_NEON) || defined(__ARM_NEON__)) /* NEON */ \
&& !defined(XXH_ENABLE_AUTOVECTORIZE) /* Define to disable */
#pragma clang loop vectorize(disable)
#endif
for (i=8 ; i < nbRounds; i++) {
- acc += XXH3_mix16B(input+(16*i), secret+(16*(i-8)) + XXH3_MIDSIZE_STARTOFFSET, seed);
+ /*
+ * Prevents clang for unrolling the acc loop and interleaving with this one.
+ */
+ XXH_COMPILER_GUARD(acc);
+ acc_end += XXH3_mix16B(input+(16*i), secret+(16*(i-8)) + XXH3_MIDSIZE_STARTOFFSET, seed);
}
- /* last bytes */
- acc += XXH3_mix16B(input + len - 16, secret + XXH3_SECRET_SIZE_MIN - XXH3_MIDSIZE_LASTOFFSET, seed);
- return XXH3_avalanche(acc);
+ return XXH3_avalanche(acc + acc_end);
}
}
# define ACC_NB XXH_ACC_NB
#endif
+#ifndef XXH_PREFETCH_DIST
+# ifdef __clang__
+# define XXH_PREFETCH_DIST 320
+# else
+# if (XXH_VECTOR == XXH_AVX512)
+# define XXH_PREFETCH_DIST 512
+# else
+# define XXH_PREFETCH_DIST 384
+# endif
+# endif /* __clang__ */
+#endif /* XXH_PREFETCH_DIST */
+
+/*
+ * These macros are to generate an XXH3_accumulate() function.
+ * The two arguments select the name suffix and target attribute.
+ *
+ * The name of this symbol is XXH3_accumulate_<name>() and it calls
+ * XXH3_accumulate_512_<name>().
+ *
+ * It may be useful to hand implement this function if the compiler fails to
+ * optimize the inline function.
+ */
+#define XXH3_ACCUMULATE_TEMPLATE(name) \
+void \
+XXH3_accumulate_##name(xxh_u64* XXH_RESTRICT acc, \
+ const xxh_u8* XXH_RESTRICT input, \
+ const xxh_u8* XXH_RESTRICT secret, \
+ size_t nbStripes) \
+{ \
+ size_t n; \
+ for (n = 0; n < nbStripes; n++ ) { \
+ const xxh_u8* const in = input + n*XXH_STRIPE_LEN; \
+ XXH_PREFETCH(in + XXH_PREFETCH_DIST); \
+ XXH3_accumulate_512_##name( \
+ acc, \
+ in, \
+ secret + n*XXH_SECRET_CONSUME_RATE); \
+ } \
+}
+
+
XXH_FORCE_INLINE void XXH_writeLE64(void* dst, xxh_u64 v64)
{
if (!XXH_CPU_LITTLE_ENDIAN) v64 = XXH_swap64(v64);
/* data_key = data_vec ^ key_vec; */
__m512i const data_key = _mm512_xor_si512 (data_vec, key_vec);
/* data_key_lo = data_key >> 32; */
- __m512i const data_key_lo = _mm512_shuffle_epi32 (data_key, (_MM_PERM_ENUM)_MM_SHUFFLE(0, 3, 0, 1));
+ __m512i const data_key_lo = _mm512_srli_epi64 (data_key, 32);
/* product = (data_key & 0xffffffff) * (data_key_lo & 0xffffffff); */
__m512i const product = _mm512_mul_epu32 (data_key, data_key_lo);
/* xacc[0] += swap(data_vec); */
*xacc = _mm512_add_epi64(product, sum);
}
}
+XXH_FORCE_INLINE XXH_TARGET_AVX512 XXH3_ACCUMULATE_TEMPLATE(avx512)
/*
* XXH3_scrambleAcc: Scrambles the accumulators to improve mixing.
/* xacc[0] ^= (xacc[0] >> 47) */
__m512i const acc_vec = *xacc;
__m512i const shifted = _mm512_srli_epi64 (acc_vec, 47);
- __m512i const data_vec = _mm512_xor_si512 (acc_vec, shifted);
/* xacc[0] ^= secret; */
__m512i const key_vec = _mm512_loadu_si512 (secret);
- __m512i const data_key = _mm512_xor_si512 (data_vec, key_vec);
+ __m512i const data_key = _mm512_ternarylogic_epi32(key_vec, acc_vec, shifted, 0x96 /* key_vec ^ acc_vec ^ shifted */);
/* xacc[0] *= XXH_PRIME32_1; */
- __m512i const data_key_hi = _mm512_shuffle_epi32 (data_key, (_MM_PERM_ENUM)_MM_SHUFFLE(0, 3, 0, 1));
+ __m512i const data_key_hi = _mm512_srli_epi64 (data_key, 32);
__m512i const prod_lo = _mm512_mul_epu32 (data_key, prime32);
__m512i const prod_hi = _mm512_mul_epu32 (data_key_hi, prime32);
*xacc = _mm512_add_epi64(prod_lo, _mm512_slli_epi64(prod_hi, 32));
XXH_ASSERT(((size_t)customSecret & 63) == 0);
(void)(&XXH_writeLE64);
{ int const nbRounds = XXH_SECRET_DEFAULT_SIZE / sizeof(__m512i);
- __m512i const seed = _mm512_mask_set1_epi64(_mm512_set1_epi64((xxh_i64)seed64), 0xAA, (xxh_i64)(0U - seed64));
+ __m512i const seed_pos = _mm512_set1_epi64((xxh_i64)seed64);
+ __m512i const seed = _mm512_mask_sub_epi64(seed_pos, 0xAA, _mm512_set1_epi8(0), seed_pos);
const __m512i* const src = (const __m512i*) ((const void*) XXH3_kSecret);
__m512i* const dest = ( __m512i*) customSecret;
XXH_ASSERT(((size_t)src & 63) == 0); /* control alignment */
XXH_ASSERT(((size_t)dest & 63) == 0);
for (i=0; i < nbRounds; ++i) {
- /* GCC has a bug, _mm512_stream_load_si512 accepts 'void*', not 'void const*',
- * this will warn "discards 'const' qualifier". */
- union {
- const __m512i* cp;
- void* p;
- } remote_const_void;
- remote_const_void.cp = src + i;
- dest[i] = _mm512_add_epi64(_mm512_stream_load_si512(remote_const_void.p), seed);
+ dest[i] = _mm512_add_epi64(_mm512_load_si512(src + i), seed);
} }
}
/* data_key = data_vec ^ key_vec; */
__m256i const data_key = _mm256_xor_si256 (data_vec, key_vec);
/* data_key_lo = data_key >> 32; */
- __m256i const data_key_lo = _mm256_shuffle_epi32 (data_key, _MM_SHUFFLE(0, 3, 0, 1));
+ __m256i const data_key_lo = _mm256_srli_epi64 (data_key, 32);
/* product = (data_key & 0xffffffff) * (data_key_lo & 0xffffffff); */
__m256i const product = _mm256_mul_epu32 (data_key, data_key_lo);
/* xacc[i] += swap(data_vec); */
xacc[i] = _mm256_add_epi64(product, sum);
} }
}
+XXH_FORCE_INLINE XXH_TARGET_AVX2 XXH3_ACCUMULATE_TEMPLATE(avx2)
XXH_FORCE_INLINE XXH_TARGET_AVX2 void
XXH3_scrambleAcc_avx2(void* XXH_RESTRICT acc, const void* XXH_RESTRICT secret)
__m256i const data_key = _mm256_xor_si256 (data_vec, key_vec);
/* xacc[i] *= XXH_PRIME32_1; */
- __m256i const data_key_hi = _mm256_shuffle_epi32 (data_key, _MM_SHUFFLE(0, 3, 0, 1));
+ __m256i const data_key_hi = _mm256_srli_epi64 (data_key, 32);
__m256i const prod_lo = _mm256_mul_epu32 (data_key, prime32);
__m256i const prod_hi = _mm256_mul_epu32 (data_key_hi, prime32);
xacc[i] = _mm256_add_epi64(prod_lo, _mm256_slli_epi64(prod_hi, 32));
XXH_ASSERT(((size_t)dest & 31) == 0);
/* GCC -O2 need unroll loop manually */
- dest[0] = _mm256_add_epi64(_mm256_stream_load_si256(src+0), seed);
- dest[1] = _mm256_add_epi64(_mm256_stream_load_si256(src+1), seed);
- dest[2] = _mm256_add_epi64(_mm256_stream_load_si256(src+2), seed);
- dest[3] = _mm256_add_epi64(_mm256_stream_load_si256(src+3), seed);
- dest[4] = _mm256_add_epi64(_mm256_stream_load_si256(src+4), seed);
- dest[5] = _mm256_add_epi64(_mm256_stream_load_si256(src+5), seed);
+ dest[0] = _mm256_add_epi64(_mm256_load_si256(src+0), seed);
+ dest[1] = _mm256_add_epi64(_mm256_load_si256(src+1), seed);
+ dest[2] = _mm256_add_epi64(_mm256_load_si256(src+2), seed);
+ dest[3] = _mm256_add_epi64(_mm256_load_si256(src+3), seed);
+ dest[4] = _mm256_add_epi64(_mm256_load_si256(src+4), seed);
+ dest[5] = _mm256_add_epi64(_mm256_load_si256(src+5), seed);
}
}
xacc[i] = _mm_add_epi64(product, sum);
} }
}
+XXH_FORCE_INLINE XXH_TARGET_SSE2 XXH3_ACCUMULATE_TEMPLATE(sse2)
XXH_FORCE_INLINE XXH_TARGET_SSE2 void
XXH3_scrambleAcc_sse2(void* XXH_RESTRICT acc, const void* XXH_RESTRICT secret)
/*!
* @internal
- * @brief The bulk processing loop for NEON.
+ * @brief The bulk processing loop for NEON and WASM SIMD128.
*
* The NEON code path is actually partially scalar when running on AArch64. This
* is to optimize the pipelining and can have up to 15% speedup depending on the
* CPU, and it also mitigates some GCC codegen issues.
*
* @see XXH3_NEON_LANES for configuring this and details about this optimization.
+ *
+ * NEON's 32-bit to 64-bit long multiply takes a half vector of 32-bit
+ * integers instead of the other platforms which mask full 64-bit vectors,
+ * so the setup is more complicated than just shifting right.
+ *
+ * Additionally, there is an optimization for 4 lanes at once noted below.
+ *
+ * Since, as stated, the most optimal amount of lanes for Cortexes is 6,
+ * there needs to be *three* versions of the accumulate operation used
+ * for the remaining 2 lanes.
+ *
+ * WASM's SIMD128 uses SIMDe's arm_neon.h polyfill because the intrinsics overlap
+ * nearly perfectly.
*/
+
XXH_FORCE_INLINE void
XXH3_accumulate_512_neon( void* XXH_RESTRICT acc,
const void* XXH_RESTRICT input,
{
XXH_ASSERT((((size_t)acc) & 15) == 0);
XXH_STATIC_ASSERT(XXH3_NEON_LANES > 0 && XXH3_NEON_LANES <= XXH_ACC_NB && XXH3_NEON_LANES % 2 == 0);
- {
- uint64x2_t* const xacc = (uint64x2_t *) acc;
+ { /* GCC for darwin arm64 does not like aliasing here */
+ xxh_aliasing_uint64x2_t* const xacc = (xxh_aliasing_uint64x2_t*) acc;
/* We don't use a uint32x4_t pointer because it causes bus errors on ARMv7. */
- uint8_t const* const xinput = (const uint8_t *) input;
- uint8_t const* const xsecret = (const uint8_t *) secret;
+ uint8_t const* xinput = (const uint8_t *) input;
+ uint8_t const* xsecret = (const uint8_t *) secret;
size_t i;
- /* NEON for the first few lanes (these loops are normally interleaved) */
- for (i=0; i < XXH3_NEON_LANES / 2; i++) {
+#ifdef __wasm_simd128__
+ /*
+ * On WASM SIMD128, Clang emits direct address loads when XXH3_kSecret
+ * is constant propagated, which results in it converting it to this
+ * inside the loop:
+ *
+ * a = v128.load(XXH3_kSecret + 0 + $secret_offset, offset = 0)
+ * b = v128.load(XXH3_kSecret + 16 + $secret_offset, offset = 0)
+ * ...
+ *
+ * This requires a full 32-bit address immediate (and therefore a 6 byte
+ * instruction) as well as an add for each offset.
+ *
+ * Putting an asm guard prevents it from folding (at the cost of losing
+ * the alignment hint), and uses the free offset in `v128.load` instead
+ * of adding secret_offset each time which overall reduces code size by
+ * about a kilobyte and improves performance.
+ */
+ XXH_COMPILER_GUARD(xsecret);
+#endif
+ /* Scalar lanes use the normal scalarRound routine */
+ for (i = XXH3_NEON_LANES; i < XXH_ACC_NB; i++) {
+ XXH3_scalarRound(acc, input, secret, i);
+ }
+ i = 0;
+ /* 4 NEON lanes at a time. */
+ for (; i+1 < XXH3_NEON_LANES / 2; i+=2) {
/* data_vec = xinput[i]; */
- uint8x16_t data_vec = vld1q_u8(xinput + (i * 16));
+ uint64x2_t data_vec_1 = XXH_vld1q_u64(xinput + (i * 16));
+ uint64x2_t data_vec_2 = XXH_vld1q_u64(xinput + ((i+1) * 16));
/* key_vec = xsecret[i]; */
- uint8x16_t key_vec = vld1q_u8(xsecret + (i * 16));
- uint64x2_t data_key;
- uint32x2_t data_key_lo, data_key_hi;
- /* xacc[i] += swap(data_vec); */
- uint64x2_t const data64 = vreinterpretq_u64_u8(data_vec);
- uint64x2_t const swapped = vextq_u64(data64, data64, 1);
- xacc[i] = vaddq_u64 (xacc[i], swapped);
+ uint64x2_t key_vec_1 = XXH_vld1q_u64(xsecret + (i * 16));
+ uint64x2_t key_vec_2 = XXH_vld1q_u64(xsecret + ((i+1) * 16));
+ /* data_swap = swap(data_vec) */
+ uint64x2_t data_swap_1 = vextq_u64(data_vec_1, data_vec_1, 1);
+ uint64x2_t data_swap_2 = vextq_u64(data_vec_2, data_vec_2, 1);
/* data_key = data_vec ^ key_vec; */
- data_key = vreinterpretq_u64_u8(veorq_u8(data_vec, key_vec));
- /* data_key_lo = (uint32x2_t) (data_key & 0xFFFFFFFF);
- * data_key_hi = (uint32x2_t) (data_key >> 32);
- * data_key = UNDEFINED; */
- XXH_SPLIT_IN_PLACE(data_key, data_key_lo, data_key_hi);
- /* xacc[i] += (uint64x2_t) data_key_lo * (uint64x2_t) data_key_hi; */
- xacc[i] = vmlal_u32 (xacc[i], data_key_lo, data_key_hi);
+ uint64x2_t data_key_1 = veorq_u64(data_vec_1, key_vec_1);
+ uint64x2_t data_key_2 = veorq_u64(data_vec_2, key_vec_2);
+ /*
+ * If we reinterpret the 64x2 vectors as 32x4 vectors, we can use a
+ * de-interleave operation for 4 lanes in 1 step with `vuzpq_u32` to
+ * get one vector with the low 32 bits of each lane, and one vector
+ * with the high 32 bits of each lane.
+ *
+ * The intrinsic returns a double vector because the original ARMv7-a
+ * instruction modified both arguments in place. AArch64 and SIMD128 emit
+ * two instructions from this intrinsic.
+ *
+ * [ dk11L | dk11H | dk12L | dk12H ] -> [ dk11L | dk12L | dk21L | dk22L ]
+ * [ dk21L | dk21H | dk22L | dk22H ] -> [ dk11H | dk12H | dk21H | dk22H ]
+ */
+ uint32x4x2_t unzipped = vuzpq_u32(
+ vreinterpretq_u32_u64(data_key_1),
+ vreinterpretq_u32_u64(data_key_2)
+ );
+ /* data_key_lo = data_key & 0xFFFFFFFF */
+ uint32x4_t data_key_lo = unzipped.val[0];
+ /* data_key_hi = data_key >> 32 */
+ uint32x4_t data_key_hi = unzipped.val[1];
+ /*
+ * Then, we can split the vectors horizontally and multiply which, as for most
+ * widening intrinsics, have a variant that works on both high half vectors
+ * for free on AArch64. A similar instruction is available on SIMD128.
+ *
+ * sum = data_swap + (u64x2) data_key_lo * (u64x2) data_key_hi
+ */
+ uint64x2_t sum_1 = XXH_vmlal_low_u32(data_swap_1, data_key_lo, data_key_hi);
+ uint64x2_t sum_2 = XXH_vmlal_high_u32(data_swap_2, data_key_lo, data_key_hi);
+ /*
+ * Clang reorders
+ * a += b * c; // umlal swap.2d, dkl.2s, dkh.2s
+ * c += a; // add acc.2d, acc.2d, swap.2d
+ * to
+ * c += a; // add acc.2d, acc.2d, swap.2d
+ * c += b * c; // umlal acc.2d, dkl.2s, dkh.2s
+ *
+ * While it would make sense in theory since the addition is faster,
+ * for reasons likely related to umlal being limited to certain NEON
+ * pipelines, this is worse. A compiler guard fixes this.
+ */
+ XXH_COMPILER_GUARD_CLANG_NEON(sum_1);
+ XXH_COMPILER_GUARD_CLANG_NEON(sum_2);
+ /* xacc[i] = acc_vec + sum; */
+ xacc[i] = vaddq_u64(xacc[i], sum_1);
+ xacc[i+1] = vaddq_u64(xacc[i+1], sum_2);
}
- /* Scalar for the remainder. This may be a zero iteration loop. */
- for (i = XXH3_NEON_LANES; i < XXH_ACC_NB; i++) {
- XXH3_scalarRound(acc, input, secret, i);
+ /* Operate on the remaining NEON lanes 2 at a time. */
+ for (; i < XXH3_NEON_LANES / 2; i++) {
+ /* data_vec = xinput[i]; */
+ uint64x2_t data_vec = XXH_vld1q_u64(xinput + (i * 16));
+ /* key_vec = xsecret[i]; */
+ uint64x2_t key_vec = XXH_vld1q_u64(xsecret + (i * 16));
+ /* acc_vec_2 = swap(data_vec) */
+ uint64x2_t data_swap = vextq_u64(data_vec, data_vec, 1);
+ /* data_key = data_vec ^ key_vec; */
+ uint64x2_t data_key = veorq_u64(data_vec, key_vec);
+ /* For two lanes, just use VMOVN and VSHRN. */
+ /* data_key_lo = data_key & 0xFFFFFFFF; */
+ uint32x2_t data_key_lo = vmovn_u64(data_key);
+ /* data_key_hi = data_key >> 32; */
+ uint32x2_t data_key_hi = vshrn_n_u64(data_key, 32);
+ /* sum = data_swap + (u64x2) data_key_lo * (u64x2) data_key_hi; */
+ uint64x2_t sum = vmlal_u32(data_swap, data_key_lo, data_key_hi);
+ /* Same Clang workaround as before */
+ XXH_COMPILER_GUARD_CLANG_NEON(sum);
+ /* xacc[i] = acc_vec + sum; */
+ xacc[i] = vaddq_u64 (xacc[i], sum);
}
}
}
+XXH_FORCE_INLINE XXH3_ACCUMULATE_TEMPLATE(neon)
XXH_FORCE_INLINE void
XXH3_scrambleAcc_neon(void* XXH_RESTRICT acc, const void* XXH_RESTRICT secret)
{
XXH_ASSERT((((size_t)acc) & 15) == 0);
- { uint64x2_t* xacc = (uint64x2_t*) acc;
+ { xxh_aliasing_uint64x2_t* xacc = (xxh_aliasing_uint64x2_t*) acc;
uint8_t const* xsecret = (uint8_t const*) secret;
- uint32x2_t prime = vdup_n_u32 (XXH_PRIME32_1);
size_t i;
- /* NEON for the first few lanes (these loops are normally interleaved) */
+ /* WASM uses operator overloads and doesn't need these. */
+#ifndef __wasm_simd128__
+ /* { prime32_1, prime32_1 } */
+ uint32x2_t const kPrimeLo = vdup_n_u32(XXH_PRIME32_1);
+ /* { 0, prime32_1, 0, prime32_1 } */
+ uint32x4_t const kPrimeHi = vreinterpretq_u32_u64(vdupq_n_u64((xxh_u64)XXH_PRIME32_1 << 32));
+#endif
+
+ /* AArch64 uses both scalar and neon at the same time */
+ for (i = XXH3_NEON_LANES; i < XXH_ACC_NB; i++) {
+ XXH3_scalarScrambleRound(acc, secret, i);
+ }
for (i=0; i < XXH3_NEON_LANES / 2; i++) {
/* xacc[i] ^= (xacc[i] >> 47); */
uint64x2_t acc_vec = xacc[i];
- uint64x2_t shifted = vshrq_n_u64 (acc_vec, 47);
- uint64x2_t data_vec = veorq_u64 (acc_vec, shifted);
+ uint64x2_t shifted = vshrq_n_u64(acc_vec, 47);
+ uint64x2_t data_vec = veorq_u64(acc_vec, shifted);
/* xacc[i] ^= xsecret[i]; */
- uint8x16_t key_vec = vld1q_u8 (xsecret + (i * 16));
- uint64x2_t data_key = veorq_u64 (data_vec, vreinterpretq_u64_u8(key_vec));
-
+ uint64x2_t key_vec = XXH_vld1q_u64(xsecret + (i * 16));
+ uint64x2_t data_key = veorq_u64(data_vec, key_vec);
/* xacc[i] *= XXH_PRIME32_1 */
- uint32x2_t data_key_lo, data_key_hi;
- /* data_key_lo = (uint32x2_t) (xacc[i] & 0xFFFFFFFF);
- * data_key_hi = (uint32x2_t) (xacc[i] >> 32);
- * xacc[i] = UNDEFINED; */
- XXH_SPLIT_IN_PLACE(data_key, data_key_lo, data_key_hi);
- { /*
- * prod_hi = (data_key >> 32) * XXH_PRIME32_1;
- *
- * Avoid vmul_u32 + vshll_n_u32 since Clang 6 and 7 will
- * incorrectly "optimize" this:
- * tmp = vmul_u32(vmovn_u64(a), vmovn_u64(b));
- * shifted = vshll_n_u32(tmp, 32);
- * to this:
- * tmp = "vmulq_u64"(a, b); // no such thing!
- * shifted = vshlq_n_u64(tmp, 32);
- *
- * However, unlike SSE, Clang lacks a 64-bit multiply routine
- * for NEON, and it scalarizes two 64-bit multiplies instead.
- *
- * vmull_u32 has the same timing as vmul_u32, and it avoids
- * this bug completely.
- * See https://bugs.llvm.org/show_bug.cgi?id=39967
- */
- uint64x2_t prod_hi = vmull_u32 (data_key_hi, prime);
- /* xacc[i] = prod_hi << 32; */
- xacc[i] = vshlq_n_u64(prod_hi, 32);
- /* xacc[i] += (prod_hi & 0xFFFFFFFF) * XXH_PRIME32_1; */
- xacc[i] = vmlal_u32(xacc[i], data_key_lo, prime);
- }
- }
- /* Scalar for the remainder. This may be a zero iteration loop. */
- for (i = XXH3_NEON_LANES; i < XXH_ACC_NB; i++) {
- XXH3_scalarScrambleRound(acc, secret, i);
+#ifdef __wasm_simd128__
+ /* SIMD128 has multiply by u64x2, use it instead of expanding and scalarizing */
+ xacc[i] = data_key * XXH_PRIME32_1;
+#else
+ /*
+ * Expanded version with portable NEON intrinsics
+ *
+ * lo(x) * lo(y) + (hi(x) * lo(y) << 32)
+ *
+ * prod_hi = hi(data_key) * lo(prime) << 32
+ *
+ * Since we only need 32 bits of this multiply a trick can be used, reinterpreting the vector
+ * as a uint32x4_t and multiplying by { 0, prime, 0, prime } to cancel out the unwanted bits
+ * and avoid the shift.
+ */
+ uint32x4_t prod_hi = vmulq_u32 (vreinterpretq_u32_u64(data_key), kPrimeHi);
+ /* Extract low bits for vmlal_u32 */
+ uint32x2_t data_key_lo = vmovn_u64(data_key);
+ /* xacc[i] = prod_hi + lo(data_key) * XXH_PRIME32_1; */
+ xacc[i] = vmlal_u32(vreinterpretq_u64_u32(prod_hi), data_key_lo, kPrimeLo);
+#endif
}
}
}
-
#endif
#if (XXH_VECTOR == XXH_VSX)
const void* XXH_RESTRICT secret)
{
/* presumed aligned */
- unsigned int* const xacc = (unsigned int*) acc;
- xxh_u64x2 const* const xinput = (xxh_u64x2 const*) input; /* no alignment restriction */
- xxh_u64x2 const* const xsecret = (xxh_u64x2 const*) secret; /* no alignment restriction */
+ xxh_aliasing_u64x2* const xacc = (xxh_aliasing_u64x2*) acc;
+ xxh_u8 const* const xinput = (xxh_u8 const*) input; /* no alignment restriction */
+ xxh_u8 const* const xsecret = (xxh_u8 const*) secret; /* no alignment restriction */
xxh_u64x2 const v32 = { 32, 32 };
size_t i;
for (i = 0; i < XXH_STRIPE_LEN / sizeof(xxh_u64x2); i++) {
/* data_vec = xinput[i]; */
- xxh_u64x2 const data_vec = XXH_vec_loadu(xinput + i);
+ xxh_u64x2 const data_vec = XXH_vec_loadu(xinput + 16*i);
/* key_vec = xsecret[i]; */
- xxh_u64x2 const key_vec = XXH_vec_loadu(xsecret + i);
+ xxh_u64x2 const key_vec = XXH_vec_loadu(xsecret + 16*i);
xxh_u64x2 const data_key = data_vec ^ key_vec;
/* shuffled = (data_key << 32) | (data_key >> 32); */
xxh_u32x4 const shuffled = (xxh_u32x4)vec_rl(data_key, v32);
/* product = ((xxh_u64x2)data_key & 0xFFFFFFFF) * ((xxh_u64x2)shuffled & 0xFFFFFFFF); */
xxh_u64x2 const product = XXH_vec_mulo((xxh_u32x4)data_key, shuffled);
/* acc_vec = xacc[i]; */
- xxh_u64x2 acc_vec = (xxh_u64x2)vec_xl(0, xacc + 4 * i);
+ xxh_u64x2 acc_vec = xacc[i];
acc_vec += product;
/* swap high and low halves */
#else
acc_vec += vec_xxpermdi(data_vec, data_vec, 2);
#endif
- /* xacc[i] = acc_vec; */
- vec_xst((xxh_u32x4)acc_vec, 0, xacc + 4 * i);
+ xacc[i] = acc_vec;
}
}
+XXH_FORCE_INLINE XXH3_ACCUMULATE_TEMPLATE(vsx)
XXH_FORCE_INLINE void
XXH3_scrambleAcc_vsx(void* XXH_RESTRICT acc, const void* XXH_RESTRICT secret)
{
XXH_ASSERT((((size_t)acc) & 15) == 0);
- { xxh_u64x2* const xacc = (xxh_u64x2*) acc;
- const xxh_u64x2* const xsecret = (const xxh_u64x2*) secret;
+ { xxh_aliasing_u64x2* const xacc = (xxh_aliasing_u64x2*) acc;
+ const xxh_u8* const xsecret = (const xxh_u8*) secret;
/* constants */
xxh_u64x2 const v32 = { 32, 32 };
xxh_u64x2 const v47 = { 47, 47 };
xxh_u64x2 const data_vec = acc_vec ^ (acc_vec >> v47);
/* xacc[i] ^= xsecret[i]; */
- xxh_u64x2 const key_vec = XXH_vec_loadu(xsecret + i);
+ xxh_u64x2 const key_vec = XXH_vec_loadu(xsecret + 16*i);
xxh_u64x2 const data_key = data_vec ^ key_vec;
/* xacc[i] *= XXH_PRIME32_1 */
#endif
+#if (XXH_VECTOR == XXH_SVE)
+
+XXH_FORCE_INLINE void
+XXH3_accumulate_512_sve( void* XXH_RESTRICT acc,
+ const void* XXH_RESTRICT input,
+ const void* XXH_RESTRICT secret)
+{
+ uint64_t *xacc = (uint64_t *)acc;
+ const uint64_t *xinput = (const uint64_t *)(const void *)input;
+ const uint64_t *xsecret = (const uint64_t *)(const void *)secret;
+ svuint64_t kSwap = sveor_n_u64_z(svptrue_b64(), svindex_u64(0, 1), 1);
+ uint64_t element_count = svcntd();
+ if (element_count >= 8) {
+ svbool_t mask = svptrue_pat_b64(SV_VL8);
+ svuint64_t vacc = svld1_u64(mask, xacc);
+ ACCRND(vacc, 0);
+ svst1_u64(mask, xacc, vacc);
+ } else if (element_count == 2) { /* sve128 */
+ svbool_t mask = svptrue_pat_b64(SV_VL2);
+ svuint64_t acc0 = svld1_u64(mask, xacc + 0);
+ svuint64_t acc1 = svld1_u64(mask, xacc + 2);
+ svuint64_t acc2 = svld1_u64(mask, xacc + 4);
+ svuint64_t acc3 = svld1_u64(mask, xacc + 6);
+ ACCRND(acc0, 0);
+ ACCRND(acc1, 2);
+ ACCRND(acc2, 4);
+ ACCRND(acc3, 6);
+ svst1_u64(mask, xacc + 0, acc0);
+ svst1_u64(mask, xacc + 2, acc1);
+ svst1_u64(mask, xacc + 4, acc2);
+ svst1_u64(mask, xacc + 6, acc3);
+ } else {
+ svbool_t mask = svptrue_pat_b64(SV_VL4);
+ svuint64_t acc0 = svld1_u64(mask, xacc + 0);
+ svuint64_t acc1 = svld1_u64(mask, xacc + 4);
+ ACCRND(acc0, 0);
+ ACCRND(acc1, 4);
+ svst1_u64(mask, xacc + 0, acc0);
+ svst1_u64(mask, xacc + 4, acc1);
+ }
+}
+
+XXH_FORCE_INLINE void
+XXH3_accumulate_sve(xxh_u64* XXH_RESTRICT acc,
+ const xxh_u8* XXH_RESTRICT input,
+ const xxh_u8* XXH_RESTRICT secret,
+ size_t nbStripes)
+{
+ if (nbStripes != 0) {
+ uint64_t *xacc = (uint64_t *)acc;
+ const uint64_t *xinput = (const uint64_t *)(const void *)input;
+ const uint64_t *xsecret = (const uint64_t *)(const void *)secret;
+ svuint64_t kSwap = sveor_n_u64_z(svptrue_b64(), svindex_u64(0, 1), 1);
+ uint64_t element_count = svcntd();
+ if (element_count >= 8) {
+ svbool_t mask = svptrue_pat_b64(SV_VL8);
+ svuint64_t vacc = svld1_u64(mask, xacc + 0);
+ do {
+ /* svprfd(svbool_t, void *, enum svfprop); */
+ svprfd(mask, xinput + 128, SV_PLDL1STRM);
+ ACCRND(vacc, 0);
+ xinput += 8;
+ xsecret += 1;
+ nbStripes--;
+ } while (nbStripes != 0);
+
+ svst1_u64(mask, xacc + 0, vacc);
+ } else if (element_count == 2) { /* sve128 */
+ svbool_t mask = svptrue_pat_b64(SV_VL2);
+ svuint64_t acc0 = svld1_u64(mask, xacc + 0);
+ svuint64_t acc1 = svld1_u64(mask, xacc + 2);
+ svuint64_t acc2 = svld1_u64(mask, xacc + 4);
+ svuint64_t acc3 = svld1_u64(mask, xacc + 6);
+ do {
+ svprfd(mask, xinput + 128, SV_PLDL1STRM);
+ ACCRND(acc0, 0);
+ ACCRND(acc1, 2);
+ ACCRND(acc2, 4);
+ ACCRND(acc3, 6);
+ xinput += 8;
+ xsecret += 1;
+ nbStripes--;
+ } while (nbStripes != 0);
+
+ svst1_u64(mask, xacc + 0, acc0);
+ svst1_u64(mask, xacc + 2, acc1);
+ svst1_u64(mask, xacc + 4, acc2);
+ svst1_u64(mask, xacc + 6, acc3);
+ } else {
+ svbool_t mask = svptrue_pat_b64(SV_VL4);
+ svuint64_t acc0 = svld1_u64(mask, xacc + 0);
+ svuint64_t acc1 = svld1_u64(mask, xacc + 4);
+ do {
+ svprfd(mask, xinput + 128, SV_PLDL1STRM);
+ ACCRND(acc0, 0);
+ ACCRND(acc1, 4);
+ xinput += 8;
+ xsecret += 1;
+ nbStripes--;
+ } while (nbStripes != 0);
+
+ svst1_u64(mask, xacc + 0, acc0);
+ svst1_u64(mask, xacc + 4, acc1);
+ }
+ }
+}
+
+#endif
+
/* scalar variants - universal */
+#if defined(__aarch64__) && (defined(__GNUC__) || defined(__clang__))
+/*
+ * In XXH3_scalarRound(), GCC and Clang have a similar codegen issue, where they
+ * emit an excess mask and a full 64-bit multiply-add (MADD X-form).
+ *
+ * While this might not seem like much, as AArch64 is a 64-bit architecture, only
+ * big Cortex designs have a full 64-bit multiplier.
+ *
+ * On the little cores, the smaller 32-bit multiplier is used, and full 64-bit
+ * multiplies expand to 2-3 multiplies in microcode. This has a major penalty
+ * of up to 4 latency cycles and 2 stall cycles in the multiply pipeline.
+ *
+ * Thankfully, AArch64 still provides the 32-bit long multiply-add (UMADDL) which does
+ * not have this penalty and does the mask automatically.
+ */
+XXH_FORCE_INLINE xxh_u64
+XXH_mult32to64_add64(xxh_u64 lhs, xxh_u64 rhs, xxh_u64 acc)
+{
+ xxh_u64 ret;
+ /* note: %x = 64-bit register, %w = 32-bit register */
+ __asm__("umaddl %x0, %w1, %w2, %x3" : "=r" (ret) : "r" (lhs), "r" (rhs), "r" (acc));
+ return ret;
+}
+#else
+XXH_FORCE_INLINE xxh_u64
+XXH_mult32to64_add64(xxh_u64 lhs, xxh_u64 rhs, xxh_u64 acc)
+{
+ return XXH_mult32to64((xxh_u32)lhs, (xxh_u32)rhs) + acc;
+}
+#endif
+
/*!
* @internal
* @brief Scalar round for @ref XXH3_accumulate_512_scalar().
xxh_u64 const data_val = XXH_readLE64(xinput + lane * 8);
xxh_u64 const data_key = data_val ^ XXH_readLE64(xsecret + lane * 8);
xacc[lane ^ 1] += data_val; /* swap adjacent lanes */
- xacc[lane] += XXH_mult32to64(data_key & 0xFFFFFFFF, data_key >> 32);
+ xacc[lane] = XXH_mult32to64_add64(data_key /* & 0xFFFFFFFF */, data_key >> 32, xacc[lane]);
}
}
const void* XXH_RESTRICT secret)
{
size_t i;
+ /* ARM GCC refuses to unroll this loop, resulting in a 24% slowdown on ARMv6. */
+#if defined(__GNUC__) && !defined(__clang__) \
+ && (defined(__arm__) || defined(__thumb2__)) \
+ && defined(__ARM_FEATURE_UNALIGNED) /* no unaligned access just wastes bytes */ \
+ && XXH_SIZE_OPT <= 0
+# pragma GCC unroll 8
+#endif
for (i=0; i < XXH_ACC_NB; i++) {
XXH3_scalarRound(acc, input, secret, i);
}
}
+XXH_FORCE_INLINE XXH3_ACCUMULATE_TEMPLATE(scalar)
/*!
* @internal
const xxh_u8* kSecretPtr = XXH3_kSecret;
XXH_STATIC_ASSERT((XXH_SECRET_DEFAULT_SIZE & 15) == 0);
-#if defined(__clang__) && defined(__aarch64__)
+#if defined(__GNUC__) && defined(__aarch64__)
/*
* UGLY HACK:
- * Clang generates a bunch of MOV/MOVK pairs for aarch64, and they are
+ * GCC and Clang generate a bunch of MOV/MOVK pairs for aarch64, and they are
* placed sequentially, in order, at the top of the unrolled loop.
*
* While MOVK is great for generating constants (2 cycles for a 64-bit
* ADD
* SUB STR
* STR
- * By forcing loads from memory (as the asm line causes Clang to assume
+ * By forcing loads from memory (as the asm line causes the compiler to assume
* that XXH3_kSecretPtr has been changed), the pipelines are used more
* efficiently:
* I L S
*/
XXH_COMPILER_GUARD(kSecretPtr);
#endif
- /*
- * Note: in debug mode, this overrides the asm optimization
- * and Clang will emit MOVK chains again.
- */
- XXH_ASSERT(kSecretPtr == XXH3_kSecret);
-
{ int const nbRounds = XXH_SECRET_DEFAULT_SIZE / 16;
int i;
for (i=0; i < nbRounds; i++) {
/*
- * The asm hack causes Clang to assume that kSecretPtr aliases with
+ * The asm hack causes the compiler to assume that kSecretPtr aliases with
* customSecret, and on aarch64, this prevented LDP from merging two
* loads together for free. Putting the loads together before the stores
* properly generates LDP.
}
-typedef void (*XXH3_f_accumulate_512)(void* XXH_RESTRICT, const void*, const void*);
+typedef void (*XXH3_f_accumulate)(xxh_u64* XXH_RESTRICT, const xxh_u8* XXH_RESTRICT, const xxh_u8* XXH_RESTRICT, size_t);
typedef void (*XXH3_f_scrambleAcc)(void* XXH_RESTRICT, const void*);
typedef void (*XXH3_f_initCustomSecret)(void* XXH_RESTRICT, xxh_u64);
#if (XXH_VECTOR == XXH_AVX512)
#define XXH3_accumulate_512 XXH3_accumulate_512_avx512
+#define XXH3_accumulate XXH3_accumulate_avx512
#define XXH3_scrambleAcc XXH3_scrambleAcc_avx512
#define XXH3_initCustomSecret XXH3_initCustomSecret_avx512
#elif (XXH_VECTOR == XXH_AVX2)
#define XXH3_accumulate_512 XXH3_accumulate_512_avx2
+#define XXH3_accumulate XXH3_accumulate_avx2
#define XXH3_scrambleAcc XXH3_scrambleAcc_avx2
#define XXH3_initCustomSecret XXH3_initCustomSecret_avx2
#elif (XXH_VECTOR == XXH_SSE2)
#define XXH3_accumulate_512 XXH3_accumulate_512_sse2
+#define XXH3_accumulate XXH3_accumulate_sse2
#define XXH3_scrambleAcc XXH3_scrambleAcc_sse2
#define XXH3_initCustomSecret XXH3_initCustomSecret_sse2
#elif (XXH_VECTOR == XXH_NEON)
#define XXH3_accumulate_512 XXH3_accumulate_512_neon
+#define XXH3_accumulate XXH3_accumulate_neon
#define XXH3_scrambleAcc XXH3_scrambleAcc_neon
#define XXH3_initCustomSecret XXH3_initCustomSecret_scalar
#elif (XXH_VECTOR == XXH_VSX)
#define XXH3_accumulate_512 XXH3_accumulate_512_vsx
+#define XXH3_accumulate XXH3_accumulate_vsx
#define XXH3_scrambleAcc XXH3_scrambleAcc_vsx
#define XXH3_initCustomSecret XXH3_initCustomSecret_scalar
+#elif (XXH_VECTOR == XXH_SVE)
+#define XXH3_accumulate_512 XXH3_accumulate_512_sve
+#define XXH3_accumulate XXH3_accumulate_sve
+#define XXH3_scrambleAcc XXH3_scrambleAcc_scalar
+#define XXH3_initCustomSecret XXH3_initCustomSecret_scalar
+
#else /* scalar */
#define XXH3_accumulate_512 XXH3_accumulate_512_scalar
+#define XXH3_accumulate XXH3_accumulate_scalar
#define XXH3_scrambleAcc XXH3_scrambleAcc_scalar
#define XXH3_initCustomSecret XXH3_initCustomSecret_scalar
#endif
-
-
-#ifndef XXH_PREFETCH_DIST
-# ifdef __clang__
-# define XXH_PREFETCH_DIST 320
-# else
-# if (XXH_VECTOR == XXH_AVX512)
-# define XXH_PREFETCH_DIST 512
-# else
-# define XXH_PREFETCH_DIST 384
-# endif
-# endif /* __clang__ */
-#endif /* XXH_PREFETCH_DIST */
-
-/*
- * XXH3_accumulate()
- * Loops over XXH3_accumulate_512().
- * Assumption: nbStripes will not overflow the secret size
- */
-XXH_FORCE_INLINE void
-XXH3_accumulate( xxh_u64* XXH_RESTRICT acc,
- const xxh_u8* XXH_RESTRICT input,
- const xxh_u8* XXH_RESTRICT secret,
- size_t nbStripes,
- XXH3_f_accumulate_512 f_acc512)
-{
- size_t n;
- for (n = 0; n < nbStripes; n++ ) {
- const xxh_u8* const in = input + n*XXH_STRIPE_LEN;
- XXH_PREFETCH(in + XXH_PREFETCH_DIST);
- f_acc512(acc,
- in,
- secret + n*XXH_SECRET_CONSUME_RATE);
- }
-}
+#if XXH_SIZE_OPT >= 1 /* don't do SIMD for initialization */
+# undef XXH3_initCustomSecret
+# define XXH3_initCustomSecret XXH3_initCustomSecret_scalar
+#endif
XXH_FORCE_INLINE void
XXH3_hashLong_internal_loop(xxh_u64* XXH_RESTRICT acc,
const xxh_u8* XXH_RESTRICT input, size_t len,
const xxh_u8* XXH_RESTRICT secret, size_t secretSize,
- XXH3_f_accumulate_512 f_acc512,
+ XXH3_f_accumulate f_acc,
XXH3_f_scrambleAcc f_scramble)
{
size_t const nbStripesPerBlock = (secretSize - XXH_STRIPE_LEN) / XXH_SECRET_CONSUME_RATE;
XXH_ASSERT(secretSize >= XXH3_SECRET_SIZE_MIN);
for (n = 0; n < nb_blocks; n++) {
- XXH3_accumulate(acc, input + n*block_len, secret, nbStripesPerBlock, f_acc512);
+ f_acc(acc, input + n*block_len, secret, nbStripesPerBlock);
f_scramble(acc, secret + secretSize - XXH_STRIPE_LEN);
}
XXH_ASSERT(len > XXH_STRIPE_LEN);
{ size_t const nbStripes = ((len - 1) - (block_len * nb_blocks)) / XXH_STRIPE_LEN;
XXH_ASSERT(nbStripes <= (secretSize / XXH_SECRET_CONSUME_RATE));
- XXH3_accumulate(acc, input + nb_blocks*block_len, secret, nbStripes, f_acc512);
+ f_acc(acc, input + nb_blocks*block_len, secret, nbStripes);
/* last stripe */
{ const xxh_u8* const p = input + len - XXH_STRIPE_LEN;
#define XXH_SECRET_LASTACC_START 7 /* not aligned on 8, last secret is different from acc & scrambler */
- f_acc512(acc, p, secret + secretSize - XXH_STRIPE_LEN - XXH_SECRET_LASTACC_START);
+ XXH3_accumulate_512(acc, p, secret + secretSize - XXH_STRIPE_LEN - XXH_SECRET_LASTACC_START);
} }
}
XXH_FORCE_INLINE XXH64_hash_t
XXH3_hashLong_64b_internal(const void* XXH_RESTRICT input, size_t len,
const void* XXH_RESTRICT secret, size_t secretSize,
- XXH3_f_accumulate_512 f_acc512,
+ XXH3_f_accumulate f_acc,
XXH3_f_scrambleAcc f_scramble)
{
XXH_ALIGN(XXH_ACC_ALIGN) xxh_u64 acc[XXH_ACC_NB] = XXH3_INIT_ACC;
- XXH3_hashLong_internal_loop(acc, (const xxh_u8*)input, len, (const xxh_u8*)secret, secretSize, f_acc512, f_scramble);
+ XXH3_hashLong_internal_loop(acc, (const xxh_u8*)input, len, (const xxh_u8*)secret, secretSize, f_acc, f_scramble);
/* converge into final hash */
XXH_STATIC_ASSERT(sizeof(acc) == 64);
* It's important for performance to transmit secret's size (when it's static)
* so that the compiler can properly optimize the vectorized loop.
* This makes a big performance difference for "medium" keys (<1 KB) when using AVX instruction set.
+ * When the secret size is unknown, or on GCC 12 where the mix of NO_INLINE and FORCE_INLINE
+ * breaks -Og, this is XXH_NO_INLINE.
*/
-XXH_FORCE_INLINE XXH64_hash_t
+XXH3_WITH_SECRET_INLINE XXH64_hash_t
XXH3_hashLong_64b_withSecret(const void* XXH_RESTRICT input, size_t len,
XXH64_hash_t seed64, const xxh_u8* XXH_RESTRICT secret, size_t secretLen)
{
(void)seed64;
- return XXH3_hashLong_64b_internal(input, len, secret, secretLen, XXH3_accumulate_512, XXH3_scrambleAcc);
+ return XXH3_hashLong_64b_internal(input, len, secret, secretLen, XXH3_accumulate, XXH3_scrambleAcc);
}
/*
* Note that inside this no_inline function, we do inline the internal loop,
* and provide a statically defined secret size to allow optimization of vector loop.
*/
-XXH_NO_INLINE XXH64_hash_t
+XXH_NO_INLINE XXH_PUREF XXH64_hash_t
XXH3_hashLong_64b_default(const void* XXH_RESTRICT input, size_t len,
XXH64_hash_t seed64, const xxh_u8* XXH_RESTRICT secret, size_t secretLen)
{
(void)seed64; (void)secret; (void)secretLen;
- return XXH3_hashLong_64b_internal(input, len, XXH3_kSecret, sizeof(XXH3_kSecret), XXH3_accumulate_512, XXH3_scrambleAcc);
+ return XXH3_hashLong_64b_internal(input, len, XXH3_kSecret, sizeof(XXH3_kSecret), XXH3_accumulate, XXH3_scrambleAcc);
}
/*
XXH_FORCE_INLINE XXH64_hash_t
XXH3_hashLong_64b_withSeed_internal(const void* input, size_t len,
XXH64_hash_t seed,
- XXH3_f_accumulate_512 f_acc512,
+ XXH3_f_accumulate f_acc,
XXH3_f_scrambleAcc f_scramble,
XXH3_f_initCustomSecret f_initSec)
{
+#if XXH_SIZE_OPT <= 0
if (seed == 0)
return XXH3_hashLong_64b_internal(input, len,
XXH3_kSecret, sizeof(XXH3_kSecret),
- f_acc512, f_scramble);
+ f_acc, f_scramble);
+#endif
{ XXH_ALIGN(XXH_SEC_ALIGN) xxh_u8 secret[XXH_SECRET_DEFAULT_SIZE];
f_initSec(secret, seed);
return XXH3_hashLong_64b_internal(input, len, secret, sizeof(secret),
- f_acc512, f_scramble);
+ f_acc, f_scramble);
}
}
* It's important for performance that XXH3_hashLong is not inlined.
*/
XXH_NO_INLINE XXH64_hash_t
-XXH3_hashLong_64b_withSeed(const void* input, size_t len,
- XXH64_hash_t seed, const xxh_u8* secret, size_t secretLen)
+XXH3_hashLong_64b_withSeed(const void* XXH_RESTRICT input, size_t len,
+ XXH64_hash_t seed, const xxh_u8* XXH_RESTRICT secret, size_t secretLen)
{
(void)secret; (void)secretLen;
return XXH3_hashLong_64b_withSeed_internal(input, len, seed,
- XXH3_accumulate_512, XXH3_scrambleAcc, XXH3_initCustomSecret);
+ XXH3_accumulate, XXH3_scrambleAcc, XXH3_initCustomSecret);
}
/* === Public entry point === */
-/*! @ingroup xxh3_family */
-XXH_PUBLIC_API XXH64_hash_t XXH3_64bits(const void* input, size_t len)
+/*! @ingroup XXH3_family */
+XXH_PUBLIC_API XXH64_hash_t XXH3_64bits(XXH_NOESCAPE const void* input, size_t length)
{
- return XXH3_64bits_internal(input, len, 0, XXH3_kSecret, sizeof(XXH3_kSecret), XXH3_hashLong_64b_default);
+ return XXH3_64bits_internal(input, length, 0, XXH3_kSecret, sizeof(XXH3_kSecret), XXH3_hashLong_64b_default);
}
-/*! @ingroup xxh3_family */
+/*! @ingroup XXH3_family */
XXH_PUBLIC_API XXH64_hash_t
-XXH3_64bits_withSecret(const void* input, size_t len, const void* secret, size_t secretSize)
+XXH3_64bits_withSecret(XXH_NOESCAPE const void* input, size_t length, XXH_NOESCAPE const void* secret, size_t secretSize)
{
- return XXH3_64bits_internal(input, len, 0, secret, secretSize, XXH3_hashLong_64b_withSecret);
+ return XXH3_64bits_internal(input, length, 0, secret, secretSize, XXH3_hashLong_64b_withSecret);
}
-/*! @ingroup xxh3_family */
+/*! @ingroup XXH3_family */
XXH_PUBLIC_API XXH64_hash_t
-XXH3_64bits_withSeed(const void* input, size_t len, XXH64_hash_t seed)
+XXH3_64bits_withSeed(XXH_NOESCAPE const void* input, size_t length, XXH64_hash_t seed)
{
- return XXH3_64bits_internal(input, len, seed, XXH3_kSecret, sizeof(XXH3_kSecret), XXH3_hashLong_64b_withSeed);
+ return XXH3_64bits_internal(input, length, seed, XXH3_kSecret, sizeof(XXH3_kSecret), XXH3_hashLong_64b_withSeed);
}
XXH_PUBLIC_API XXH64_hash_t
-XXH3_64bits_withSecretandSeed(const void* input, size_t len, const void* secret, size_t secretSize, XXH64_hash_t seed)
+XXH3_64bits_withSecretandSeed(XXH_NOESCAPE const void* input, size_t length, XXH_NOESCAPE const void* secret, size_t secretSize, XXH64_hash_t seed)
{
- if (len <= XXH3_MIDSIZE_MAX)
- return XXH3_64bits_internal(input, len, seed, XXH3_kSecret, sizeof(XXH3_kSecret), NULL);
- return XXH3_hashLong_64b_withSecret(input, len, seed, (const xxh_u8*)secret, secretSize);
+ if (length <= XXH3_MIDSIZE_MAX)
+ return XXH3_64bits_internal(input, length, seed, XXH3_kSecret, sizeof(XXH3_kSecret), NULL);
+ return XXH3_hashLong_64b_withSecret(input, length, seed, (const xxh_u8*)secret, secretSize);
}
/* === XXH3 streaming === */
-
+#ifndef XXH_NO_STREAM
/*
* Malloc's a pointer that is always aligned to align.
*
*
* Align must be a power of 2 and 8 <= align <= 128.
*/
-static void* XXH_alignedMalloc(size_t s, size_t align)
+static XXH_MALLOCF void* XXH_alignedMalloc(size_t s, size_t align)
{
XXH_ASSERT(align <= 128 && align >= 8); /* range check */
XXH_ASSERT((align & (align-1)) == 0); /* power of 2 */
XXH_free(base);
}
}
-/*! @ingroup xxh3_family */
+/*! @ingroup XXH3_family */
+/*!
+ * @brief Allocate an @ref XXH3_state_t.
+ *
+ * @return An allocated pointer of @ref XXH3_state_t on success.
+ * @return `NULL` on failure.
+ *
+ * @note Must be freed with XXH3_freeState().
+ */
XXH_PUBLIC_API XXH3_state_t* XXH3_createState(void)
{
XXH3_state_t* const state = (XXH3_state_t*)XXH_alignedMalloc(sizeof(XXH3_state_t), 64);
return state;
}
-/*! @ingroup xxh3_family */
+/*! @ingroup XXH3_family */
+/*!
+ * @brief Frees an @ref XXH3_state_t.
+ *
+ * @param statePtr A pointer to an @ref XXH3_state_t allocated with @ref XXH3_createState().
+ *
+ * @return @ref XXH_OK.
+ *
+ * @note Must be allocated with XXH3_createState().
+ */
XXH_PUBLIC_API XXH_errorcode XXH3_freeState(XXH3_state_t* statePtr)
{
XXH_alignedFree(statePtr);
return XXH_OK;
}
-/*! @ingroup xxh3_family */
+/*! @ingroup XXH3_family */
XXH_PUBLIC_API void
-XXH3_copyState(XXH3_state_t* dst_state, const XXH3_state_t* src_state)
+XXH3_copyState(XXH_NOESCAPE XXH3_state_t* dst_state, XXH_NOESCAPE const XXH3_state_t* src_state)
{
XXH_memcpy(dst_state, src_state, sizeof(*dst_state));
}
statePtr->nbStripesPerBlock = statePtr->secretLimit / XXH_SECRET_CONSUME_RATE;
}
-/*! @ingroup xxh3_family */
+/*! @ingroup XXH3_family */
XXH_PUBLIC_API XXH_errorcode
-XXH3_64bits_reset(XXH3_state_t* statePtr)
+XXH3_64bits_reset(XXH_NOESCAPE XXH3_state_t* statePtr)
{
if (statePtr == NULL) return XXH_ERROR;
XXH3_reset_internal(statePtr, 0, XXH3_kSecret, XXH_SECRET_DEFAULT_SIZE);
return XXH_OK;
}
-/*! @ingroup xxh3_family */
+/*! @ingroup XXH3_family */
XXH_PUBLIC_API XXH_errorcode
-XXH3_64bits_reset_withSecret(XXH3_state_t* statePtr, const void* secret, size_t secretSize)
+XXH3_64bits_reset_withSecret(XXH_NOESCAPE XXH3_state_t* statePtr, XXH_NOESCAPE const void* secret, size_t secretSize)
{
if (statePtr == NULL) return XXH_ERROR;
XXH3_reset_internal(statePtr, 0, secret, secretSize);
return XXH_OK;
}
-/*! @ingroup xxh3_family */
+/*! @ingroup XXH3_family */
XXH_PUBLIC_API XXH_errorcode
-XXH3_64bits_reset_withSeed(XXH3_state_t* statePtr, XXH64_hash_t seed)
+XXH3_64bits_reset_withSeed(XXH_NOESCAPE XXH3_state_t* statePtr, XXH64_hash_t seed)
{
if (statePtr == NULL) return XXH_ERROR;
if (seed==0) return XXH3_64bits_reset(statePtr);
return XXH_OK;
}
-/*! @ingroup xxh3_family */
+/*! @ingroup XXH3_family */
XXH_PUBLIC_API XXH_errorcode
-XXH3_64bits_reset_withSecretandSeed(XXH3_state_t* statePtr, const void* secret, size_t secretSize, XXH64_hash_t seed64)
+XXH3_64bits_reset_withSecretandSeed(XXH_NOESCAPE XXH3_state_t* statePtr, XXH_NOESCAPE const void* secret, size_t secretSize, XXH64_hash_t seed64)
{
if (statePtr == NULL) return XXH_ERROR;
if (secret == NULL) return XXH_ERROR;
return XXH_OK;
}
-/* Note : when XXH3_consumeStripes() is invoked,
- * there must be a guarantee that at least one more byte must be consumed from input
- * so that the function can blindly consume all stripes using the "normal" secret segment */
-XXH_FORCE_INLINE void
+/*!
+ * @internal
+ * @brief Processes a large input for XXH3_update() and XXH3_digest_long().
+ *
+ * Unlike XXH3_hashLong_internal_loop(), this can process data that overlaps a block.
+ *
+ * @param acc Pointer to the 8 accumulator lanes
+ * @param nbStripesSoFarPtr In/out pointer to the number of leftover stripes in the block*
+ * @param nbStripesPerBlock Number of stripes in a block
+ * @param input Input pointer
+ * @param nbStripes Number of stripes to process
+ * @param secret Secret pointer
+ * @param secretLimit Offset of the last block in @p secret
+ * @param f_acc Pointer to an XXH3_accumulate implementation
+ * @param f_scramble Pointer to an XXH3_scrambleAcc implementation
+ * @return Pointer past the end of @p input after processing
+ */
+XXH_FORCE_INLINE const xxh_u8 *
XXH3_consumeStripes(xxh_u64* XXH_RESTRICT acc,
size_t* XXH_RESTRICT nbStripesSoFarPtr, size_t nbStripesPerBlock,
const xxh_u8* XXH_RESTRICT input, size_t nbStripes,
const xxh_u8* XXH_RESTRICT secret, size_t secretLimit,
- XXH3_f_accumulate_512 f_acc512,
+ XXH3_f_accumulate f_acc,
XXH3_f_scrambleAcc f_scramble)
{
- XXH_ASSERT(nbStripes <= nbStripesPerBlock); /* can handle max 1 scramble per invocation */
- XXH_ASSERT(*nbStripesSoFarPtr < nbStripesPerBlock);
- if (nbStripesPerBlock - *nbStripesSoFarPtr <= nbStripes) {
- /* need a scrambling operation */
- size_t const nbStripesToEndofBlock = nbStripesPerBlock - *nbStripesSoFarPtr;
- size_t const nbStripesAfterBlock = nbStripes - nbStripesToEndofBlock;
- XXH3_accumulate(acc, input, secret + nbStripesSoFarPtr[0] * XXH_SECRET_CONSUME_RATE, nbStripesToEndofBlock, f_acc512);
- f_scramble(acc, secret + secretLimit);
- XXH3_accumulate(acc, input + nbStripesToEndofBlock * XXH_STRIPE_LEN, secret, nbStripesAfterBlock, f_acc512);
- *nbStripesSoFarPtr = nbStripesAfterBlock;
- } else {
- XXH3_accumulate(acc, input, secret + nbStripesSoFarPtr[0] * XXH_SECRET_CONSUME_RATE, nbStripes, f_acc512);
+ const xxh_u8* initialSecret = secret + *nbStripesSoFarPtr * XXH_SECRET_CONSUME_RATE;
+ /* Process full blocks */
+ if (nbStripes >= (nbStripesPerBlock - *nbStripesSoFarPtr)) {
+ /* Process the initial partial block... */
+ size_t nbStripesThisIter = nbStripesPerBlock - *nbStripesSoFarPtr;
+
+ do {
+ /* Accumulate and scramble */
+ f_acc(acc, input, initialSecret, nbStripesThisIter);
+ f_scramble(acc, secret + secretLimit);
+ input += nbStripesThisIter * XXH_STRIPE_LEN;
+ nbStripes -= nbStripesThisIter;
+ /* Then continue the loop with the full block size */
+ nbStripesThisIter = nbStripesPerBlock;
+ initialSecret = secret;
+ } while (nbStripes >= nbStripesPerBlock);
+ *nbStripesSoFarPtr = 0;
+ }
+ /* Process a partial block */
+ if (nbStripes > 0) {
+ f_acc(acc, input, initialSecret, nbStripes);
+ input += nbStripes * XXH_STRIPE_LEN;
*nbStripesSoFarPtr += nbStripes;
}
+ /* Return end pointer */
+ return input;
}
#ifndef XXH3_STREAM_USE_STACK
-# ifndef __clang__ /* clang doesn't need additional stack space */
+# if XXH_SIZE_OPT <= 0 && !defined(__clang__) /* clang doesn't need additional stack space */
# define XXH3_STREAM_USE_STACK 1
# endif
#endif
XXH_FORCE_INLINE XXH_errorcode
XXH3_update(XXH3_state_t* XXH_RESTRICT const state,
const xxh_u8* XXH_RESTRICT input, size_t len,
- XXH3_f_accumulate_512 f_acc512,
+ XXH3_f_accumulate f_acc,
XXH3_f_scrambleAcc f_scramble)
{
if (input==NULL) {
* when operating accumulators directly into state.
* Operating into stack space seems to enable proper optimization.
* clang, on the other hand, doesn't seem to need this trick */
- XXH_ALIGN(XXH_ACC_ALIGN) xxh_u64 acc[8]; memcpy(acc, state->acc, sizeof(acc));
+ XXH_ALIGN(XXH_ACC_ALIGN) xxh_u64 acc[8];
+ XXH_memcpy(acc, state->acc, sizeof(acc));
#else
xxh_u64* XXH_RESTRICT const acc = state->acc;
#endif
XXH_ASSERT(state->bufferedSize <= XXH3_INTERNALBUFFER_SIZE);
/* small input : just fill in tmp buffer */
- if (state->bufferedSize + len <= XXH3_INTERNALBUFFER_SIZE) {
+ if (len <= XXH3_INTERNALBUFFER_SIZE - state->bufferedSize) {
XXH_memcpy(state->buffer + state->bufferedSize, input, len);
state->bufferedSize += (XXH32_hash_t)len;
return XXH_OK;
&state->nbStripesSoFar, state->nbStripesPerBlock,
state->buffer, XXH3_INTERNALBUFFER_STRIPES,
secret, state->secretLimit,
- f_acc512, f_scramble);
+ f_acc, f_scramble);
state->bufferedSize = 0;
}
XXH_ASSERT(input < bEnd);
-
- /* large input to consume : ingest per full block */
- if ((size_t)(bEnd - input) > state->nbStripesPerBlock * XXH_STRIPE_LEN) {
+ if (bEnd - input > XXH3_INTERNALBUFFER_SIZE) {
size_t nbStripes = (size_t)(bEnd - 1 - input) / XXH_STRIPE_LEN;
- XXH_ASSERT(state->nbStripesPerBlock >= state->nbStripesSoFar);
- /* join to current block's end */
- { size_t const nbStripesToEnd = state->nbStripesPerBlock - state->nbStripesSoFar;
- XXH_ASSERT(nbStripesToEnd <= nbStripes);
- XXH3_accumulate(acc, input, secret + state->nbStripesSoFar * XXH_SECRET_CONSUME_RATE, nbStripesToEnd, f_acc512);
- f_scramble(acc, secret + state->secretLimit);
- state->nbStripesSoFar = 0;
- input += nbStripesToEnd * XXH_STRIPE_LEN;
- nbStripes -= nbStripesToEnd;
- }
- /* consume per entire blocks */
- while(nbStripes >= state->nbStripesPerBlock) {
- XXH3_accumulate(acc, input, secret, state->nbStripesPerBlock, f_acc512);
- f_scramble(acc, secret + state->secretLimit);
- input += state->nbStripesPerBlock * XXH_STRIPE_LEN;
- nbStripes -= state->nbStripesPerBlock;
- }
- /* consume last partial block */
- XXH3_accumulate(acc, input, secret, nbStripes, f_acc512);
- input += nbStripes * XXH_STRIPE_LEN;
- XXH_ASSERT(input < bEnd); /* at least some bytes left */
- state->nbStripesSoFar = nbStripes;
- /* buffer predecessor of last partial stripe */
- XXH_memcpy(state->buffer + sizeof(state->buffer) - XXH_STRIPE_LEN, input - XXH_STRIPE_LEN, XXH_STRIPE_LEN);
- XXH_ASSERT(bEnd - input <= XXH_STRIPE_LEN);
- } else {
- /* content to consume <= block size */
- /* Consume input by a multiple of internal buffer size */
- if (bEnd - input > XXH3_INTERNALBUFFER_SIZE) {
- const xxh_u8* const limit = bEnd - XXH3_INTERNALBUFFER_SIZE;
- do {
- XXH3_consumeStripes(acc,
+ input = XXH3_consumeStripes(acc,
&state->nbStripesSoFar, state->nbStripesPerBlock,
- input, XXH3_INTERNALBUFFER_STRIPES,
- secret, state->secretLimit,
- f_acc512, f_scramble);
- input += XXH3_INTERNALBUFFER_SIZE;
- } while (input<limit);
- /* buffer predecessor of last partial stripe */
- XXH_memcpy(state->buffer + sizeof(state->buffer) - XXH_STRIPE_LEN, input - XXH_STRIPE_LEN, XXH_STRIPE_LEN);
- }
- }
+ input, nbStripes,
+ secret, state->secretLimit,
+ f_acc, f_scramble);
+ XXH_memcpy(state->buffer + sizeof(state->buffer) - XXH_STRIPE_LEN, input - XXH_STRIPE_LEN, XXH_STRIPE_LEN);
+ }
/* Some remaining input (always) : buffer it */
XXH_ASSERT(input < bEnd);
XXH_ASSERT(bEnd - input <= XXH3_INTERNALBUFFER_SIZE);
state->bufferedSize = (XXH32_hash_t)(bEnd-input);
#if defined(XXH3_STREAM_USE_STACK) && XXH3_STREAM_USE_STACK >= 1
/* save stack accumulators into state */
- memcpy(state->acc, acc, sizeof(acc));
+ XXH_memcpy(state->acc, acc, sizeof(acc));
#endif
}
return XXH_OK;
}
-/*! @ingroup xxh3_family */
+/*! @ingroup XXH3_family */
XXH_PUBLIC_API XXH_errorcode
-XXH3_64bits_update(XXH3_state_t* state, const void* input, size_t len)
+XXH3_64bits_update(XXH_NOESCAPE XXH3_state_t* state, XXH_NOESCAPE const void* input, size_t len)
{
return XXH3_update(state, (const xxh_u8*)input, len,
- XXH3_accumulate_512, XXH3_scrambleAcc);
+ XXH3_accumulate, XXH3_scrambleAcc);
}
const XXH3_state_t* state,
const unsigned char* secret)
{
+ xxh_u8 lastStripe[XXH_STRIPE_LEN];
+ const xxh_u8* lastStripePtr;
+
/*
* Digest on a local copy. This way, the state remains unaltered, and it can
* continue ingesting more input afterwards.
*/
XXH_memcpy(acc, state->acc, sizeof(state->acc));
if (state->bufferedSize >= XXH_STRIPE_LEN) {
+ /* Consume remaining stripes then point to remaining data in buffer */
size_t const nbStripes = (state->bufferedSize - 1) / XXH_STRIPE_LEN;
size_t nbStripesSoFar = state->nbStripesSoFar;
XXH3_consumeStripes(acc,
&nbStripesSoFar, state->nbStripesPerBlock,
state->buffer, nbStripes,
secret, state->secretLimit,
- XXH3_accumulate_512, XXH3_scrambleAcc);
- /* last stripe */
- XXH3_accumulate_512(acc,
- state->buffer + state->bufferedSize - XXH_STRIPE_LEN,
- secret + state->secretLimit - XXH_SECRET_LASTACC_START);
+ XXH3_accumulate, XXH3_scrambleAcc);
+ lastStripePtr = state->buffer + state->bufferedSize - XXH_STRIPE_LEN;
} else { /* bufferedSize < XXH_STRIPE_LEN */
- xxh_u8 lastStripe[XXH_STRIPE_LEN];
+ /* Copy to temp buffer */
size_t const catchupSize = XXH_STRIPE_LEN - state->bufferedSize;
XXH_ASSERT(state->bufferedSize > 0); /* there is always some input buffered */
XXH_memcpy(lastStripe, state->buffer + sizeof(state->buffer) - catchupSize, catchupSize);
XXH_memcpy(lastStripe + catchupSize, state->buffer, state->bufferedSize);
- XXH3_accumulate_512(acc,
- lastStripe,
- secret + state->secretLimit - XXH_SECRET_LASTACC_START);
+ lastStripePtr = lastStripe;
}
+ /* Last stripe */
+ XXH3_accumulate_512(acc,
+ lastStripePtr,
+ secret + state->secretLimit - XXH_SECRET_LASTACC_START);
}
-/*! @ingroup xxh3_family */
-XXH_PUBLIC_API XXH64_hash_t XXH3_64bits_digest (const XXH3_state_t* state)
+/*! @ingroup XXH3_family */
+XXH_PUBLIC_API XXH64_hash_t XXH3_64bits_digest (XXH_NOESCAPE const XXH3_state_t* state)
{
const unsigned char* const secret = (state->extSecret == NULL) ? state->customSecret : state->extSecret;
if (state->totalLen > XXH3_MIDSIZE_MAX) {
return XXH3_64bits_withSecret(state->buffer, (size_t)(state->totalLen),
secret, state->secretLimit + XXH_STRIPE_LEN);
}
-
+#endif /* !XXH_NO_STREAM */
/* ==========================================
* fast for a _128-bit_ hash on 32-bit (it usually clears XXH64).
*/
-XXH_FORCE_INLINE XXH128_hash_t
+XXH_FORCE_INLINE XXH_PUREF XXH128_hash_t
XXH3_len_1to3_128b(const xxh_u8* input, size_t len, const xxh_u8* secret, XXH64_hash_t seed)
{
/* A doubled version of 1to3_64b with different constants. */
}
}
-XXH_FORCE_INLINE XXH128_hash_t
+XXH_FORCE_INLINE XXH_PUREF XXH128_hash_t
XXH3_len_4to8_128b(const xxh_u8* input, size_t len, const xxh_u8* secret, XXH64_hash_t seed)
{
XXH_ASSERT(input != NULL);
m128.low64 ^= (m128.high64 >> 3);
m128.low64 = XXH_xorshift64(m128.low64, 35);
- m128.low64 *= 0x9FB21C651E98DF25ULL;
+ m128.low64 *= PRIME_MX2;
m128.low64 = XXH_xorshift64(m128.low64, 28);
m128.high64 = XXH3_avalanche(m128.high64);
return m128;
}
}
-XXH_FORCE_INLINE XXH128_hash_t
+XXH_FORCE_INLINE XXH_PUREF XXH128_hash_t
XXH3_len_9to16_128b(const xxh_u8* input, size_t len, const xxh_u8* secret, XXH64_hash_t seed)
{
XXH_ASSERT(input != NULL);
/*
* Assumption: `secret` size is >= XXH3_SECRET_SIZE_MIN
*/
-XXH_FORCE_INLINE XXH128_hash_t
+XXH_FORCE_INLINE XXH_PUREF XXH128_hash_t
XXH3_len_0to16_128b(const xxh_u8* input, size_t len, const xxh_u8* secret, XXH64_hash_t seed)
{
XXH_ASSERT(len <= 16);
}
-XXH_FORCE_INLINE XXH128_hash_t
+XXH_FORCE_INLINE XXH_PUREF XXH128_hash_t
XXH3_len_17to128_128b(const xxh_u8* XXH_RESTRICT input, size_t len,
const xxh_u8* XXH_RESTRICT secret, size_t secretSize,
XXH64_hash_t seed)
{ XXH128_hash_t acc;
acc.low64 = len * XXH_PRIME64_1;
acc.high64 = 0;
+
+#if XXH_SIZE_OPT >= 1
+ {
+ /* Smaller, but slightly slower. */
+ unsigned int i = (unsigned int)(len - 1) / 32;
+ do {
+ acc = XXH128_mix32B(acc, input+16*i, input+len-16*(i+1), secret+32*i, seed);
+ } while (i-- != 0);
+ }
+#else
if (len > 32) {
if (len > 64) {
if (len > 96) {
acc = XXH128_mix32B(acc, input+16, input+len-32, secret+32, seed);
}
acc = XXH128_mix32B(acc, input, input+len-16, secret, seed);
+#endif
{ XXH128_hash_t h128;
h128.low64 = acc.low64 + acc.high64;
h128.high64 = (acc.low64 * XXH_PRIME64_1)
}
}
-XXH_NO_INLINE XXH128_hash_t
+XXH_NO_INLINE XXH_PUREF XXH128_hash_t
XXH3_len_129to240_128b(const xxh_u8* XXH_RESTRICT input, size_t len,
const xxh_u8* XXH_RESTRICT secret, size_t secretSize,
XXH64_hash_t seed)
XXH_ASSERT(128 < len && len <= XXH3_MIDSIZE_MAX);
{ XXH128_hash_t acc;
- int const nbRounds = (int)len / 32;
- int i;
+ unsigned i;
acc.low64 = len * XXH_PRIME64_1;
acc.high64 = 0;
- for (i=0; i<4; i++) {
+ /*
+ * We set as `i` as offset + 32. We do this so that unchanged
+ * `len` can be used as upper bound. This reaches a sweet spot
+ * where both x86 and aarch64 get simple agen and good codegen
+ * for the loop.
+ */
+ for (i = 32; i < 160; i += 32) {
acc = XXH128_mix32B(acc,
- input + (32 * i),
- input + (32 * i) + 16,
- secret + (32 * i),
+ input + i - 32,
+ input + i - 16,
+ secret + i - 32,
seed);
}
acc.low64 = XXH3_avalanche(acc.low64);
acc.high64 = XXH3_avalanche(acc.high64);
- XXH_ASSERT(nbRounds >= 4);
- for (i=4 ; i < nbRounds; i++) {
+ /*
+ * NB: `i <= len` will duplicate the last 32-bytes if
+ * len % 32 was zero. This is an unfortunate necessity to keep
+ * the hash result stable.
+ */
+ for (i=160; i <= len; i += 32) {
acc = XXH128_mix32B(acc,
- input + (32 * i),
- input + (32 * i) + 16,
- secret + XXH3_MIDSIZE_STARTOFFSET + (32 * (i - 4)),
+ input + i - 32,
+ input + i - 16,
+ secret + XXH3_MIDSIZE_STARTOFFSET + i - 160,
seed);
}
/* last bytes */
input + len - 16,
input + len - 32,
secret + XXH3_SECRET_SIZE_MIN - XXH3_MIDSIZE_LASTOFFSET - 16,
- 0ULL - seed);
+ (XXH64_hash_t)0 - seed);
{ XXH128_hash_t h128;
h128.low64 = acc.low64 + acc.high64;
XXH_FORCE_INLINE XXH128_hash_t
XXH3_hashLong_128b_internal(const void* XXH_RESTRICT input, size_t len,
const xxh_u8* XXH_RESTRICT secret, size_t secretSize,
- XXH3_f_accumulate_512 f_acc512,
+ XXH3_f_accumulate f_acc,
XXH3_f_scrambleAcc f_scramble)
{
XXH_ALIGN(XXH_ACC_ALIGN) xxh_u64 acc[XXH_ACC_NB] = XXH3_INIT_ACC;
- XXH3_hashLong_internal_loop(acc, (const xxh_u8*)input, len, secret, secretSize, f_acc512, f_scramble);
+ XXH3_hashLong_internal_loop(acc, (const xxh_u8*)input, len, secret, secretSize, f_acc, f_scramble);
/* converge into final hash */
XXH_STATIC_ASSERT(sizeof(acc) == 64);
}
/*
- * It's important for performance that XXH3_hashLong is not inlined.
+ * It's important for performance that XXH3_hashLong() is not inlined.
*/
-XXH_NO_INLINE XXH128_hash_t
+XXH_NO_INLINE XXH_PUREF XXH128_hash_t
XXH3_hashLong_128b_default(const void* XXH_RESTRICT input, size_t len,
XXH64_hash_t seed64,
const void* XXH_RESTRICT secret, size_t secretLen)
{
(void)seed64; (void)secret; (void)secretLen;
return XXH3_hashLong_128b_internal(input, len, XXH3_kSecret, sizeof(XXH3_kSecret),
- XXH3_accumulate_512, XXH3_scrambleAcc);
+ XXH3_accumulate, XXH3_scrambleAcc);
}
/*
- * It's important for performance to pass @secretLen (when it's static)
+ * It's important for performance to pass @p secretLen (when it's static)
* to the compiler, so that it can properly optimize the vectorized loop.
+ *
+ * When the secret size is unknown, or on GCC 12 where the mix of NO_INLINE and FORCE_INLINE
+ * breaks -Og, this is XXH_NO_INLINE.
*/
-XXH_FORCE_INLINE XXH128_hash_t
+XXH3_WITH_SECRET_INLINE XXH128_hash_t
XXH3_hashLong_128b_withSecret(const void* XXH_RESTRICT input, size_t len,
XXH64_hash_t seed64,
const void* XXH_RESTRICT secret, size_t secretLen)
{
(void)seed64;
return XXH3_hashLong_128b_internal(input, len, (const xxh_u8*)secret, secretLen,
- XXH3_accumulate_512, XXH3_scrambleAcc);
+ XXH3_accumulate, XXH3_scrambleAcc);
}
XXH_FORCE_INLINE XXH128_hash_t
XXH3_hashLong_128b_withSeed_internal(const void* XXH_RESTRICT input, size_t len,
XXH64_hash_t seed64,
- XXH3_f_accumulate_512 f_acc512,
+ XXH3_f_accumulate f_acc,
XXH3_f_scrambleAcc f_scramble,
XXH3_f_initCustomSecret f_initSec)
{
if (seed64 == 0)
return XXH3_hashLong_128b_internal(input, len,
XXH3_kSecret, sizeof(XXH3_kSecret),
- f_acc512, f_scramble);
+ f_acc, f_scramble);
{ XXH_ALIGN(XXH_SEC_ALIGN) xxh_u8 secret[XXH_SECRET_DEFAULT_SIZE];
f_initSec(secret, seed64);
return XXH3_hashLong_128b_internal(input, len, (const xxh_u8*)secret, sizeof(secret),
- f_acc512, f_scramble);
+ f_acc, f_scramble);
}
}
{
(void)secret; (void)secretLen;
return XXH3_hashLong_128b_withSeed_internal(input, len, seed64,
- XXH3_accumulate_512, XXH3_scrambleAcc, XXH3_initCustomSecret);
+ XXH3_accumulate, XXH3_scrambleAcc, XXH3_initCustomSecret);
}
typedef XXH128_hash_t (*XXH3_hashLong128_f)(const void* XXH_RESTRICT, size_t,
/* === Public XXH128 API === */
-/*! @ingroup xxh3_family */
-XXH_PUBLIC_API XXH128_hash_t XXH3_128bits(const void* input, size_t len)
+/*! @ingroup XXH3_family */
+XXH_PUBLIC_API XXH128_hash_t XXH3_128bits(XXH_NOESCAPE const void* input, size_t len)
{
return XXH3_128bits_internal(input, len, 0,
XXH3_kSecret, sizeof(XXH3_kSecret),
XXH3_hashLong_128b_default);
}
-/*! @ingroup xxh3_family */
+/*! @ingroup XXH3_family */
XXH_PUBLIC_API XXH128_hash_t
-XXH3_128bits_withSecret(const void* input, size_t len, const void* secret, size_t secretSize)
+XXH3_128bits_withSecret(XXH_NOESCAPE const void* input, size_t len, XXH_NOESCAPE const void* secret, size_t secretSize)
{
return XXH3_128bits_internal(input, len, 0,
(const xxh_u8*)secret, secretSize,
XXH3_hashLong_128b_withSecret);
}
-/*! @ingroup xxh3_family */
+/*! @ingroup XXH3_family */
XXH_PUBLIC_API XXH128_hash_t
-XXH3_128bits_withSeed(const void* input, size_t len, XXH64_hash_t seed)
+XXH3_128bits_withSeed(XXH_NOESCAPE const void* input, size_t len, XXH64_hash_t seed)
{
return XXH3_128bits_internal(input, len, seed,
XXH3_kSecret, sizeof(XXH3_kSecret),
XXH3_hashLong_128b_withSeed);
}
-/*! @ingroup xxh3_family */
+/*! @ingroup XXH3_family */
XXH_PUBLIC_API XXH128_hash_t
-XXH3_128bits_withSecretandSeed(const void* input, size_t len, const void* secret, size_t secretSize, XXH64_hash_t seed)
+XXH3_128bits_withSecretandSeed(XXH_NOESCAPE const void* input, size_t len, XXH_NOESCAPE const void* secret, size_t secretSize, XXH64_hash_t seed)
{
if (len <= XXH3_MIDSIZE_MAX)
return XXH3_128bits_internal(input, len, seed, XXH3_kSecret, sizeof(XXH3_kSecret), NULL);
return XXH3_hashLong_128b_withSecret(input, len, seed, secret, secretSize);
}
-/*! @ingroup xxh3_family */
+/*! @ingroup XXH3_family */
XXH_PUBLIC_API XXH128_hash_t
-XXH128(const void* input, size_t len, XXH64_hash_t seed)
+XXH128(XXH_NOESCAPE const void* input, size_t len, XXH64_hash_t seed)
{
return XXH3_128bits_withSeed(input, len, seed);
}
/* === XXH3 128-bit streaming === */
-
+#ifndef XXH_NO_STREAM
/*
* All initialization and update functions are identical to 64-bit streaming variant.
* The only difference is the finalization routine.
*/
-/*! @ingroup xxh3_family */
+/*! @ingroup XXH3_family */
XXH_PUBLIC_API XXH_errorcode
-XXH3_128bits_reset(XXH3_state_t* statePtr)
+XXH3_128bits_reset(XXH_NOESCAPE XXH3_state_t* statePtr)
{
return XXH3_64bits_reset(statePtr);
}
-/*! @ingroup xxh3_family */
+/*! @ingroup XXH3_family */
XXH_PUBLIC_API XXH_errorcode
-XXH3_128bits_reset_withSecret(XXH3_state_t* statePtr, const void* secret, size_t secretSize)
+XXH3_128bits_reset_withSecret(XXH_NOESCAPE XXH3_state_t* statePtr, XXH_NOESCAPE const void* secret, size_t secretSize)
{
return XXH3_64bits_reset_withSecret(statePtr, secret, secretSize);
}
-/*! @ingroup xxh3_family */
+/*! @ingroup XXH3_family */
XXH_PUBLIC_API XXH_errorcode
-XXH3_128bits_reset_withSeed(XXH3_state_t* statePtr, XXH64_hash_t seed)
+XXH3_128bits_reset_withSeed(XXH_NOESCAPE XXH3_state_t* statePtr, XXH64_hash_t seed)
{
return XXH3_64bits_reset_withSeed(statePtr, seed);
}
-/*! @ingroup xxh3_family */
+/*! @ingroup XXH3_family */
XXH_PUBLIC_API XXH_errorcode
-XXH3_128bits_reset_withSecretandSeed(XXH3_state_t* statePtr, const void* secret, size_t secretSize, XXH64_hash_t seed)
+XXH3_128bits_reset_withSecretandSeed(XXH_NOESCAPE XXH3_state_t* statePtr, XXH_NOESCAPE const void* secret, size_t secretSize, XXH64_hash_t seed)
{
return XXH3_64bits_reset_withSecretandSeed(statePtr, secret, secretSize, seed);
}
-/*! @ingroup xxh3_family */
+/*! @ingroup XXH3_family */
XXH_PUBLIC_API XXH_errorcode
-XXH3_128bits_update(XXH3_state_t* state, const void* input, size_t len)
+XXH3_128bits_update(XXH_NOESCAPE XXH3_state_t* state, XXH_NOESCAPE const void* input, size_t len)
{
- return XXH3_update(state, (const xxh_u8*)input, len,
- XXH3_accumulate_512, XXH3_scrambleAcc);
+ return XXH3_64bits_update(state, input, len);
}
-/*! @ingroup xxh3_family */
-XXH_PUBLIC_API XXH128_hash_t XXH3_128bits_digest (const XXH3_state_t* state)
+/*! @ingroup XXH3_family */
+XXH_PUBLIC_API XXH128_hash_t XXH3_128bits_digest (XXH_NOESCAPE const XXH3_state_t* state)
{
const unsigned char* const secret = (state->extSecret == NULL) ? state->customSecret : state->extSecret;
if (state->totalLen > XXH3_MIDSIZE_MAX) {
return XXH3_128bits_withSecret(state->buffer, (size_t)(state->totalLen),
secret, state->secretLimit + XXH_STRIPE_LEN);
}
-
+#endif /* !XXH_NO_STREAM */
/* 128-bit utility functions */
#include <string.h> /* memcmp, memcpy */
/* return : 1 is equal, 0 if different */
-/*! @ingroup xxh3_family */
+/*! @ingroup XXH3_family */
XXH_PUBLIC_API int XXH128_isEqual(XXH128_hash_t h1, XXH128_hash_t h2)
{
/* note : XXH128_hash_t is compact, it has no padding byte */
}
/* This prototype is compatible with stdlib's qsort().
- * return : >0 if *h128_1 > *h128_2
- * <0 if *h128_1 < *h128_2
- * =0 if *h128_1 == *h128_2 */
-/*! @ingroup xxh3_family */
-XXH_PUBLIC_API int XXH128_cmp(const void* h128_1, const void* h128_2)
+ * @return : >0 if *h128_1 > *h128_2
+ * <0 if *h128_1 < *h128_2
+ * =0 if *h128_1 == *h128_2 */
+/*! @ingroup XXH3_family */
+XXH_PUBLIC_API int XXH128_cmp(XXH_NOESCAPE const void* h128_1, XXH_NOESCAPE const void* h128_2)
{
XXH128_hash_t const h1 = *(const XXH128_hash_t*)h128_1;
XXH128_hash_t const h2 = *(const XXH128_hash_t*)h128_2;
/*====== Canonical representation ======*/
-/*! @ingroup xxh3_family */
+/*! @ingroup XXH3_family */
XXH_PUBLIC_API void
-XXH128_canonicalFromHash(XXH128_canonical_t* dst, XXH128_hash_t hash)
+XXH128_canonicalFromHash(XXH_NOESCAPE XXH128_canonical_t* dst, XXH128_hash_t hash)
{
XXH_STATIC_ASSERT(sizeof(XXH128_canonical_t) == sizeof(XXH128_hash_t));
if (XXH_CPU_LITTLE_ENDIAN) {
XXH_memcpy((char*)dst + sizeof(hash.high64), &hash.low64, sizeof(hash.low64));
}
-/*! @ingroup xxh3_family */
+/*! @ingroup XXH3_family */
XXH_PUBLIC_API XXH128_hash_t
-XXH128_hashFromCanonical(const XXH128_canonical_t* src)
+XXH128_hashFromCanonical(XXH_NOESCAPE const XXH128_canonical_t* src)
{
XXH128_hash_t h;
h.high64 = XXH_readBE64(src);
XXH_writeLE64( (char*)dst+8, XXH_readLE64((char*)dst+8) ^ h128.high64 );
}
-/*! @ingroup xxh3_family */
+/*! @ingroup XXH3_family */
XXH_PUBLIC_API XXH_errorcode
-XXH3_generateSecret(void* secretBuffer, size_t secretSize, const void* customSeed, size_t customSeedSize)
+XXH3_generateSecret(XXH_NOESCAPE void* secretBuffer, size_t secretSize, XXH_NOESCAPE const void* customSeed, size_t customSeedSize)
{
#if (XXH_DEBUGLEVEL >= 1)
XXH_ASSERT(secretBuffer != NULL);
return XXH_OK;
}
-/*! @ingroup xxh3_family */
+/*! @ingroup XXH3_family */
XXH_PUBLIC_API void
-XXH3_generateSecret_fromSeed(void* secretBuffer, XXH64_hash_t seed)
+XXH3_generateSecret_fromSeed(XXH_NOESCAPE void* secretBuffer, XXH64_hash_t seed)
{
XXH_ALIGN(XXH_SEC_ALIGN) xxh_u8 secret[XXH_SECRET_DEFAULT_SIZE];
XXH3_initCustomSecret(secret, seed);
/* Pop our optimization override from above */
#if XXH_VECTOR == XXH_AVX2 /* AVX2 */ \
&& defined(__GNUC__) && !defined(__clang__) /* GCC, not Clang */ \
- && defined(__OPTIMIZE__) && !defined(__OPTIMIZE_SIZE__) /* respect -O0 and -Os */
+ && defined(__OPTIMIZE__) && XXH_SIZE_OPT <= 0 /* respect -O0 and -Os */
# pragma GCC pop_options
#endif
#if defined (__cplusplus)
-}
+} /* extern "C" */
#endif
ZSTD_memcpy(dst, src, 8);
#endif
}
-#define COPY8(d,s) { ZSTD_copy8(d,s); d+=8; s+=8; }
+#define COPY8(d,s) do { ZSTD_copy8(d,s); d+=8; s+=8; } while (0)
/* Need to use memmove here since the literal buffer can now be located within
the dst buffer. In circumstances where the op "catches up" to where the
ZSTD_memcpy(dst, copy16_buf, 16);
#endif
}
-#define COPY16(d,s) { ZSTD_copy16(d,s); d+=16; s+=16; }
+#define COPY16(d,s) do { ZSTD_copy16(d,s); d+=16; s+=16; } while (0)
#define WILDCOPY_OVERLENGTH 32
#define WILDCOPY_VECLEN 16
if (ovtype == ZSTD_overlap_src_before_dst && diff < WILDCOPY_VECLEN) {
/* Handle short offset copies. */
do {
- COPY8(op, ip)
+ COPY8(op, ip);
} while (op < oend);
} else {
assert(diff >= WILDCOPY_VECLEN || diff <= -WILDCOPY_VECLEN);
/*! ZSTD_getcBlockSize() :
* Provides the size of compressed block from block header `src` */
-/* Used by: decompress, fullbench (does not get its definition from here) */
+/* Used by: decompress, fullbench */
size_t ZSTD_getcBlockSize(const void* src, size_t srcSize,
blockProperties_t* bpPtr);
/*! ZSTD_decodeSeqHeaders() :
* decode sequence header from src */
-/* Used by: decompress, fullbench (does not get its definition from here) */
+/* Used by: zstd_decompress_block, fullbench */
size_t ZSTD_decodeSeqHeaders(ZSTD_DCtx* dctx, int* nbSeqPtr,
const void* src, size_t srcSize);
#include "../common/error_private.h"
#define ZSTD_DEPS_NEED_MALLOC
#define ZSTD_DEPS_NEED_MATH64
-#include "../common/zstd_deps.h" /* ZSTD_malloc, ZSTD_free, ZSTD_memcpy, ZSTD_memset */
+#include "../common/zstd_deps.h" /* ZSTD_memset */
#include "../common/bits.h" /* ZSTD_highbit32 */
size_t const maxHeaderSize = (((maxSymbolValue+1) * tableLog
+ 4 /* bitCount initialized at 4 */
+ 2 /* first two symbols may use one additional bit each */) / 8)
- + 1 /* round up to whole nb bytes */
- + 2 /* additional two bytes for bitstream flush */;
+ + 1 /* round up to whole nb bytes */
+ + 2 /* additional two bytes for bitstream flush */;
return maxSymbolValue ? maxHeaderSize : FSE_NCOUNTBOUND; /* maxSymbolValue==0 ? use default */
}
/* Init */
remaining = tableSize+1; /* +1 for extra accuracy */
threshold = tableSize;
- nbBits = tableLog+1;
+ nbBits = (int)tableLog+1;
while ((symbol < alphabetSize) && (remaining>1)) { /* stops at 1 */
if (previousIs0) {
}
while (symbol >= start+3) {
start+=3;
- bitStream += 3 << bitCount;
+ bitStream += 3U << bitCount;
bitCount += 2;
}
bitStream += (symbol-start) << bitCount;
count++; /* +1 for extra accuracy */
if (count>=threshold)
count += max; /* [0..max[ [max..threshold[ (...) [threshold+max 2*threshold[ */
- bitStream += count << bitCount;
+ bitStream += (U32)count << bitCount;
bitCount += nbBits;
bitCount -= (count<max);
previousIs0 = (count==1);
out[1] = (BYTE)(bitStream>>8);
out+= (bitCount+7) /8;
- return (out-ostart);
+ assert(out >= ostart);
+ return (size_t)(out-ostart);
}
}
}
+HUF_CTableHeader HUF_readCTableHeader(HUF_CElt const* ctable)
+{
+ HUF_CTableHeader header;
+ ZSTD_memcpy(&header, ctable, sizeof(header));
+ return header;
+}
+
+static void HUF_writeCTableHeader(HUF_CElt* ctable, U32 tableLog, U32 maxSymbolValue)
+{
+ HUF_CTableHeader header;
+ HUF_STATIC_ASSERT(sizeof(ctable[0]) == sizeof(header));
+ ZSTD_memset(&header, 0, sizeof(header));
+ assert(tableLog < 256);
+ header.tableLog = (BYTE)tableLog;
+ assert(maxSymbolValue < 256);
+ header.maxSymbolValue = (BYTE)maxSymbolValue;
+ ZSTD_memcpy(ctable, &header, sizeof(header));
+}
+
typedef struct {
HUF_CompressWeightsWksp wksp;
BYTE bitsToWeight[HUF_TABLELOG_MAX + 1]; /* precomputed conversion table */
HUF_STATIC_ASSERT(HUF_CTABLE_WORKSPACE_SIZE >= sizeof(HUF_WriteCTableWksp));
+ assert(HUF_readCTableHeader(CTable).maxSymbolValue == maxSymbolValue);
+ assert(HUF_readCTableHeader(CTable).tableLog == huffLog);
+
/* check conditions */
if (workspaceSize < sizeof(HUF_WriteCTableWksp)) return ERROR(GENERIC);
if (maxSymbolValue > HUF_SYMBOLVALUE_MAX) return ERROR(maxSymbolValue_tooLarge);
if (tableLog > HUF_TABLELOG_MAX) return ERROR(tableLog_tooLarge);
if (nbSymbols > *maxSymbolValuePtr+1) return ERROR(maxSymbolValue_tooSmall);
- CTable[0] = tableLog;
+ *maxSymbolValuePtr = nbSymbols - 1;
+
+ HUF_writeCTableHeader(CTable, tableLog, *maxSymbolValuePtr);
/* Prepare base value per rank */
{ U32 n, nextRankStart = 0;
{ U32 n; for (n=0; n<nbSymbols; n++) HUF_setValue(ct + n, valPerRank[HUF_getNbBits(ct[n])]++); }
}
- *maxSymbolValuePtr = nbSymbols - 1;
return readSize;
}
{
const HUF_CElt* const ct = CTable + 1;
assert(symbolValue <= HUF_SYMBOLVALUE_MAX);
+ if (symbolValue > HUF_readCTableHeader(CTable).maxSymbolValue)
+ return 0;
return (U32)HUF_getNbBits(ct[symbolValue]);
}
HUF_setNbBits(ct + huffNode[n].byte, huffNode[n].nbBits); /* push nbBits per symbol, symbol order */
for (n=0; n<alphabetSize; n++)
HUF_setValue(ct + n, valPerRank[HUF_getNbBits(ct[n])]++); /* assign value within rank, symbol order */
- CTable[0] = maxNbBits;
+
+ HUF_writeCTableHeader(CTable, maxNbBits, maxSymbolValue);
}
size_t
}
int HUF_validateCTable(const HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue) {
- HUF_CElt const* ct = CTable + 1;
- int bad = 0;
- int s;
- for (s = 0; s <= (int)maxSymbolValue; ++s) {
- bad |= (count[s] != 0) & (HUF_getNbBits(ct[s]) == 0);
- }
- return !bad;
+ HUF_CTableHeader header = HUF_readCTableHeader(CTable);
+ HUF_CElt const* ct = CTable + 1;
+ int bad = 0;
+ int s;
+
+ assert(header.tableLog <= HUF_TABLELOG_ABSOLUTEMAX);
+
+ if (header.maxSymbolValue < maxSymbolValue)
+ return 0;
+
+ for (s = 0; s <= (int)maxSymbolValue; ++s) {
+ bad |= (count[s] != 0) & (HUF_getNbBits(ct[s]) == 0);
+ }
+ return !bad;
}
size_t HUF_compressBound(size_t size) { return HUF_COMPRESSBOUND(size); }
const void* src, size_t srcSize,
const HUF_CElt* CTable)
{
- U32 const tableLog = (U32)CTable[0];
+ U32 const tableLog = HUF_readCTableHeader(CTable).tableLog;
HUF_CElt const* ct = CTable + 1;
const BYTE* ip = (const BYTE*) src;
BYTE* const ostart = (BYTE*)dst;
BYTE* const oend = ostart + dstSize;
- BYTE* op = ostart;
HUF_CStream_t bitC;
/* init */
if (dstSize < 8) return 0; /* not enough space to compress */
- { size_t const initErr = HUF_initCStream(&bitC, op, (size_t)(oend-op));
+ { BYTE* op = ostart;
+ size_t const initErr = HUF_initCStream(&bitC, op, (size_t)(oend-op));
if (HUF_isError(initErr)) return 0; }
if (dstSize < HUF_tightCompressBound(srcSize, (size_t)tableLog) || tableLog > 11)
{ BYTE* dst = (BYTE*)workSpace + sizeof(HUF_WriteCTableWksp);
size_t dstSize = wkspSize - sizeof(HUF_WriteCTableWksp);
- size_t maxBits, hSize, newSize;
+ size_t hSize, newSize;
const unsigned symbolCardinality = HUF_cardinality(count, maxSymbolValue);
const unsigned minTableLog = HUF_minTableLog(symbolCardinality);
size_t optSize = ((size_t) ~0) - 1;
/* Search until size increases */
for (optLogGuess = minTableLog; optLogGuess <= maxTableLog; optLogGuess++) {
DEBUGLOG(7, "checking for huffLog=%u", optLogGuess);
- maxBits = HUF_buildCTable_wksp(table, count, maxSymbolValue, optLogGuess, workSpace, wkspSize);
- if (ERR_isError(maxBits)) continue;
- if (maxBits < optLogGuess && optLogGuess > minTableLog) break;
+ { size_t maxBits = HUF_buildCTable_wksp(table, count, maxSymbolValue, optLogGuess, workSpace, wkspSize);
+ if (ERR_isError(maxBits)) continue;
+
+ if (maxBits < optLogGuess && optLogGuess > minTableLog) break;
- hSize = HUF_writeCTable_wksp(dst, dstSize, table, maxSymbolValue, (U32)maxBits, workSpace, wkspSize);
+ hSize = HUF_writeCTable_wksp(dst, dstSize, table, maxSymbolValue, (U32)maxBits, workSpace, wkspSize);
+ }
if (ERR_isError(hSize)) continue;
huffLog = (U32)maxBits;
DEBUGLOG(6, "bit distribution completed (%zu symbols)", showCTableBits(table->CTable + 1, maxSymbolValue+1));
}
- /* Zero unused symbols in CTable, so we can check it for validity */
- {
- size_t const ctableSize = HUF_CTABLE_SIZE_ST(maxSymbolValue);
- size_t const unusedSize = sizeof(table->CTable) - ctableSize * sizeof(HUF_CElt);
- ZSTD_memset(table->CTable + ctableSize, 0, unusedSize);
- }
/* Write table description header */
{ CHECK_V_F(hSize, HUF_writeCTable_wksp(op, dstSize, table->CTable, maxSymbolValue, huffLog,
/* HUF_compress4X_repeat():
* compress input using 4 streams.
* consider skipping quickly
- * re-use an existing huffman compression table */
+ * reuse an existing huffman compression table */
size_t HUF_compress4X_repeat (void* dst, size_t dstSize,
const void* src, size_t srcSize,
unsigned maxSymbolValue, unsigned huffLog,
size_t ZSTD_freeCCtx(ZSTD_CCtx* cctx)
{
+ DEBUGLOG(3, "ZSTD_freeCCtx (address: %p)", (void*)cctx);
if (cctx==NULL) return 0; /* support free on NULL */
RETURN_ERROR_IF(cctx->staticSize, memory_allocation,
"not compatible with static CCtx");
return 0;
}
-#define BOUNDCHECK(cParam, val) { \
- RETURN_ERROR_IF(!ZSTD_cParam_withinBounds(cParam,val), \
- parameter_outOfBound, "Param out of bounds"); \
-}
+#define BOUNDCHECK(cParam, val) \
+ do { \
+ RETURN_ERROR_IF(!ZSTD_cParam_withinBounds(cParam,val), \
+ parameter_outOfBound, "Param out of bounds"); \
+ } while (0)
static int ZSTD_isUpdateAuthorized(ZSTD_cParameter param)
#else
FORWARD_IF_ERROR(ZSTD_cParam_clampBounds(param, &value), "");
CCtxParams->nbWorkers = value;
- return CCtxParams->nbWorkers;
+ return (size_t)(CCtxParams->nbWorkers);
#endif
case ZSTD_c_jobSize :
#else
FORWARD_IF_ERROR(ZSTD_cParam_clampBounds(ZSTD_c_overlapLog, &value), "");
CCtxParams->overlapLog = value;
- return CCtxParams->overlapLog;
+ return (size_t)CCtxParams->overlapLog;
#endif
case ZSTD_c_rsyncable :
#else
FORWARD_IF_ERROR(ZSTD_cParam_clampBounds(ZSTD_c_overlapLog, &value), "");
CCtxParams->rsyncable = value;
- return CCtxParams->rsyncable;
+ return (size_t)CCtxParams->rsyncable;
#endif
case ZSTD_c_enableDedicatedDictSearch :
return CCtxParams->ldmParams.hashRateLog;
case ZSTD_c_targetCBlockSize :
- if (value!=0) /* 0 ==> default */
+ if (value!=0) { /* 0 ==> default */
+ value = MAX(value, ZSTD_TARGETCBLOCKSIZE_MIN);
BOUNDCHECK(ZSTD_c_targetCBlockSize, value);
+ }
CCtxParams->targetCBlockSize = (U32)value;
return CCtxParams->targetCBlockSize;
case ZSTD_c_validateSequences:
BOUNDCHECK(ZSTD_c_validateSequences, value);
CCtxParams->validateSequences = value;
- return CCtxParams->validateSequences;
+ return (size_t)CCtxParams->validateSequences;
case ZSTD_c_useBlockSplitter:
BOUNDCHECK(ZSTD_c_useBlockSplitter, value);
case ZSTD_c_deterministicRefPrefix:
BOUNDCHECK(ZSTD_c_deterministicRefPrefix, value);
CCtxParams->deterministicRefPrefix = !!value;
- return CCtxParams->deterministicRefPrefix;
+ return (size_t)CCtxParams->deterministicRefPrefix;
case ZSTD_c_prefetchCDictTables:
BOUNDCHECK(ZSTD_c_prefetchCDictTables, value);
case ZSTD_c_enableSeqProducerFallback:
BOUNDCHECK(ZSTD_c_enableSeqProducerFallback, value);
CCtxParams->enableMatchFinderFallback = value;
- return CCtxParams->enableMatchFinderFallback;
+ return (size_t)CCtxParams->enableMatchFinderFallback;
case ZSTD_c_maxBlockSize:
if (value!=0) /* 0 ==> default */
RETURN_ERROR_IF(cctx->streamStage != zcss_init, stage_wrong,
"Reset parameters is only possible during init stage.");
ZSTD_clearAllDicts(cctx);
- ZSTD_memset(&cctx->externalMatchCtx, 0, sizeof(cctx->externalMatchCtx));
return ZSTD_CCtxParams_reset(&cctx->requestedParams);
}
return 0;
static ZSTD_compressionParameters
ZSTD_clampCParams(ZSTD_compressionParameters cParams)
{
-# define CLAMP_TYPE(cParam, val, type) { \
- ZSTD_bounds const bounds = ZSTD_cParam_getBounds(cParam); \
- if ((int)val<bounds.lowerBound) val=(type)bounds.lowerBound; \
- else if ((int)val>bounds.upperBound) val=(type)bounds.upperBound; \
- }
+# define CLAMP_TYPE(cParam, val, type) \
+ do { \
+ ZSTD_bounds const bounds = ZSTD_cParam_getBounds(cParam); \
+ if ((int)val<bounds.lowerBound) val=(type)bounds.lowerBound; \
+ else if ((int)val>bounds.upperBound) val=(type)bounds.upperBound; \
+ } while (0)
# define CLAMP(cParam, val) CLAMP_TYPE(cParam, val, unsigned)
CLAMP(ZSTD_c_windowLog, cParams.windowLog);
CLAMP(ZSTD_c_chainLog, cParams.chainLog);
const U64 maxWindowResize = 1ULL << (ZSTD_WINDOWLOG_MAX-1);
assert(ZSTD_checkCParams(cPar)==0);
+ /* Cascade the selected strategy down to the next-highest one built into
+ * this binary. */
+#ifdef ZSTD_EXCLUDE_BTULTRA_BLOCK_COMPRESSOR
+ if (cPar.strategy == ZSTD_btultra2) {
+ cPar.strategy = ZSTD_btultra;
+ }
+ if (cPar.strategy == ZSTD_btultra) {
+ cPar.strategy = ZSTD_btopt;
+ }
+#endif
+#ifdef ZSTD_EXCLUDE_BTOPT_BLOCK_COMPRESSOR
+ if (cPar.strategy == ZSTD_btopt) {
+ cPar.strategy = ZSTD_btlazy2;
+ }
+#endif
+#ifdef ZSTD_EXCLUDE_BTLAZY2_BLOCK_COMPRESSOR
+ if (cPar.strategy == ZSTD_btlazy2) {
+ cPar.strategy = ZSTD_lazy2;
+ }
+#endif
+#ifdef ZSTD_EXCLUDE_LAZY2_BLOCK_COMPRESSOR
+ if (cPar.strategy == ZSTD_lazy2) {
+ cPar.strategy = ZSTD_lazy;
+ }
+#endif
+#ifdef ZSTD_EXCLUDE_LAZY_BLOCK_COMPRESSOR
+ if (cPar.strategy == ZSTD_lazy) {
+ cPar.strategy = ZSTD_greedy;
+ }
+#endif
+#ifdef ZSTD_EXCLUDE_GREEDY_BLOCK_COMPRESSOR
+ if (cPar.strategy == ZSTD_greedy) {
+ cPar.strategy = ZSTD_dfast;
+ }
+#endif
+#ifdef ZSTD_EXCLUDE_DFAST_BLOCK_COMPRESSOR
+ if (cPar.strategy == ZSTD_dfast) {
+ cPar.strategy = ZSTD_fast;
+ cPar.targetLength = 0;
+ }
+#endif
+
switch (mode) {
case ZSTD_cpm_unknown:
case ZSTD_cpm_noAttachDict:
+ ZSTD_cwksp_aligned_alloc_size((MaxLL+1) * sizeof(U32))
+ ZSTD_cwksp_aligned_alloc_size((MaxOff+1) * sizeof(U32))
+ ZSTD_cwksp_aligned_alloc_size((1<<Litbits) * sizeof(U32))
- + ZSTD_cwksp_aligned_alloc_size((ZSTD_OPT_NUM+1) * sizeof(ZSTD_match_t))
- + ZSTD_cwksp_aligned_alloc_size((ZSTD_OPT_NUM+1) * sizeof(ZSTD_optimal_t));
+ + ZSTD_cwksp_aligned_alloc_size(ZSTD_OPT_SIZE * sizeof(ZSTD_match_t))
+ + ZSTD_cwksp_aligned_alloc_size(ZSTD_OPT_SIZE * sizeof(ZSTD_optimal_t));
size_t const lazyAdditionalSpace = ZSTD_rowMatchFinderUsed(cParams->strategy, useRowMatchFinder)
? ZSTD_cwksp_aligned_alloc_size(hSize)
: 0;
* be needed. However, we still allocate two 0-sized buffers, which can
* take space under ASAN. */
return ZSTD_estimateCCtxSize_usingCCtxParams_internal(
- &cParams, ¶ms->ldmParams, 1, useRowMatchFinder, 0, 0, ZSTD_CONTENTSIZE_UNKNOWN, params->useSequenceProducer, params->maxBlockSize);
+ &cParams, ¶ms->ldmParams, 1, useRowMatchFinder, 0, 0, ZSTD_CONTENTSIZE_UNKNOWN, ZSTD_hasExtSeqProd(params), params->maxBlockSize);
}
size_t ZSTD_estimateCCtxSize_usingCParams(ZSTD_compressionParameters cParams)
return ZSTD_estimateCCtxSize_usingCCtxParams_internal(
&cParams, ¶ms->ldmParams, 1, useRowMatchFinder, inBuffSize, outBuffSize,
- ZSTD_CONTENTSIZE_UNKNOWN, params->useSequenceProducer, params->maxBlockSize);
+ ZSTD_CONTENTSIZE_UNKNOWN, ZSTD_hasExtSeqProd(params), params->maxBlockSize);
}
}
ms->opt.litLengthFreq = (unsigned*)ZSTD_cwksp_reserve_aligned(ws, (MaxLL+1) * sizeof(unsigned));
ms->opt.matchLengthFreq = (unsigned*)ZSTD_cwksp_reserve_aligned(ws, (MaxML+1) * sizeof(unsigned));
ms->opt.offCodeFreq = (unsigned*)ZSTD_cwksp_reserve_aligned(ws, (MaxOff+1) * sizeof(unsigned));
- ms->opt.matchTable = (ZSTD_match_t*)ZSTD_cwksp_reserve_aligned(ws, (ZSTD_OPT_NUM+1) * sizeof(ZSTD_match_t));
- ms->opt.priceTable = (ZSTD_optimal_t*)ZSTD_cwksp_reserve_aligned(ws, (ZSTD_OPT_NUM+1) * sizeof(ZSTD_optimal_t));
+ ms->opt.matchTable = (ZSTD_match_t*)ZSTD_cwksp_reserve_aligned(ws, ZSTD_OPT_SIZE * sizeof(ZSTD_match_t));
+ ms->opt.priceTable = (ZSTD_optimal_t*)ZSTD_cwksp_reserve_aligned(ws, ZSTD_OPT_SIZE * sizeof(ZSTD_optimal_t));
}
ms->cParams = *cParams;
{ size_t const windowSize = MAX(1, (size_t)MIN(((U64)1 << params->cParams.windowLog), pledgedSrcSize));
size_t const blockSize = MIN(params->maxBlockSize, windowSize);
- size_t const maxNbSeq = ZSTD_maxNbSeq(blockSize, params->cParams.minMatch, params->useSequenceProducer);
+ size_t const maxNbSeq = ZSTD_maxNbSeq(blockSize, params->cParams.minMatch, ZSTD_hasExtSeqProd(params));
size_t const buffOutSize = (zbuff == ZSTDb_buffered && params->outBufferMode == ZSTD_bm_buffered)
? ZSTD_compressBound(blockSize) + 1
: 0;
size_t const neededSpace =
ZSTD_estimateCCtxSize_usingCCtxParams_internal(
¶ms->cParams, ¶ms->ldmParams, zc->staticSize != 0, params->useRowMatchFinder,
- buffInSize, buffOutSize, pledgedSrcSize, params->useSequenceProducer, params->maxBlockSize);
- int resizeWorkspace;
+ buffInSize, buffOutSize, pledgedSrcSize, ZSTD_hasExtSeqProd(params), params->maxBlockSize);
FORWARD_IF_ERROR(neededSpace, "cctx size estimate failed!");
{ /* Check if workspace is large enough, alloc a new one if needed */
int const workspaceTooSmall = ZSTD_cwksp_sizeof(ws) < neededSpace;
int const workspaceWasteful = ZSTD_cwksp_check_wasteful(ws, neededSpace);
- resizeWorkspace = workspaceTooSmall || workspaceWasteful;
+ int resizeWorkspace = workspaceTooSmall || workspaceWasteful;
DEBUGLOG(4, "Need %zu B workspace", neededSpace);
DEBUGLOG(4, "windowSize: %zu - blockSize: %zu", windowSize, blockSize);
}
/* reserve space for block-level external sequences */
- if (params->useSequenceProducer) {
+ if (ZSTD_hasExtSeqProd(params)) {
size_t const maxNbExternalSeq = ZSTD_sequenceBound(blockSize);
- zc->externalMatchCtx.seqBufferCapacity = maxNbExternalSeq;
- zc->externalMatchCtx.seqBuffer =
+ zc->extSeqBufCapacity = maxNbExternalSeq;
+ zc->extSeqBuf =
(ZSTD_Sequence*)ZSTD_cwksp_reserve_aligned(ws, maxNbExternalSeq * sizeof(ZSTD_Sequence));
}
assert(size < (1U<<31)); /* can be casted to int */
#if ZSTD_MEMORY_SANITIZER && !defined (ZSTD_MSAN_DONT_POISON_WORKSPACE)
- /* To validate that the table re-use logic is sound, and that we don't
+ /* To validate that the table reuse logic is sound, and that we don't
* access table space that we haven't cleaned, we re-"poison" the table
* space every time we mark it dirty.
*
static const ZSTD_blockCompressor blockCompressor[4][ZSTD_STRATEGY_MAX+1] = {
{ ZSTD_compressBlock_fast /* default for 0 */,
ZSTD_compressBlock_fast,
- ZSTD_compressBlock_doubleFast,
- ZSTD_compressBlock_greedy,
- ZSTD_compressBlock_lazy,
- ZSTD_compressBlock_lazy2,
- ZSTD_compressBlock_btlazy2,
- ZSTD_compressBlock_btopt,
- ZSTD_compressBlock_btultra,
- ZSTD_compressBlock_btultra2 },
+ ZSTD_COMPRESSBLOCK_DOUBLEFAST,
+ ZSTD_COMPRESSBLOCK_GREEDY,
+ ZSTD_COMPRESSBLOCK_LAZY,
+ ZSTD_COMPRESSBLOCK_LAZY2,
+ ZSTD_COMPRESSBLOCK_BTLAZY2,
+ ZSTD_COMPRESSBLOCK_BTOPT,
+ ZSTD_COMPRESSBLOCK_BTULTRA,
+ ZSTD_COMPRESSBLOCK_BTULTRA2
+ },
{ ZSTD_compressBlock_fast_extDict /* default for 0 */,
ZSTD_compressBlock_fast_extDict,
- ZSTD_compressBlock_doubleFast_extDict,
- ZSTD_compressBlock_greedy_extDict,
- ZSTD_compressBlock_lazy_extDict,
- ZSTD_compressBlock_lazy2_extDict,
- ZSTD_compressBlock_btlazy2_extDict,
- ZSTD_compressBlock_btopt_extDict,
- ZSTD_compressBlock_btultra_extDict,
- ZSTD_compressBlock_btultra_extDict },
+ ZSTD_COMPRESSBLOCK_DOUBLEFAST_EXTDICT,
+ ZSTD_COMPRESSBLOCK_GREEDY_EXTDICT,
+ ZSTD_COMPRESSBLOCK_LAZY_EXTDICT,
+ ZSTD_COMPRESSBLOCK_LAZY2_EXTDICT,
+ ZSTD_COMPRESSBLOCK_BTLAZY2_EXTDICT,
+ ZSTD_COMPRESSBLOCK_BTOPT_EXTDICT,
+ ZSTD_COMPRESSBLOCK_BTULTRA_EXTDICT,
+ ZSTD_COMPRESSBLOCK_BTULTRA_EXTDICT
+ },
{ ZSTD_compressBlock_fast_dictMatchState /* default for 0 */,
ZSTD_compressBlock_fast_dictMatchState,
- ZSTD_compressBlock_doubleFast_dictMatchState,
- ZSTD_compressBlock_greedy_dictMatchState,
- ZSTD_compressBlock_lazy_dictMatchState,
- ZSTD_compressBlock_lazy2_dictMatchState,
- ZSTD_compressBlock_btlazy2_dictMatchState,
- ZSTD_compressBlock_btopt_dictMatchState,
- ZSTD_compressBlock_btultra_dictMatchState,
- ZSTD_compressBlock_btultra_dictMatchState },
+ ZSTD_COMPRESSBLOCK_DOUBLEFAST_DICTMATCHSTATE,
+ ZSTD_COMPRESSBLOCK_GREEDY_DICTMATCHSTATE,
+ ZSTD_COMPRESSBLOCK_LAZY_DICTMATCHSTATE,
+ ZSTD_COMPRESSBLOCK_LAZY2_DICTMATCHSTATE,
+ ZSTD_COMPRESSBLOCK_BTLAZY2_DICTMATCHSTATE,
+ ZSTD_COMPRESSBLOCK_BTOPT_DICTMATCHSTATE,
+ ZSTD_COMPRESSBLOCK_BTULTRA_DICTMATCHSTATE,
+ ZSTD_COMPRESSBLOCK_BTULTRA_DICTMATCHSTATE
+ },
{ NULL /* default for 0 */,
NULL,
NULL,
- ZSTD_compressBlock_greedy_dedicatedDictSearch,
- ZSTD_compressBlock_lazy_dedicatedDictSearch,
- ZSTD_compressBlock_lazy2_dedicatedDictSearch,
+ ZSTD_COMPRESSBLOCK_GREEDY_DEDICATEDDICTSEARCH,
+ ZSTD_COMPRESSBLOCK_LAZY_DEDICATEDDICTSEARCH,
+ ZSTD_COMPRESSBLOCK_LAZY2_DEDICATEDDICTSEARCH,
NULL,
NULL,
NULL,
DEBUGLOG(4, "Selected block compressor: dictMode=%d strat=%d rowMatchfinder=%d", (int)dictMode, (int)strat, (int)useRowMatchFinder);
if (ZSTD_rowMatchFinderUsed(strat, useRowMatchFinder)) {
static const ZSTD_blockCompressor rowBasedBlockCompressors[4][3] = {
- { ZSTD_compressBlock_greedy_row,
- ZSTD_compressBlock_lazy_row,
- ZSTD_compressBlock_lazy2_row },
- { ZSTD_compressBlock_greedy_extDict_row,
- ZSTD_compressBlock_lazy_extDict_row,
- ZSTD_compressBlock_lazy2_extDict_row },
- { ZSTD_compressBlock_greedy_dictMatchState_row,
- ZSTD_compressBlock_lazy_dictMatchState_row,
- ZSTD_compressBlock_lazy2_dictMatchState_row },
- { ZSTD_compressBlock_greedy_dedicatedDictSearch_row,
- ZSTD_compressBlock_lazy_dedicatedDictSearch_row,
- ZSTD_compressBlock_lazy2_dedicatedDictSearch_row }
+ {
+ ZSTD_COMPRESSBLOCK_GREEDY_ROW,
+ ZSTD_COMPRESSBLOCK_LAZY_ROW,
+ ZSTD_COMPRESSBLOCK_LAZY2_ROW
+ },
+ {
+ ZSTD_COMPRESSBLOCK_GREEDY_EXTDICT_ROW,
+ ZSTD_COMPRESSBLOCK_LAZY_EXTDICT_ROW,
+ ZSTD_COMPRESSBLOCK_LAZY2_EXTDICT_ROW
+ },
+ {
+ ZSTD_COMPRESSBLOCK_GREEDY_DICTMATCHSTATE_ROW,
+ ZSTD_COMPRESSBLOCK_LAZY_DICTMATCHSTATE_ROW,
+ ZSTD_COMPRESSBLOCK_LAZY2_DICTMATCHSTATE_ROW
+ },
+ {
+ ZSTD_COMPRESSBLOCK_GREEDY_DEDICATEDDICTSEARCH_ROW,
+ ZSTD_COMPRESSBLOCK_LAZY_DEDICATEDDICTSEARCH_ROW,
+ ZSTD_COMPRESSBLOCK_LAZY2_DEDICATEDDICTSEARCH_ROW
+ }
};
DEBUGLOG(4, "Selecting a row-based matchfinder");
assert(useRowMatchFinder != ZSTD_ps_auto);
/* External matchfinder + LDM is technically possible, just not implemented yet.
* We need to revisit soon and implement it. */
RETURN_ERROR_IF(
- zc->appliedParams.useSequenceProducer,
+ ZSTD_hasExtSeqProd(&zc->appliedParams),
parameter_combination_unsupported,
"Long-distance matching with external sequence producer enabled is not currently supported."
);
/* External matchfinder + LDM is technically possible, just not implemented yet.
* We need to revisit soon and implement it. */
RETURN_ERROR_IF(
- zc->appliedParams.useSequenceProducer,
+ ZSTD_hasExtSeqProd(&zc->appliedParams),
parameter_combination_unsupported,
"Long-distance matching with external sequence producer enabled is not currently supported."
);
zc->appliedParams.useRowMatchFinder,
src, srcSize);
assert(ldmSeqStore.pos == ldmSeqStore.size);
- } else if (zc->appliedParams.useSequenceProducer) {
+ } else if (ZSTD_hasExtSeqProd(&zc->appliedParams)) {
assert(
- zc->externalMatchCtx.seqBufferCapacity >= ZSTD_sequenceBound(srcSize)
+ zc->extSeqBufCapacity >= ZSTD_sequenceBound(srcSize)
);
- assert(zc->externalMatchCtx.mFinder != NULL);
+ assert(zc->appliedParams.extSeqProdFunc != NULL);
{ U32 const windowSize = (U32)1 << zc->appliedParams.cParams.windowLog;
- size_t const nbExternalSeqs = (zc->externalMatchCtx.mFinder)(
- zc->externalMatchCtx.mState,
- zc->externalMatchCtx.seqBuffer,
- zc->externalMatchCtx.seqBufferCapacity,
+ size_t const nbExternalSeqs = (zc->appliedParams.extSeqProdFunc)(
+ zc->appliedParams.extSeqProdState,
+ zc->extSeqBuf,
+ zc->extSeqBufCapacity,
src, srcSize,
NULL, 0, /* dict and dictSize, currently not supported */
zc->appliedParams.compressionLevel,
);
size_t const nbPostProcessedSeqs = ZSTD_postProcessSequenceProducerResult(
- zc->externalMatchCtx.seqBuffer,
+ zc->extSeqBuf,
nbExternalSeqs,
- zc->externalMatchCtx.seqBufferCapacity,
+ zc->extSeqBufCapacity,
srcSize
);
/* Return early if there is no error, since we don't need to worry about last literals */
if (!ZSTD_isError(nbPostProcessedSeqs)) {
ZSTD_sequencePosition seqPos = {0,0,0};
- size_t const seqLenSum = ZSTD_fastSequenceLengthSum(zc->externalMatchCtx.seqBuffer, nbPostProcessedSeqs);
+ size_t const seqLenSum = ZSTD_fastSequenceLengthSum(zc->extSeqBuf, nbPostProcessedSeqs);
RETURN_ERROR_IF(seqLenSum > srcSize, externalSequences_invalid, "External sequences imply too large a block!");
FORWARD_IF_ERROR(
ZSTD_copySequencesToSeqStoreExplicitBlockDelim(
zc, &seqPos,
- zc->externalMatchCtx.seqBuffer, nbPostProcessedSeqs,
+ zc->extSeqBuf, nbPostProcessedSeqs,
src, srcSize,
zc->appliedParams.searchForExternalRepcodes
),
}
/* Fallback to software matchfinder */
- { ZSTD_blockCompressor const blockCompressor = ZSTD_selectBlockCompressor(zc->appliedParams.cParams.strategy,
- zc->appliedParams.useRowMatchFinder,
- dictMode);
+ { ZSTD_blockCompressor const blockCompressor =
+ ZSTD_selectBlockCompressor(
+ zc->appliedParams.cParams.strategy,
+ zc->appliedParams.useRowMatchFinder,
+ dictMode);
ms->ldmSeqStore = NULL;
DEBUGLOG(
5,
lastLLSize = blockCompressor(ms, &zc->seqStore, zc->blockState.nextCBlock->rep, src, srcSize);
} }
} else { /* not long range mode and no external matchfinder */
- ZSTD_blockCompressor const blockCompressor = ZSTD_selectBlockCompressor(zc->appliedParams.cParams.strategy,
- zc->appliedParams.useRowMatchFinder,
- dictMode);
+ ZSTD_blockCompressor const blockCompressor = ZSTD_selectBlockCompressor(
+ zc->appliedParams.cParams.strategy,
+ zc->appliedParams.useRowMatchFinder,
+ dictMode);
ms->ldmSeqStore = NULL;
lastLLSize = blockCompressor(ms, &zc->seqStore, zc->blockState.nextCBlock->rep, src, srcSize);
}
return ZSTDbss_compress;
}
-static void ZSTD_copyBlockSequences(ZSTD_CCtx* zc)
+static size_t ZSTD_copyBlockSequences(SeqCollector* seqCollector, const seqStore_t* seqStore, const U32 prevRepcodes[ZSTD_REP_NUM])
{
- const seqStore_t* seqStore = ZSTD_getSeqStore(zc);
- const seqDef* seqStoreSeqs = seqStore->sequencesStart;
- size_t seqStoreSeqSize = seqStore->sequences - seqStoreSeqs;
- size_t seqStoreLiteralsSize = (size_t)(seqStore->lit - seqStore->litStart);
- size_t literalsRead = 0;
- size_t lastLLSize;
+ const seqDef* inSeqs = seqStore->sequencesStart;
+ const size_t nbInSequences = seqStore->sequences - inSeqs;
+ const size_t nbInLiterals = (size_t)(seqStore->lit - seqStore->litStart);
- ZSTD_Sequence* outSeqs = &zc->seqCollector.seqStart[zc->seqCollector.seqIndex];
+ ZSTD_Sequence* outSeqs = seqCollector->seqIndex == 0 ? seqCollector->seqStart : seqCollector->seqStart + seqCollector->seqIndex;
+ const size_t nbOutSequences = nbInSequences + 1;
+ size_t nbOutLiterals = 0;
+ repcodes_t repcodes;
size_t i;
- repcodes_t updatedRepcodes;
- assert(zc->seqCollector.seqIndex + 1 < zc->seqCollector.maxSequences);
- /* Ensure we have enough space for last literals "sequence" */
- assert(zc->seqCollector.maxSequences >= seqStoreSeqSize + 1);
- ZSTD_memcpy(updatedRepcodes.rep, zc->blockState.prevCBlock->rep, sizeof(repcodes_t));
- for (i = 0; i < seqStoreSeqSize; ++i) {
- U32 rawOffset = seqStoreSeqs[i].offBase - ZSTD_REP_NUM;
- outSeqs[i].litLength = seqStoreSeqs[i].litLength;
- outSeqs[i].matchLength = seqStoreSeqs[i].mlBase + MINMATCH;
+ /* Bounds check that we have enough space for every input sequence
+ * and the block delimiter
+ */
+ assert(seqCollector->seqIndex <= seqCollector->maxSequences);
+ RETURN_ERROR_IF(
+ nbOutSequences > (size_t)(seqCollector->maxSequences - seqCollector->seqIndex),
+ dstSize_tooSmall,
+ "Not enough space to copy sequences");
+
+ ZSTD_memcpy(&repcodes, prevRepcodes, sizeof(repcodes));
+ for (i = 0; i < nbInSequences; ++i) {
+ U32 rawOffset;
+ outSeqs[i].litLength = inSeqs[i].litLength;
+ outSeqs[i].matchLength = inSeqs[i].mlBase + MINMATCH;
outSeqs[i].rep = 0;
+ /* Handle the possible single length >= 64K
+ * There can only be one because we add MINMATCH to every match length,
+ * and blocks are at most 128K.
+ */
if (i == seqStore->longLengthPos) {
if (seqStore->longLengthType == ZSTD_llt_literalLength) {
outSeqs[i].litLength += 0x10000;
}
}
- if (seqStoreSeqs[i].offBase <= ZSTD_REP_NUM) {
- /* Derive the correct offset corresponding to a repcode */
- outSeqs[i].rep = seqStoreSeqs[i].offBase;
+ /* Determine the raw offset given the offBase, which may be a repcode. */
+ if (OFFBASE_IS_REPCODE(inSeqs[i].offBase)) {
+ const U32 repcode = OFFBASE_TO_REPCODE(inSeqs[i].offBase);
+ assert(repcode > 0);
+ outSeqs[i].rep = repcode;
if (outSeqs[i].litLength != 0) {
- rawOffset = updatedRepcodes.rep[outSeqs[i].rep - 1];
+ rawOffset = repcodes.rep[repcode - 1];
} else {
- if (outSeqs[i].rep == 3) {
- rawOffset = updatedRepcodes.rep[0] - 1;
+ if (repcode == 3) {
+ assert(repcodes.rep[0] > 1);
+ rawOffset = repcodes.rep[0] - 1;
} else {
- rawOffset = updatedRepcodes.rep[outSeqs[i].rep];
+ rawOffset = repcodes.rep[repcode];
}
}
+ } else {
+ rawOffset = OFFBASE_TO_OFFSET(inSeqs[i].offBase);
}
outSeqs[i].offset = rawOffset;
- /* seqStoreSeqs[i].offset == offCode+1, and ZSTD_updateRep() expects offCode
- so we provide seqStoreSeqs[i].offset - 1 */
- ZSTD_updateRep(updatedRepcodes.rep,
- seqStoreSeqs[i].offBase,
- seqStoreSeqs[i].litLength == 0);
- literalsRead += outSeqs[i].litLength;
+
+ /* Update repcode history for the sequence */
+ ZSTD_updateRep(repcodes.rep,
+ inSeqs[i].offBase,
+ inSeqs[i].litLength == 0);
+
+ nbOutLiterals += outSeqs[i].litLength;
}
/* Insert last literals (if any exist) in the block as a sequence with ml == off == 0.
* If there are no last literals, then we'll emit (of: 0, ml: 0, ll: 0), which is a marker
* for the block boundary, according to the API.
*/
- assert(seqStoreLiteralsSize >= literalsRead);
- lastLLSize = seqStoreLiteralsSize - literalsRead;
- outSeqs[i].litLength = (U32)lastLLSize;
- outSeqs[i].matchLength = outSeqs[i].offset = outSeqs[i].rep = 0;
- seqStoreSeqSize++;
- zc->seqCollector.seqIndex += seqStoreSeqSize;
+ assert(nbInLiterals >= nbOutLiterals);
+ {
+ const size_t lastLLSize = nbInLiterals - nbOutLiterals;
+ outSeqs[nbInSequences].litLength = (U32)lastLLSize;
+ outSeqs[nbInSequences].matchLength = 0;
+ outSeqs[nbInSequences].offset = 0;
+ assert(nbOutSequences == nbInSequences + 1);
+ }
+ seqCollector->seqIndex += nbOutSequences;
+ assert(seqCollector->seqIndex <= seqCollector->maxSequences);
+
+ return 0;
}
size_t ZSTD_sequenceBound(size_t srcSize) {
- return (srcSize / ZSTD_MINMATCH_MIN) + 1;
+ const size_t maxNbSeq = (srcSize / ZSTD_MINMATCH_MIN) + 1;
+ const size_t maxNbDelims = (srcSize / ZSTD_BLOCKSIZE_MAX_MIN) + 1;
+ return maxNbSeq + maxNbDelims;
}
size_t ZSTD_generateSequences(ZSTD_CCtx* zc, ZSTD_Sequence* outSeqs,
const size_t dstCapacity = ZSTD_compressBound(srcSize);
void* dst = ZSTD_customMalloc(dstCapacity, ZSTD_defaultCMem);
SeqCollector seqCollector;
+ {
+ int targetCBlockSize;
+ FORWARD_IF_ERROR(ZSTD_CCtx_getParameter(zc, ZSTD_c_targetCBlockSize, &targetCBlockSize), "");
+ RETURN_ERROR_IF(targetCBlockSize != 0, parameter_unsupported, "targetCBlockSize != 0");
+ }
+ {
+ int nbWorkers;
+ FORWARD_IF_ERROR(ZSTD_CCtx_getParameter(zc, ZSTD_c_nbWorkers, &nbWorkers), "");
+ RETURN_ERROR_IF(nbWorkers != 0, parameter_unsupported, "nbWorkers != 0");
+ }
RETURN_ERROR_IF(dst == NULL, memory_allocation, "NULL pointer!");
seqCollector.maxSequences = outSeqsSize;
zc->seqCollector = seqCollector;
- ZSTD_compress2(zc, dst, dstCapacity, src, srcSize);
- ZSTD_customFree(dst, ZSTD_defaultCMem);
+ {
+ const size_t ret = ZSTD_compress2(zc, dst, dstCapacity, src, srcSize);
+ ZSTD_customFree(dst, ZSTD_defaultCMem);
+ FORWARD_IF_ERROR(ret, "ZSTD_compress2 failed");
+ }
+ assert(zc->seqCollector.seqIndex <= ZSTD_sequenceBound(srcSize));
return zc->seqCollector.seqIndex;
}
cSeqsSize = 1;
}
+ /* Sequence collection not supported when block splitting */
if (zc->seqCollector.collectSequences) {
- ZSTD_copyBlockSequences(zc);
+ FORWARD_IF_ERROR(ZSTD_copyBlockSequences(&zc->seqCollector, seqStore, dRepOriginal.rep), "copyBlockSequences failed");
ZSTD_blockState_confirmRepcodesAndEntropyTables(&zc->blockState);
return 0;
}
if (bss == ZSTDbss_noCompress) {
if (zc->blockState.prevCBlock->entropy.fse.offcode_repeatMode == FSE_repeat_valid)
zc->blockState.prevCBlock->entropy.fse.offcode_repeatMode = FSE_repeat_check;
+ RETURN_ERROR_IF(zc->seqCollector.collectSequences, sequenceProducer_failed, "Uncompressible block");
cSize = ZSTD_noCompressBlock(dst, dstCapacity, src, srcSize, lastBlock);
FORWARD_IF_ERROR(cSize, "ZSTD_noCompressBlock failed");
DEBUGLOG(4, "ZSTD_compressBlock_splitBlock: Nocompress block");
{ const size_t bss = ZSTD_buildSeqStore(zc, src, srcSize);
FORWARD_IF_ERROR(bss, "ZSTD_buildSeqStore failed");
- if (bss == ZSTDbss_noCompress) { cSize = 0; goto out; }
+ if (bss == ZSTDbss_noCompress) {
+ RETURN_ERROR_IF(zc->seqCollector.collectSequences, sequenceProducer_failed, "Uncompressible block");
+ cSize = 0;
+ goto out;
+ }
}
if (zc->seqCollector.collectSequences) {
- ZSTD_copyBlockSequences(zc);
+ FORWARD_IF_ERROR(ZSTD_copyBlockSequences(&zc->seqCollector, ZSTD_getSeqStore(zc), zc->blockState.prevCBlock->rep), "copyBlockSequences failed");
ZSTD_blockState_confirmRepcodesAndEntropyTables(&zc->blockState);
return 0;
}
}
}
-size_t ZSTD_referenceExternalSequences(ZSTD_CCtx* cctx, rawSeq* seq, size_t nbSeq)
+void ZSTD_referenceExternalSequences(ZSTD_CCtx* cctx, rawSeq* seq, size_t nbSeq)
{
- RETURN_ERROR_IF(cctx->stage != ZSTDcs_init, stage_wrong,
- "wrong cctx stage");
- RETURN_ERROR_IF(cctx->appliedParams.ldmParams.enableLdm == ZSTD_ps_enable,
- parameter_unsupported,
- "incompatible with ldm");
+ assert(cctx->stage == ZSTDcs_init);
+ assert(nbSeq == 0 || cctx->appliedParams.ldmParams.enableLdm != ZSTD_ps_enable);
cctx->externSeqStore.seq = seq;
cctx->externSeqStore.size = nbSeq;
cctx->externSeqStore.capacity = nbSeq;
cctx->externSeqStore.pos = 0;
cctx->externSeqStore.posInSequence = 0;
- return 0;
}
ZSTD_fillHashTable(ms, iend, dtlm, tfp);
break;
case ZSTD_dfast:
+#ifndef ZSTD_EXCLUDE_DFAST_BLOCK_COMPRESSOR
ZSTD_fillDoubleHashTable(ms, iend, dtlm, tfp);
+#else
+ assert(0); /* shouldn't be called: cparams should've been adjusted. */
+#endif
break;
case ZSTD_greedy:
case ZSTD_lazy:
case ZSTD_lazy2:
+#if !defined(ZSTD_EXCLUDE_GREEDY_BLOCK_COMPRESSOR) \
+ || !defined(ZSTD_EXCLUDE_LAZY_BLOCK_COMPRESSOR) \
+ || !defined(ZSTD_EXCLUDE_LAZY2_BLOCK_COMPRESSOR)
assert(srcSize >= HASH_READ_SIZE);
if (ms->dedicatedDictSearch) {
assert(ms->chainTable != NULL);
DEBUGLOG(4, "Using chain-based hash table for lazy dict");
}
}
+#else
+ assert(0); /* shouldn't be called: cparams should've been adjusted. */
+#endif
break;
case ZSTD_btlazy2: /* we want the dictionary table fully sorted */
case ZSTD_btopt:
case ZSTD_btultra:
case ZSTD_btultra2:
+#if !defined(ZSTD_EXCLUDE_BTLAZY2_BLOCK_COMPRESSOR) \
+ || !defined(ZSTD_EXCLUDE_BTOPT_BLOCK_COMPRESSOR) \
+ || !defined(ZSTD_EXCLUDE_BTULTRA_BLOCK_COMPRESSOR)
assert(srcSize >= HASH_READ_SIZE);
ZSTD_updateTree(ms, iend-HASH_READ_SIZE, iend);
+#else
+ assert(0); /* shouldn't be called: cparams should've been adjusted. */
+#endif
break;
default:
/* We only set the loaded table as valid if it contains all non-zero
* weights. Otherwise, we set it to check */
- if (!hasZeroWeights)
+ if (!hasZeroWeights && maxSymbolValue == 255)
bs->entropy.huf.repeatMode = HUF_repeat_valid;
RETURN_ERROR_IF(HUF_isError(hufHeaderSize), dictionary_corrupted, "");
- RETURN_ERROR_IF(maxSymbolValue < 255, dictionary_corrupted, "");
dictPtr += hufHeaderSize;
}
{
BYTE* const ostart = (BYTE*)dst;
BYTE* op = ostart;
- size_t fhSize = 0;
DEBUGLOG(4, "ZSTD_writeEpilogue");
RETURN_ERROR_IF(cctx->stage == ZSTDcs_created, stage_wrong, "init missing");
/* special case : empty frame */
if (cctx->stage == ZSTDcs_init) {
- fhSize = ZSTD_writeFrameHeader(dst, dstCapacity, &cctx->appliedParams, 0, 0);
+ size_t fhSize = ZSTD_writeFrameHeader(dst, dstCapacity, &cctx->appliedParams, 0, 0);
FORWARD_IF_ERROR(fhSize, "ZSTD_writeFrameHeader failed");
dstCapacity -= fhSize;
op += fhSize;
if (cctx->stage != ZSTDcs_ending) {
/* write one last empty block, make it the "last" block */
U32 const cBlockHeader24 = 1 /* last block */ + (((U32)bt_raw)<<1) + 0;
- RETURN_ERROR_IF(dstCapacity<4, dstSize_tooSmall, "no room for epilogue");
- MEM_writeLE32(op, cBlockHeader24);
+ ZSTD_STATIC_ASSERT(ZSTD_BLOCKHEADERSIZE == 3);
+ RETURN_ERROR_IF(dstCapacity<3, dstSize_tooSmall, "no room for epilogue");
+ MEM_writeLE24(op, cBlockHeader24);
op += ZSTD_blockHeaderSize;
dstCapacity -= ZSTD_blockHeaderSize;
}
cctxParams.useRowMatchFinder, cctxParams.enableDedicatedDictSearch,
customMem);
- if (ZSTD_isError( ZSTD_initCDict_internal(cdict,
+ if (!cdict || ZSTD_isError( ZSTD_initCDict_internal(cdict,
dict, dictSize,
dictLoadMethod, dictContentType,
cctxParams) )) {
if (zcs->appliedParams.inBufferMode == ZSTD_bm_stable) {
assert(input->pos >= zcs->stableIn_notConsumed);
input->pos -= zcs->stableIn_notConsumed;
- ip -= zcs->stableIn_notConsumed;
+ if (ip) ip -= zcs->stableIn_notConsumed;
zcs->stableIn_notConsumed = 0;
}
if (zcs->appliedParams.inBufferMode == ZSTD_bm_buffered) {
#ifdef ZSTD_MULTITHREAD
/* If external matchfinder is enabled, make sure to fail before checking job size (for consistency) */
RETURN_ERROR_IF(
- params.useSequenceProducer == 1 && params.nbWorkers >= 1,
+ ZSTD_hasExtSeqProd(¶ms) && params.nbWorkers >= 1,
parameter_combination_unsupported,
"External sequence producer isn't supported with nbWorkers >= 1"
);
if (cctx->appliedParams.validateSequences) {
seqPos->posInSrc += litLength + matchLength;
FORWARD_IF_ERROR(ZSTD_validateSequence(offBase, matchLength, cctx->appliedParams.cParams.minMatch, seqPos->posInSrc,
- cctx->appliedParams.cParams.windowLog, dictSize, cctx->appliedParams.useSequenceProducer),
+ cctx->appliedParams.cParams.windowLog, dictSize, ZSTD_hasExtSeqProd(&cctx->appliedParams)),
"Sequence validation failed");
}
RETURN_ERROR_IF(idx - seqPos->idx >= cctx->seqStore.maxNbSeq, externalSequences_invalid,
if (cctx->appliedParams.validateSequences) {
seqPos->posInSrc += litLength + matchLength;
FORWARD_IF_ERROR(ZSTD_validateSequence(offBase, matchLength, cctx->appliedParams.cParams.minMatch, seqPos->posInSrc,
- cctx->appliedParams.cParams.windowLog, dictSize, cctx->appliedParams.useSequenceProducer),
+ cctx->appliedParams.cParams.windowLog, dictSize, ZSTD_hasExtSeqProd(&cctx->appliedParams)),
"Sequence validation failed");
}
DEBUGLOG(6, "Storing sequence: (of: %u, ml: %u, ll: %u)", offBase, matchLength, litLength);
}
void ZSTD_registerSequenceProducer(
- ZSTD_CCtx* zc, void* mState,
- ZSTD_sequenceProducer_F* mFinder
+ ZSTD_CCtx* zc,
+ void* extSeqProdState,
+ ZSTD_sequenceProducer_F extSeqProdFunc
+) {
+ assert(zc != NULL);
+ ZSTD_CCtxParams_registerSequenceProducer(
+ &zc->requestedParams, extSeqProdState, extSeqProdFunc
+ );
+}
+
+void ZSTD_CCtxParams_registerSequenceProducer(
+ ZSTD_CCtx_params* params,
+ void* extSeqProdState,
+ ZSTD_sequenceProducer_F extSeqProdFunc
) {
- if (mFinder != NULL) {
- ZSTD_externalMatchCtx emctx;
- emctx.mState = mState;
- emctx.mFinder = mFinder;
- emctx.seqBuffer = NULL;
- emctx.seqBufferCapacity = 0;
- zc->externalMatchCtx = emctx;
- zc->requestedParams.useSequenceProducer = 1;
+ assert(params != NULL);
+ if (extSeqProdFunc != NULL) {
+ params->extSeqProdFunc = extSeqProdFunc;
+ params->extSeqProdState = extSeqProdState;
} else {
- ZSTD_memset(&zc->externalMatchCtx, 0, sizeof(zc->externalMatchCtx));
- zc->requestedParams.useSequenceProducer = 0;
+ params->extSeqProdFunc = NULL;
+ params->extSeqProdState = NULL;
}
}
It's not a big deal though : candidate will just be sorted again.
Additionally, candidate position 1 will be lost.
But candidate 1 cannot hide a large tree of candidates, so it's a minimal loss.
- The benefit is that ZSTD_DUBT_UNSORTED_MARK cannot be mishandled after table re-use with a different strategy.
+ The benefit is that ZSTD_DUBT_UNSORTED_MARK cannot be mishandled after table reuse with a different strategy.
This constant is required by ZSTD_compressBlock_btlazy2() and ZSTD_reduceTable_internal() */
UNUSED_ATTR static const rawSeqStore_t kNullRawSeqStore = {NULL, 0, 0, 0, 0};
typedef struct {
- int price;
- U32 off;
- U32 mlen;
- U32 litlen;
- U32 rep[ZSTD_REP_NUM];
+ int price; /* price from beginning of segment to this position */
+ U32 off; /* offset of previous match */
+ U32 mlen; /* length of previous match */
+ U32 litlen; /* nb of literals since previous match */
+ U32 rep[ZSTD_REP_NUM]; /* offset history after previous match */
} ZSTD_optimal_t;
typedef enum { zop_dynamic=0, zop_predef } ZSTD_OptPrice_e;
+#define ZSTD_OPT_SIZE (ZSTD_OPT_NUM+3)
typedef struct {
/* All tables are allocated inside cctx->workspace by ZSTD_resetCCtx_internal() */
unsigned* litFreq; /* table of literals statistics, of size 256 */
unsigned* litLengthFreq; /* table of litLength statistics, of size (MaxLL+1) */
unsigned* matchLengthFreq; /* table of matchLength statistics, of size (MaxML+1) */
unsigned* offCodeFreq; /* table of offCode statistics, of size (MaxOff+1) */
- ZSTD_match_t* matchTable; /* list of found matches, of size ZSTD_OPT_NUM+1 */
- ZSTD_optimal_t* priceTable; /* All positions tracked by optimal parser, of size ZSTD_OPT_NUM+1 */
+ ZSTD_match_t* matchTable; /* list of found matches, of size ZSTD_OPT_SIZE */
+ ZSTD_optimal_t* priceTable; /* All positions tracked by optimal parser, of size ZSTD_OPT_SIZE */
U32 litSum; /* nb of literals */
U32 litLengthSum; /* nb of litLength codes */
U32 rowHashLog; /* For row-based matchfinder: Hashlog based on nb of rows in the hashTable.*/
BYTE* tagTable; /* For row-based matchFinder: A row-based table containing the hashes and head index. */
U32 hashCache[ZSTD_ROW_HASH_CACHE_SIZE]; /* For row-based matchFinder: a cache of hashes to improve speed */
- U64 hashSalt; /* For row-based matchFinder: salts the hash for re-use of tag table */
+ U64 hashSalt; /* For row-based matchFinder: salts the hash for reuse of tag table */
U32 hashSaltEntropy; /* For row-based matchFinder: collects entropy for salt generation */
U32* hashTable;
* if the external matchfinder returns an error code. */
int enableMatchFinderFallback;
- /* Indicates whether an external matchfinder has been referenced.
- * Users can't set this externally.
- * It is set internally in ZSTD_registerSequenceProducer(). */
- int useSequenceProducer;
+ /* Parameters for the external sequence producer API.
+ * Users set these parameters through ZSTD_registerSequenceProducer().
+ * It is not possible to set these parameters individually through the public API. */
+ void* extSeqProdState;
+ ZSTD_sequenceProducer_F extSeqProdFunc;
/* Adjust the max block size*/
size_t maxBlockSize;
ZSTD_entropyCTablesMetadata_t entropyMetadata;
} ZSTD_blockSplitCtx;
-/* Context for block-level external matchfinder API */
-typedef struct {
- void* mState;
- ZSTD_sequenceProducer_F* mFinder;
- ZSTD_Sequence* seqBuffer;
- size_t seqBufferCapacity;
-} ZSTD_externalMatchCtx;
-
struct ZSTD_CCtx_s {
ZSTD_compressionStage_e stage;
int cParamsChanged; /* == 1 if cParams(except wlog) or compression level are changed in requestedParams. Triggers transmission of new params to ZSTDMT (if available) then reset to 0. */
/* Workspace for block splitter */
ZSTD_blockSplitCtx blockSplitCtx;
- /* Workspace for external matchfinder */
- ZSTD_externalMatchCtx externalMatchCtx;
+ /* Buffer for output from external sequence producer */
+ ZSTD_Sequence* extSeqBuf;
+ size_t extSeqBufCapacity;
};
typedef enum { ZSTD_dtlm_fast, ZSTD_dtlm_full } ZSTD_dictTableLoadMethod_e;
* The least significant cycleLog bits of the indices must remain the same,
* which may be 0. Every index up to maxDist in the past must be valid.
*/
-MEM_STATIC U32 ZSTD_window_correctOverflow(ZSTD_window_t* window, U32 cycleLog,
+MEM_STATIC
+ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
+U32 ZSTD_window_correctOverflow(ZSTD_window_t* window, U32 cycleLog,
U32 maxDist, void const* src)
{
/* preemptive overflow correction:
* forget about the extDict. Handles overlap of the prefix and extDict.
* Returns non-zero if the segment is contiguous.
*/
-MEM_STATIC U32 ZSTD_window_update(ZSTD_window_t* window,
+MEM_STATIC
+ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
+U32 ZSTD_window_update(ZSTD_window_t* window,
void const* src, size_t srcSize,
int forceNonContiguous)
{
* This cannot be used when long range matching is enabled.
* Zstd will use these sequences, and pass the literals to a secondary block
* compressor.
- * @return : An error code on failure.
* NOTE: seqs are not verified! Invalid sequences can cause out-of-bounds memory
* access and data corruption.
*/
-size_t ZSTD_referenceExternalSequences(ZSTD_CCtx* cctx, rawSeq* seq, size_t nbSeq);
+void ZSTD_referenceExternalSequences(ZSTD_CCtx* cctx, rawSeq* seq, size_t nbSeq);
/** ZSTD_cycleLog() :
* condition for correct operation : hashLog > 1 */
const ZSTD_Sequence* const inSeqs, size_t inSeqsSize,
const void* src, size_t blockSize, ZSTD_paramSwitch_e externalRepSearch);
+/* Returns 1 if an external sequence producer is registered, otherwise returns 0. */
+MEM_STATIC int ZSTD_hasExtSeqProd(const ZSTD_CCtx_params* params) {
+ return params->extSeqProdFunc != NULL;
+}
/* ===============================================================
* Deprecated definitions that are still used internally to avoid
}
{ int const flags = bmi2 ? HUF_flags_bmi2 : 0;
- const size_t cSize = singleStream ? HUF_compress1X_usingCTable(op, oend-op, literals, litSize, hufTable, flags)
- : HUF_compress4X_usingCTable(op, oend-op, literals, litSize, hufTable, flags);
+ const size_t cSize = singleStream ? HUF_compress1X_usingCTable(op, (size_t)(oend-op), literals, litSize, hufTable, flags)
+ : HUF_compress4X_usingCTable(op, (size_t)(oend-op), literals, litSize, hufTable, flags);
op += cSize;
cLitSize += cSize;
if (cSize == 0 || ERR_isError(cSize)) {
switch(lhSize)
{
case 3: /* 2 - 2 - 10 - 10 */
- { U32 const lhc = hType + ((!singleStream) << 2) + ((U32)litSize<<4) + ((U32)cLitSize<<14);
+ { U32 const lhc = hType + ((U32)(!singleStream) << 2) + ((U32)litSize<<4) + ((U32)cLitSize<<14);
MEM_writeLE24(ostart, lhc);
break;
}
}
*entropyWritten = 1;
DEBUGLOG(5, "Compressed literals: %u -> %u", (U32)litSize, (U32)(op-ostart));
- return op-ostart;
+ return (size_t)(op-ostart);
}
static size_t
ZSTD_seqDecompressedSize(seqStore_t const* seqStore,
- const seqDef* sequences, size_t nbSeq,
- size_t litSize, int lastSequence)
+ const seqDef* sequences, size_t nbSeqs,
+ size_t litSize, int lastSubBlock)
{
- const seqDef* const sstart = sequences;
- const seqDef* const send = sequences + nbSeq;
- const seqDef* sp = sstart;
size_t matchLengthSum = 0;
size_t litLengthSum = 0;
- (void)(litLengthSum); /* suppress unused variable warning on some environments */
- while (send-sp > 0) {
- ZSTD_sequenceLength const seqLen = ZSTD_getSequenceLength(seqStore, sp);
+ size_t n;
+ for (n=0; n<nbSeqs; n++) {
+ const ZSTD_sequenceLength seqLen = ZSTD_getSequenceLength(seqStore, sequences+n);
litLengthSum += seqLen.litLength;
matchLengthSum += seqLen.matchLength;
- sp++;
}
- assert(litLengthSum <= litSize);
- if (!lastSequence) {
+ DEBUGLOG(5, "ZSTD_seqDecompressedSize: %u sequences from %p: %u literals + %u matchlength",
+ (unsigned)nbSeqs, (const void*)sequences,
+ (unsigned)litLengthSum, (unsigned)matchLengthSum);
+ if (!lastSubBlock)
assert(litLengthSum == litSize);
- }
+ else
+ assert(litLengthSum <= litSize);
+ (void)litLengthSum;
return matchLengthSum + litSize;
}
/* Sequences Header */
RETURN_ERROR_IF((oend-op) < 3 /*max nbSeq Size*/ + 1 /*seqHead*/,
dstSize_tooSmall, "");
- if (nbSeq < 0x7F)
+ if (nbSeq < 128)
*op++ = (BYTE)nbSeq;
else if (nbSeq < LONGNBSEQ)
op[0] = (BYTE)((nbSeq>>8) + 0x80), op[1] = (BYTE)nbSeq, op+=2;
else
op[0]=0xFF, MEM_writeLE16(op+1, (U16)(nbSeq - LONGNBSEQ)), op+=3;
if (nbSeq==0) {
- return op - ostart;
+ return (size_t)(op - ostart);
}
/* seqHead : flags for FSE encoding type */
}
{ size_t const bitstreamSize = ZSTD_encodeSequences(
- op, oend - op,
+ op, (size_t)(oend - op),
fseTables->matchlengthCTable, mlCode,
fseTables->offcodeCTable, ofCode,
fseTables->litlengthCTable, llCode,
#endif
*entropyWritten = 1;
- return op - ostart;
+ return (size_t)(op - ostart);
}
/** ZSTD_compressSubBlock() :
litSize, nbSeq, writeLitEntropy, writeSeqEntropy, lastBlock);
{ size_t cLitSize = ZSTD_compressSubBlock_literal((const HUF_CElt*)entropy->huf.CTable,
&entropyMetadata->hufMetadata, literals, litSize,
- op, oend-op, bmi2, writeLitEntropy, litEntropyWritten);
+ op, (size_t)(oend-op),
+ bmi2, writeLitEntropy, litEntropyWritten);
FORWARD_IF_ERROR(cLitSize, "ZSTD_compressSubBlock_literal failed");
if (cLitSize == 0) return 0;
op += cLitSize;
sequences, nbSeq,
llCode, mlCode, ofCode,
cctxParams,
- op, oend-op,
+ op, (size_t)(oend-op),
bmi2, writeSeqEntropy, seqEntropyWritten);
FORWARD_IF_ERROR(cSeqSize, "ZSTD_compressSubBlock_sequences failed");
if (cSeqSize == 0) return 0;
op += cSeqSize;
}
/* Write block header */
- { size_t cSize = (op-ostart)-ZSTD_blockHeaderSize;
+ { size_t cSize = (size_t)(op-ostart) - ZSTD_blockHeaderSize;
U32 const cBlockHeader24 = lastBlock + (((U32)bt_compressed)<<1) + (U32)(cSize << 3);
MEM_writeLE24(ostart, cBlockHeader24);
}
- return op-ostart;
+ return (size_t)(op-ostart);
}
static size_t ZSTD_estimateSubBlockSize_literal(const BYTE* literals, size_t litSize,
return cSeqSizeEstimate + sequencesSectionHeaderSize;
}
-static size_t ZSTD_estimateSubBlockSize(const BYTE* literals, size_t litSize,
+typedef struct {
+ size_t estLitSize;
+ size_t estBlockSize;
+} EstimatedBlockSize;
+static EstimatedBlockSize ZSTD_estimateSubBlockSize(const BYTE* literals, size_t litSize,
const BYTE* ofCodeTable,
const BYTE* llCodeTable,
const BYTE* mlCodeTable,
const ZSTD_entropyCTables_t* entropy,
const ZSTD_entropyCTablesMetadata_t* entropyMetadata,
void* workspace, size_t wkspSize,
- int writeLitEntropy, int writeSeqEntropy) {
- size_t cSizeEstimate = 0;
- cSizeEstimate += ZSTD_estimateSubBlockSize_literal(literals, litSize,
- &entropy->huf, &entropyMetadata->hufMetadata,
- workspace, wkspSize, writeLitEntropy);
- cSizeEstimate += ZSTD_estimateSubBlockSize_sequences(ofCodeTable, llCodeTable, mlCodeTable,
+ int writeLitEntropy, int writeSeqEntropy)
+{
+ EstimatedBlockSize ebs;
+ ebs.estLitSize = ZSTD_estimateSubBlockSize_literal(literals, litSize,
+ &entropy->huf, &entropyMetadata->hufMetadata,
+ workspace, wkspSize, writeLitEntropy);
+ ebs.estBlockSize = ZSTD_estimateSubBlockSize_sequences(ofCodeTable, llCodeTable, mlCodeTable,
nbSeq, &entropy->fse, &entropyMetadata->fseMetadata,
workspace, wkspSize, writeSeqEntropy);
- return cSizeEstimate + ZSTD_blockHeaderSize;
+ ebs.estBlockSize += ebs.estLitSize + ZSTD_blockHeaderSize;
+ return ebs;
}
static int ZSTD_needSequenceEntropyTables(ZSTD_fseCTablesMetadata_t const* fseMetadata)
return 0;
}
+static size_t countLiterals(seqStore_t const* seqStore, const seqDef* sp, size_t seqCount)
+{
+ size_t n, total = 0;
+ assert(sp != NULL);
+ for (n=0; n<seqCount; n++) {
+ total += ZSTD_getSequenceLength(seqStore, sp+n).litLength;
+ }
+ DEBUGLOG(6, "countLiterals for %zu sequences from %p => %zu bytes", seqCount, (const void*)sp, total);
+ return total;
+}
+
+#define BYTESCALE 256
+
+static size_t sizeBlockSequences(const seqDef* sp, size_t nbSeqs,
+ size_t targetBudget, size_t avgLitCost, size_t avgSeqCost,
+ int firstSubBlock)
+{
+ size_t n, budget = 0, inSize=0;
+ /* entropy headers */
+ size_t const headerSize = (size_t)firstSubBlock * 120 * BYTESCALE; /* generous estimate */
+ assert(firstSubBlock==0 || firstSubBlock==1);
+ budget += headerSize;
+
+ /* first sequence => at least one sequence*/
+ budget += sp[0].litLength * avgLitCost + avgSeqCost;
+ if (budget > targetBudget) return 1;
+ inSize = sp[0].litLength + (sp[0].mlBase+MINMATCH);
+
+ /* loop over sequences */
+ for (n=1; n<nbSeqs; n++) {
+ size_t currentCost = sp[n].litLength * avgLitCost + avgSeqCost;
+ budget += currentCost;
+ inSize += sp[n].litLength + (sp[n].mlBase+MINMATCH);
+ /* stop when sub-block budget is reached */
+ if ( (budget > targetBudget)
+ /* though continue to expand until the sub-block is deemed compressible */
+ && (budget < inSize * BYTESCALE) )
+ break;
+ }
+
+ return n;
+}
+
/** ZSTD_compressSubBlock_multi() :
* Breaks super-block into multiple sub-blocks and compresses them.
- * Entropy will be written to the first block.
- * The following blocks will use repeat mode to compress.
- * All sub-blocks are compressed blocks (no raw or rle blocks).
- * @return : compressed size of the super block (which is multiple ZSTD blocks)
- * Or 0 if it failed to compress. */
+ * Entropy will be written into the first block.
+ * The following blocks use repeat_mode to compress.
+ * Sub-blocks are all compressed, except the last one when beneficial.
+ * @return : compressed size of the super block (which features multiple ZSTD blocks)
+ * or 0 if it failed to compress. */
static size_t ZSTD_compressSubBlock_multi(const seqStore_t* seqStorePtr,
const ZSTD_compressedBlockState_t* prevCBlock,
ZSTD_compressedBlockState_t* nextCBlock,
{
const seqDef* const sstart = seqStorePtr->sequencesStart;
const seqDef* const send = seqStorePtr->sequences;
- const seqDef* sp = sstart;
+ const seqDef* sp = sstart; /* tracks progresses within seqStorePtr->sequences */
+ size_t const nbSeqs = (size_t)(send - sstart);
const BYTE* const lstart = seqStorePtr->litStart;
const BYTE* const lend = seqStorePtr->lit;
const BYTE* lp = lstart;
+ size_t const nbLiterals = (size_t)(lend - lstart);
BYTE const* ip = (BYTE const*)src;
BYTE const* const iend = ip + srcSize;
BYTE* const ostart = (BYTE*)dst;
const BYTE* llCodePtr = seqStorePtr->llCode;
const BYTE* mlCodePtr = seqStorePtr->mlCode;
const BYTE* ofCodePtr = seqStorePtr->ofCode;
- size_t targetCBlockSize = cctxParams->targetCBlockSize;
- size_t litSize, seqCount;
- int writeLitEntropy = entropyMetadata->hufMetadata.hType == set_compressed;
+ size_t const minTarget = ZSTD_TARGETCBLOCKSIZE_MIN; /* enforce minimum size, to reduce undesirable side effects */
+ size_t const targetCBlockSize = MAX(minTarget, cctxParams->targetCBlockSize);
+ int writeLitEntropy = (entropyMetadata->hufMetadata.hType == set_compressed);
int writeSeqEntropy = 1;
- int lastSequence = 0;
-
- DEBUGLOG(5, "ZSTD_compressSubBlock_multi (litSize=%u, nbSeq=%u)",
- (unsigned)(lend-lp), (unsigned)(send-sstart));
-
- litSize = 0;
- seqCount = 0;
- do {
- size_t cBlockSizeEstimate = 0;
- if (sstart == send) {
- lastSequence = 1;
- } else {
- const seqDef* const sequence = sp + seqCount;
- lastSequence = sequence == send - 1;
- litSize += ZSTD_getSequenceLength(seqStorePtr, sequence).litLength;
- seqCount++;
- }
- if (lastSequence) {
- assert(lp <= lend);
- assert(litSize <= (size_t)(lend - lp));
- litSize = (size_t)(lend - lp);
+
+ DEBUGLOG(5, "ZSTD_compressSubBlock_multi (srcSize=%u, litSize=%u, nbSeq=%u)",
+ (unsigned)srcSize, (unsigned)(lend-lstart), (unsigned)(send-sstart));
+
+ /* let's start by a general estimation for the full block */
+ if (nbSeqs > 0) {
+ EstimatedBlockSize const ebs =
+ ZSTD_estimateSubBlockSize(lp, nbLiterals,
+ ofCodePtr, llCodePtr, mlCodePtr, nbSeqs,
+ &nextCBlock->entropy, entropyMetadata,
+ workspace, wkspSize,
+ writeLitEntropy, writeSeqEntropy);
+ /* quick estimation */
+ size_t const avgLitCost = nbLiterals ? (ebs.estLitSize * BYTESCALE) / nbLiterals : BYTESCALE;
+ size_t const avgSeqCost = ((ebs.estBlockSize - ebs.estLitSize) * BYTESCALE) / nbSeqs;
+ const size_t nbSubBlocks = MAX((ebs.estBlockSize + (targetCBlockSize/2)) / targetCBlockSize, 1);
+ size_t n, avgBlockBudget, blockBudgetSupp=0;
+ avgBlockBudget = (ebs.estBlockSize * BYTESCALE) / nbSubBlocks;
+ DEBUGLOG(5, "estimated fullblock size=%u bytes ; avgLitCost=%.2f ; avgSeqCost=%.2f ; targetCBlockSize=%u, nbSubBlocks=%u ; avgBlockBudget=%.0f bytes",
+ (unsigned)ebs.estBlockSize, (double)avgLitCost/BYTESCALE, (double)avgSeqCost/BYTESCALE,
+ (unsigned)targetCBlockSize, (unsigned)nbSubBlocks, (double)avgBlockBudget/BYTESCALE);
+ /* simplification: if estimates states that the full superblock doesn't compress, just bail out immediately
+ * this will result in the production of a single uncompressed block covering @srcSize.*/
+ if (ebs.estBlockSize > srcSize) return 0;
+
+ /* compress and write sub-blocks */
+ assert(nbSubBlocks>0);
+ for (n=0; n < nbSubBlocks-1; n++) {
+ /* determine nb of sequences for current sub-block + nbLiterals from next sequence */
+ size_t const seqCount = sizeBlockSequences(sp, (size_t)(send-sp),
+ avgBlockBudget + blockBudgetSupp, avgLitCost, avgSeqCost, n==0);
+ /* if reached last sequence : break to last sub-block (simplification) */
+ assert(seqCount <= (size_t)(send-sp));
+ if (sp + seqCount == send) break;
+ assert(seqCount > 0);
+ /* compress sub-block */
+ { int litEntropyWritten = 0;
+ int seqEntropyWritten = 0;
+ size_t litSize = countLiterals(seqStorePtr, sp, seqCount);
+ const size_t decompressedSize =
+ ZSTD_seqDecompressedSize(seqStorePtr, sp, seqCount, litSize, 0);
+ size_t const cSize = ZSTD_compressSubBlock(&nextCBlock->entropy, entropyMetadata,
+ sp, seqCount,
+ lp, litSize,
+ llCodePtr, mlCodePtr, ofCodePtr,
+ cctxParams,
+ op, (size_t)(oend-op),
+ bmi2, writeLitEntropy, writeSeqEntropy,
+ &litEntropyWritten, &seqEntropyWritten,
+ 0);
+ FORWARD_IF_ERROR(cSize, "ZSTD_compressSubBlock failed");
+
+ /* check compressibility, update state components */
+ if (cSize > 0 && cSize < decompressedSize) {
+ DEBUGLOG(5, "Committed sub-block compressing %u bytes => %u bytes",
+ (unsigned)decompressedSize, (unsigned)cSize);
+ assert(ip + decompressedSize <= iend);
+ ip += decompressedSize;
+ lp += litSize;
+ op += cSize;
+ llCodePtr += seqCount;
+ mlCodePtr += seqCount;
+ ofCodePtr += seqCount;
+ /* Entropy only needs to be written once */
+ if (litEntropyWritten) {
+ writeLitEntropy = 0;
+ }
+ if (seqEntropyWritten) {
+ writeSeqEntropy = 0;
+ }
+ sp += seqCount;
+ blockBudgetSupp = 0;
+ } }
+ /* otherwise : do not compress yet, coalesce current sub-block with following one */
}
- /* I think there is an optimization opportunity here.
- * Calling ZSTD_estimateSubBlockSize for every sequence can be wasteful
- * since it recalculates estimate from scratch.
- * For example, it would recount literal distribution and symbol codes every time.
- */
- cBlockSizeEstimate = ZSTD_estimateSubBlockSize(lp, litSize, ofCodePtr, llCodePtr, mlCodePtr, seqCount,
- &nextCBlock->entropy, entropyMetadata,
- workspace, wkspSize, writeLitEntropy, writeSeqEntropy);
- if (cBlockSizeEstimate > targetCBlockSize || lastSequence) {
- int litEntropyWritten = 0;
- int seqEntropyWritten = 0;
- const size_t decompressedSize = ZSTD_seqDecompressedSize(seqStorePtr, sp, seqCount, litSize, lastSequence);
- const size_t cSize = ZSTD_compressSubBlock(&nextCBlock->entropy, entropyMetadata,
- sp, seqCount,
- lp, litSize,
- llCodePtr, mlCodePtr, ofCodePtr,
- cctxParams,
- op, oend-op,
- bmi2, writeLitEntropy, writeSeqEntropy,
- &litEntropyWritten, &seqEntropyWritten,
- lastBlock && lastSequence);
- FORWARD_IF_ERROR(cSize, "ZSTD_compressSubBlock failed");
- if (cSize > 0 && cSize < decompressedSize) {
- DEBUGLOG(5, "Committed the sub-block");
- assert(ip + decompressedSize <= iend);
- ip += decompressedSize;
- sp += seqCount;
- lp += litSize;
- op += cSize;
- llCodePtr += seqCount;
- mlCodePtr += seqCount;
- ofCodePtr += seqCount;
- litSize = 0;
- seqCount = 0;
- /* Entropy only needs to be written once */
- if (litEntropyWritten) {
- writeLitEntropy = 0;
- }
- if (seqEntropyWritten) {
- writeSeqEntropy = 0;
- }
+ } /* if (nbSeqs > 0) */
+
+ /* write last block */
+ DEBUGLOG(5, "Generate last sub-block: %u sequences remaining", (unsigned)(send - sp));
+ { int litEntropyWritten = 0;
+ int seqEntropyWritten = 0;
+ size_t litSize = (size_t)(lend - lp);
+ size_t seqCount = (size_t)(send - sp);
+ const size_t decompressedSize =
+ ZSTD_seqDecompressedSize(seqStorePtr, sp, seqCount, litSize, 1);
+ size_t const cSize = ZSTD_compressSubBlock(&nextCBlock->entropy, entropyMetadata,
+ sp, seqCount,
+ lp, litSize,
+ llCodePtr, mlCodePtr, ofCodePtr,
+ cctxParams,
+ op, (size_t)(oend-op),
+ bmi2, writeLitEntropy, writeSeqEntropy,
+ &litEntropyWritten, &seqEntropyWritten,
+ lastBlock);
+ FORWARD_IF_ERROR(cSize, "ZSTD_compressSubBlock failed");
+
+ /* update pointers, the nb of literals borrowed from next sequence must be preserved */
+ if (cSize > 0 && cSize < decompressedSize) {
+ DEBUGLOG(5, "Last sub-block compressed %u bytes => %u bytes",
+ (unsigned)decompressedSize, (unsigned)cSize);
+ assert(ip + decompressedSize <= iend);
+ ip += decompressedSize;
+ lp += litSize;
+ op += cSize;
+ llCodePtr += seqCount;
+ mlCodePtr += seqCount;
+ ofCodePtr += seqCount;
+ /* Entropy only needs to be written once */
+ if (litEntropyWritten) {
+ writeLitEntropy = 0;
+ }
+ if (seqEntropyWritten) {
+ writeSeqEntropy = 0;
}
+ sp += seqCount;
}
- } while (!lastSequence);
+ }
+
+
if (writeLitEntropy) {
- DEBUGLOG(5, "ZSTD_compressSubBlock_multi has literal entropy tables unwritten");
+ DEBUGLOG(5, "Literal entropy tables were never written");
ZSTD_memcpy(&nextCBlock->entropy.huf, &prevCBlock->entropy.huf, sizeof(prevCBlock->entropy.huf));
}
if (writeSeqEntropy && ZSTD_needSequenceEntropyTables(&entropyMetadata->fseMetadata)) {
/* If we haven't written our entropy tables, then we've violated our contract and
* must emit an uncompressed block.
*/
- DEBUGLOG(5, "ZSTD_compressSubBlock_multi has sequence entropy tables unwritten");
+ DEBUGLOG(5, "Sequence entropy tables were never written => cancel, emit an uncompressed block");
return 0;
}
+
if (ip < iend) {
- size_t const cSize = ZSTD_noCompressBlock(op, oend - op, ip, iend - ip, lastBlock);
- DEBUGLOG(5, "ZSTD_compressSubBlock_multi last sub-block uncompressed, %zu bytes", (size_t)(iend - ip));
+ /* some data left : last part of the block sent uncompressed */
+ size_t const rSize = (size_t)((iend - ip));
+ size_t const cSize = ZSTD_noCompressBlock(op, (size_t)(oend - op), ip, rSize, lastBlock);
+ DEBUGLOG(5, "Generate last uncompressed sub-block of %u bytes", (unsigned)(rSize));
FORWARD_IF_ERROR(cSize, "ZSTD_noCompressBlock failed");
assert(cSize != 0);
op += cSize;
/* We have to regenerate the repcodes because we've skipped some sequences */
if (sp < send) {
- seqDef const* seq;
+ const seqDef* seq;
repcodes_t rep;
ZSTD_memcpy(&rep, prevCBlock->rep, sizeof(rep));
for (seq = sstart; seq < sp; ++seq) {
ZSTD_memcpy(nextCBlock->rep, &rep, sizeof(rep));
}
}
- DEBUGLOG(5, "ZSTD_compressSubBlock_multi compressed");
- return op-ostart;
+
+ DEBUGLOG(5, "ZSTD_compressSubBlock_multi compressed all subBlocks: total compressed size = %u",
+ (unsigned)(op-ostart));
+ return (size_t)(op-ostart);
}
size_t ZSTD_compressSuperBlock(ZSTD_CCtx* zc,
void* dst, size_t dstCapacity,
- void const* src, size_t srcSize,
- unsigned lastBlock) {
+ const void* src, size_t srcSize,
+ unsigned lastBlock)
+{
ZSTD_entropyCTablesMetadata_t entropyMetadata;
FORWARD_IF_ERROR(ZSTD_buildBlockEntropyStats(&zc->seqStore,
{
intptr_t const offset = __msan_test_shadow(ws->initOnceStart,
(U8*)ZSTD_cwksp_initialAllocStart(ws) - (U8*)ws->initOnceStart);
+ (void)offset;
#if defined(ZSTD_MSAN_PRINT)
if(offset!=-1) {
__msan_print_shadow((U8*)ws->initOnceStart + offset - 8, 32);
/**
* Aligned on 64 bytes. These buffers have the special property that
- * their values remain constrained, allowing us to re-use them without
+ * their values remain constrained, allowing us to reuse them without
* memset()-ing them.
*/
MEM_STATIC void* ZSTD_cwksp_reserve_table(ZSTD_cwksp* ws, size_t bytes)
DEBUGLOG(4, "cwksp: ZSTD_cwksp_mark_tables_dirty");
#if ZSTD_MEMORY_SANITIZER && !defined (ZSTD_MSAN_DONT_POISON_WORKSPACE)
- /* To validate that the table re-use logic is sound, and that we don't
+ /* To validate that the table reuse logic is sound, and that we don't
* access table space that we haven't cleaned, we re-"poison" the table
* space every time we mark it dirty.
* Since tableValidEnd space and initOnce space may overlap we don't poison
DEBUGLOG(4, "cwksp: clearing!");
#if ZSTD_MEMORY_SANITIZER && !defined (ZSTD_MSAN_DONT_POISON_WORKSPACE)
- /* To validate that the context re-use logic is sound, and that we don't
+ /* To validate that the context reuse logic is sound, and that we don't
* access stuff that this compression hasn't initialized, we re-"poison"
- * the workspace except for the areas in which we expect memory re-use
+ * the workspace except for the areas in which we expect memory reuse
* without initialization (objects, valid tables area and init once
* memory). */
{
ZSTD_cwksp_assert_internal_consistency(ws);
}
+MEM_STATIC size_t ZSTD_cwksp_sizeof(const ZSTD_cwksp* ws) {
+ return (size_t)((BYTE*)ws->workspaceEnd - (BYTE*)ws->workspace);
+}
+
+MEM_STATIC size_t ZSTD_cwksp_used(const ZSTD_cwksp* ws) {
+ return (size_t)((BYTE*)ws->tableEnd - (BYTE*)ws->workspace)
+ + (size_t)((BYTE*)ws->workspaceEnd - (BYTE*)ws->allocStart);
+}
+
/**
* The provided workspace takes ownership of the buffer [start, start+size).
* Any existing values in the workspace are ignored (the previously managed
MEM_STATIC void ZSTD_cwksp_free(ZSTD_cwksp* ws, ZSTD_customMem customMem) {
void *ptr = ws->workspace;
DEBUGLOG(4, "cwksp: freeing workspace");
+#if ZSTD_MEMORY_SANITIZER && !defined(ZSTD_MSAN_DONT_POISON_WORKSPACE)
+ if (ptr != NULL && customMem.customFree != NULL) {
+ __msan_unpoison(ptr, ZSTD_cwksp_sizeof(ws));
+ }
+#endif
ZSTD_memset(ws, 0, sizeof(ZSTD_cwksp));
ZSTD_customFree(ptr, customMem);
}
ZSTD_memset(src, 0, sizeof(ZSTD_cwksp));
}
-MEM_STATIC size_t ZSTD_cwksp_sizeof(const ZSTD_cwksp* ws) {
- return (size_t)((BYTE*)ws->workspaceEnd - (BYTE*)ws->workspace);
-}
-
-MEM_STATIC size_t ZSTD_cwksp_used(const ZSTD_cwksp* ws) {
- return (size_t)((BYTE*)ws->tableEnd - (BYTE*)ws->workspace)
- + (size_t)((BYTE*)ws->workspaceEnd - (BYTE*)ws->allocStart);
-}
-
MEM_STATIC int ZSTD_cwksp_reserve_failed(const ZSTD_cwksp* ws) {
return ws->allocFailed;
}
#include "zstd_compress_internal.h"
#include "zstd_double_fast.h"
-static void ZSTD_fillDoubleHashTableForCDict(ZSTD_matchState_t* ms,
+#ifndef ZSTD_EXCLUDE_DFAST_BLOCK_COMPRESSOR
+
+static
+ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
+void ZSTD_fillDoubleHashTableForCDict(ZSTD_matchState_t* ms,
void const* end, ZSTD_dictTableLoadMethod_e dtlm)
{
const ZSTD_compressionParameters* const cParams = &ms->cParams;
} }
}
-static void ZSTD_fillDoubleHashTableForCCtx(ZSTD_matchState_t* ms,
+static
+ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
+void ZSTD_fillDoubleHashTableForCCtx(ZSTD_matchState_t* ms,
void const* end, ZSTD_dictTableLoadMethod_e dtlm)
{
const ZSTD_compressionParameters* const cParams = &ms->cParams;
FORCE_INLINE_TEMPLATE
+ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
size_t ZSTD_compressBlock_doubleFast_noDict_generic(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize, U32 const mls /* template */)
FORCE_INLINE_TEMPLATE
+ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
size_t ZSTD_compressBlock_doubleFast_dictMatchState_generic(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize,
if (ms->prefetchCDictTables) {
size_t const hashTableBytes = (((size_t)1) << dictCParams->hashLog) * sizeof(U32);
size_t const chainTableBytes = (((size_t)1) << dictCParams->chainLog) * sizeof(U32);
- PREFETCH_AREA(dictHashLong, hashTableBytes)
- PREFETCH_AREA(dictHashSmall, chainTableBytes)
+ PREFETCH_AREA(dictHashLong, hashTableBytes);
+ PREFETCH_AREA(dictHashSmall, chainTableBytes);
}
/* init */
}
-static size_t ZSTD_compressBlock_doubleFast_extDict_generic(
+static
+ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
+size_t ZSTD_compressBlock_doubleFast_extDict_generic(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize,
U32 const mls /* template */)
return ZSTD_compressBlock_doubleFast_extDict_7(ms, seqStore, rep, src, srcSize);
}
}
+
+#endif /* ZSTD_EXCLUDE_DFAST_BLOCK_COMPRESSOR */
#include "../common/mem.h" /* U32 */
#include "zstd_compress_internal.h" /* ZSTD_CCtx, size_t */
+#ifndef ZSTD_EXCLUDE_DFAST_BLOCK_COMPRESSOR
+
void ZSTD_fillDoubleHashTable(ZSTD_matchState_t* ms,
void const* end, ZSTD_dictTableLoadMethod_e dtlm,
ZSTD_tableFillPurpose_e tfp);
+
size_t ZSTD_compressBlock_doubleFast(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize);
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize);
+#define ZSTD_COMPRESSBLOCK_DOUBLEFAST ZSTD_compressBlock_doubleFast
+#define ZSTD_COMPRESSBLOCK_DOUBLEFAST_DICTMATCHSTATE ZSTD_compressBlock_doubleFast_dictMatchState
+#define ZSTD_COMPRESSBLOCK_DOUBLEFAST_EXTDICT ZSTD_compressBlock_doubleFast_extDict
+#else
+#define ZSTD_COMPRESSBLOCK_DOUBLEFAST NULL
+#define ZSTD_COMPRESSBLOCK_DOUBLEFAST_DICTMATCHSTATE NULL
+#define ZSTD_COMPRESSBLOCK_DOUBLEFAST_EXTDICT NULL
+#endif /* ZSTD_EXCLUDE_DFAST_BLOCK_COMPRESSOR */
#if defined (__cplusplus)
}
#include "zstd_compress_internal.h" /* ZSTD_hashPtr, ZSTD_count, ZSTD_storeSeq */
#include "zstd_fast.h"
-static void ZSTD_fillHashTableForCDict(ZSTD_matchState_t* ms,
+static
+ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
+void ZSTD_fillHashTableForCDict(ZSTD_matchState_t* ms,
const void* const end,
ZSTD_dictTableLoadMethod_e dtlm)
{
} } } }
}
-static void ZSTD_fillHashTableForCCtx(ZSTD_matchState_t* ms,
+static
+ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
+void ZSTD_fillHashTableForCCtx(ZSTD_matchState_t* ms,
const void* const end,
ZSTD_dictTableLoadMethod_e dtlm)
{
*
* This is also the work we do at the beginning to enter the loop initially.
*/
-FORCE_INLINE_TEMPLATE size_t
-ZSTD_compressBlock_fast_noDict_generic(
+FORCE_INLINE_TEMPLATE
+ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
+size_t ZSTD_compressBlock_fast_noDict_generic(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize,
U32 const mls, U32 const hasStep)
}
FORCE_INLINE_TEMPLATE
+ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
size_t ZSTD_compressBlock_fast_dictMatchState_generic(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize, U32 const mls, U32 const hasStep)
if (ms->prefetchCDictTables) {
size_t const hashTableBytes = (((size_t)1) << dictCParams->hashLog) * sizeof(U32);
- PREFETCH_AREA(dictHashTable, hashTableBytes)
+ PREFETCH_AREA(dictHashTable, hashTableBytes);
}
/* init */
}
-static size_t ZSTD_compressBlock_fast_extDict_generic(
+static
+ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
+size_t ZSTD_compressBlock_fast_extDict_generic(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize, U32 const mls, U32 const hasStep)
{
#include "zstd_lazy.h"
#include "../common/bits.h" /* ZSTD_countTrailingZeros64 */
+#if !defined(ZSTD_EXCLUDE_GREEDY_BLOCK_COMPRESSOR) \
+ || !defined(ZSTD_EXCLUDE_LAZY_BLOCK_COMPRESSOR) \
+ || !defined(ZSTD_EXCLUDE_LAZY2_BLOCK_COMPRESSOR) \
+ || !defined(ZSTD_EXCLUDE_BTLAZY2_BLOCK_COMPRESSOR)
+
#define kLazySkippingStep 8
* Binary Tree search
***************************************/
-static void
-ZSTD_updateDUBT(ZSTD_matchState_t* ms,
+static
+ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
+void ZSTD_updateDUBT(ZSTD_matchState_t* ms,
const BYTE* ip, const BYTE* iend,
U32 mls)
{
* sort one already inserted but unsorted position
* assumption : curr >= btlow == (curr - btmask)
* doesn't fail */
-static void
-ZSTD_insertDUBT1(const ZSTD_matchState_t* ms,
+static
+ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
+void ZSTD_insertDUBT1(const ZSTD_matchState_t* ms,
U32 curr, const BYTE* inputEnd,
U32 nbCompares, U32 btLow,
const ZSTD_dictMode_e dictMode)
}
-static size_t
-ZSTD_DUBT_findBetterDictMatch (
+static
+ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
+size_t ZSTD_DUBT_findBetterDictMatch (
const ZSTD_matchState_t* ms,
const BYTE* const ip, const BYTE* const iend,
size_t* offsetPtr,
}
-static size_t
-ZSTD_DUBT_findBestMatch(ZSTD_matchState_t* ms,
+static
+ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
+size_t ZSTD_DUBT_findBestMatch(ZSTD_matchState_t* ms,
const BYTE* const ip, const BYTE* const iend,
size_t* offBasePtr,
U32 const mls,
/** ZSTD_BtFindBestMatch() : Tree updater, providing best match */
-FORCE_INLINE_TEMPLATE size_t
-ZSTD_BtFindBestMatch( ZSTD_matchState_t* ms,
+FORCE_INLINE_TEMPLATE
+ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
+size_t ZSTD_BtFindBestMatch( ZSTD_matchState_t* ms,
const BYTE* const ip, const BYTE* const iLimit,
size_t* offBasePtr,
const U32 mls /* template */,
/* Update chains up to ip (excluded)
Assumption : always within prefix (i.e. not within extDict) */
-FORCE_INLINE_TEMPLATE U32 ZSTD_insertAndFindFirstIndex_internal(
+FORCE_INLINE_TEMPLATE
+ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
+U32 ZSTD_insertAndFindFirstIndex_internal(
ZSTD_matchState_t* ms,
const ZSTD_compressionParameters* const cParams,
const BYTE* ip, U32 const mls, U32 const lazySkipping)
/* inlining is important to hardwire a hot branch (template emulation) */
FORCE_INLINE_TEMPLATE
+ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
size_t ZSTD_HcFindBestMatch(
ZSTD_matchState_t* ms,
const BYTE* const ip, const BYTE* const iLimit,
* Fill up the hash cache starting at idx, prefetching up to ZSTD_ROW_HASH_CACHE_SIZE entries,
* but not beyond iLimit.
*/
-FORCE_INLINE_TEMPLATE void ZSTD_row_fillHashCache(ZSTD_matchState_t* ms, const BYTE* base,
+FORCE_INLINE_TEMPLATE
+ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
+void ZSTD_row_fillHashCache(ZSTD_matchState_t* ms, const BYTE* base,
U32 const rowLog, U32 const mls,
U32 idx, const BYTE* const iLimit)
{
* Returns the hash of base + idx, and replaces the hash in the hash cache with the byte at
* base + idx + ZSTD_ROW_HASH_CACHE_SIZE. Also prefetches the appropriate rows from hashTable and tagTable.
*/
-FORCE_INLINE_TEMPLATE U32 ZSTD_row_nextCachedHash(U32* cache, U32 const* hashTable,
+FORCE_INLINE_TEMPLATE
+ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
+U32 ZSTD_row_nextCachedHash(U32* cache, U32 const* hashTable,
BYTE const* tagTable, BYTE const* base,
U32 idx, U32 const hashLog,
U32 const rowLog, U32 const mls,
/* ZSTD_row_update_internalImpl():
* Updates the hash table with positions starting from updateStartIdx until updateEndIdx.
*/
-FORCE_INLINE_TEMPLATE void ZSTD_row_update_internalImpl(ZSTD_matchState_t* ms,
- U32 updateStartIdx, U32 const updateEndIdx,
- U32 const mls, U32 const rowLog,
- U32 const rowMask, U32 const useCache)
+FORCE_INLINE_TEMPLATE
+ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
+void ZSTD_row_update_internalImpl(ZSTD_matchState_t* ms,
+ U32 updateStartIdx, U32 const updateEndIdx,
+ U32 const mls, U32 const rowLog,
+ U32 const rowMask, U32 const useCache)
{
U32* const hashTable = ms->hashTable;
BYTE* const tagTable = ms->tagTable;
* Inserts the byte at ip into the appropriate position in the hash table, and updates ms->nextToUpdate.
* Skips sections of long matches as is necessary.
*/
-FORCE_INLINE_TEMPLATE void ZSTD_row_update_internal(ZSTD_matchState_t* ms, const BYTE* ip,
- U32 const mls, U32 const rowLog,
- U32 const rowMask, U32 const useCache)
+FORCE_INLINE_TEMPLATE
+ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
+void ZSTD_row_update_internal(ZSTD_matchState_t* ms, const BYTE* ip,
+ U32 const mls, U32 const rowLog,
+ U32 const rowMask, U32 const useCache)
{
U32 idx = ms->nextToUpdate;
const BYTE* const base = ms->window.base;
/* The high-level approach of the SIMD row based match finder is as follows:
* - Figure out where to insert the new entry:
- * - Generate a hash from a byte along with an additional 1-byte "short hash". The additional byte is our "tag"
- * - The hashTable is effectively split into groups or "rows" of 16 or 32 entries of U32, and the hash determines
+ * - Generate a hash for current input posistion and split it into a one byte of tag and `rowHashLog` bits of index.
+ * - The hash is salted by a value that changes on every contex reset, so when the same table is used
+ * we will avoid collisions that would otherwise slow us down by intorducing phantom matches.
+ * - The hashTable is effectively split into groups or "rows" of 15 or 31 entries of U32, and the index determines
* which row to insert into.
- * - Determine the correct position within the row to insert the entry into. Each row of 16 or 32 can
- * be considered as a circular buffer with a "head" index that resides in the tagTable.
- * - Also insert the "tag" into the equivalent row and position in the tagTable.
- * - Note: The tagTable has 17 or 33 1-byte entries per row, due to 16 or 32 tags, and 1 "head" entry.
- * The 17 or 33 entry rows are spaced out to occur every 32 or 64 bytes, respectively,
- * for alignment/performance reasons, leaving some bytes unused.
- * - Use SIMD to efficiently compare the tags in the tagTable to the 1-byte "short hash" and
+ * - Determine the correct position within the row to insert the entry into. Each row of 15 or 31 can
+ * be considered as a circular buffer with a "head" index that resides in the tagTable (overall 16 or 32 bytes
+ * per row).
+ * - Use SIMD to efficiently compare the tags in the tagTable to the 1-byte tag calculated for the position and
* generate a bitfield that we can cycle through to check the collisions in the hash table.
* - Pick the longest match.
+ * - Insert the tag into the equivalent row and position in the tagTable.
*/
FORCE_INLINE_TEMPLATE
+ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
size_t ZSTD_RowFindBestMatch(
ZSTD_matchState_t* ms,
const BYTE* const ip, const BYTE* const iLimit,
* Common parser - lazy strategy
*********************************/
-FORCE_INLINE_TEMPLATE size_t
-ZSTD_compressBlock_lazy_generic(
+FORCE_INLINE_TEMPLATE
+ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
+size_t ZSTD_compressBlock_lazy_generic(
ZSTD_matchState_t* ms, seqStore_t* seqStore,
U32 rep[ZSTD_REP_NUM],
const void* src, size_t srcSize,
/* Return the last literals size */
return (size_t)(iend - anchor);
}
+#endif /* build exclusions */
-size_t ZSTD_compressBlock_btlazy2(
+#ifndef ZSTD_EXCLUDE_GREEDY_BLOCK_COMPRESSOR
+size_t ZSTD_compressBlock_greedy(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize)
{
- return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_binaryTree, 2, ZSTD_noDict);
+ return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 0, ZSTD_noDict);
}
-size_t ZSTD_compressBlock_lazy2(
+size_t ZSTD_compressBlock_greedy_dictMatchState(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize)
{
- return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 2, ZSTD_noDict);
+ return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 0, ZSTD_dictMatchState);
}
-size_t ZSTD_compressBlock_lazy(
+size_t ZSTD_compressBlock_greedy_dedicatedDictSearch(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize)
{
- return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 1, ZSTD_noDict);
+ return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 0, ZSTD_dedicatedDictSearch);
}
-size_t ZSTD_compressBlock_greedy(
+size_t ZSTD_compressBlock_greedy_row(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize)
{
- return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 0, ZSTD_noDict);
+ return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 0, ZSTD_noDict);
}
-size_t ZSTD_compressBlock_btlazy2_dictMatchState(
+size_t ZSTD_compressBlock_greedy_dictMatchState_row(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize)
{
- return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_binaryTree, 2, ZSTD_dictMatchState);
+ return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 0, ZSTD_dictMatchState);
}
-size_t ZSTD_compressBlock_lazy2_dictMatchState(
+size_t ZSTD_compressBlock_greedy_dedicatedDictSearch_row(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize)
{
- return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 2, ZSTD_dictMatchState);
+ return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 0, ZSTD_dedicatedDictSearch);
}
+#endif
-size_t ZSTD_compressBlock_lazy_dictMatchState(
+#ifndef ZSTD_EXCLUDE_LAZY_BLOCK_COMPRESSOR
+size_t ZSTD_compressBlock_lazy(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize)
{
- return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 1, ZSTD_dictMatchState);
+ return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 1, ZSTD_noDict);
}
-size_t ZSTD_compressBlock_greedy_dictMatchState(
+size_t ZSTD_compressBlock_lazy_dictMatchState(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize)
{
- return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 0, ZSTD_dictMatchState);
+ return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 1, ZSTD_dictMatchState);
}
-
-size_t ZSTD_compressBlock_lazy2_dedicatedDictSearch(
+size_t ZSTD_compressBlock_lazy_dedicatedDictSearch(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize)
{
- return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 2, ZSTD_dedicatedDictSearch);
+ return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 1, ZSTD_dedicatedDictSearch);
}
-size_t ZSTD_compressBlock_lazy_dedicatedDictSearch(
+size_t ZSTD_compressBlock_lazy_row(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize)
{
- return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 1, ZSTD_dedicatedDictSearch);
+ return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 1, ZSTD_noDict);
}
-size_t ZSTD_compressBlock_greedy_dedicatedDictSearch(
+size_t ZSTD_compressBlock_lazy_dictMatchState_row(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize)
{
- return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 0, ZSTD_dedicatedDictSearch);
+ return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 1, ZSTD_dictMatchState);
}
-/* Row-based matchfinder */
-size_t ZSTD_compressBlock_lazy2_row(
+size_t ZSTD_compressBlock_lazy_dedicatedDictSearch_row(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize)
{
- return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 2, ZSTD_noDict);
+ return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 1, ZSTD_dedicatedDictSearch);
}
+#endif
-size_t ZSTD_compressBlock_lazy_row(
+#ifndef ZSTD_EXCLUDE_LAZY2_BLOCK_COMPRESSOR
+size_t ZSTD_compressBlock_lazy2(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize)
{
- return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 1, ZSTD_noDict);
+ return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 2, ZSTD_noDict);
}
-size_t ZSTD_compressBlock_greedy_row(
+size_t ZSTD_compressBlock_lazy2_dictMatchState(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize)
{
- return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 0, ZSTD_noDict);
+ return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 2, ZSTD_dictMatchState);
}
-size_t ZSTD_compressBlock_lazy2_dictMatchState_row(
+size_t ZSTD_compressBlock_lazy2_dedicatedDictSearch(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize)
{
- return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 2, ZSTD_dictMatchState);
+ return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 2, ZSTD_dedicatedDictSearch);
}
-size_t ZSTD_compressBlock_lazy_dictMatchState_row(
+size_t ZSTD_compressBlock_lazy2_row(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize)
{
- return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 1, ZSTD_dictMatchState);
+ return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 2, ZSTD_noDict);
}
-size_t ZSTD_compressBlock_greedy_dictMatchState_row(
+size_t ZSTD_compressBlock_lazy2_dictMatchState_row(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize)
{
- return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 0, ZSTD_dictMatchState);
+ return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 2, ZSTD_dictMatchState);
}
-
size_t ZSTD_compressBlock_lazy2_dedicatedDictSearch_row(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize)
{
return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 2, ZSTD_dedicatedDictSearch);
}
+#endif
-size_t ZSTD_compressBlock_lazy_dedicatedDictSearch_row(
+#ifndef ZSTD_EXCLUDE_BTLAZY2_BLOCK_COMPRESSOR
+size_t ZSTD_compressBlock_btlazy2(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize)
{
- return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 1, ZSTD_dedicatedDictSearch);
+ return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_binaryTree, 2, ZSTD_noDict);
}
-size_t ZSTD_compressBlock_greedy_dedicatedDictSearch_row(
+size_t ZSTD_compressBlock_btlazy2_dictMatchState(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize)
{
- return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 0, ZSTD_dedicatedDictSearch);
+ return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_binaryTree, 2, ZSTD_dictMatchState);
}
+#endif
+#if !defined(ZSTD_EXCLUDE_GREEDY_BLOCK_COMPRESSOR) \
+ || !defined(ZSTD_EXCLUDE_LAZY_BLOCK_COMPRESSOR) \
+ || !defined(ZSTD_EXCLUDE_LAZY2_BLOCK_COMPRESSOR) \
+ || !defined(ZSTD_EXCLUDE_BTLAZY2_BLOCK_COMPRESSOR)
FORCE_INLINE_TEMPLATE
+ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
size_t ZSTD_compressBlock_lazy_extDict_generic(
ZSTD_matchState_t* ms, seqStore_t* seqStore,
U32 rep[ZSTD_REP_NUM],
/* Return the last literals size */
return (size_t)(iend - anchor);
}
+#endif /* build exclusions */
-
+#ifndef ZSTD_EXCLUDE_GREEDY_BLOCK_COMPRESSOR
size_t ZSTD_compressBlock_greedy_extDict(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize)
return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 0);
}
-size_t ZSTD_compressBlock_lazy_extDict(
+size_t ZSTD_compressBlock_greedy_extDict_row(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize)
-
{
- return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 1);
+ return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 0);
}
+#endif
-size_t ZSTD_compressBlock_lazy2_extDict(
+#ifndef ZSTD_EXCLUDE_LAZY_BLOCK_COMPRESSOR
+size_t ZSTD_compressBlock_lazy_extDict(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize)
{
- return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 2);
+ return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 1);
}
-size_t ZSTD_compressBlock_btlazy2_extDict(
+size_t ZSTD_compressBlock_lazy_extDict_row(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize)
{
- return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_binaryTree, 2);
+ return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 1);
}
+#endif
-size_t ZSTD_compressBlock_greedy_extDict_row(
+#ifndef ZSTD_EXCLUDE_LAZY2_BLOCK_COMPRESSOR
+size_t ZSTD_compressBlock_lazy2_extDict(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize)
+
{
- return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 0);
+ return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 2);
}
-size_t ZSTD_compressBlock_lazy_extDict_row(
+size_t ZSTD_compressBlock_lazy2_extDict_row(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize)
-
{
- return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 1);
+ return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 2);
}
+#endif
-size_t ZSTD_compressBlock_lazy2_extDict_row(
+#ifndef ZSTD_EXCLUDE_BTLAZY2_BLOCK_COMPRESSOR
+size_t ZSTD_compressBlock_btlazy2_extDict(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize)
+
{
- return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 2);
+ return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_binaryTree, 2);
}
+#endif
#define ZSTD_ROW_HASH_TAG_BITS 8 /* nb bits to use for the tag */
+#if !defined(ZSTD_EXCLUDE_GREEDY_BLOCK_COMPRESSOR) \
+ || !defined(ZSTD_EXCLUDE_LAZY_BLOCK_COMPRESSOR) \
+ || !defined(ZSTD_EXCLUDE_LAZY2_BLOCK_COMPRESSOR) \
+ || !defined(ZSTD_EXCLUDE_BTLAZY2_BLOCK_COMPRESSOR)
U32 ZSTD_insertAndFindFirstIndex(ZSTD_matchState_t* ms, const BYTE* ip);
void ZSTD_row_update(ZSTD_matchState_t* const ms, const BYTE* ip);
void ZSTD_dedicatedDictSearch_lazy_loadDictionary(ZSTD_matchState_t* ms, const BYTE* const ip);
void ZSTD_preserveUnsortedMark (U32* const table, U32 const size, U32 const reducerValue); /*! used in ZSTD_reduceIndex(). preemptively increase value of ZSTD_DUBT_UNSORTED_MARK */
+#endif
-size_t ZSTD_compressBlock_btlazy2(
+#ifndef ZSTD_EXCLUDE_GREEDY_BLOCK_COMPRESSOR
+size_t ZSTD_compressBlock_greedy(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize);
-size_t ZSTD_compressBlock_lazy2(
+size_t ZSTD_compressBlock_greedy_row(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize);
-size_t ZSTD_compressBlock_lazy(
+size_t ZSTD_compressBlock_greedy_dictMatchState(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize);
-size_t ZSTD_compressBlock_greedy(
+size_t ZSTD_compressBlock_greedy_dictMatchState_row(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize);
-size_t ZSTD_compressBlock_lazy2_row(
+size_t ZSTD_compressBlock_greedy_dedicatedDictSearch(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize);
-size_t ZSTD_compressBlock_lazy_row(
+size_t ZSTD_compressBlock_greedy_dedicatedDictSearch_row(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize);
-size_t ZSTD_compressBlock_greedy_row(
+size_t ZSTD_compressBlock_greedy_extDict(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize);
-
-size_t ZSTD_compressBlock_btlazy2_dictMatchState(
+size_t ZSTD_compressBlock_greedy_extDict_row(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize);
-size_t ZSTD_compressBlock_lazy2_dictMatchState(
+
+#define ZSTD_COMPRESSBLOCK_GREEDY ZSTD_compressBlock_greedy
+#define ZSTD_COMPRESSBLOCK_GREEDY_ROW ZSTD_compressBlock_greedy_row
+#define ZSTD_COMPRESSBLOCK_GREEDY_DICTMATCHSTATE ZSTD_compressBlock_greedy_dictMatchState
+#define ZSTD_COMPRESSBLOCK_GREEDY_DICTMATCHSTATE_ROW ZSTD_compressBlock_greedy_dictMatchState_row
+#define ZSTD_COMPRESSBLOCK_GREEDY_DEDICATEDDICTSEARCH ZSTD_compressBlock_greedy_dedicatedDictSearch
+#define ZSTD_COMPRESSBLOCK_GREEDY_DEDICATEDDICTSEARCH_ROW ZSTD_compressBlock_greedy_dedicatedDictSearch_row
+#define ZSTD_COMPRESSBLOCK_GREEDY_EXTDICT ZSTD_compressBlock_greedy_extDict
+#define ZSTD_COMPRESSBLOCK_GREEDY_EXTDICT_ROW ZSTD_compressBlock_greedy_extDict_row
+#else
+#define ZSTD_COMPRESSBLOCK_GREEDY NULL
+#define ZSTD_COMPRESSBLOCK_GREEDY_ROW NULL
+#define ZSTD_COMPRESSBLOCK_GREEDY_DICTMATCHSTATE NULL
+#define ZSTD_COMPRESSBLOCK_GREEDY_DICTMATCHSTATE_ROW NULL
+#define ZSTD_COMPRESSBLOCK_GREEDY_DEDICATEDDICTSEARCH NULL
+#define ZSTD_COMPRESSBLOCK_GREEDY_DEDICATEDDICTSEARCH_ROW NULL
+#define ZSTD_COMPRESSBLOCK_GREEDY_EXTDICT NULL
+#define ZSTD_COMPRESSBLOCK_GREEDY_EXTDICT_ROW NULL
+#endif
+
+#ifndef ZSTD_EXCLUDE_LAZY_BLOCK_COMPRESSOR
+size_t ZSTD_compressBlock_lazy(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize);
-size_t ZSTD_compressBlock_lazy_dictMatchState(
+size_t ZSTD_compressBlock_lazy_row(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize);
-size_t ZSTD_compressBlock_greedy_dictMatchState(
+size_t ZSTD_compressBlock_lazy_dictMatchState(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize);
-size_t ZSTD_compressBlock_lazy2_dictMatchState_row(
+size_t ZSTD_compressBlock_lazy_dictMatchState_row(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize);
-size_t ZSTD_compressBlock_lazy_dictMatchState_row(
+size_t ZSTD_compressBlock_lazy_dedicatedDictSearch(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize);
-size_t ZSTD_compressBlock_greedy_dictMatchState_row(
+size_t ZSTD_compressBlock_lazy_dedicatedDictSearch_row(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize);
-
-size_t ZSTD_compressBlock_lazy2_dedicatedDictSearch(
+size_t ZSTD_compressBlock_lazy_extDict(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize);
-size_t ZSTD_compressBlock_lazy_dedicatedDictSearch(
+size_t ZSTD_compressBlock_lazy_extDict_row(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize);
-size_t ZSTD_compressBlock_greedy_dedicatedDictSearch(
+
+#define ZSTD_COMPRESSBLOCK_LAZY ZSTD_compressBlock_lazy
+#define ZSTD_COMPRESSBLOCK_LAZY_ROW ZSTD_compressBlock_lazy_row
+#define ZSTD_COMPRESSBLOCK_LAZY_DICTMATCHSTATE ZSTD_compressBlock_lazy_dictMatchState
+#define ZSTD_COMPRESSBLOCK_LAZY_DICTMATCHSTATE_ROW ZSTD_compressBlock_lazy_dictMatchState_row
+#define ZSTD_COMPRESSBLOCK_LAZY_DEDICATEDDICTSEARCH ZSTD_compressBlock_lazy_dedicatedDictSearch
+#define ZSTD_COMPRESSBLOCK_LAZY_DEDICATEDDICTSEARCH_ROW ZSTD_compressBlock_lazy_dedicatedDictSearch_row
+#define ZSTD_COMPRESSBLOCK_LAZY_EXTDICT ZSTD_compressBlock_lazy_extDict
+#define ZSTD_COMPRESSBLOCK_LAZY_EXTDICT_ROW ZSTD_compressBlock_lazy_extDict_row
+#else
+#define ZSTD_COMPRESSBLOCK_LAZY NULL
+#define ZSTD_COMPRESSBLOCK_LAZY_ROW NULL
+#define ZSTD_COMPRESSBLOCK_LAZY_DICTMATCHSTATE NULL
+#define ZSTD_COMPRESSBLOCK_LAZY_DICTMATCHSTATE_ROW NULL
+#define ZSTD_COMPRESSBLOCK_LAZY_DEDICATEDDICTSEARCH NULL
+#define ZSTD_COMPRESSBLOCK_LAZY_DEDICATEDDICTSEARCH_ROW NULL
+#define ZSTD_COMPRESSBLOCK_LAZY_EXTDICT NULL
+#define ZSTD_COMPRESSBLOCK_LAZY_EXTDICT_ROW NULL
+#endif
+
+#ifndef ZSTD_EXCLUDE_LAZY2_BLOCK_COMPRESSOR
+size_t ZSTD_compressBlock_lazy2(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize);
-size_t ZSTD_compressBlock_lazy2_dedicatedDictSearch_row(
+size_t ZSTD_compressBlock_lazy2_row(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize);
-size_t ZSTD_compressBlock_lazy_dedicatedDictSearch_row(
+size_t ZSTD_compressBlock_lazy2_dictMatchState(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize);
-size_t ZSTD_compressBlock_greedy_dedicatedDictSearch_row(
+size_t ZSTD_compressBlock_lazy2_dictMatchState_row(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize);
-
-size_t ZSTD_compressBlock_greedy_extDict(
+size_t ZSTD_compressBlock_lazy2_dedicatedDictSearch(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize);
-size_t ZSTD_compressBlock_lazy_extDict(
+size_t ZSTD_compressBlock_lazy2_dedicatedDictSearch_row(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize);
size_t ZSTD_compressBlock_lazy2_extDict(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize);
-size_t ZSTD_compressBlock_greedy_extDict_row(
+size_t ZSTD_compressBlock_lazy2_extDict_row(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize);
-size_t ZSTD_compressBlock_lazy_extDict_row(
+
+#define ZSTD_COMPRESSBLOCK_LAZY2 ZSTD_compressBlock_lazy2
+#define ZSTD_COMPRESSBLOCK_LAZY2_ROW ZSTD_compressBlock_lazy2_row
+#define ZSTD_COMPRESSBLOCK_LAZY2_DICTMATCHSTATE ZSTD_compressBlock_lazy2_dictMatchState
+#define ZSTD_COMPRESSBLOCK_LAZY2_DICTMATCHSTATE_ROW ZSTD_compressBlock_lazy2_dictMatchState_row
+#define ZSTD_COMPRESSBLOCK_LAZY2_DEDICATEDDICTSEARCH ZSTD_compressBlock_lazy2_dedicatedDictSearch
+#define ZSTD_COMPRESSBLOCK_LAZY2_DEDICATEDDICTSEARCH_ROW ZSTD_compressBlock_lazy2_dedicatedDictSearch_row
+#define ZSTD_COMPRESSBLOCK_LAZY2_EXTDICT ZSTD_compressBlock_lazy2_extDict
+#define ZSTD_COMPRESSBLOCK_LAZY2_EXTDICT_ROW ZSTD_compressBlock_lazy2_extDict_row
+#else
+#define ZSTD_COMPRESSBLOCK_LAZY2 NULL
+#define ZSTD_COMPRESSBLOCK_LAZY2_ROW NULL
+#define ZSTD_COMPRESSBLOCK_LAZY2_DICTMATCHSTATE NULL
+#define ZSTD_COMPRESSBLOCK_LAZY2_DICTMATCHSTATE_ROW NULL
+#define ZSTD_COMPRESSBLOCK_LAZY2_DEDICATEDDICTSEARCH NULL
+#define ZSTD_COMPRESSBLOCK_LAZY2_DEDICATEDDICTSEARCH_ROW NULL
+#define ZSTD_COMPRESSBLOCK_LAZY2_EXTDICT NULL
+#define ZSTD_COMPRESSBLOCK_LAZY2_EXTDICT_ROW NULL
+#endif
+
+#ifndef ZSTD_EXCLUDE_BTLAZY2_BLOCK_COMPRESSOR
+size_t ZSTD_compressBlock_btlazy2(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize);
-size_t ZSTD_compressBlock_lazy2_extDict_row(
+size_t ZSTD_compressBlock_btlazy2_dictMatchState(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize);
size_t ZSTD_compressBlock_btlazy2_extDict(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize);
+#define ZSTD_COMPRESSBLOCK_BTLAZY2 ZSTD_compressBlock_btlazy2
+#define ZSTD_COMPRESSBLOCK_BTLAZY2_DICTMATCHSTATE ZSTD_compressBlock_btlazy2_dictMatchState
+#define ZSTD_COMPRESSBLOCK_BTLAZY2_EXTDICT ZSTD_compressBlock_btlazy2_extDict
+#else
+#define ZSTD_COMPRESSBLOCK_BTLAZY2 NULL
+#define ZSTD_COMPRESSBLOCK_BTLAZY2_DICTMATCHSTATE NULL
+#define ZSTD_COMPRESSBLOCK_BTLAZY2_EXTDICT NULL
+#endif
+
#if defined (__cplusplus)
}
break;
case ZSTD_dfast:
+#ifndef ZSTD_EXCLUDE_DFAST_BLOCK_COMPRESSOR
ZSTD_fillDoubleHashTable(ms, iend, ZSTD_dtlm_fast, ZSTD_tfp_forCCtx);
+#else
+ assert(0); /* shouldn't be called: cparams should've been adjusted. */
+#endif
break;
case ZSTD_greedy:
}
}
-static size_t ZSTD_ldm_generateSequences_internal(
+static
+ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
+size_t ZSTD_ldm_generateSequences_internal(
ldmState_t* ldmState, rawSeqStore_t* rawSeqStore,
ldmParams_t const* params, void const* src, size_t srcSize)
{
/* maybeSplitSequence updates rawSeqStore->pos */
rawSeq const sequence = maybeSplitSequence(rawSeqStore,
(U32)(iend - ip), minMatch);
- int i;
/* End signal */
if (sequence.offset == 0)
break;
/* Run the block compressor */
DEBUGLOG(5, "pos %u : calling block compressor on segment of size %u", (unsigned)(ip-istart), sequence.litLength);
{
+ int i;
size_t const newLitLength =
blockCompressor(ms, seqStore, rep, ip, sequence.litLength);
ip += sequence.litLength;
#include "hist.h"
#include "zstd_opt.h"
+#if !defined(ZSTD_EXCLUDE_BTLAZY2_BLOCK_COMPRESSOR) \
+ || !defined(ZSTD_EXCLUDE_BTOPT_BLOCK_COMPRESSOR) \
+ || !defined(ZSTD_EXCLUDE_BTULTRA_BLOCK_COMPRESSOR)
#define ZSTD_LITFREQ_ADD 2 /* scaling factor for litFreq, so that frequencies adapt faster to new stats */
#define ZSTD_MAX_PRICE (1<<30)
const optState_t* const optPtr,
int optLevel)
{
+ DEBUGLOG(8, "ZSTD_rawLiteralsCost (%u literals)", litLength);
if (litLength == 0) return 0;
if (!ZSTD_compressedLiterals(optPtr))
/* Update hashTable3 up to ip (excluded)
Assumption : always within prefix (i.e. not within extDict) */
-static U32 ZSTD_insertAndFindFirstIndexHash3 (const ZSTD_matchState_t* ms,
- U32* nextToUpdate3,
- const BYTE* const ip)
+static
+ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
+U32 ZSTD_insertAndFindFirstIndexHash3 (const ZSTD_matchState_t* ms,
+ U32* nextToUpdate3,
+ const BYTE* const ip)
{
U32* const hashTable3 = ms->hashTable3;
U32 const hashLog3 = ms->hashLog3;
* @param ip assumed <= iend-8 .
* @param target The target of ZSTD_updateTree_internal() - we are filling to this position
* @return : nb of positions added */
-static U32 ZSTD_insertBt1(
+static
+ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
+U32 ZSTD_insertBt1(
const ZSTD_matchState_t* ms,
const BYTE* const ip, const BYTE* const iend,
U32 const target,
}
FORCE_INLINE_TEMPLATE
+ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
void ZSTD_updateTree_internal(
ZSTD_matchState_t* ms,
const BYTE* const ip, const BYTE* const iend,
const BYTE* const base = ms->window.base;
U32 const target = (U32)(ip - base);
U32 idx = ms->nextToUpdate;
- DEBUGLOG(6, "ZSTD_updateTree_internal, from %u to %u (dictMode:%u)",
+ DEBUGLOG(7, "ZSTD_updateTree_internal, from %u to %u (dictMode:%u)",
idx, target, dictMode);
while(idx < target) {
ZSTD_updateTree_internal(ms, ip, iend, ms->cParams.minMatch, ZSTD_noDict);
}
-FORCE_INLINE_TEMPLATE U32
+FORCE_INLINE_TEMPLATE
+ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
+U32
ZSTD_insertBtAndGetAllMatches (
ZSTD_match_t* matches, /* store result (found matches) in this table (presumed large enough) */
ZSTD_matchState_t* ms,
U32 const ll0,
U32 const lengthToBeat);
-FORCE_INLINE_TEMPLATE U32 ZSTD_btGetAllMatches_internal(
+FORCE_INLINE_TEMPLATE
+ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
+U32 ZSTD_btGetAllMatches_internal(
ZSTD_match_t* matches,
ZSTD_matchState_t* ms,
U32* nextToUpdate3,
* Optimal parser
*********************************/
-static U32 ZSTD_totalLen(ZSTD_optimal_t sol)
-{
- return sol.litlen + sol.mlen;
-}
-
#if 0 /* debug */
static void
#endif
-FORCE_INLINE_TEMPLATE size_t
+#define LIT_PRICE(_p) (int)ZSTD_rawLiteralsCost(_p, 1, optStatePtr, optLevel)
+#define LL_PRICE(_l) (int)ZSTD_litLengthPrice(_l, optStatePtr, optLevel)
+#define LL_INCPRICE(_l) (LL_PRICE(_l) - LL_PRICE(_l-1))
+
+FORCE_INLINE_TEMPLATE
+ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
+size_t
ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
seqStore_t* seqStore,
U32 rep[ZSTD_REP_NUM],
ZSTD_optimal_t* const opt = optStatePtr->priceTable;
ZSTD_match_t* const matches = optStatePtr->matchTable;
- ZSTD_optimal_t lastSequence;
+ ZSTD_optimal_t lastStretch;
ZSTD_optLdm_t optLdm;
- ZSTD_memset(&lastSequence, 0, sizeof(ZSTD_optimal_t));
+ ZSTD_memset(&lastStretch, 0, sizeof(ZSTD_optimal_t));
optLdm.seqStore = ms->ldmSeqStore ? *ms->ldmSeqStore : kNullRawSeqStore;
optLdm.endPosInBlock = optLdm.startPosInBlock = optLdm.offset = 0;
U32 const ll0 = !litlen;
U32 nbMatches = getAllMatches(matches, ms, &nextToUpdate3, ip, iend, rep, ll0, minMatch);
ZSTD_optLdm_processMatchCandidate(&optLdm, matches, &nbMatches,
- (U32)(ip-istart), (U32)(iend - ip));
- if (!nbMatches) { ip++; continue; }
+ (U32)(ip-istart), (U32)(iend-ip));
+ if (!nbMatches) {
+ DEBUGLOG(8, "no match found at cPos %u", (unsigned)(ip-istart));
+ ip++;
+ continue;
+ }
+
+ /* Match found: let's store this solution, and eventually find more candidates.
+ * During this forward pass, @opt is used to store stretches,
+ * defined as "a match followed by N literals".
+ * Note how this is different from a Sequence, which is "N literals followed by a match".
+ * Storing stretches allows us to store different match predecessors
+ * for each literal position part of a literals run. */
/* initialize opt[0] */
- { U32 i ; for (i=0; i<ZSTD_REP_NUM; i++) opt[0].rep[i] = rep[i]; }
- opt[0].mlen = 0; /* means is_a_literal */
+ opt[0].mlen = 0; /* there are only literals so far */
opt[0].litlen = litlen;
- /* We don't need to include the actual price of the literals because
- * it is static for the duration of the forward pass, and is included
- * in every price. We include the literal length to avoid negative
- * prices when we subtract the previous literal length.
+ /* No need to include the actual price of the literals before the first match
+ * because it is static for the duration of the forward pass, and is included
+ * in every subsequent price. But, we include the literal length because
+ * the cost variation of litlen depends on the value of litlen.
*/
- opt[0].price = (int)ZSTD_litLengthPrice(litlen, optStatePtr, optLevel);
+ opt[0].price = LL_PRICE(litlen);
+ ZSTD_STATIC_ASSERT(sizeof(opt[0].rep[0]) == sizeof(rep[0]));
+ ZSTD_memcpy(&opt[0].rep, rep, sizeof(opt[0].rep));
/* large match -> immediate encoding */
{ U32 const maxML = matches[nbMatches-1].len;
nbMatches, maxML, maxOffBase, (U32)(ip-prefixStart));
if (maxML > sufficient_len) {
- lastSequence.litlen = litlen;
- lastSequence.mlen = maxML;
- lastSequence.off = maxOffBase;
- DEBUGLOG(6, "large match (%u>%u), immediate encoding",
+ lastStretch.litlen = 0;
+ lastStretch.mlen = maxML;
+ lastStretch.off = maxOffBase;
+ DEBUGLOG(6, "large match (%u>%u) => immediate encoding",
maxML, sufficient_len);
cur = 0;
- last_pos = ZSTD_totalLen(lastSequence);
+ last_pos = maxML;
goto _shortestPath;
} }
/* set prices for first matches starting position == 0 */
assert(opt[0].price >= 0);
- { U32 const literalsPrice = (U32)opt[0].price + ZSTD_litLengthPrice(0, optStatePtr, optLevel);
- U32 pos;
+ { U32 pos;
U32 matchNb;
for (pos = 1; pos < minMatch; pos++) {
- opt[pos].price = ZSTD_MAX_PRICE; /* mlen, litlen and price will be fixed during forward scanning */
+ opt[pos].price = ZSTD_MAX_PRICE;
+ opt[pos].mlen = 0;
+ opt[pos].litlen = litlen + pos;
}
for (matchNb = 0; matchNb < nbMatches; matchNb++) {
U32 const offBase = matches[matchNb].off;
U32 const end = matches[matchNb].len;
for ( ; pos <= end ; pos++ ) {
- U32 const matchPrice = ZSTD_getMatchPrice(offBase, pos, optStatePtr, optLevel);
- U32 const sequencePrice = literalsPrice + matchPrice;
+ int const matchPrice = (int)ZSTD_getMatchPrice(offBase, pos, optStatePtr, optLevel);
+ int const sequencePrice = opt[0].price + matchPrice;
DEBUGLOG(7, "rPos:%u => set initial price : %.2f",
- pos, ZSTD_fCost((int)sequencePrice));
+ pos, ZSTD_fCost(sequencePrice));
opt[pos].mlen = pos;
opt[pos].off = offBase;
- opt[pos].litlen = litlen;
- opt[pos].price = (int)sequencePrice;
- } }
+ opt[pos].litlen = 0; /* end of match */
+ opt[pos].price = sequencePrice + LL_PRICE(0);
+ }
+ }
last_pos = pos-1;
+ opt[pos].price = ZSTD_MAX_PRICE;
}
}
/* check further positions */
for (cur = 1; cur <= last_pos; cur++) {
const BYTE* const inr = ip + cur;
- assert(cur < ZSTD_OPT_NUM);
- DEBUGLOG(7, "cPos:%zi==rPos:%u", inr-istart, cur)
+ assert(cur <= ZSTD_OPT_NUM);
+ DEBUGLOG(7, "cPos:%zi==rPos:%u", inr-istart, cur);
/* Fix current position with one literal if cheaper */
- { U32 const litlen = (opt[cur-1].mlen == 0) ? opt[cur-1].litlen + 1 : 1;
+ { U32 const litlen = opt[cur-1].litlen + 1;
int const price = opt[cur-1].price
- + (int)ZSTD_rawLiteralsCost(ip+cur-1, 1, optStatePtr, optLevel)
- + (int)ZSTD_litLengthPrice(litlen, optStatePtr, optLevel)
- - (int)ZSTD_litLengthPrice(litlen-1, optStatePtr, optLevel);
+ + LIT_PRICE(ip+cur-1)
+ + LL_INCPRICE(litlen);
assert(price < 1000000000); /* overflow check */
if (price <= opt[cur].price) {
+ ZSTD_optimal_t const prevMatch = opt[cur];
DEBUGLOG(7, "cPos:%zi==rPos:%u : better price (%.2f<=%.2f) using literal (ll==%u) (hist:%u,%u,%u)",
inr-istart, cur, ZSTD_fCost(price), ZSTD_fCost(opt[cur].price), litlen,
opt[cur-1].rep[0], opt[cur-1].rep[1], opt[cur-1].rep[2]);
- opt[cur].mlen = 0;
- opt[cur].off = 0;
+ opt[cur] = opt[cur-1];
opt[cur].litlen = litlen;
opt[cur].price = price;
+ if ( (optLevel >= 1) /* additional check only for higher modes */
+ && (prevMatch.litlen == 0) /* replace a match */
+ && (LL_INCPRICE(1) < 0) /* ll1 is cheaper than ll0 */
+ && LIKELY(ip + cur < iend)
+ ) {
+ /* check next position, in case it would be cheaper */
+ int with1literal = prevMatch.price + LIT_PRICE(ip+cur) + LL_INCPRICE(1);
+ int withMoreLiterals = price + LIT_PRICE(ip+cur) + LL_INCPRICE(litlen+1);
+ DEBUGLOG(7, "then at next rPos %u : match+1lit %.2f vs %ulits %.2f",
+ cur+1, ZSTD_fCost(with1literal), litlen+1, ZSTD_fCost(withMoreLiterals));
+ if ( (with1literal < withMoreLiterals)
+ && (with1literal < opt[cur+1].price) ) {
+ /* update offset history - before it disappears */
+ U32 const prev = cur - prevMatch.mlen;
+ repcodes_t const newReps = ZSTD_newRep(opt[prev].rep, prevMatch.off, opt[prev].litlen==0);
+ assert(cur >= prevMatch.mlen);
+ DEBUGLOG(7, "==> match+1lit is cheaper (%.2f < %.2f) (hist:%u,%u,%u) !",
+ ZSTD_fCost(with1literal), ZSTD_fCost(withMoreLiterals),
+ newReps.rep[0], newReps.rep[1], newReps.rep[2] );
+ opt[cur+1] = prevMatch; /* mlen & offbase */
+ ZSTD_memcpy(opt[cur+1].rep, &newReps, sizeof(repcodes_t));
+ opt[cur+1].litlen = 1;
+ opt[cur+1].price = with1literal;
+ if (last_pos < cur+1) last_pos = cur+1;
+ }
+ }
} else {
- DEBUGLOG(7, "cPos:%zi==rPos:%u : literal would cost more (%.2f>%.2f) (hist:%u,%u,%u)",
- inr-istart, cur, ZSTD_fCost(price), ZSTD_fCost(opt[cur].price),
- opt[cur].rep[0], opt[cur].rep[1], opt[cur].rep[2]);
+ DEBUGLOG(7, "cPos:%zi==rPos:%u : literal would cost more (%.2f>%.2f)",
+ inr-istart, cur, ZSTD_fCost(price), ZSTD_fCost(opt[cur].price));
}
}
- /* Set the repcodes of the current position. We must do it here
- * because we rely on the repcodes of the 2nd to last sequence being
- * correct to set the next chunks repcodes during the backward
- * traversal.
+ /* Offset history is not updated during match comparison.
+ * Do it here, now that the match is selected and confirmed.
*/
ZSTD_STATIC_ASSERT(sizeof(opt[cur].rep) == sizeof(repcodes_t));
assert(cur >= opt[cur].mlen);
- if (opt[cur].mlen != 0) {
+ if (opt[cur].litlen == 0) {
+ /* just finished a match => alter offset history */
U32 const prev = cur - opt[cur].mlen;
- repcodes_t const newReps = ZSTD_newRep(opt[prev].rep, opt[cur].off, opt[cur].litlen==0);
+ repcodes_t const newReps = ZSTD_newRep(opt[prev].rep, opt[cur].off, opt[prev].litlen==0);
ZSTD_memcpy(opt[cur].rep, &newReps, sizeof(repcodes_t));
- } else {
- ZSTD_memcpy(opt[cur].rep, opt[cur - 1].rep, sizeof(repcodes_t));
}
/* last match must start at a minimum distance of 8 from oend */
if ( (optLevel==0) /*static_test*/
&& (opt[cur+1].price <= opt[cur].price + (BITCOST_MULTIPLIER/2)) ) {
- DEBUGLOG(7, "move to next rPos:%u : price is <=", cur+1);
+ DEBUGLOG(7, "skip current position : next rPos(%u) price is cheaper", cur+1);
continue; /* skip unpromising positions; about ~+6% speed, -0.01 ratio */
}
assert(opt[cur].price >= 0);
- { U32 const ll0 = (opt[cur].mlen != 0);
- U32 const litlen = (opt[cur].mlen == 0) ? opt[cur].litlen : 0;
- U32 const previousPrice = (U32)opt[cur].price;
- U32 const basePrice = previousPrice + ZSTD_litLengthPrice(0, optStatePtr, optLevel);
+ { U32 const ll0 = (opt[cur].litlen == 0);
+ int const previousPrice = opt[cur].price;
+ int const basePrice = previousPrice + LL_PRICE(0);
U32 nbMatches = getAllMatches(matches, ms, &nextToUpdate3, inr, iend, opt[cur].rep, ll0, minMatch);
U32 matchNb;
continue;
}
- { U32 const maxML = matches[nbMatches-1].len;
- DEBUGLOG(7, "cPos:%zi==rPos:%u, found %u matches, of maxLength=%u",
- inr-istart, cur, nbMatches, maxML);
-
- if ( (maxML > sufficient_len)
- || (cur + maxML >= ZSTD_OPT_NUM) ) {
- lastSequence.mlen = maxML;
- lastSequence.off = matches[nbMatches-1].off;
- lastSequence.litlen = litlen;
- cur -= (opt[cur].mlen==0) ? opt[cur].litlen : 0; /* last sequence is actually only literals, fix cur to last match - note : may underflow, in which case, it's first sequence, and it's okay */
- last_pos = cur + ZSTD_totalLen(lastSequence);
- if (cur > ZSTD_OPT_NUM) cur = 0; /* underflow => first match */
+ { U32 const longestML = matches[nbMatches-1].len;
+ DEBUGLOG(7, "cPos:%zi==rPos:%u, found %u matches, of longest ML=%u",
+ inr-istart, cur, nbMatches, longestML);
+
+ if ( (longestML > sufficient_len)
+ || (cur + longestML >= ZSTD_OPT_NUM)
+ || (ip + cur + longestML >= iend) ) {
+ lastStretch.mlen = longestML;
+ lastStretch.off = matches[nbMatches-1].off;
+ lastStretch.litlen = 0;
+ last_pos = cur + longestML;
goto _shortestPath;
} }
U32 mlen;
DEBUGLOG(7, "testing match %u => offBase=%4u, mlen=%2u, llen=%2u",
- matchNb, matches[matchNb].off, lastML, litlen);
+ matchNb, matches[matchNb].off, lastML, opt[cur].litlen);
for (mlen = lastML; mlen >= startML; mlen--) { /* scan downward */
U32 const pos = cur + mlen;
- int const price = (int)basePrice + (int)ZSTD_getMatchPrice(offset, mlen, optStatePtr, optLevel);
+ int const price = basePrice + (int)ZSTD_getMatchPrice(offset, mlen, optStatePtr, optLevel);
if ((pos > last_pos) || (price < opt[pos].price)) {
DEBUGLOG(7, "rPos:%u (ml=%2u) => new better price (%.2f<%.2f)",
pos, mlen, ZSTD_fCost(price), ZSTD_fCost(opt[pos].price));
- while (last_pos < pos) { opt[last_pos+1].price = ZSTD_MAX_PRICE; last_pos++; } /* fill empty positions */
+ while (last_pos < pos) {
+ /* fill empty positions, for future comparisons */
+ last_pos++;
+ opt[last_pos].price = ZSTD_MAX_PRICE;
+ opt[last_pos].litlen = !0; /* just needs to be != 0, to mean "not an end of match" */
+ }
opt[pos].mlen = mlen;
opt[pos].off = offset;
- opt[pos].litlen = litlen;
+ opt[pos].litlen = 0;
opt[pos].price = price;
} else {
DEBUGLOG(7, "rPos:%u (ml=%2u) => new price is worse (%.2f>=%.2f)",
if (optLevel==0) break; /* early update abort; gets ~+10% speed for about -0.01 ratio loss */
}
} } }
+ opt[last_pos+1].price = ZSTD_MAX_PRICE;
} /* for (cur = 1; cur <= last_pos; cur++) */
- lastSequence = opt[last_pos];
- cur = last_pos > ZSTD_totalLen(lastSequence) ? last_pos - ZSTD_totalLen(lastSequence) : 0; /* single sequence, and it starts before `ip` */
- assert(cur < ZSTD_OPT_NUM); /* control overflow*/
+ lastStretch = opt[last_pos];
+ assert(cur >= lastStretch.mlen);
+ cur = last_pos - lastStretch.mlen;
_shortestPath: /* cur, last_pos, best_mlen, best_off have to be set */
assert(opt[0].mlen == 0);
+ assert(last_pos >= lastStretch.mlen);
+ assert(cur == last_pos - lastStretch.mlen);
- /* Set the next chunk's repcodes based on the repcodes of the beginning
- * of the last match, and the last sequence. This avoids us having to
- * update them while traversing the sequences.
- */
- if (lastSequence.mlen != 0) {
- repcodes_t const reps = ZSTD_newRep(opt[cur].rep, lastSequence.off, lastSequence.litlen==0);
- ZSTD_memcpy(rep, &reps, sizeof(reps));
+ if (lastStretch.mlen==0) {
+ /* no solution : all matches have been converted into literals */
+ assert(lastStretch.litlen == (ip - anchor) + last_pos);
+ ip += last_pos;
+ continue;
+ }
+ assert(lastStretch.off > 0);
+
+ /* Update offset history */
+ if (lastStretch.litlen == 0) {
+ /* finishing on a match : update offset history */
+ repcodes_t const reps = ZSTD_newRep(opt[cur].rep, lastStretch.off, opt[cur].litlen==0);
+ ZSTD_memcpy(rep, &reps, sizeof(repcodes_t));
} else {
- ZSTD_memcpy(rep, opt[cur].rep, sizeof(repcodes_t));
+ ZSTD_memcpy(rep, lastStretch.rep, sizeof(repcodes_t));
+ assert(cur >= lastStretch.litlen);
+ cur -= lastStretch.litlen;
}
- { U32 const storeEnd = cur + 1;
+ /* Let's write the shortest path solution.
+ * It is stored in @opt in reverse order,
+ * starting from @storeEnd (==cur+2),
+ * effectively partially @opt overwriting.
+ * Content is changed too:
+ * - So far, @opt stored stretches, aka a match followed by literals
+ * - Now, it will store sequences, aka literals followed by a match
+ */
+ { U32 const storeEnd = cur + 2;
U32 storeStart = storeEnd;
- U32 seqPos = cur;
+ U32 stretchPos = cur;
DEBUGLOG(6, "start reverse traversal (last_pos:%u, cur:%u)",
last_pos, cur); (void)last_pos;
- assert(storeEnd < ZSTD_OPT_NUM);
- DEBUGLOG(6, "last sequence copied into pos=%u (llen=%u,mlen=%u,ofc=%u)",
- storeEnd, lastSequence.litlen, lastSequence.mlen, lastSequence.off);
- opt[storeEnd] = lastSequence;
- while (seqPos > 0) {
- U32 const backDist = ZSTD_totalLen(opt[seqPos]);
+ assert(storeEnd < ZSTD_OPT_SIZE);
+ DEBUGLOG(6, "last stretch copied into pos=%u (llen=%u,mlen=%u,ofc=%u)",
+ storeEnd, lastStretch.litlen, lastStretch.mlen, lastStretch.off);
+ if (lastStretch.litlen > 0) {
+ /* last "sequence" is unfinished: just a bunch of literals */
+ opt[storeEnd].litlen = lastStretch.litlen;
+ opt[storeEnd].mlen = 0;
+ storeStart = storeEnd-1;
+ opt[storeStart] = lastStretch;
+ } {
+ opt[storeEnd] = lastStretch; /* note: litlen will be fixed */
+ storeStart = storeEnd;
+ }
+ while (1) {
+ ZSTD_optimal_t nextStretch = opt[stretchPos];
+ opt[storeStart].litlen = nextStretch.litlen;
+ DEBUGLOG(6, "selected sequence (llen=%u,mlen=%u,ofc=%u)",
+ opt[storeStart].litlen, opt[storeStart].mlen, opt[storeStart].off);
+ if (nextStretch.mlen == 0) {
+ /* reaching beginning of segment */
+ break;
+ }
storeStart--;
- DEBUGLOG(6, "sequence from rPos=%u copied into pos=%u (llen=%u,mlen=%u,ofc=%u)",
- seqPos, storeStart, opt[seqPos].litlen, opt[seqPos].mlen, opt[seqPos].off);
- opt[storeStart] = opt[seqPos];
- seqPos = (seqPos > backDist) ? seqPos - backDist : 0;
+ opt[storeStart] = nextStretch; /* note: litlen will be fixed */
+ assert(nextStretch.litlen + nextStretch.mlen <= stretchPos);
+ stretchPos -= nextStretch.litlen + nextStretch.mlen;
}
/* save sequences */
- DEBUGLOG(6, "sending selected sequences into seqStore")
+ DEBUGLOG(6, "sending selected sequences into seqStore");
{ U32 storePos;
for (storePos=storeStart; storePos <= storeEnd; storePos++) {
U32 const llen = opt[storePos].litlen;
anchor += advance;
ip = anchor;
} }
+ DEBUGLOG(7, "new offset history : %u, %u, %u", rep[0], rep[1], rep[2]);
+
+ /* update all costs */
ZSTD_setBasePrices(optStatePtr, optLevel);
}
} /* while (ip < ilimit) */
/* Return the last literals size */
return (size_t)(iend - anchor);
}
+#endif /* build exclusions */
+#ifndef ZSTD_EXCLUDE_BTOPT_BLOCK_COMPRESSOR
static size_t ZSTD_compressBlock_opt0(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
const void* src, size_t srcSize, const ZSTD_dictMode_e dictMode)
{
return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, src, srcSize, 0 /* optLevel */, dictMode);
}
+#endif
+#ifndef ZSTD_EXCLUDE_BTULTRA_BLOCK_COMPRESSOR
static size_t ZSTD_compressBlock_opt2(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
const void* src, size_t srcSize, const ZSTD_dictMode_e dictMode)
{
return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, src, srcSize, 2 /* optLevel */, dictMode);
}
+#endif
+#ifndef ZSTD_EXCLUDE_BTOPT_BLOCK_COMPRESSOR
size_t ZSTD_compressBlock_btopt(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
const void* src, size_t srcSize)
DEBUGLOG(5, "ZSTD_compressBlock_btopt");
return ZSTD_compressBlock_opt0(ms, seqStore, rep, src, srcSize, ZSTD_noDict);
}
+#endif
+#ifndef ZSTD_EXCLUDE_BTULTRA_BLOCK_COMPRESSOR
/* ZSTD_initStats_ultra():
* make a first compression pass, just to seed stats with more accurate starting values.
* only works on first block, with no dictionary and no ldm.
* this function cannot error out, its narrow contract must be respected.
*/
-static void
-ZSTD_initStats_ultra(ZSTD_matchState_t* ms,
- seqStore_t* seqStore,
- U32 rep[ZSTD_REP_NUM],
- const void* src, size_t srcSize)
+static
+ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
+void ZSTD_initStats_ultra(ZSTD_matchState_t* ms,
+ seqStore_t* seqStore,
+ U32 rep[ZSTD_REP_NUM],
+ const void* src, size_t srcSize)
{
U32 tmpRep[ZSTD_REP_NUM]; /* updated rep codes will sink here */
ZSTD_memcpy(tmpRep, rep, sizeof(tmpRep));
* Consequently, this can only work if no data has been previously loaded in tables,
* aka, no dictionary, no prefix, no ldm preprocessing.
* The compression ratio gain is generally small (~0.5% on first block),
- ** the cost is 2x cpu time on first block. */
+ * the cost is 2x cpu time on first block. */
assert(srcSize <= ZSTD_BLOCKSIZE_MAX);
if ( (ms->opt.litLengthSum==0) /* first block */
&& (seqStore->sequences == seqStore->sequencesStart) /* no ldm */
return ZSTD_compressBlock_opt2(ms, seqStore, rep, src, srcSize, ZSTD_noDict);
}
+#endif
+#ifndef ZSTD_EXCLUDE_BTOPT_BLOCK_COMPRESSOR
size_t ZSTD_compressBlock_btopt_dictMatchState(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
const void* src, size_t srcSize)
return ZSTD_compressBlock_opt0(ms, seqStore, rep, src, srcSize, ZSTD_dictMatchState);
}
-size_t ZSTD_compressBlock_btultra_dictMatchState(
+size_t ZSTD_compressBlock_btopt_extDict(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
const void* src, size_t srcSize)
{
- return ZSTD_compressBlock_opt2(ms, seqStore, rep, src, srcSize, ZSTD_dictMatchState);
+ return ZSTD_compressBlock_opt0(ms, seqStore, rep, src, srcSize, ZSTD_extDict);
}
+#endif
-size_t ZSTD_compressBlock_btopt_extDict(
+#ifndef ZSTD_EXCLUDE_BTULTRA_BLOCK_COMPRESSOR
+size_t ZSTD_compressBlock_btultra_dictMatchState(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
const void* src, size_t srcSize)
{
- return ZSTD_compressBlock_opt0(ms, seqStore, rep, src, srcSize, ZSTD_extDict);
+ return ZSTD_compressBlock_opt2(ms, seqStore, rep, src, srcSize, ZSTD_dictMatchState);
}
size_t ZSTD_compressBlock_btultra_extDict(
{
return ZSTD_compressBlock_opt2(ms, seqStore, rep, src, srcSize, ZSTD_extDict);
}
+#endif
/* note : no btultra2 variant for extDict nor dictMatchState,
* because btultra2 is not meant to work with dictionaries
#include "zstd_compress_internal.h"
+#if !defined(ZSTD_EXCLUDE_BTLAZY2_BLOCK_COMPRESSOR) \
+ || !defined(ZSTD_EXCLUDE_BTOPT_BLOCK_COMPRESSOR) \
+ || !defined(ZSTD_EXCLUDE_BTULTRA_BLOCK_COMPRESSOR)
/* used in ZSTD_loadDictionaryContent() */
void ZSTD_updateTree(ZSTD_matchState_t* ms, const BYTE* ip, const BYTE* iend);
+#endif
+#ifndef ZSTD_EXCLUDE_BTOPT_BLOCK_COMPRESSOR
size_t ZSTD_compressBlock_btopt(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize);
-size_t ZSTD_compressBlock_btultra(
+size_t ZSTD_compressBlock_btopt_dictMatchState(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize);
-size_t ZSTD_compressBlock_btultra2(
+size_t ZSTD_compressBlock_btopt_extDict(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize);
+#define ZSTD_COMPRESSBLOCK_BTOPT ZSTD_compressBlock_btopt
+#define ZSTD_COMPRESSBLOCK_BTOPT_DICTMATCHSTATE ZSTD_compressBlock_btopt_dictMatchState
+#define ZSTD_COMPRESSBLOCK_BTOPT_EXTDICT ZSTD_compressBlock_btopt_extDict
+#else
+#define ZSTD_COMPRESSBLOCK_BTOPT NULL
+#define ZSTD_COMPRESSBLOCK_BTOPT_DICTMATCHSTATE NULL
+#define ZSTD_COMPRESSBLOCK_BTOPT_EXTDICT NULL
+#endif
-size_t ZSTD_compressBlock_btopt_dictMatchState(
+#ifndef ZSTD_EXCLUDE_BTULTRA_BLOCK_COMPRESSOR
+size_t ZSTD_compressBlock_btultra(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize);
size_t ZSTD_compressBlock_btultra_dictMatchState(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize);
-
-size_t ZSTD_compressBlock_btopt_extDict(
- ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
- void const* src, size_t srcSize);
size_t ZSTD_compressBlock_btultra_extDict(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize);
/* note : no btultra2 variant for extDict nor dictMatchState,
* because btultra2 is not meant to work with dictionaries
* and is only specific for the first block (no prefix) */
+size_t ZSTD_compressBlock_btultra2(
+ ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+ void const* src, size_t srcSize);
+
+#define ZSTD_COMPRESSBLOCK_BTULTRA ZSTD_compressBlock_btultra
+#define ZSTD_COMPRESSBLOCK_BTULTRA_DICTMATCHSTATE ZSTD_compressBlock_btultra_dictMatchState
+#define ZSTD_COMPRESSBLOCK_BTULTRA_EXTDICT ZSTD_compressBlock_btultra_extDict
+#define ZSTD_COMPRESSBLOCK_BTULTRA2 ZSTD_compressBlock_btultra2
+#else
+#define ZSTD_COMPRESSBLOCK_BTULTRA NULL
+#define ZSTD_COMPRESSBLOCK_BTULTRA_DICTMATCHSTATE NULL
+#define ZSTD_COMPRESSBLOCK_BTULTRA_EXTDICT NULL
+#define ZSTD_COMPRESSBLOCK_BTULTRA2 NULL
+#endif
#if defined (__cplusplus)
}
#endif
-/* ====== Constants ====== */
-#define ZSTDMT_OVERLAPLOG_DEFAULT 0
-
-
/* ====== Dependencies ====== */
-#include "../common/allocations.h" /* ZSTD_customMalloc, ZSTD_customCalloc, ZSTD_customFree */
+#include "../common/allocations.h" /* ZSTD_customMalloc, ZSTD_customCalloc, ZSTD_customFree */
#include "../common/zstd_deps.h" /* ZSTD_memcpy, ZSTD_memset, INT_MAX, UINT_MAX */
#include "../common/mem.h" /* MEM_STATIC */
#include "../common/pool.h" /* threadpool */
#include "../common/threading.h" /* mutex */
-#include "zstd_compress_internal.h" /* MIN, ERROR, ZSTD_*, ZSTD_highbit32 */
+#include "zstd_compress_internal.h" /* MIN, ERROR, ZSTD_*, ZSTD_highbit32 */
#include "zstd_ldm.h"
#include "zstdmt_compress.h"
# include <unistd.h>
# include <sys/times.h>
-# define DEBUG_PRINTHEX(l,p,n) { \
- unsigned debug_u; \
- for (debug_u=0; debug_u<(n); debug_u++) \
- RAWLOG(l, "%02X ", ((const unsigned char*)(p))[debug_u]); \
- RAWLOG(l, " \n"); \
-}
+# define DEBUG_PRINTHEX(l,p,n) \
+ do { \
+ unsigned debug_u; \
+ for (debug_u=0; debug_u<(n); debug_u++) \
+ RAWLOG(l, "%02X ", ((const unsigned char*)(p))[debug_u]); \
+ RAWLOG(l, " \n"); \
+ } while (0)
static unsigned long long GetCurrentClockTimeMicroseconds(void)
{
} }
#define MUTEX_WAIT_TIME_DLEVEL 6
-#define ZSTD_PTHREAD_MUTEX_LOCK(mutex) { \
- if (DEBUGLEVEL >= MUTEX_WAIT_TIME_DLEVEL) { \
- unsigned long long const beforeTime = GetCurrentClockTimeMicroseconds(); \
- ZSTD_pthread_mutex_lock(mutex); \
- { unsigned long long const afterTime = GetCurrentClockTimeMicroseconds(); \
- unsigned long long const elapsedTime = (afterTime-beforeTime); \
- if (elapsedTime > 1000) { /* or whatever threshold you like; I'm using 1 millisecond here */ \
- DEBUGLOG(MUTEX_WAIT_TIME_DLEVEL, "Thread took %llu microseconds to acquire mutex %s \n", \
- elapsedTime, #mutex); \
- } } \
- } else { \
- ZSTD_pthread_mutex_lock(mutex); \
- } \
-}
+#define ZSTD_PTHREAD_MUTEX_LOCK(mutex) \
+ do { \
+ if (DEBUGLEVEL >= MUTEX_WAIT_TIME_DLEVEL) { \
+ unsigned long long const beforeTime = GetCurrentClockTimeMicroseconds(); \
+ ZSTD_pthread_mutex_lock(mutex); \
+ { unsigned long long const afterTime = GetCurrentClockTimeMicroseconds(); \
+ unsigned long long const elapsedTime = (afterTime-beforeTime); \
+ if (elapsedTime > 1000) { \
+ /* or whatever threshold you like; I'm using 1 millisecond here */ \
+ DEBUGLOG(MUTEX_WAIT_TIME_DLEVEL, \
+ "Thread took %llu microseconds to acquire mutex %s \n", \
+ elapsedTime, #mutex); \
+ } } \
+ } else { \
+ ZSTD_pthread_mutex_lock(mutex); \
+ } \
+ } while (0)
#else
# define ZSTD_PTHREAD_MUTEX_LOCK(m) ZSTD_pthread_mutex_lock(m)
-# define DEBUG_PRINTHEX(l,p,n) {}
+# define DEBUG_PRINTHEX(l,p,n) do { } while (0)
#endif
unsigned totalBuffers;
unsigned nbBuffers;
ZSTD_customMem cMem;
- buffer_t bTable[1]; /* variable size */
+ buffer_t* buffers;
} ZSTDMT_bufferPool;
+static void ZSTDMT_freeBufferPool(ZSTDMT_bufferPool* bufPool)
+{
+ DEBUGLOG(3, "ZSTDMT_freeBufferPool (address:%08X)", (U32)(size_t)bufPool);
+ if (!bufPool) return; /* compatibility with free on NULL */
+ if (bufPool->buffers) {
+ unsigned u;
+ for (u=0; u<bufPool->totalBuffers; u++) {
+ DEBUGLOG(4, "free buffer %2u (address:%08X)", u, (U32)(size_t)bufPool->buffers[u].start);
+ ZSTD_customFree(bufPool->buffers[u].start, bufPool->cMem);
+ }
+ ZSTD_customFree(bufPool->buffers, bufPool->cMem);
+ }
+ ZSTD_pthread_mutex_destroy(&bufPool->poolMutex);
+ ZSTD_customFree(bufPool, bufPool->cMem);
+}
+
static ZSTDMT_bufferPool* ZSTDMT_createBufferPool(unsigned maxNbBuffers, ZSTD_customMem cMem)
{
- ZSTDMT_bufferPool* const bufPool = (ZSTDMT_bufferPool*)ZSTD_customCalloc(
- sizeof(ZSTDMT_bufferPool) + (maxNbBuffers-1) * sizeof(buffer_t), cMem);
+ ZSTDMT_bufferPool* const bufPool =
+ (ZSTDMT_bufferPool*)ZSTD_customCalloc(sizeof(ZSTDMT_bufferPool), cMem);
if (bufPool==NULL) return NULL;
if (ZSTD_pthread_mutex_init(&bufPool->poolMutex, NULL)) {
ZSTD_customFree(bufPool, cMem);
return NULL;
}
+ bufPool->buffers = (buffer_t*)ZSTD_customCalloc(maxNbBuffers * sizeof(buffer_t), cMem);
+ if (bufPool->buffers==NULL) {
+ ZSTDMT_freeBufferPool(bufPool);
+ return NULL;
+ }
bufPool->bufferSize = 64 KB;
bufPool->totalBuffers = maxNbBuffers;
bufPool->nbBuffers = 0;
return bufPool;
}
-static void ZSTDMT_freeBufferPool(ZSTDMT_bufferPool* bufPool)
-{
- unsigned u;
- DEBUGLOG(3, "ZSTDMT_freeBufferPool (address:%08X)", (U32)(size_t)bufPool);
- if (!bufPool) return; /* compatibility with free on NULL */
- for (u=0; u<bufPool->totalBuffers; u++) {
- DEBUGLOG(4, "free buffer %2u (address:%08X)", u, (U32)(size_t)bufPool->bTable[u].start);
- ZSTD_customFree(bufPool->bTable[u].start, bufPool->cMem);
- }
- ZSTD_pthread_mutex_destroy(&bufPool->poolMutex);
- ZSTD_customFree(bufPool, bufPool->cMem);
-}
-
/* only works at initialization, not during compression */
static size_t ZSTDMT_sizeof_bufferPool(ZSTDMT_bufferPool* bufPool)
{
- size_t const poolSize = sizeof(*bufPool)
- + (bufPool->totalBuffers - 1) * sizeof(buffer_t);
+ size_t const poolSize = sizeof(*bufPool);
+ size_t const arraySize = bufPool->totalBuffers * sizeof(buffer_t);
unsigned u;
size_t totalBufferSize = 0;
ZSTD_pthread_mutex_lock(&bufPool->poolMutex);
for (u=0; u<bufPool->totalBuffers; u++)
- totalBufferSize += bufPool->bTable[u].capacity;
+ totalBufferSize += bufPool->buffers[u].capacity;
ZSTD_pthread_mutex_unlock(&bufPool->poolMutex);
- return poolSize + totalBufferSize;
+ return poolSize + arraySize + totalBufferSize;
}
/* ZSTDMT_setBufferSize() :
DEBUGLOG(5, "ZSTDMT_getBuffer: bSize = %u", (U32)bufPool->bufferSize);
ZSTD_pthread_mutex_lock(&bufPool->poolMutex);
if (bufPool->nbBuffers) { /* try to use an existing buffer */
- buffer_t const buf = bufPool->bTable[--(bufPool->nbBuffers)];
+ buffer_t const buf = bufPool->buffers[--(bufPool->nbBuffers)];
size_t const availBufferSize = buf.capacity;
- bufPool->bTable[bufPool->nbBuffers] = g_nullBuffer;
+ bufPool->buffers[bufPool->nbBuffers] = g_nullBuffer;
if ((availBufferSize >= bSize) & ((availBufferSize>>3) <= bSize)) {
/* large enough, but not too much */
DEBUGLOG(5, "ZSTDMT_getBuffer: provide buffer %u of size %u",
if (buf.start == NULL) return; /* compatible with release on NULL */
ZSTD_pthread_mutex_lock(&bufPool->poolMutex);
if (bufPool->nbBuffers < bufPool->totalBuffers) {
- bufPool->bTable[bufPool->nbBuffers++] = buf; /* stored for later use */
+ bufPool->buffers[bufPool->nbBuffers++] = buf; /* stored for later use */
DEBUGLOG(5, "ZSTDMT_releaseBuffer: stored buffer of size %u in slot %u",
(U32)buf.capacity, (U32)(bufPool->nbBuffers-1));
ZSTD_pthread_mutex_unlock(&bufPool->poolMutex);
return;
}
ZSTD_pthread_mutex_unlock(&bufPool->poolMutex);
- /* Reached bufferPool capacity (should not happen) */
+ /* Reached bufferPool capacity (note: should not happen) */
DEBUGLOG(5, "ZSTDMT_releaseBuffer: pool capacity reached => freeing ");
ZSTD_customFree(buf.start, bufPool->cMem);
}
int totalCCtx;
int availCCtx;
ZSTD_customMem cMem;
- ZSTD_CCtx* cctx[1]; /* variable size */
+ ZSTD_CCtx** cctxs;
} ZSTDMT_CCtxPool;
-/* note : all CCtx borrowed from the pool should be released back to the pool _before_ freeing the pool */
+/* note : all CCtx borrowed from the pool must be reverted back to the pool _before_ freeing the pool */
static void ZSTDMT_freeCCtxPool(ZSTDMT_CCtxPool* pool)
{
- int cid;
- for (cid=0; cid<pool->totalCCtx; cid++)
- ZSTD_freeCCtx(pool->cctx[cid]); /* note : compatible with free on NULL */
+ if (!pool) return;
ZSTD_pthread_mutex_destroy(&pool->poolMutex);
+ if (pool->cctxs) {
+ int cid;
+ for (cid=0; cid<pool->totalCCtx; cid++)
+ ZSTD_freeCCtx(pool->cctxs[cid]); /* free compatible with NULL */
+ ZSTD_customFree(pool->cctxs, pool->cMem);
+ }
ZSTD_customFree(pool, pool->cMem);
}
static ZSTDMT_CCtxPool* ZSTDMT_createCCtxPool(int nbWorkers,
ZSTD_customMem cMem)
{
- ZSTDMT_CCtxPool* const cctxPool = (ZSTDMT_CCtxPool*) ZSTD_customCalloc(
- sizeof(ZSTDMT_CCtxPool) + (nbWorkers-1)*sizeof(ZSTD_CCtx*), cMem);
+ ZSTDMT_CCtxPool* const cctxPool =
+ (ZSTDMT_CCtxPool*) ZSTD_customCalloc(sizeof(ZSTDMT_CCtxPool), cMem);
assert(nbWorkers > 0);
if (!cctxPool) return NULL;
if (ZSTD_pthread_mutex_init(&cctxPool->poolMutex, NULL)) {
ZSTD_customFree(cctxPool, cMem);
return NULL;
}
- cctxPool->cMem = cMem;
cctxPool->totalCCtx = nbWorkers;
+ cctxPool->cctxs = (ZSTD_CCtx**)ZSTD_customCalloc(nbWorkers * sizeof(ZSTD_CCtx*), cMem);
+ if (!cctxPool->cctxs) {
+ ZSTDMT_freeCCtxPool(cctxPool);
+ return NULL;
+ }
+ cctxPool->cMem = cMem;
+ cctxPool->cctxs[0] = ZSTD_createCCtx_advanced(cMem);
+ if (!cctxPool->cctxs[0]) { ZSTDMT_freeCCtxPool(cctxPool); return NULL; }
cctxPool->availCCtx = 1; /* at least one cctx for single-thread mode */
- cctxPool->cctx[0] = ZSTD_createCCtx_advanced(cMem);
- if (!cctxPool->cctx[0]) { ZSTDMT_freeCCtxPool(cctxPool); return NULL; }
DEBUGLOG(3, "cctxPool created, with %u workers", nbWorkers);
return cctxPool;
}
{
ZSTD_pthread_mutex_lock(&cctxPool->poolMutex);
{ unsigned const nbWorkers = cctxPool->totalCCtx;
- size_t const poolSize = sizeof(*cctxPool)
- + (nbWorkers-1) * sizeof(ZSTD_CCtx*);
- unsigned u;
+ size_t const poolSize = sizeof(*cctxPool);
+ size_t const arraySize = cctxPool->totalCCtx * sizeof(ZSTD_CCtx*);
size_t totalCCtxSize = 0;
+ unsigned u;
for (u=0; u<nbWorkers; u++) {
- totalCCtxSize += ZSTD_sizeof_CCtx(cctxPool->cctx[u]);
+ totalCCtxSize += ZSTD_sizeof_CCtx(cctxPool->cctxs[u]);
}
ZSTD_pthread_mutex_unlock(&cctxPool->poolMutex);
assert(nbWorkers > 0);
- return poolSize + totalCCtxSize;
+ return poolSize + arraySize + totalCCtxSize;
}
}
ZSTD_pthread_mutex_lock(&cctxPool->poolMutex);
if (cctxPool->availCCtx) {
cctxPool->availCCtx--;
- { ZSTD_CCtx* const cctx = cctxPool->cctx[cctxPool->availCCtx];
+ { ZSTD_CCtx* const cctx = cctxPool->cctxs[cctxPool->availCCtx];
ZSTD_pthread_mutex_unlock(&cctxPool->poolMutex);
return cctx;
} }
if (cctx==NULL) return; /* compatibility with release on NULL */
ZSTD_pthread_mutex_lock(&pool->poolMutex);
if (pool->availCCtx < pool->totalCCtx)
- pool->cctx[pool->availCCtx++] = cctx;
+ pool->cctxs[pool->availCCtx++] = cctx;
else {
/* pool overflow : should not happen, since totalCCtx==nbWorkers */
DEBUGLOG(4, "CCtx pool overflow : free cctx");
ZSTD_pthread_mutex_unlock(&serialState->mutex);
if (seqStore.size > 0) {
- size_t const err = ZSTD_referenceExternalSequences(
- jobCCtx, seqStore.seq, seqStore.size);
+ ZSTD_referenceExternalSequences(jobCCtx, seqStore.seq, seqStore.size);
assert(serialState->params.ldmParams.enableLdm == ZSTD_ps_enable);
- assert(!ZSTD_isError(err));
- (void)err;
}
}
unsigned frameChecksumNeeded; /* used only by mtctx */
} ZSTDMT_jobDescription;
-#define JOB_ERROR(e) { \
- ZSTD_PTHREAD_MUTEX_LOCK(&job->job_mutex); \
- job->cSize = e; \
- ZSTD_pthread_mutex_unlock(&job->job_mutex); \
- goto _endJob; \
-}
+#define JOB_ERROR(e) \
+ do { \
+ ZSTD_PTHREAD_MUTEX_LOCK(&job->job_mutex); \
+ job->cSize = e; \
+ ZSTD_pthread_mutex_unlock(&job->job_mutex); \
+ goto _endJob; \
+ } while (0)
/* ZSTDMT_compressionJob() is a POOL_function type */
static void ZSTDMT_compressionJob(void* jobDescription)
{ unsigned jobNb;
unsigned lastJobNb = mtctx->nextJobID + mtctx->jobReady; assert(mtctx->jobReady <= 1);
DEBUGLOG(6, "ZSTDMT_getFrameProgression: jobs: from %u to <%u (jobReady:%u)",
- mtctx->doneJobID, lastJobNb, mtctx->jobReady)
+ mtctx->doneJobID, lastJobNb, mtctx->jobReady);
for (jobNb = mtctx->doneJobID ; jobNb < lastJobNb ; jobNb++) {
unsigned const wJobID = jobNb & mtctx->jobIDMask;
ZSTDMT_jobDescription* jobPtr = &mtctx->jobs[wJobID];
* Macros
****************************************************************/
+#ifdef HUF_DISABLE_FAST_DECODE
+# define HUF_ENABLE_FAST_DECODE 0
+#else
+# define HUF_ENABLE_FAST_DECODE 1
+#endif
+
/* These two optional macros force the use one way or another of the two
* Huffman decompression implementations. You can't force in both directions
* at the same time.
* op [in/out] - The output pointers, must be updated to reflect what is written.
* bits [in/out] - The bitstream containers, must be updated to reflect the current state.
* dt [in] - The decoding table.
- * ilimit [in] - The input limit, stop when any input pointer is below ilimit.
+ * ilowest [in] - The beginning of the valid range of the input. Decoders may read
+ * down to this pointer. It may be below iend[0].
* oend [in] - The end of the output stream. op[3] must not cross oend.
* iend [in] - The end of each input stream. ip[i] may cross iend[i],
- * as long as it is above ilimit, but that indicates corruption.
+ * as long as it is above ilowest, but that indicates corruption.
*/
typedef struct {
BYTE const* ip[4];
BYTE* op[4];
U64 bits[4];
void const* dt;
- BYTE const* ilimit;
+ BYTE const* ilowest;
BYTE* oend;
BYTE const* iend[4];
} HUF_DecompressFastArgs;
void const* dt = DTable + 1;
U32 const dtLog = HUF_getDTableDesc(DTable).tableLog;
- const BYTE* const ilimit = (const BYTE*)src + 6 + 8;
+ const BYTE* const istart = (const BYTE*)src;
- BYTE* const oend = (BYTE*)dst + dstSize;
+ BYTE* const oend = ZSTD_maybeNullPtrAdd((BYTE*)dst, dstSize);
/* The fast decoding loop assumes 64-bit little-endian.
* This condition is false on x32.
if (!MEM_isLittleEndian() || MEM_32bits())
return 0;
+ /* Avoid nullptr addition */
+ if (dstSize == 0)
+ return 0;
+ assert(dst != NULL);
+
/* strict minimum : jump table + 1 byte per stream */
if (srcSize < 10)
return ERROR(corruption_detected);
/* Read the jump table. */
{
- const BYTE* const istart = (const BYTE*)src;
size_t const length1 = MEM_readLE16(istart);
size_t const length2 = MEM_readLE16(istart+2);
size_t const length3 = MEM_readLE16(istart+4);
/* HUF_initFastDStream() requires this, and this small of an input
* won't benefit from the ASM loop anyways.
- * length1 must be >= 16 so that ip[0] >= ilimit before the loop
- * starts.
*/
- if (length1 < 16 || length2 < 8 || length3 < 8 || length4 < 8)
+ if (length1 < 8 || length2 < 8 || length3 < 8 || length4 < 8)
return 0;
if (length4 > srcSize) return ERROR(corruption_detected); /* overflow */
}
args->bits[2] = HUF_initFastDStream(args->ip[2]);
args->bits[3] = HUF_initFastDStream(args->ip[3]);
- /* If ip[] >= ilimit, it is guaranteed to be safe to
- * reload bits[]. It may be beyond its section, but is
- * guaranteed to be valid (>= istart).
- */
- args->ilimit = ilimit;
+ /* The decoders must be sure to never read beyond ilowest.
+ * This is lower than iend[0], but allowing decoders to read
+ * down to ilowest can allow an extra iteration or two in the
+ * fast loop.
+ */
+ args->ilowest = istart;
args->oend = oend;
args->dt = dt;
assert(sizeof(size_t) == 8);
bit->bitContainer = MEM_readLEST(args->ip[stream]);
bit->bitsConsumed = ZSTD_countTrailingZeros64(args->bits[stream]);
- bit->start = (const char*)args->iend[0];
+ bit->start = (const char*)args->ilowest;
bit->limitPtr = bit->start + sizeof(size_t);
bit->ptr = (const char*)args->ip[stream];
return 0;
}
+/* Calls X(N) for each stream 0, 1, 2, 3. */
+#define HUF_4X_FOR_EACH_STREAM(X) \
+ do { \
+ X(0); \
+ X(1); \
+ X(2); \
+ X(3); \
+ } while (0)
+
+/* Calls X(N, var) for each stream 0, 1, 2, 3. */
+#define HUF_4X_FOR_EACH_STREAM_WITH_VAR(X, var) \
+ do { \
+ X(0, (var)); \
+ X(1, (var)); \
+ X(2, (var)); \
+ X(3, (var)); \
+ } while (0)
+
#ifndef HUF_FORCE_DECOMPRESS_X2
}
#define HUF_DECODE_SYMBOLX1_0(ptr, DStreamPtr) \
- *ptr++ = HUF_decodeSymbolX1(DStreamPtr, dt, dtLog)
+ do { *ptr++ = HUF_decodeSymbolX1(DStreamPtr, dt, dtLog); } while (0)
-#define HUF_DECODE_SYMBOLX1_1(ptr, DStreamPtr) \
- if (MEM_64bits() || (HUF_TABLELOG_MAX<=12)) \
- HUF_DECODE_SYMBOLX1_0(ptr, DStreamPtr)
+#define HUF_DECODE_SYMBOLX1_1(ptr, DStreamPtr) \
+ do { \
+ if (MEM_64bits() || (HUF_TABLELOG_MAX<=12)) \
+ HUF_DECODE_SYMBOLX1_0(ptr, DStreamPtr); \
+ } while (0)
-#define HUF_DECODE_SYMBOLX1_2(ptr, DStreamPtr) \
- if (MEM_64bits()) \
- HUF_DECODE_SYMBOLX1_0(ptr, DStreamPtr)
+#define HUF_DECODE_SYMBOLX1_2(ptr, DStreamPtr) \
+ do { \
+ if (MEM_64bits()) \
+ HUF_DECODE_SYMBOLX1_0(ptr, DStreamPtr); \
+ } while (0)
HINT_INLINE size_t
HUF_decodeStreamX1(BYTE* p, BIT_DStream_t* const bitDPtr, BYTE* const pEnd, const HUF_DEltX1* const dt, const U32 dtLog)
const HUF_DTable* DTable)
{
BYTE* op = (BYTE*)dst;
- BYTE* const oend = op + dstSize;
+ BYTE* const oend = ZSTD_maybeNullPtrAdd(op, dstSize);
const void* dtPtr = DTable + 1;
const HUF_DEltX1* const dt = (const HUF_DEltX1*)dtPtr;
BIT_DStream_t bitD;
{
/* Check */
if (cSrcSize < 10) return ERROR(corruption_detected); /* strict minimum : jump table + 1 byte per stream */
+ if (dstSize < 6) return ERROR(corruption_detected); /* stream 4-split doesn't work */
{ const BYTE* const istart = (const BYTE*) cSrc;
BYTE* const ostart = (BYTE*) dst;
if (length4 > cSrcSize) return ERROR(corruption_detected); /* overflow */
if (opStart4 > oend) return ERROR(corruption_detected); /* overflow */
- if (dstSize < 6) return ERROR(corruption_detected); /* stream 4-split doesn't work */
+ assert(dstSize >= 6); /* validated above */
CHECK_F( BIT_initDStream(&bitD1, istart1, length1) );
CHECK_F( BIT_initDStream(&bitD2, istart2, length2) );
CHECK_F( BIT_initDStream(&bitD3, istart3, length3) );
BYTE* op[4];
U16 const* const dtable = (U16 const*)args->dt;
BYTE* const oend = args->oend;
- BYTE const* const ilimit = args->ilimit;
+ BYTE const* const ilowest = args->ilowest;
/* Copy the arguments to local variables */
ZSTD_memcpy(&bits, &args->bits, sizeof(bits));
for (;;) {
BYTE* olimit;
int stream;
- int symbol;
/* Assert loop preconditions */
#ifndef NDEBUG
for (stream = 0; stream < 4; ++stream) {
assert(op[stream] <= (stream == 3 ? oend : op[stream + 1]));
- assert(ip[stream] >= ilimit);
+ assert(ip[stream] >= ilowest);
}
#endif
/* Compute olimit */
/* Each iteration consumes up to 11 bits * 5 = 55 bits < 7 bytes
* per stream.
*/
- size_t const iiters = (size_t)(ip[0] - ilimit) / 7;
+ size_t const iiters = (size_t)(ip[0] - ilowest) / 7;
/* We can safely run iters iterations before running bounds checks */
size_t const iters = MIN(oiters, iiters);
size_t const symbols = iters * 5;
*/
olimit = op[3] + symbols;
- /* Exit fast decoding loop once we get close to the end. */
- if (op[3] + 20 > olimit)
+ /* Exit fast decoding loop once we reach the end. */
+ if (op[3] == olimit)
break;
/* Exit the decoding loop if any input pointer has crossed the
}
#endif
+#define HUF_4X1_DECODE_SYMBOL(_stream, _symbol) \
+ do { \
+ int const index = (int)(bits[(_stream)] >> 53); \
+ int const entry = (int)dtable[index]; \
+ bits[(_stream)] <<= (entry & 0x3F); \
+ op[(_stream)][(_symbol)] = (BYTE)((entry >> 8) & 0xFF); \
+ } while (0)
+
+#define HUF_4X1_RELOAD_STREAM(_stream) \
+ do { \
+ int const ctz = ZSTD_countTrailingZeros64(bits[(_stream)]); \
+ int const nbBits = ctz & 7; \
+ int const nbBytes = ctz >> 3; \
+ op[(_stream)] += 5; \
+ ip[(_stream)] -= nbBytes; \
+ bits[(_stream)] = MEM_read64(ip[(_stream)]) | 1; \
+ bits[(_stream)] <<= nbBits; \
+ } while (0)
+
+ /* Manually unroll the loop because compilers don't consistently
+ * unroll the inner loops, which destroys performance.
+ */
do {
/* Decode 5 symbols in each of the 4 streams */
- for (symbol = 0; symbol < 5; ++symbol) {
- for (stream = 0; stream < 4; ++stream) {
- int const index = (int)(bits[stream] >> 53);
- int const entry = (int)dtable[index];
- bits[stream] <<= (entry & 63);
- op[stream][symbol] = (BYTE)((entry >> 8) & 0xFF);
- }
- }
- /* Reload the bitstreams */
- for (stream = 0; stream < 4; ++stream) {
- int const ctz = ZSTD_countTrailingZeros64(bits[stream]);
- int const nbBits = ctz & 7;
- int const nbBytes = ctz >> 3;
- op[stream] += 5;
- ip[stream] -= nbBytes;
- bits[stream] = MEM_read64(ip[stream]) | 1;
- bits[stream] <<= nbBits;
- }
+ HUF_4X_FOR_EACH_STREAM_WITH_VAR(HUF_4X1_DECODE_SYMBOL, 0);
+ HUF_4X_FOR_EACH_STREAM_WITH_VAR(HUF_4X1_DECODE_SYMBOL, 1);
+ HUF_4X_FOR_EACH_STREAM_WITH_VAR(HUF_4X1_DECODE_SYMBOL, 2);
+ HUF_4X_FOR_EACH_STREAM_WITH_VAR(HUF_4X1_DECODE_SYMBOL, 3);
+ HUF_4X_FOR_EACH_STREAM_WITH_VAR(HUF_4X1_DECODE_SYMBOL, 4);
+
+ /* Reload each of the 4 the bitstreams */
+ HUF_4X_FOR_EACH_STREAM(HUF_4X1_RELOAD_STREAM);
} while (op[3] < olimit);
+
+#undef HUF_4X1_DECODE_SYMBOL
+#undef HUF_4X1_RELOAD_STREAM
}
_out:
HUF_DecompressFastLoopFn loopFn)
{
void const* dt = DTable + 1;
- const BYTE* const iend = (const BYTE*)cSrc + 6;
- BYTE* const oend = (BYTE*)dst + dstSize;
+ BYTE const* const ilowest = (BYTE const*)cSrc;
+ BYTE* const oend = ZSTD_maybeNullPtrAdd((BYTE*)dst, dstSize);
HUF_DecompressFastArgs args;
{ size_t const ret = HUF_DecompressFastArgs_init(&args, dst, dstSize, cSrc, cSrcSize, DTable);
FORWARD_IF_ERROR(ret, "Failed to init fast loop args");
return 0;
}
- assert(args.ip[0] >= args.ilimit);
+ assert(args.ip[0] >= args.ilowest);
loopFn(&args);
- /* Our loop guarantees that ip[] >= ilimit and that we haven't
+ /* Our loop guarantees that ip[] >= ilowest and that we haven't
* overwritten any op[].
*/
- assert(args.ip[0] >= iend);
- assert(args.ip[1] >= iend);
- assert(args.ip[2] >= iend);
- assert(args.ip[3] >= iend);
+ assert(args.ip[0] >= ilowest);
+ assert(args.ip[0] >= ilowest);
+ assert(args.ip[1] >= ilowest);
+ assert(args.ip[2] >= ilowest);
+ assert(args.ip[3] >= ilowest);
assert(args.op[3] <= oend);
- (void)iend;
+
+ assert(ilowest == args.ilowest);
+ assert(ilowest + 6 == args.iend[0]);
+ (void)ilowest;
/* finish bit streams one by one. */
{ size_t const segmentSize = (dstSize+3) / 4;
}
#endif
- if (!(flags & HUF_flags_disableFast)) {
+ if (HUF_ENABLE_FAST_DECODE && !(flags & HUF_flags_disableFast)) {
size_t const ret = HUF_decompress4X1_usingDTable_internal_fast(dst, dstSize, cSrc, cSrcSize, DTable, loopFn);
if (ret != 0)
return ret;
}
#define HUF_DECODE_SYMBOLX2_0(ptr, DStreamPtr) \
- ptr += HUF_decodeSymbolX2(ptr, DStreamPtr, dt, dtLog)
+ do { ptr += HUF_decodeSymbolX2(ptr, DStreamPtr, dt, dtLog); } while (0)
-#define HUF_DECODE_SYMBOLX2_1(ptr, DStreamPtr) \
- if (MEM_64bits() || (HUF_TABLELOG_MAX<=12)) \
- ptr += HUF_decodeSymbolX2(ptr, DStreamPtr, dt, dtLog)
+#define HUF_DECODE_SYMBOLX2_1(ptr, DStreamPtr) \
+ do { \
+ if (MEM_64bits() || (HUF_TABLELOG_MAX<=12)) \
+ ptr += HUF_decodeSymbolX2(ptr, DStreamPtr, dt, dtLog); \
+ } while (0)
-#define HUF_DECODE_SYMBOLX2_2(ptr, DStreamPtr) \
- if (MEM_64bits()) \
- ptr += HUF_decodeSymbolX2(ptr, DStreamPtr, dt, dtLog)
+#define HUF_DECODE_SYMBOLX2_2(ptr, DStreamPtr) \
+ do { \
+ if (MEM_64bits()) \
+ ptr += HUF_decodeSymbolX2(ptr, DStreamPtr, dt, dtLog); \
+ } while (0)
HINT_INLINE size_t
HUF_decodeStreamX2(BYTE* p, BIT_DStream_t* bitDPtr, BYTE* const pEnd,
/* decode */
{ BYTE* const ostart = (BYTE*) dst;
- BYTE* const oend = ostart + dstSize;
+ BYTE* const oend = ZSTD_maybeNullPtrAdd(ostart, dstSize);
const void* const dtPtr = DTable+1; /* force compiler to not use strict-aliasing */
const HUF_DEltX2* const dt = (const HUF_DEltX2*)dtPtr;
DTableDesc const dtd = HUF_getDTableDesc(DTable);
const HUF_DTable* DTable)
{
if (cSrcSize < 10) return ERROR(corruption_detected); /* strict minimum : jump table + 1 byte per stream */
+ if (dstSize < 6) return ERROR(corruption_detected); /* stream 4-split doesn't work */
{ const BYTE* const istart = (const BYTE*) cSrc;
BYTE* const ostart = (BYTE*) dst;
if (length4 > cSrcSize) return ERROR(corruption_detected); /* overflow */
if (opStart4 > oend) return ERROR(corruption_detected); /* overflow */
- if (dstSize < 6) return ERROR(corruption_detected); /* stream 4-split doesn't work */
+ assert(dstSize >= 6 /* validated above */);
CHECK_F( BIT_initDStream(&bitD1, istart1, length1) );
CHECK_F( BIT_initDStream(&bitD2, istart2, length2) );
CHECK_F( BIT_initDStream(&bitD3, istart3, length3) );
BYTE* op[4];
BYTE* oend[4];
HUF_DEltX2 const* const dtable = (HUF_DEltX2 const*)args->dt;
- BYTE const* const ilimit = args->ilimit;
+ BYTE const* const ilowest = args->ilowest;
/* Copy the arguments to local registers. */
ZSTD_memcpy(&bits, &args->bits, sizeof(bits));
for (;;) {
BYTE* olimit;
int stream;
- int symbol;
/* Assert loop preconditions */
#ifndef NDEBUG
for (stream = 0; stream < 4; ++stream) {
assert(op[stream] <= oend[stream]);
- assert(ip[stream] >= ilimit);
+ assert(ip[stream] >= ilowest);
}
#endif
/* Compute olimit */
* We also know that each input pointer is >= ip[0]. So we can run
* iters loops before running out of input.
*/
- size_t iters = (size_t)(ip[0] - ilimit) / 7;
+ size_t iters = (size_t)(ip[0] - ilowest) / 7;
/* Each iteration can produce up to 10 bytes of output per stream.
* Each output stream my advance at different rates. So take the
* minimum number of safe iterations among all the output streams.
*/
olimit = op[3] + (iters * 5);
- /* Exit the fast decoding loop if we are too close to the end. */
- if (op[3] + 10 > olimit)
+ /* Exit the fast decoding loop once we reach the end. */
+ if (op[3] == olimit)
break;
/* Exit the decoding loop if any input pointer has crossed the
}
#endif
+#define HUF_4X2_DECODE_SYMBOL(_stream, _decode3) \
+ do { \
+ if ((_decode3) || (_stream) != 3) { \
+ int const index = (int)(bits[(_stream)] >> 53); \
+ HUF_DEltX2 const entry = dtable[index]; \
+ MEM_write16(op[(_stream)], entry.sequence); \
+ bits[(_stream)] <<= (entry.nbBits) & 0x3F; \
+ op[(_stream)] += (entry.length); \
+ } \
+ } while (0)
+
+#define HUF_4X2_RELOAD_STREAM(_stream) \
+ do { \
+ HUF_4X2_DECODE_SYMBOL(3, 1); \
+ { \
+ int const ctz = ZSTD_countTrailingZeros64(bits[(_stream)]); \
+ int const nbBits = ctz & 7; \
+ int const nbBytes = ctz >> 3; \
+ ip[(_stream)] -= nbBytes; \
+ bits[(_stream)] = MEM_read64(ip[(_stream)]) | 1; \
+ bits[(_stream)] <<= nbBits; \
+ } \
+ } while (0)
+
+ /* Manually unroll the loop because compilers don't consistently
+ * unroll the inner loops, which destroys performance.
+ */
do {
- /* Do 5 table lookups for each of the first 3 streams */
- for (symbol = 0; symbol < 5; ++symbol) {
- for (stream = 0; stream < 3; ++stream) {
- int const index = (int)(bits[stream] >> 53);
- HUF_DEltX2 const entry = dtable[index];
- MEM_write16(op[stream], entry.sequence);
- bits[stream] <<= (entry.nbBits);
- op[stream] += (entry.length);
- }
- }
- /* Do 1 table lookup from the final stream */
- {
- int const index = (int)(bits[3] >> 53);
- HUF_DEltX2 const entry = dtable[index];
- MEM_write16(op[3], entry.sequence);
- bits[3] <<= (entry.nbBits);
- op[3] += (entry.length);
- }
- /* Do 4 table lookups from the final stream & reload bitstreams */
- for (stream = 0; stream < 4; ++stream) {
- /* Do a table lookup from the final stream.
- * This is interleaved with the reloading to reduce register
- * pressure. This shouldn't be necessary, but compilers can
- * struggle with codegen with high register pressure.
- */
- {
- int const index = (int)(bits[3] >> 53);
- HUF_DEltX2 const entry = dtable[index];
- MEM_write16(op[3], entry.sequence);
- bits[3] <<= (entry.nbBits);
- op[3] += (entry.length);
- }
- /* Reload the bistreams. The final bitstream must be reloaded
- * after the 5th symbol was decoded.
- */
- {
- int const ctz = ZSTD_countTrailingZeros64(bits[stream]);
- int const nbBits = ctz & 7;
- int const nbBytes = ctz >> 3;
- ip[stream] -= nbBytes;
- bits[stream] = MEM_read64(ip[stream]) | 1;
- bits[stream] <<= nbBits;
- }
- }
+ /* Decode 5 symbols from each of the first 3 streams.
+ * The final stream will be decoded during the reload phase
+ * to reduce register pressure.
+ */
+ HUF_4X_FOR_EACH_STREAM_WITH_VAR(HUF_4X2_DECODE_SYMBOL, 0);
+ HUF_4X_FOR_EACH_STREAM_WITH_VAR(HUF_4X2_DECODE_SYMBOL, 0);
+ HUF_4X_FOR_EACH_STREAM_WITH_VAR(HUF_4X2_DECODE_SYMBOL, 0);
+ HUF_4X_FOR_EACH_STREAM_WITH_VAR(HUF_4X2_DECODE_SYMBOL, 0);
+ HUF_4X_FOR_EACH_STREAM_WITH_VAR(HUF_4X2_DECODE_SYMBOL, 0);
+
+ /* Decode one symbol from the final stream */
+ HUF_4X2_DECODE_SYMBOL(3, 1);
+
+ /* Decode 4 symbols from the final stream & reload bitstreams.
+ * The final stream is reloaded last, meaning that all 5 symbols
+ * are decoded from the final stream before it is reloaded.
+ */
+ HUF_4X_FOR_EACH_STREAM(HUF_4X2_RELOAD_STREAM);
} while (op[3] < olimit);
}
+#undef HUF_4X2_DECODE_SYMBOL
+#undef HUF_4X2_RELOAD_STREAM
+
_out:
/* Save the final values of each of the state variables back to args. */
const HUF_DTable* DTable,
HUF_DecompressFastLoopFn loopFn) {
void const* dt = DTable + 1;
- const BYTE* const iend = (const BYTE*)cSrc + 6;
- BYTE* const oend = (BYTE*)dst + dstSize;
+ const BYTE* const ilowest = (const BYTE*)cSrc;
+ BYTE* const oend = ZSTD_maybeNullPtrAdd((BYTE*)dst, dstSize);
HUF_DecompressFastArgs args;
{
size_t const ret = HUF_DecompressFastArgs_init(&args, dst, dstSize, cSrc, cSrcSize, DTable);
return 0;
}
- assert(args.ip[0] >= args.ilimit);
+ assert(args.ip[0] >= args.ilowest);
loopFn(&args);
/* note : op4 already verified within main loop */
- assert(args.ip[0] >= iend);
- assert(args.ip[1] >= iend);
- assert(args.ip[2] >= iend);
- assert(args.ip[3] >= iend);
+ assert(args.ip[0] >= ilowest);
+ assert(args.ip[1] >= ilowest);
+ assert(args.ip[2] >= ilowest);
+ assert(args.ip[3] >= ilowest);
assert(args.op[3] <= oend);
- (void)iend;
+
+ assert(ilowest == args.ilowest);
+ assert(ilowest + 6 == args.iend[0]);
+ (void)ilowest;
/* finish bitStreams one by one */
{
}
#endif
- if (!(flags & HUF_flags_disableFast)) {
+ if (HUF_ENABLE_FAST_DECODE && !(flags & HUF_flags_disableFast)) {
size_t const ret = HUF_decompress4X2_usingDTable_internal_fast(dst, dstSize, cSrc, cSrcSize, DTable, loopFn);
if (ret != 0)
return ret;
#include "../common/portability_macros.h"
+#if defined(__ELF__) && defined(__GNUC__)
/* Stack marking
* ref: https://wiki.gentoo.org/wiki/Hardened/GNU_stack_quickstart
*/
-#if defined(__ELF__) && defined(__GNUC__)
.section .note.GNU-stack,"",%progbits
+
+#if defined(__aarch64__)
+/* Mark that this assembly supports BTI & PAC, because it is empty for aarch64.
+ * See: https://github.com/facebook/zstd/issues/3841
+ * See: https://gcc.godbolt.org/z/sqr5T4ffK
+ * See: https://lore.kernel.org/linux-arm-kernel/20200429211641.9279-8-broonie@kernel.org/
+ * See: https://reviews.llvm.org/D62609
+ */
+.pushsection .note.gnu.property, "a"
+.p2align 3
+.long 4 /* size of the name - "GNU\0" */
+.long 0x10 /* size of descriptor */
+.long 0x5 /* NT_GNU_PROPERTY_TYPE_0 */
+.asciz "GNU"
+.long 0xc0000000 /* pr_type - GNU_PROPERTY_AARCH64_FEATURE_1_AND */
+.long 4 /* pr_datasz - 4 bytes */
+.long 3 /* pr_data - GNU_PROPERTY_AARCH64_FEATURE_1_BTI | GNU_PROPERTY_AARCH64_FEATURE_1_PAC */
+.p2align 3 /* pr_padding - bring everything to 8 byte alignment */
+.popsection
+#endif
+
#endif
#if ZSTD_ENABLE_ASM_X86_64_BMI2
movq 88(%rax), %bits3
movq 96(%rax), %dtable
push %rax /* argument */
- push 104(%rax) /* ilimit */
+ push 104(%rax) /* ilowest */
push 112(%rax) /* oend */
push %olimit /* olimit space */
shrq $2, %r15
movq %ip0, %rax /* rax = ip0 */
- movq 40(%rsp), %rdx /* rdx = ilimit */
- subq %rdx, %rax /* rax = ip0 - ilimit */
- movq %rax, %rbx /* rbx = ip0 - ilimit */
+ movq 40(%rsp), %rdx /* rdx = ilowest */
+ subq %rdx, %rax /* rax = ip0 - ilowest */
+ movq %rax, %rbx /* rbx = ip0 - ilowest */
- /* rdx = (ip0 - ilimit) / 7 */
+ /* rdx = (ip0 - ilowest) / 7 */
movabsq $2635249153387078803, %rdx
mulq %rdx
subq %rdx, %rbx
/* If (op3 + 20 > olimit) */
movq %op3, %rax /* rax = op3 */
- addq $20, %rax /* rax = op3 + 20 */
- cmpq %rax, %olimit /* op3 + 20 > olimit */
- jb .L_4X1_exit
+ cmpq %rax, %olimit /* op3 == olimit */
+ je .L_4X1_exit
/* If (ip1 < ip0) go to exit */
cmpq %ip0, %ip1
/* Restore stack (oend & olimit) */
pop %rax /* olimit */
pop %rax /* oend */
- pop %rax /* ilimit */
+ pop %rax /* ilowest */
pop %rax /* arg */
/* Save ip / op / bits */
movq 96(%rax), %dtable
push %rax /* argument */
push %rax /* olimit */
- push 104(%rax) /* ilimit */
+ push 104(%rax) /* ilowest */
movq 112(%rax), %rax
push %rax /* oend3 */
/* We can consume up to 7 input bytes each iteration. */
movq %ip0, %rax /* rax = ip0 */
- movq 40(%rsp), %rdx /* rdx = ilimit */
- subq %rdx, %rax /* rax = ip0 - ilimit */
- movq %rax, %r15 /* r15 = ip0 - ilimit */
+ movq 40(%rsp), %rdx /* rdx = ilowest */
+ subq %rdx, %rax /* rax = ip0 - ilowest */
+ movq %rax, %r15 /* r15 = ip0 - ilowest */
/* rdx = rax / 7 */
movabsq $2635249153387078803, %rdx
addq %r15, %rdx
shrq $2, %rdx
- /* r15 = (ip0 - ilimit) / 7 */
+ /* r15 = (ip0 - ilowest) / 7 */
movq %rdx, %r15
/* r15 = min(r15, min(oend0 - op0, oend1 - op1, oend2 - op2, oend3 - op3) / 10) */
/* If (op3 + 10 > olimit) */
movq %op3, %rax /* rax = op3 */
- addq $10, %rax /* rax = op3 + 10 */
- cmpq %rax, %olimit /* op3 + 10 > olimit */
- jb .L_4X2_exit
+ cmpq %rax, %olimit /* op3 == olimit */
+ je .L_4X2_exit
/* If (ip1 < ip0) go to exit */
cmpq %ip0, %ip1
pop %rax /* oend1 */
pop %rax /* oend2 */
pop %rax /* oend3 */
- pop %rax /* ilimit */
+ pop %rax /* ilowest */
pop %rax /* olimit */
pop %rax /* arg */
/*-*******************************************************
* Dependencies
*********************************************************/
-#include "../common/allocations.h" /* ZSTD_customMalloc, ZSTD_customCalloc, ZSTD_customFree */
#include "../common/zstd_deps.h" /* ZSTD_memcpy, ZSTD_memmove, ZSTD_memset */
+#include "../common/allocations.h" /* ZSTD_customMalloc, ZSTD_customCalloc, ZSTD_customFree */
+#include "../common/error_private.h"
+#include "../common/zstd_internal.h" /* blockProperties_t */
#include "../common/mem.h" /* low level memory routines */
+#include "../common/bits.h" /* ZSTD_highbit32 */
#define FSE_STATIC_LINKING_ONLY
#include "../common/fse.h"
#include "../common/huf.h"
#include "../common/xxhash.h" /* XXH64_reset, XXH64_update, XXH64_digest, XXH64 */
-#include "../common/zstd_internal.h" /* blockProperties_t */
#include "zstd_decompress_internal.h" /* ZSTD_DCtx */
#include "zstd_ddict.h" /* ZSTD_DDictDictContent */
#include "zstd_decompress_block.h" /* ZSTD_decompressBlock_internal */
-#include "../common/bits.h" /* ZSTD_highbit32 */
#if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT>=1)
# include "../legacy/zstd_legacy.h"
dctx->forceIgnoreChecksum = ZSTD_d_validateChecksum;
dctx->refMultipleDDicts = ZSTD_rmd_refSingleDDict;
dctx->disableHufAsm = 0;
+ dctx->maxBlockSizeParam = 0;
}
static void ZSTD_initDCtx_internal(ZSTD_DCtx* dctx)
#endif
dctx->noForwardProgress = 0;
dctx->oversizedDuration = 0;
+ dctx->isFrameDecompression = 1;
#if DYNAMIC_BMI2
dctx->bmi2 = ZSTD_cpuSupportsBmi2();
#endif
return frameSizeInfo;
}
-static ZSTD_frameSizeInfo ZSTD_findFrameSizeInfo(const void* src, size_t srcSize)
+static ZSTD_frameSizeInfo ZSTD_findFrameSizeInfo(const void* src, size_t srcSize, ZSTD_format_e format)
{
ZSTD_frameSizeInfo frameSizeInfo;
ZSTD_memset(&frameSizeInfo, 0, sizeof(ZSTD_frameSizeInfo));
#if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT >= 1)
- if (ZSTD_isLegacy(src, srcSize))
+ if (format == ZSTD_f_zstd1 && ZSTD_isLegacy(src, srcSize))
return ZSTD_findFrameSizeInfoLegacy(src, srcSize);
#endif
- if ((srcSize >= ZSTD_SKIPPABLEHEADERSIZE)
+ if (format == ZSTD_f_zstd1 && (srcSize >= ZSTD_SKIPPABLEHEADERSIZE)
&& (MEM_readLE32(src) & ZSTD_MAGIC_SKIPPABLE_MASK) == ZSTD_MAGIC_SKIPPABLE_START) {
frameSizeInfo.compressedSize = readSkippableFrameSize(src, srcSize);
assert(ZSTD_isError(frameSizeInfo.compressedSize) ||
ZSTD_frameHeader zfh;
/* Extract Frame Header */
- { size_t const ret = ZSTD_getFrameHeader(&zfh, src, srcSize);
+ { size_t const ret = ZSTD_getFrameHeader_advanced(&zfh, src, srcSize, format);
if (ZSTD_isError(ret))
return ZSTD_errorFrameSizeInfo(ret);
if (ret > 0)
}
}
+static size_t ZSTD_findFrameCompressedSize_advanced(const void *src, size_t srcSize, ZSTD_format_e format) {
+ ZSTD_frameSizeInfo const frameSizeInfo = ZSTD_findFrameSizeInfo(src, srcSize, format);
+ return frameSizeInfo.compressedSize;
+}
+
/** ZSTD_findFrameCompressedSize() :
- * compatible with legacy mode
- * `src` must point to the start of a ZSTD frame, ZSTD legacy frame, or skippable frame
- * `srcSize` must be at least as large as the frame contained
- * @return : the compressed size of the frame starting at `src` */
+ * See docs in zstd.h
+ * Note: compatible with legacy mode */
size_t ZSTD_findFrameCompressedSize(const void *src, size_t srcSize)
{
- ZSTD_frameSizeInfo const frameSizeInfo = ZSTD_findFrameSizeInfo(src, srcSize);
- return frameSizeInfo.compressedSize;
+ return ZSTD_findFrameCompressedSize_advanced(src, srcSize, ZSTD_f_zstd1);
}
/** ZSTD_decompressBound() :
unsigned long long bound = 0;
/* Iterate over each frame */
while (srcSize > 0) {
- ZSTD_frameSizeInfo const frameSizeInfo = ZSTD_findFrameSizeInfo(src, srcSize);
+ ZSTD_frameSizeInfo const frameSizeInfo = ZSTD_findFrameSizeInfo(src, srcSize, ZSTD_f_zstd1);
size_t const compressedSize = frameSizeInfo.compressedSize;
unsigned long long const decompressedBound = frameSizeInfo.decompressedBound;
if (ZSTD_isError(compressedSize) || decompressedBound == ZSTD_CONTENTSIZE_ERROR)
/* Iterate over each frame */
while (srcSize > 0) {
- ZSTD_frameSizeInfo const frameSizeInfo = ZSTD_findFrameSizeInfo(src, srcSize);
+ ZSTD_frameSizeInfo const frameSizeInfo = ZSTD_findFrameSizeInfo(src, srcSize, ZSTD_f_zstd1);
size_t const compressedSize = frameSizeInfo.compressedSize;
unsigned long long const decompressedBound = frameSizeInfo.decompressedBound;
ZSTD_frameHeader zfh;
ip += frameHeaderSize; remainingSrcSize -= frameHeaderSize;
}
+ /* Shrink the blockSizeMax if enabled */
+ if (dctx->maxBlockSizeParam != 0)
+ dctx->fParams.blockSizeMax = MIN(dctx->fParams.blockSizeMax, (unsigned)dctx->maxBlockSizeParam);
+
/* Loop on each block */
while (1) {
BYTE* oBlockEnd = oend;
switch(blockProperties.blockType)
{
case bt_compressed:
- decodedSize = ZSTD_decompressBlock_internal(dctx, op, (size_t)(oBlockEnd-op), ip, cBlockSize, /* frame */ 1, not_streaming);
+ assert(dctx->isFrameDecompression == 1);
+ decodedSize = ZSTD_decompressBlock_internal(dctx, op, (size_t)(oBlockEnd-op), ip, cBlockSize, not_streaming);
break;
case bt_raw :
/* Use oend instead of oBlockEnd because this function is safe to overlap. It uses memmove. */
default:
RETURN_ERROR(corruption_detected, "invalid block type");
}
-
- if (ZSTD_isError(decodedSize)) return decodedSize;
- if (dctx->validateChecksum)
+ FORWARD_IF_ERROR(decodedSize, "Block decompression failure");
+ DEBUGLOG(5, "Decompressed block of dSize = %u", (unsigned)decodedSize);
+ if (dctx->validateChecksum) {
XXH64_update(&dctx->xxhState, op, decodedSize);
- if (decodedSize != 0)
+ }
+ if (decodedSize) /* support dst = NULL,0 */ {
op += decodedSize;
+ }
assert(ip != NULL);
ip += cBlockSize;
remainingSrcSize -= cBlockSize;
return (size_t)(op-ostart);
}
-static size_t ZSTD_decompressMultiFrame(ZSTD_DCtx* dctx,
+static
+ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
+size_t ZSTD_decompressMultiFrame(ZSTD_DCtx* dctx,
void* dst, size_t dstCapacity,
const void* src, size_t srcSize,
const void* dict, size_t dictSize,
while (srcSize >= ZSTD_startingInputLength(dctx->format)) {
#if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT >= 1)
- if (ZSTD_isLegacy(src, srcSize)) {
+ if (dctx->format == ZSTD_f_zstd1 && ZSTD_isLegacy(src, srcSize)) {
size_t decodedSize;
size_t const frameSize = ZSTD_findFrameCompressedSizeLegacy(src, srcSize);
if (ZSTD_isError(frameSize)) return frameSize;
decodedSize = ZSTD_decompressLegacy(dst, dstCapacity, src, frameSize, dict, dictSize);
if (ZSTD_isError(decodedSize)) return decodedSize;
+ {
+ unsigned long long const expectedSize = ZSTD_getFrameContentSize(src, srcSize);
+ RETURN_ERROR_IF(expectedSize == ZSTD_CONTENTSIZE_ERROR, corruption_detected, "Corrupted frame header!");
+ if (expectedSize != ZSTD_CONTENTSIZE_UNKNOWN) {
+ RETURN_ERROR_IF(expectedSize != decodedSize, corruption_detected,
+ "Frame header size does not match decoded size!");
+ }
+ }
+
assert(decodedSize <= dstCapacity);
dst = (BYTE*)dst + decodedSize;
dstCapacity -= decodedSize;
}
#endif
- if (srcSize >= 4) {
+ if (dctx->format == ZSTD_f_zstd1 && srcSize >= 4) {
U32 const magicNumber = MEM_readLE32(src);
DEBUGLOG(5, "reading magic number %08X", (unsigned)magicNumber);
if ((magicNumber & ZSTD_MAGIC_SKIPPABLE_MASK) == ZSTD_MAGIC_SKIPPABLE_START) {
{
case bt_compressed:
DEBUGLOG(5, "ZSTD_decompressContinue: case bt_compressed");
- rSize = ZSTD_decompressBlock_internal(dctx, dst, dstCapacity, src, srcSize, /* frame */ 1, is_streaming);
+ assert(dctx->isFrameDecompression == 1);
+ rSize = ZSTD_decompressBlock_internal(dctx, dst, dstCapacity, src, srcSize, is_streaming);
dctx->expected = 0; /* Streaming not supported */
break;
case bt_raw :
case ZSTDds_decodeSkippableHeader:
assert(src != NULL);
assert(srcSize <= ZSTD_SKIPPABLEHEADERSIZE);
+ assert(dctx->format != ZSTD_f_zstd1_magicless);
ZSTD_memcpy(dctx->headerBuffer + (ZSTD_SKIPPABLEHEADERSIZE - srcSize), src, srcSize); /* complete skippable header */
dctx->expected = MEM_readLE32(dctx->headerBuffer + ZSTD_FRAMEIDSIZE); /* note : dctx->expected can grow seriously large, beyond local buffer size */
dctx->stage = ZSTDds_skipFrame;
dctx->litEntropy = dctx->fseEntropy = 0;
dctx->dictID = 0;
dctx->bType = bt_reserved;
+ dctx->isFrameDecompression = 1;
ZSTD_STATIC_ASSERT(sizeof(dctx->entropy.rep) == sizeof(repStartValue));
ZSTD_memcpy(dctx->entropy.rep, repStartValue, sizeof(repStartValue)); /* initial repcodes */
dctx->LLTptr = dctx->entropy.LLTable;
bounds.lowerBound = 0;
bounds.upperBound = 1;
return bounds;
+ case ZSTD_d_maxBlockSize:
+ bounds.lowerBound = ZSTD_BLOCKSIZE_MAX_MIN;
+ bounds.upperBound = ZSTD_BLOCKSIZE_MAX;
+ return bounds;
default:;
}
case ZSTD_d_disableHuffmanAssembly:
*value = (int)dctx->disableHufAsm;
return 0;
+ case ZSTD_d_maxBlockSize:
+ *value = dctx->maxBlockSizeParam;
+ return 0;
default:;
}
RETURN_ERROR(parameter_unsupported, "");
CHECK_DBOUNDS(ZSTD_d_disableHuffmanAssembly, value);
dctx->disableHufAsm = value != 0;
return 0;
+ case ZSTD_d_maxBlockSize:
+ if (value != 0) CHECK_DBOUNDS(ZSTD_d_maxBlockSize, value);
+ dctx->maxBlockSizeParam = value;
+ return 0;
default:;
}
RETURN_ERROR(parameter_unsupported, "");
|| (reset == ZSTD_reset_session_and_parameters) ) {
dctx->streamStage = zdss_init;
dctx->noForwardProgress = 0;
+ dctx->isFrameDecompression = 1;
}
if ( (reset == ZSTD_reset_parameters)
|| (reset == ZSTD_reset_session_and_parameters) ) {
return ZSTD_sizeof_DCtx(dctx);
}
-size_t ZSTD_decodingBufferSize_min(unsigned long long windowSize, unsigned long long frameContentSize)
+static size_t ZSTD_decodingBufferSize_internal(unsigned long long windowSize, unsigned long long frameContentSize, size_t blockSizeMax)
{
- size_t const blockSize = (size_t) MIN(windowSize, ZSTD_BLOCKSIZE_MAX);
- /* space is needed to store the litbuffer after the output of a given block without stomping the extDict of a previous run, as well as to cover both windows against wildcopy*/
- unsigned long long const neededRBSize = windowSize + blockSize + ZSTD_BLOCKSIZE_MAX + (WILDCOPY_OVERLENGTH * 2);
+ size_t const blockSize = MIN((size_t)MIN(windowSize, ZSTD_BLOCKSIZE_MAX), blockSizeMax);
+ /* We need blockSize + WILDCOPY_OVERLENGTH worth of buffer so that if a block
+ * ends at windowSize + WILDCOPY_OVERLENGTH + 1 bytes, we can start writing
+ * the block at the beginning of the output buffer, and maintain a full window.
+ *
+ * We need another blockSize worth of buffer so that we can store split
+ * literals at the end of the block without overwriting the extDict window.
+ */
+ unsigned long long const neededRBSize = windowSize + (blockSize * 2) + (WILDCOPY_OVERLENGTH * 2);
unsigned long long const neededSize = MIN(frameContentSize, neededRBSize);
size_t const minRBSize = (size_t) neededSize;
RETURN_ERROR_IF((unsigned long long)minRBSize != neededSize,
return minRBSize;
}
+size_t ZSTD_decodingBufferSize_min(unsigned long long windowSize, unsigned long long frameContentSize)
+{
+ return ZSTD_decodingBufferSize_internal(windowSize, frameContentSize, ZSTD_BLOCKSIZE_MAX);
+}
+
size_t ZSTD_estimateDStreamSize(size_t windowSize)
{
size_t const blockSize = MIN(windowSize, ZSTD_BLOCKSIZE_MAX);
if (zds->fParams.frameContentSize != ZSTD_CONTENTSIZE_UNKNOWN
&& zds->fParams.frameType != ZSTD_skippableFrame
&& (U64)(size_t)(oend-op) >= zds->fParams.frameContentSize) {
- size_t const cSize = ZSTD_findFrameCompressedSize(istart, (size_t)(iend-istart));
+ size_t const cSize = ZSTD_findFrameCompressedSize_advanced(istart, (size_t)(iend-istart), zds->format);
if (cSize <= (size_t)(iend-istart)) {
/* shortcut : using single-pass mode */
size_t const decompressedSize = ZSTD_decompress_usingDDict(zds, op, (size_t)(oend-op), istart, cSize, ZSTD_getDDict(zds));
if (ZSTD_isError(decompressedSize)) return decompressedSize;
- DEBUGLOG(4, "shortcut to single-pass ZSTD_decompress_usingDDict()")
+ DEBUGLOG(4, "shortcut to single-pass ZSTD_decompress_usingDDict()");
assert(istart != NULL);
ip = istart + cSize;
op = op ? op + decompressedSize : op; /* can occur if frameContentSize = 0 (empty frame) */
DEBUGLOG(4, "Consume header");
FORWARD_IF_ERROR(ZSTD_decompressBegin_usingDDict(zds, ZSTD_getDDict(zds)), "");
- if ((MEM_readLE32(zds->headerBuffer) & ZSTD_MAGIC_SKIPPABLE_MASK) == ZSTD_MAGIC_SKIPPABLE_START) { /* skippable frame */
+ if (zds->format == ZSTD_f_zstd1
+ && (MEM_readLE32(zds->headerBuffer) & ZSTD_MAGIC_SKIPPABLE_MASK) == ZSTD_MAGIC_SKIPPABLE_START) { /* skippable frame */
zds->expected = MEM_readLE32(zds->headerBuffer + ZSTD_FRAMEIDSIZE);
zds->stage = ZSTDds_skipFrame;
} else {
zds->fParams.windowSize = MAX(zds->fParams.windowSize, 1U << ZSTD_WINDOWLOG_ABSOLUTEMIN);
RETURN_ERROR_IF(zds->fParams.windowSize > zds->maxWindowSize,
frameParameter_windowTooLarge, "");
+ if (zds->maxBlockSizeParam != 0)
+ zds->fParams.blockSizeMax = MIN(zds->fParams.blockSizeMax, (unsigned)zds->maxBlockSizeParam);
/* Adapt buffer sizes to frame header instructions */
{ size_t const neededInBuffSize = MAX(zds->fParams.blockSizeMax, 4 /* frame checksum */);
size_t const neededOutBuffSize = zds->outBufferMode == ZSTD_bm_buffered
- ? ZSTD_decodingBufferSize_min(zds->fParams.windowSize, zds->fParams.frameContentSize)
+ ? ZSTD_decodingBufferSize_internal(zds->fParams.windowSize, zds->fParams.frameContentSize, zds->fParams.blockSizeMax)
: 0;
ZSTD_DCtx_updateOversizedDuration(zds, neededInBuffSize, neededOutBuffSize);
* Block decoding
***************************************************************/
+static size_t ZSTD_blockSizeMax(ZSTD_DCtx const* dctx)
+{
+ size_t const blockSizeMax = dctx->isFrameDecompression ? dctx->fParams.blockSizeMax : ZSTD_BLOCKSIZE_MAX;
+ assert(blockSizeMax <= ZSTD_BLOCKSIZE_MAX);
+ return blockSizeMax;
+}
+
/*! ZSTD_getcBlockSize() :
* Provides the size of compressed block from block header `src` */
size_t ZSTD_getcBlockSize(const void* src, size_t srcSize,
static void ZSTD_allocateLiteralsBuffer(ZSTD_DCtx* dctx, void* const dst, const size_t dstCapacity, const size_t litSize,
const streaming_operation streaming, const size_t expectedWriteSize, const unsigned splitImmediately)
{
- if (streaming == not_streaming && dstCapacity > ZSTD_BLOCKSIZE_MAX + WILDCOPY_OVERLENGTH + litSize + WILDCOPY_OVERLENGTH)
- {
- /* room for litbuffer to fit without read faulting */
- dctx->litBuffer = (BYTE*)dst + ZSTD_BLOCKSIZE_MAX + WILDCOPY_OVERLENGTH;
+ size_t const blockSizeMax = ZSTD_blockSizeMax(dctx);
+ assert(litSize <= blockSizeMax);
+ assert(dctx->isFrameDecompression || streaming == not_streaming);
+ assert(expectedWriteSize <= blockSizeMax);
+ if (streaming == not_streaming && dstCapacity > blockSizeMax + WILDCOPY_OVERLENGTH + litSize + WILDCOPY_OVERLENGTH) {
+ /* If we aren't streaming, we can just put the literals after the output
+ * of the current block. We don't need to worry about overwriting the
+ * extDict of our window, because it doesn't exist.
+ * So if we have space after the end of the block, just put it there.
+ */
+ dctx->litBuffer = (BYTE*)dst + blockSizeMax + WILDCOPY_OVERLENGTH;
dctx->litBufferEnd = dctx->litBuffer + litSize;
dctx->litBufferLocation = ZSTD_in_dst;
- }
- else if (litSize > ZSTD_LITBUFFEREXTRASIZE)
- {
- /* won't fit in litExtraBuffer, so it will be split between end of dst and extra buffer */
+ } else if (litSize <= ZSTD_LITBUFFEREXTRASIZE) {
+ /* Literals fit entirely within the extra buffer, put them there to avoid
+ * having to split the literals.
+ */
+ dctx->litBuffer = dctx->litExtraBuffer;
+ dctx->litBufferEnd = dctx->litBuffer + litSize;
+ dctx->litBufferLocation = ZSTD_not_in_dst;
+ } else {
+ assert(blockSizeMax > ZSTD_LITBUFFEREXTRASIZE);
+ /* Literals must be split between the output block and the extra lit
+ * buffer. We fill the extra lit buffer with the tail of the literals,
+ * and put the rest of the literals at the end of the block, with
+ * WILDCOPY_OVERLENGTH of buffer room to allow for overreads.
+ * This MUST not write more than our maxBlockSize beyond dst, because in
+ * streaming mode, that could overwrite part of our extDict window.
+ */
if (splitImmediately) {
/* won't fit in litExtraBuffer, so it will be split between end of dst and extra buffer */
dctx->litBuffer = (BYTE*)dst + expectedWriteSize - litSize + ZSTD_LITBUFFEREXTRASIZE - WILDCOPY_OVERLENGTH;
dctx->litBufferEnd = dctx->litBuffer + litSize - ZSTD_LITBUFFEREXTRASIZE;
- }
- else {
+ } else {
/* initially this will be stored entirely in dst during huffman decoding, it will partially be shifted to litExtraBuffer after */
dctx->litBuffer = (BYTE*)dst + expectedWriteSize - litSize;
dctx->litBufferEnd = (BYTE*)dst + expectedWriteSize;
}
dctx->litBufferLocation = ZSTD_split;
- }
- else
- {
- /* fits entirely within litExtraBuffer, so no split is necessary */
- dctx->litBuffer = dctx->litExtraBuffer;
- dctx->litBufferEnd = dctx->litBuffer + litSize;
- dctx->litBufferLocation = ZSTD_not_in_dst;
+ assert(dctx->litBufferEnd <= (BYTE*)dst + expectedWriteSize);
}
}
-/* Hidden declaration for fullbench */
-size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
- const void* src, size_t srcSize,
- void* dst, size_t dstCapacity, const streaming_operation streaming);
/*! ZSTD_decodeLiteralsBlock() :
* Where it is possible to do so without being stomped by the output during decompression, the literals block will be stored
* in the dstBuffer. If there is room to do so, it will be stored in full in the excess dst space after where the current
*
* @return : nb of bytes read from src (< srcSize )
* note : symbol not declared but exposed for fullbench */
-size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
+static size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
const void* src, size_t srcSize, /* note : srcSize < BLOCKSIZE */
void* dst, size_t dstCapacity, const streaming_operation streaming)
{
{ const BYTE* const istart = (const BYTE*) src;
symbolEncodingType_e const litEncType = (symbolEncodingType_e)(istart[0] & 3);
+ size_t const blockSizeMax = ZSTD_blockSizeMax(dctx);
switch(litEncType)
{
U32 const lhlCode = (istart[0] >> 2) & 3;
U32 const lhc = MEM_readLE32(istart);
size_t hufSuccess;
- size_t expectedWriteSize = MIN(ZSTD_BLOCKSIZE_MAX, dstCapacity);
+ size_t expectedWriteSize = MIN(blockSizeMax, dstCapacity);
int const flags = 0
| (ZSTD_DCtx_get_bmi2(dctx) ? HUF_flags_bmi2 : 0)
| (dctx->disableHufAsm ? HUF_flags_disableAsm : 0);
break;
}
RETURN_ERROR_IF(litSize > 0 && dst == NULL, dstSize_tooSmall, "NULL not handled");
- RETURN_ERROR_IF(litSize > ZSTD_BLOCKSIZE_MAX, corruption_detected, "");
+ RETURN_ERROR_IF(litSize > blockSizeMax, corruption_detected, "");
if (!singleStream)
RETURN_ERROR_IF(litSize < MIN_LITERALS_FOR_4_STREAMS, literals_headerWrong,
"Not enough literals (%zu) for the 4-streams mode (min %u)",
}
if (dctx->litBufferLocation == ZSTD_split)
{
+ assert(litSize > ZSTD_LITBUFFEREXTRASIZE);
ZSTD_memcpy(dctx->litExtraBuffer, dctx->litBufferEnd - ZSTD_LITBUFFEREXTRASIZE, ZSTD_LITBUFFEREXTRASIZE);
ZSTD_memmove(dctx->litBuffer + ZSTD_LITBUFFEREXTRASIZE - WILDCOPY_OVERLENGTH, dctx->litBuffer, litSize - ZSTD_LITBUFFEREXTRASIZE);
dctx->litBuffer += ZSTD_LITBUFFEREXTRASIZE - WILDCOPY_OVERLENGTH;
dctx->litBufferEnd -= WILDCOPY_OVERLENGTH;
+ assert(dctx->litBufferEnd <= (BYTE*)dst + blockSizeMax);
}
RETURN_ERROR_IF(HUF_isError(hufSuccess), corruption_detected, "");
case set_basic:
{ size_t litSize, lhSize;
U32 const lhlCode = ((istart[0]) >> 2) & 3;
- size_t expectedWriteSize = MIN(ZSTD_BLOCKSIZE_MAX, dstCapacity);
+ size_t expectedWriteSize = MIN(blockSizeMax, dstCapacity);
switch(lhlCode)
{
case 0: case 2: default: /* note : default is impossible, since lhlCode into [0..3] */
}
RETURN_ERROR_IF(litSize > 0 && dst == NULL, dstSize_tooSmall, "NULL not handled");
+ RETURN_ERROR_IF(litSize > blockSizeMax, corruption_detected, "");
RETURN_ERROR_IF(expectedWriteSize < litSize, dstSize_tooSmall, "");
ZSTD_allocateLiteralsBuffer(dctx, dst, dstCapacity, litSize, streaming, expectedWriteSize, 1);
if (lhSize+litSize+WILDCOPY_OVERLENGTH > srcSize) { /* risk reading beyond src buffer with wildcopy */
case set_rle:
{ U32 const lhlCode = ((istart[0]) >> 2) & 3;
size_t litSize, lhSize;
- size_t expectedWriteSize = MIN(ZSTD_BLOCKSIZE_MAX, dstCapacity);
+ size_t expectedWriteSize = MIN(blockSizeMax, dstCapacity);
switch(lhlCode)
{
case 0: case 2: default: /* note : default is impossible, since lhlCode into [0..3] */
break;
}
RETURN_ERROR_IF(litSize > 0 && dst == NULL, dstSize_tooSmall, "NULL not handled");
- RETURN_ERROR_IF(litSize > ZSTD_BLOCKSIZE_MAX, corruption_detected, "");
+ RETURN_ERROR_IF(litSize > blockSizeMax, corruption_detected, "");
RETURN_ERROR_IF(expectedWriteSize < litSize, dstSize_tooSmall, "");
ZSTD_allocateLiteralsBuffer(dctx, dst, dstCapacity, litSize, streaming, expectedWriteSize, 1);
if (dctx->litBufferLocation == ZSTD_split)
}
}
+/* Hidden declaration for fullbench */
+size_t ZSTD_decodeLiteralsBlock_wrapper(ZSTD_DCtx* dctx,
+ const void* src, size_t srcSize,
+ void* dst, size_t dstCapacity);
+size_t ZSTD_decodeLiteralsBlock_wrapper(ZSTD_DCtx* dctx,
+ const void* src, size_t srcSize,
+ void* dst, size_t dstCapacity)
+{
+ dctx->isFrameDecompression = 0;
+ return ZSTD_decodeLiteralsBlock(dctx, src, srcSize, dst, dstCapacity, not_streaming);
+}
+
/* Default FSE distribution tables.
* These are pre-calculated FSE decoding tables using default distributions as defined in specification :
* https://github.com/facebook/zstd/blob/release/doc/zstd_compression_format.md#default-distributions
/* SeqHead */
nbSeq = *ip++;
- if (!nbSeq) {
- *nbSeqPtr=0;
- RETURN_ERROR_IF(srcSize != 1, srcSize_wrong, "");
- return 1;
- }
if (nbSeq > 0x7F) {
if (nbSeq == 0xFF) {
RETURN_ERROR_IF(ip+2 > iend, srcSize_wrong, "");
}
*nbSeqPtr = nbSeq;
+ if (nbSeq == 0) {
+ /* No sequence : section ends immediately */
+ RETURN_ERROR_IF(ip != iend, corruption_detected,
+ "extraneous data present in the Sequences section");
+ return (size_t)(ip - istart);
+ }
+
/* FSE table descriptors */
RETURN_ERROR_IF(ip+1 > iend, srcSize_wrong, ""); /* minimum possible size: 1 byte for symbol encoding types */
+ RETURN_ERROR_IF(*ip & 3, corruption_detected, ""); /* The last field, Reserved, must be all-zeroes. */
{ symbolEncodingType_e const LLtype = (symbolEncodingType_e)(*ip >> 6);
symbolEncodingType_e const OFtype = (symbolEncodingType_e)((*ip >> 4) & 3);
symbolEncodingType_e const MLtype = (symbolEncodingType_e)((*ip >> 2) & 3);
/* ZSTD_safecopyDstBeforeSrc():
* This version allows overlap with dst before src, or handles the non-overlap case with dst after src
* Kept separate from more common ZSTD_safecopy case to avoid performance impact to the safecopy common case */
-static void ZSTD_safecopyDstBeforeSrc(BYTE* op, BYTE const* ip, ptrdiff_t length) {
+static void ZSTD_safecopyDstBeforeSrc(BYTE* op, const BYTE* ip, ptrdiff_t length) {
ptrdiff_t const diff = op - ip;
BYTE* const oend = op + length;
* to be optimized for many small sequences, since those fall into ZSTD_execSequence().
*/
FORCE_NOINLINE
+ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
size_t ZSTD_execSequenceEnd(BYTE* op,
BYTE* const oend, seq_t sequence,
const BYTE** litPtr, const BYTE* const litLimit,
* This version is intended to be used during instances where the litBuffer is still split. It is kept separate to avoid performance impact for the good case.
*/
FORCE_NOINLINE
+ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
size_t ZSTD_execSequenceEndSplitLitBuffer(BYTE* op,
BYTE* const oend, const BYTE* const oend_w, seq_t sequence,
const BYTE** litPtr, const BYTE* const litLimit,
}
HINT_INLINE
+ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
size_t ZSTD_execSequence(BYTE* op,
BYTE* const oend, seq_t sequence,
const BYTE** litPtr, const BYTE* const litLimit,
}
HINT_INLINE
+ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
size_t ZSTD_execSequenceSplitLitBuffer(BYTE* op,
BYTE* const oend, const BYTE* const oend_w, seq_t sequence,
const BYTE** litPtr, const BYTE* const litLimit,
typedef enum { ZSTD_lo_isRegularOffset, ZSTD_lo_isLongOffset=1 } ZSTD_longOffset_e;
+/**
+ * ZSTD_decodeSequence():
+ * @p longOffsets : tells the decoder to reload more bit while decoding large offsets
+ * only used in 32-bit mode
+ * @return : Sequence (litL + matchL + offset)
+ */
FORCE_INLINE_TEMPLATE seq_t
-ZSTD_decodeSequence(seqState_t* seqState, const ZSTD_longOffset_e longOffsets)
+ZSTD_decodeSequence(seqState_t* seqState, const ZSTD_longOffset_e longOffsets, const int isLastSeq)
{
seq_t seq;
/*
- * ZSTD_seqSymbol is a structure with a total of 64 bits wide. So it can be
- * loaded in one operation and extracted its fields by simply shifting or
- * bit-extracting on aarch64.
+ * ZSTD_seqSymbol is a 64 bits wide structure.
+ * It can be loaded in one operation
+ * and its fields extracted by simply shifting or bit-extracting on aarch64.
* GCC doesn't recognize this and generates more unnecessary ldr/ldrb/ldrh
* operations that cause performance drop. This can be avoided by using this
* ZSTD_memcpy hack.
} else {
offset = ofBase + ll0 + BIT_readBitsFast(&seqState->DStream, 1);
{ size_t temp = (offset==3) ? seqState->prevOffset[0] - 1 : seqState->prevOffset[offset];
- temp += !temp; /* 0 is not valid; input is corrupted; force offset to 1 */
+ temp -= !temp; /* 0 is not valid: input corrupted => force offset to -1 => corruption detected at execSequence */
if (offset != 1) seqState->prevOffset[2] = seqState->prevOffset[1];
seqState->prevOffset[1] = seqState->prevOffset[0];
seqState->prevOffset[0] = offset = temp;
DEBUGLOG(6, "seq: litL=%u, matchL=%u, offset=%u",
(U32)seq.litLength, (U32)seq.matchLength, (U32)seq.offset);
- ZSTD_updateFseStateWithDInfo(&seqState->stateLL, &seqState->DStream, llNext, llnbBits); /* <= 9 bits */
- ZSTD_updateFseStateWithDInfo(&seqState->stateML, &seqState->DStream, mlNext, mlnbBits); /* <= 9 bits */
- if (MEM_32bits()) BIT_reloadDStream(&seqState->DStream); /* <= 18 bits */
- ZSTD_updateFseStateWithDInfo(&seqState->stateOffb, &seqState->DStream, ofNext, ofnbBits); /* <= 8 bits */
+ if (!isLastSeq) {
+ /* don't update FSE state for last Sequence */
+ ZSTD_updateFseStateWithDInfo(&seqState->stateLL, &seqState->DStream, llNext, llnbBits); /* <= 9 bits */
+ ZSTD_updateFseStateWithDInfo(&seqState->stateML, &seqState->DStream, mlNext, mlnbBits); /* <= 9 bits */
+ if (MEM_32bits()) BIT_reloadDStream(&seqState->DStream); /* <= 18 bits */
+ ZSTD_updateFseStateWithDInfo(&seqState->stateOffb, &seqState->DStream, ofNext, ofnbBits); /* <= 8 bits */
+ BIT_reloadDStream(&seqState->DStream);
+ }
}
return seq;
}
-#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
-MEM_STATIC int ZSTD_dictionaryIsActive(ZSTD_DCtx const* dctx, BYTE const* prefixStart, BYTE const* oLitEnd)
+#if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE)
+#if DEBUGLEVEL >= 1
+static int ZSTD_dictionaryIsActive(ZSTD_DCtx const* dctx, BYTE const* prefixStart, BYTE const* oLitEnd)
{
size_t const windowSize = dctx->fParams.windowSize;
/* No dictionary used. */
/* Dictionary is active. */
return 1;
}
+#endif
-MEM_STATIC void ZSTD_assertValidSequence(
+static void ZSTD_assertValidSequence(
ZSTD_DCtx const* dctx,
BYTE const* op, BYTE const* oend,
seq_t const seq,
BYTE const* prefixStart, BYTE const* virtualStart)
{
#if DEBUGLEVEL >= 1
- size_t const windowSize = dctx->fParams.windowSize;
- size_t const sequenceSize = seq.litLength + seq.matchLength;
- BYTE const* const oLitEnd = op + seq.litLength;
- DEBUGLOG(6, "Checking sequence: litL=%u matchL=%u offset=%u",
- (U32)seq.litLength, (U32)seq.matchLength, (U32)seq.offset);
- assert(op <= oend);
- assert((size_t)(oend - op) >= sequenceSize);
- assert(sequenceSize <= ZSTD_BLOCKSIZE_MAX);
- if (ZSTD_dictionaryIsActive(dctx, prefixStart, oLitEnd)) {
- size_t const dictSize = (size_t)((char const*)dctx->dictContentEndForFuzzing - (char const*)dctx->dictContentBeginForFuzzing);
- /* Offset must be within the dictionary. */
- assert(seq.offset <= (size_t)(oLitEnd - virtualStart));
- assert(seq.offset <= windowSize + dictSize);
- } else {
- /* Offset must be within our window. */
- assert(seq.offset <= windowSize);
+ if (dctx->isFrameDecompression) {
+ size_t const windowSize = dctx->fParams.windowSize;
+ size_t const sequenceSize = seq.litLength + seq.matchLength;
+ BYTE const* const oLitEnd = op + seq.litLength;
+ DEBUGLOG(6, "Checking sequence: litL=%u matchL=%u offset=%u",
+ (U32)seq.litLength, (U32)seq.matchLength, (U32)seq.offset);
+ assert(op <= oend);
+ assert((size_t)(oend - op) >= sequenceSize);
+ assert(sequenceSize <= ZSTD_blockSizeMax(dctx));
+ if (ZSTD_dictionaryIsActive(dctx, prefixStart, oLitEnd)) {
+ size_t const dictSize = (size_t)((char const*)dctx->dictContentEndForFuzzing - (char const*)dctx->dictContentBeginForFuzzing);
+ /* Offset must be within the dictionary. */
+ assert(seq.offset <= (size_t)(oLitEnd - virtualStart));
+ assert(seq.offset <= windowSize + dictSize);
+ } else {
+ /* Offset must be within our window. */
+ assert(seq.offset <= windowSize);
+ }
}
#else
(void)dctx, (void)op, (void)oend, (void)seq, (void)prefixStart, (void)virtualStart;
ZSTD_decompressSequences_bodySplitLitBuffer( ZSTD_DCtx* dctx,
void* dst, size_t maxDstSize,
const void* seqStart, size_t seqSize, int nbSeq,
- const ZSTD_longOffset_e isLongOffset,
- const int frame)
+ const ZSTD_longOffset_e isLongOffset)
{
const BYTE* ip = (const BYTE*)seqStart;
const BYTE* const iend = ip + seqSize;
BYTE* const ostart = (BYTE*)dst;
- BYTE* const oend = ostart + maxDstSize;
+ BYTE* const oend = ZSTD_maybeNullPtrAdd(ostart, maxDstSize);
BYTE* op = ostart;
const BYTE* litPtr = dctx->litPtr;
const BYTE* litBufferEnd = dctx->litBufferEnd;
const BYTE* const prefixStart = (const BYTE*) (dctx->prefixStart);
const BYTE* const vBase = (const BYTE*) (dctx->virtualStart);
const BYTE* const dictEnd = (const BYTE*) (dctx->dictEnd);
- DEBUGLOG(5, "ZSTD_decompressSequences_bodySplitLitBuffer");
- (void)frame;
+ DEBUGLOG(5, "ZSTD_decompressSequences_bodySplitLitBuffer (%i seqs)", nbSeq);
- /* Regen sequences */
+ /* Literals are split between internal buffer & output buffer */
if (nbSeq) {
seqState_t seqState;
dctx->fseEntropy = 1;
BIT_DStream_completed < BIT_DStream_overflow);
/* decompress without overrunning litPtr begins */
- {
- seq_t sequence = ZSTD_decodeSequence(&seqState, isLongOffset);
+ { seq_t sequence = {0,0,0}; /* some static analyzer believe that @sequence is not initialized (it necessarily is, since for(;;) loop as at least one iteration) */
/* Align the decompression loop to 32 + 16 bytes.
*
* zstd compiled with gcc-9 on an Intel i9-9900k shows 10% decompression
#endif
/* Handle the initial state where litBuffer is currently split between dst and litExtraBuffer */
- for (; litPtr + sequence.litLength <= dctx->litBufferEnd; ) {
- size_t const oneSeqSize = ZSTD_execSequenceSplitLitBuffer(op, oend, litPtr + sequence.litLength - WILDCOPY_OVERLENGTH, sequence, &litPtr, litBufferEnd, prefixStart, vBase, dictEnd);
+ for ( ; nbSeq; nbSeq--) {
+ sequence = ZSTD_decodeSequence(&seqState, isLongOffset, nbSeq==1);
+ if (litPtr + sequence.litLength > dctx->litBufferEnd) break;
+ { size_t const oneSeqSize = ZSTD_execSequenceSplitLitBuffer(op, oend, litPtr + sequence.litLength - WILDCOPY_OVERLENGTH, sequence, &litPtr, litBufferEnd, prefixStart, vBase, dictEnd);
#if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE)
- assert(!ZSTD_isError(oneSeqSize));
- if (frame) ZSTD_assertValidSequence(dctx, op, oend, sequence, prefixStart, vBase);
+ assert(!ZSTD_isError(oneSeqSize));
+ ZSTD_assertValidSequence(dctx, op, oend, sequence, prefixStart, vBase);
#endif
- if (UNLIKELY(ZSTD_isError(oneSeqSize)))
- return oneSeqSize;
- DEBUGLOG(6, "regenerated sequence size : %u", (U32)oneSeqSize);
- op += oneSeqSize;
- if (UNLIKELY(!--nbSeq))
- break;
- BIT_reloadDStream(&(seqState.DStream));
- sequence = ZSTD_decodeSequence(&seqState, isLongOffset);
- }
+ if (UNLIKELY(ZSTD_isError(oneSeqSize)))
+ return oneSeqSize;
+ DEBUGLOG(6, "regenerated sequence size : %u", (U32)oneSeqSize);
+ op += oneSeqSize;
+ } }
+ DEBUGLOG(6, "reached: (litPtr + sequence.litLength > dctx->litBufferEnd)");
/* If there are more sequences, they will need to read literals from litExtraBuffer; copy over the remainder from dst and update litPtr and litEnd */
if (nbSeq > 0) {
const size_t leftoverLit = dctx->litBufferEnd - litPtr;
- if (leftoverLit)
- {
+ DEBUGLOG(6, "There are %i sequences left, and %zu/%zu literals left in buffer", nbSeq, leftoverLit, sequence.litLength);
+ if (leftoverLit) {
RETURN_ERROR_IF(leftoverLit > (size_t)(oend - op), dstSize_tooSmall, "remaining lit must fit within dstBuffer");
ZSTD_safecopyDstBeforeSrc(op, litPtr, leftoverLit);
sequence.litLength -= leftoverLit;
litPtr = dctx->litExtraBuffer;
litBufferEnd = dctx->litExtraBuffer + ZSTD_LITBUFFEREXTRASIZE;
dctx->litBufferLocation = ZSTD_not_in_dst;
- {
- size_t const oneSeqSize = ZSTD_execSequence(op, oend, sequence, &litPtr, litBufferEnd, prefixStart, vBase, dictEnd);
+ { size_t const oneSeqSize = ZSTD_execSequence(op, oend, sequence, &litPtr, litBufferEnd, prefixStart, vBase, dictEnd);
#if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE)
assert(!ZSTD_isError(oneSeqSize));
- if (frame) ZSTD_assertValidSequence(dctx, op, oend, sequence, prefixStart, vBase);
+ ZSTD_assertValidSequence(dctx, op, oend, sequence, prefixStart, vBase);
#endif
if (UNLIKELY(ZSTD_isError(oneSeqSize)))
return oneSeqSize;
DEBUGLOG(6, "regenerated sequence size : %u", (U32)oneSeqSize);
op += oneSeqSize;
- if (--nbSeq)
- BIT_reloadDStream(&(seqState.DStream));
}
+ nbSeq--;
}
}
- if (nbSeq > 0) /* there is remaining lit from extra buffer */
- {
+ if (nbSeq > 0) {
+ /* there is remaining lit from extra buffer */
#if defined(__GNUC__) && defined(__x86_64__)
__asm__(".p2align 6");
# endif
#endif
- for (; ; ) {
- seq_t const sequence = ZSTD_decodeSequence(&seqState, isLongOffset);
+ for ( ; nbSeq ; nbSeq--) {
+ seq_t const sequence = ZSTD_decodeSequence(&seqState, isLongOffset, nbSeq==1);
size_t const oneSeqSize = ZSTD_execSequence(op, oend, sequence, &litPtr, litBufferEnd, prefixStart, vBase, dictEnd);
#if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE)
assert(!ZSTD_isError(oneSeqSize));
- if (frame) ZSTD_assertValidSequence(dctx, op, oend, sequence, prefixStart, vBase);
+ ZSTD_assertValidSequence(dctx, op, oend, sequence, prefixStart, vBase);
#endif
if (UNLIKELY(ZSTD_isError(oneSeqSize)))
return oneSeqSize;
DEBUGLOG(6, "regenerated sequence size : %u", (U32)oneSeqSize);
op += oneSeqSize;
- if (UNLIKELY(!--nbSeq))
- break;
- BIT_reloadDStream(&(seqState.DStream));
}
}
/* check if reached exact end */
DEBUGLOG(5, "ZSTD_decompressSequences_bodySplitLitBuffer: after decode loop, remaining nbSeq : %i", nbSeq);
RETURN_ERROR_IF(nbSeq, corruption_detected, "");
- RETURN_ERROR_IF(BIT_reloadDStream(&seqState.DStream) < BIT_DStream_completed, corruption_detected, "");
+ DEBUGLOG(5, "bitStream : start=%p, ptr=%p, bitsConsumed=%u", seqState.DStream.start, seqState.DStream.ptr, seqState.DStream.bitsConsumed);
+ RETURN_ERROR_IF(!BIT_endOfDStream(&seqState.DStream), corruption_detected, "");
/* save reps for next block */
{ U32 i; for (i=0; i<ZSTD_REP_NUM; i++) dctx->entropy.rep[i] = (U32)(seqState.prevOffset[i]); }
}
/* last literal segment */
- if (dctx->litBufferLocation == ZSTD_split) /* split hasn't been reached yet, first get dst then copy litExtraBuffer */
- {
- size_t const lastLLSize = litBufferEnd - litPtr;
+ if (dctx->litBufferLocation == ZSTD_split) {
+ /* split hasn't been reached yet, first get dst then copy litExtraBuffer */
+ size_t const lastLLSize = (size_t)(litBufferEnd - litPtr);
+ DEBUGLOG(6, "copy last literals from segment : %u", (U32)lastLLSize);
RETURN_ERROR_IF(lastLLSize > (size_t)(oend - op), dstSize_tooSmall, "");
if (op != NULL) {
ZSTD_memmove(op, litPtr, lastLLSize);
litBufferEnd = dctx->litExtraBuffer + ZSTD_LITBUFFEREXTRASIZE;
dctx->litBufferLocation = ZSTD_not_in_dst;
}
- { size_t const lastLLSize = litBufferEnd - litPtr;
+ /* copy last literals from internal buffer */
+ { size_t const lastLLSize = (size_t)(litBufferEnd - litPtr);
+ DEBUGLOG(6, "copy last literals from internal buffer : %u", (U32)lastLLSize);
RETURN_ERROR_IF(lastLLSize > (size_t)(oend-op), dstSize_tooSmall, "");
if (op != NULL) {
ZSTD_memcpy(op, litPtr, lastLLSize);
op += lastLLSize;
- }
- }
+ } }
- return op-ostart;
+ DEBUGLOG(6, "decoded block of size %u bytes", (U32)(op - ostart));
+ return (size_t)(op - ostart);
}
FORCE_INLINE_TEMPLATE size_t
ZSTD_decompressSequences_body(ZSTD_DCtx* dctx,
void* dst, size_t maxDstSize,
const void* seqStart, size_t seqSize, int nbSeq,
- const ZSTD_longOffset_e isLongOffset,
- const int frame)
+ const ZSTD_longOffset_e isLongOffset)
{
const BYTE* ip = (const BYTE*)seqStart;
const BYTE* const iend = ip + seqSize;
BYTE* const ostart = (BYTE*)dst;
- BYTE* const oend = dctx->litBufferLocation == ZSTD_not_in_dst ? ostart + maxDstSize : dctx->litBuffer;
+ BYTE* const oend = dctx->litBufferLocation == ZSTD_not_in_dst ? ZSTD_maybeNullPtrAdd(ostart, maxDstSize) : dctx->litBuffer;
BYTE* op = ostart;
const BYTE* litPtr = dctx->litPtr;
const BYTE* const litEnd = litPtr + dctx->litSize;
const BYTE* const vBase = (const BYTE*)(dctx->virtualStart);
const BYTE* const dictEnd = (const BYTE*)(dctx->dictEnd);
DEBUGLOG(5, "ZSTD_decompressSequences_body: nbSeq = %d", nbSeq);
- (void)frame;
/* Regen sequences */
if (nbSeq) {
ZSTD_initFseState(&seqState.stateML, &seqState.DStream, dctx->MLTptr);
assert(dst != NULL);
- ZSTD_STATIC_ASSERT(
- BIT_DStream_unfinished < BIT_DStream_completed &&
- BIT_DStream_endOfBuffer < BIT_DStream_completed &&
- BIT_DStream_completed < BIT_DStream_overflow);
-
#if defined(__GNUC__) && defined(__x86_64__)
__asm__(".p2align 6");
__asm__("nop");
# endif
#endif
- for ( ; ; ) {
- seq_t const sequence = ZSTD_decodeSequence(&seqState, isLongOffset);
+ for ( ; nbSeq ; nbSeq--) {
+ seq_t const sequence = ZSTD_decodeSequence(&seqState, isLongOffset, nbSeq==1);
size_t const oneSeqSize = ZSTD_execSequence(op, oend, sequence, &litPtr, litEnd, prefixStart, vBase, dictEnd);
#if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE)
assert(!ZSTD_isError(oneSeqSize));
- if (frame) ZSTD_assertValidSequence(dctx, op, oend, sequence, prefixStart, vBase);
+ ZSTD_assertValidSequence(dctx, op, oend, sequence, prefixStart, vBase);
#endif
if (UNLIKELY(ZSTD_isError(oneSeqSize)))
return oneSeqSize;
DEBUGLOG(6, "regenerated sequence size : %u", (U32)oneSeqSize);
op += oneSeqSize;
- if (UNLIKELY(!--nbSeq))
- break;
- BIT_reloadDStream(&(seqState.DStream));
}
/* check if reached exact end */
- DEBUGLOG(5, "ZSTD_decompressSequences_body: after decode loop, remaining nbSeq : %i", nbSeq);
- RETURN_ERROR_IF(nbSeq, corruption_detected, "");
- RETURN_ERROR_IF(BIT_reloadDStream(&seqState.DStream) < BIT_DStream_completed, corruption_detected, "");
+ assert(nbSeq == 0);
+ RETURN_ERROR_IF(!BIT_endOfDStream(&seqState.DStream), corruption_detected, "");
/* save reps for next block */
{ U32 i; for (i=0; i<ZSTD_REP_NUM; i++) dctx->entropy.rep[i] = (U32)(seqState.prevOffset[i]); }
}
/* last literal segment */
- { size_t const lastLLSize = litEnd - litPtr;
+ { size_t const lastLLSize = (size_t)(litEnd - litPtr);
+ DEBUGLOG(6, "copy last literals : %u", (U32)lastLLSize);
RETURN_ERROR_IF(lastLLSize > (size_t)(oend-op), dstSize_tooSmall, "");
if (op != NULL) {
ZSTD_memcpy(op, litPtr, lastLLSize);
op += lastLLSize;
- }
- }
+ } }
- return op-ostart;
+ DEBUGLOG(6, "decoded block of size %u bytes", (U32)(op - ostart));
+ return (size_t)(op - ostart);
}
static size_t
ZSTD_decompressSequences_default(ZSTD_DCtx* dctx,
void* dst, size_t maxDstSize,
const void* seqStart, size_t seqSize, int nbSeq,
- const ZSTD_longOffset_e isLongOffset,
- const int frame)
+ const ZSTD_longOffset_e isLongOffset)
{
- return ZSTD_decompressSequences_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
+ return ZSTD_decompressSequences_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset);
}
static size_t
ZSTD_decompressSequencesSplitLitBuffer_default(ZSTD_DCtx* dctx,
void* dst, size_t maxDstSize,
const void* seqStart, size_t seqSize, int nbSeq,
- const ZSTD_longOffset_e isLongOffset,
- const int frame)
+ const ZSTD_longOffset_e isLongOffset)
{
- return ZSTD_decompressSequences_bodySplitLitBuffer(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
+ return ZSTD_decompressSequences_bodySplitLitBuffer(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset);
}
#endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG */
#ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT
-FORCE_INLINE_TEMPLATE size_t
-ZSTD_prefetchMatch(size_t prefetchPos, seq_t const sequence,
+FORCE_INLINE_TEMPLATE
+
+size_t ZSTD_prefetchMatch(size_t prefetchPos, seq_t const sequence,
const BYTE* const prefixStart, const BYTE* const dictEnd)
{
prefetchPos += sequence.litLength;
{ const BYTE* const matchBase = (sequence.offset > prefetchPos) ? dictEnd : prefixStart;
- const BYTE* const match = matchBase + prefetchPos - sequence.offset; /* note : this operation can overflow when seq.offset is really too large, which can only happen when input is corrupted.
- * No consequence though : memory address is only used for prefetching, not for dereferencing */
+ /* note : this operation can overflow when seq.offset is really too large, which can only happen when input is corrupted.
+ * No consequence though : memory address is only used for prefetching, not for dereferencing */
+ const BYTE* const match = ZSTD_wrappedPtrSub(ZSTD_wrappedPtrAdd(matchBase, prefetchPos), sequence.offset);
PREFETCH_L1(match); PREFETCH_L1(match+CACHELINE_SIZE); /* note : it's safe to invoke PREFETCH() on any memory address, including invalid ones */
}
return prefetchPos + sequence.matchLength;
ZSTD_DCtx* dctx,
void* dst, size_t maxDstSize,
const void* seqStart, size_t seqSize, int nbSeq,
- const ZSTD_longOffset_e isLongOffset,
- const int frame)
+ const ZSTD_longOffset_e isLongOffset)
{
const BYTE* ip = (const BYTE*)seqStart;
const BYTE* const iend = ip + seqSize;
BYTE* const ostart = (BYTE*)dst;
- BYTE* const oend = dctx->litBufferLocation == ZSTD_in_dst ? dctx->litBuffer : ostart + maxDstSize;
+ BYTE* const oend = dctx->litBufferLocation == ZSTD_in_dst ? dctx->litBuffer : ZSTD_maybeNullPtrAdd(ostart, maxDstSize);
BYTE* op = ostart;
const BYTE* litPtr = dctx->litPtr;
const BYTE* litBufferEnd = dctx->litBufferEnd;
const BYTE* const prefixStart = (const BYTE*) (dctx->prefixStart);
const BYTE* const dictStart = (const BYTE*) (dctx->virtualStart);
const BYTE* const dictEnd = (const BYTE*) (dctx->dictEnd);
- (void)frame;
/* Regen sequences */
if (nbSeq) {
ZSTD_initFseState(&seqState.stateML, &seqState.DStream, dctx->MLTptr);
/* prepare in advance */
- for (seqNb=0; (BIT_reloadDStream(&seqState.DStream) <= BIT_DStream_completed) && (seqNb<seqAdvance); seqNb++) {
- seq_t const sequence = ZSTD_decodeSequence(&seqState, isLongOffset);
+ for (seqNb=0; seqNb<seqAdvance; seqNb++) {
+ seq_t const sequence = ZSTD_decodeSequence(&seqState, isLongOffset, seqNb == nbSeq-1);
prefetchPos = ZSTD_prefetchMatch(prefetchPos, sequence, prefixStart, dictEnd);
sequences[seqNb] = sequence;
}
- RETURN_ERROR_IF(seqNb<seqAdvance, corruption_detected, "");
/* decompress without stomping litBuffer */
- for (; (BIT_reloadDStream(&(seqState.DStream)) <= BIT_DStream_completed) && (seqNb < nbSeq); seqNb++) {
- seq_t sequence = ZSTD_decodeSequence(&seqState, isLongOffset);
- size_t oneSeqSize;
+ for (; seqNb < nbSeq; seqNb++) {
+ seq_t sequence = ZSTD_decodeSequence(&seqState, isLongOffset, seqNb == nbSeq-1);
- if (dctx->litBufferLocation == ZSTD_split && litPtr + sequences[(seqNb - ADVANCED_SEQS) & STORED_SEQS_MASK].litLength > dctx->litBufferEnd)
- {
+ if (dctx->litBufferLocation == ZSTD_split && litPtr + sequences[(seqNb - ADVANCED_SEQS) & STORED_SEQS_MASK].litLength > dctx->litBufferEnd) {
/* lit buffer is reaching split point, empty out the first buffer and transition to litExtraBuffer */
const size_t leftoverLit = dctx->litBufferEnd - litPtr;
if (leftoverLit)
litPtr = dctx->litExtraBuffer;
litBufferEnd = dctx->litExtraBuffer + ZSTD_LITBUFFEREXTRASIZE;
dctx->litBufferLocation = ZSTD_not_in_dst;
- oneSeqSize = ZSTD_execSequence(op, oend, sequences[(seqNb - ADVANCED_SEQS) & STORED_SEQS_MASK], &litPtr, litBufferEnd, prefixStart, dictStart, dictEnd);
+ { size_t const oneSeqSize = ZSTD_execSequence(op, oend, sequences[(seqNb - ADVANCED_SEQS) & STORED_SEQS_MASK], &litPtr, litBufferEnd, prefixStart, dictStart, dictEnd);
#if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE)
- assert(!ZSTD_isError(oneSeqSize));
- if (frame) ZSTD_assertValidSequence(dctx, op, oend, sequences[(seqNb - ADVANCED_SEQS) & STORED_SEQS_MASK], prefixStart, dictStart);
+ assert(!ZSTD_isError(oneSeqSize));
+ ZSTD_assertValidSequence(dctx, op, oend, sequences[(seqNb - ADVANCED_SEQS) & STORED_SEQS_MASK], prefixStart, dictStart);
#endif
- if (ZSTD_isError(oneSeqSize)) return oneSeqSize;
+ if (ZSTD_isError(oneSeqSize)) return oneSeqSize;
- prefetchPos = ZSTD_prefetchMatch(prefetchPos, sequence, prefixStart, dictEnd);
- sequences[seqNb & STORED_SEQS_MASK] = sequence;
- op += oneSeqSize;
- }
+ prefetchPos = ZSTD_prefetchMatch(prefetchPos, sequence, prefixStart, dictEnd);
+ sequences[seqNb & STORED_SEQS_MASK] = sequence;
+ op += oneSeqSize;
+ } }
else
{
/* lit buffer is either wholly contained in first or second split, or not split at all*/
- oneSeqSize = dctx->litBufferLocation == ZSTD_split ?
+ size_t const oneSeqSize = dctx->litBufferLocation == ZSTD_split ?
ZSTD_execSequenceSplitLitBuffer(op, oend, litPtr + sequences[(seqNb - ADVANCED_SEQS) & STORED_SEQS_MASK].litLength - WILDCOPY_OVERLENGTH, sequences[(seqNb - ADVANCED_SEQS) & STORED_SEQS_MASK], &litPtr, litBufferEnd, prefixStart, dictStart, dictEnd) :
ZSTD_execSequence(op, oend, sequences[(seqNb - ADVANCED_SEQS) & STORED_SEQS_MASK], &litPtr, litBufferEnd, prefixStart, dictStart, dictEnd);
#if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE)
assert(!ZSTD_isError(oneSeqSize));
- if (frame) ZSTD_assertValidSequence(dctx, op, oend, sequences[(seqNb - ADVANCED_SEQS) & STORED_SEQS_MASK], prefixStart, dictStart);
+ ZSTD_assertValidSequence(dctx, op, oend, sequences[(seqNb - ADVANCED_SEQS) & STORED_SEQS_MASK], prefixStart, dictStart);
#endif
if (ZSTD_isError(oneSeqSize)) return oneSeqSize;
op += oneSeqSize;
}
}
- RETURN_ERROR_IF(seqNb<nbSeq, corruption_detected, "");
+ RETURN_ERROR_IF(!BIT_endOfDStream(&seqState.DStream), corruption_detected, "");
/* finish queue */
seqNb -= seqAdvance;
for ( ; seqNb<nbSeq ; seqNb++) {
seq_t *sequence = &(sequences[seqNb&STORED_SEQS_MASK]);
- if (dctx->litBufferLocation == ZSTD_split && litPtr + sequence->litLength > dctx->litBufferEnd)
- {
+ if (dctx->litBufferLocation == ZSTD_split && litPtr + sequence->litLength > dctx->litBufferEnd) {
const size_t leftoverLit = dctx->litBufferEnd - litPtr;
- if (leftoverLit)
- {
+ if (leftoverLit) {
RETURN_ERROR_IF(leftoverLit > (size_t)(oend - op), dstSize_tooSmall, "remaining lit must fit within dstBuffer");
ZSTD_safecopyDstBeforeSrc(op, litPtr, leftoverLit);
sequence->litLength -= leftoverLit;
litPtr = dctx->litExtraBuffer;
litBufferEnd = dctx->litExtraBuffer + ZSTD_LITBUFFEREXTRASIZE;
dctx->litBufferLocation = ZSTD_not_in_dst;
- {
- size_t const oneSeqSize = ZSTD_execSequence(op, oend, *sequence, &litPtr, litBufferEnd, prefixStart, dictStart, dictEnd);
+ { size_t const oneSeqSize = ZSTD_execSequence(op, oend, *sequence, &litPtr, litBufferEnd, prefixStart, dictStart, dictEnd);
#if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE)
assert(!ZSTD_isError(oneSeqSize));
- if (frame) ZSTD_assertValidSequence(dctx, op, oend, sequences[seqNb&STORED_SEQS_MASK], prefixStart, dictStart);
+ ZSTD_assertValidSequence(dctx, op, oend, sequences[seqNb&STORED_SEQS_MASK], prefixStart, dictStart);
#endif
if (ZSTD_isError(oneSeqSize)) return oneSeqSize;
op += oneSeqSize;
ZSTD_execSequence(op, oend, *sequence, &litPtr, litBufferEnd, prefixStart, dictStart, dictEnd);
#if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE)
assert(!ZSTD_isError(oneSeqSize));
- if (frame) ZSTD_assertValidSequence(dctx, op, oend, sequences[seqNb&STORED_SEQS_MASK], prefixStart, dictStart);
+ ZSTD_assertValidSequence(dctx, op, oend, sequences[seqNb&STORED_SEQS_MASK], prefixStart, dictStart);
#endif
if (ZSTD_isError(oneSeqSize)) return oneSeqSize;
op += oneSeqSize;
}
/* last literal segment */
- if (dctx->litBufferLocation == ZSTD_split) /* first deplete literal buffer in dst, then copy litExtraBuffer */
- {
+ if (dctx->litBufferLocation == ZSTD_split) { /* first deplete literal buffer in dst, then copy litExtraBuffer */
size_t const lastLLSize = litBufferEnd - litPtr;
RETURN_ERROR_IF(lastLLSize > (size_t)(oend - op), dstSize_tooSmall, "");
if (op != NULL) {
}
}
- return op-ostart;
+ return (size_t)(op - ostart);
}
static size_t
ZSTD_decompressSequencesLong_default(ZSTD_DCtx* dctx,
void* dst, size_t maxDstSize,
const void* seqStart, size_t seqSize, int nbSeq,
- const ZSTD_longOffset_e isLongOffset,
- const int frame)
+ const ZSTD_longOffset_e isLongOffset)
{
- return ZSTD_decompressSequencesLong_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
+ return ZSTD_decompressSequencesLong_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset);
}
#endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT */
ZSTD_decompressSequences_bmi2(ZSTD_DCtx* dctx,
void* dst, size_t maxDstSize,
const void* seqStart, size_t seqSize, int nbSeq,
- const ZSTD_longOffset_e isLongOffset,
- const int frame)
+ const ZSTD_longOffset_e isLongOffset)
{
- return ZSTD_decompressSequences_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
+ return ZSTD_decompressSequences_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset);
}
static BMI2_TARGET_ATTRIBUTE size_t
DONT_VECTORIZE
ZSTD_decompressSequencesSplitLitBuffer_bmi2(ZSTD_DCtx* dctx,
void* dst, size_t maxDstSize,
const void* seqStart, size_t seqSize, int nbSeq,
- const ZSTD_longOffset_e isLongOffset,
- const int frame)
+ const ZSTD_longOffset_e isLongOffset)
{
- return ZSTD_decompressSequences_bodySplitLitBuffer(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
+ return ZSTD_decompressSequences_bodySplitLitBuffer(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset);
}
#endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG */
ZSTD_decompressSequencesLong_bmi2(ZSTD_DCtx* dctx,
void* dst, size_t maxDstSize,
const void* seqStart, size_t seqSize, int nbSeq,
- const ZSTD_longOffset_e isLongOffset,
- const int frame)
+ const ZSTD_longOffset_e isLongOffset)
{
- return ZSTD_decompressSequencesLong_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
+ return ZSTD_decompressSequencesLong_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset);
}
#endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT */
ZSTD_DCtx* dctx,
void* dst, size_t maxDstSize,
const void* seqStart, size_t seqSize, int nbSeq,
- const ZSTD_longOffset_e isLongOffset,
- const int frame);
+ const ZSTD_longOffset_e isLongOffset);
#ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG
static size_t
ZSTD_decompressSequences(ZSTD_DCtx* dctx, void* dst, size_t maxDstSize,
const void* seqStart, size_t seqSize, int nbSeq,
- const ZSTD_longOffset_e isLongOffset,
- const int frame)
+ const ZSTD_longOffset_e isLongOffset)
{
DEBUGLOG(5, "ZSTD_decompressSequences");
#if DYNAMIC_BMI2
if (ZSTD_DCtx_get_bmi2(dctx)) {
- return ZSTD_decompressSequences_bmi2(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
+ return ZSTD_decompressSequences_bmi2(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset);
}
#endif
- return ZSTD_decompressSequences_default(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
+ return ZSTD_decompressSequences_default(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset);
}
static size_t
ZSTD_decompressSequencesSplitLitBuffer(ZSTD_DCtx* dctx, void* dst, size_t maxDstSize,
const void* seqStart, size_t seqSize, int nbSeq,
- const ZSTD_longOffset_e isLongOffset,
- const int frame)
+ const ZSTD_longOffset_e isLongOffset)
{
DEBUGLOG(5, "ZSTD_decompressSequencesSplitLitBuffer");
#if DYNAMIC_BMI2
if (ZSTD_DCtx_get_bmi2(dctx)) {
- return ZSTD_decompressSequencesSplitLitBuffer_bmi2(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
+ return ZSTD_decompressSequencesSplitLitBuffer_bmi2(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset);
}
#endif
- return ZSTD_decompressSequencesSplitLitBuffer_default(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
+ return ZSTD_decompressSequencesSplitLitBuffer_default(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset);
}
#endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG */
ZSTD_decompressSequencesLong(ZSTD_DCtx* dctx,
void* dst, size_t maxDstSize,
const void* seqStart, size_t seqSize, int nbSeq,
- const ZSTD_longOffset_e isLongOffset,
- const int frame)
+ const ZSTD_longOffset_e isLongOffset)
{
DEBUGLOG(5, "ZSTD_decompressSequencesLong");
#if DYNAMIC_BMI2
if (ZSTD_DCtx_get_bmi2(dctx)) {
- return ZSTD_decompressSequencesLong_bmi2(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
+ return ZSTD_decompressSequencesLong_bmi2(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset);
}
#endif
- return ZSTD_decompressSequencesLong_default(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
+ return ZSTD_decompressSequencesLong_default(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset);
}
#endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT */
size_t
ZSTD_decompressBlock_internal(ZSTD_DCtx* dctx,
void* dst, size_t dstCapacity,
- const void* src, size_t srcSize, const int frame, const streaming_operation streaming)
+ const void* src, size_t srcSize, const streaming_operation streaming)
{ /* blockType == blockCompressed */
const BYTE* ip = (const BYTE*)src;
- DEBUGLOG(5, "ZSTD_decompressBlock_internal (size : %u)", (U32)srcSize);
+ DEBUGLOG(5, "ZSTD_decompressBlock_internal (cSize : %u)", (unsigned)srcSize);
/* Note : the wording of the specification
- * allows compressed block to be sized exactly ZSTD_BLOCKSIZE_MAX.
+ * allows compressed block to be sized exactly ZSTD_blockSizeMax(dctx).
* This generally does not happen, as it makes little sense,
* since an uncompressed block would feature same size and have no decompression cost.
* Also, note that decoder from reference libzstd before < v1.5.4
* would consider this edge case as an error.
- * As a consequence, avoid generating compressed blocks of size ZSTD_BLOCKSIZE_MAX
+ * As a consequence, avoid generating compressed blocks of size ZSTD_blockSizeMax(dctx)
* for broader compatibility with the deployed ecosystem of zstd decoders */
- RETURN_ERROR_IF(srcSize > ZSTD_BLOCKSIZE_MAX, srcSize_wrong, "");
+ RETURN_ERROR_IF(srcSize > ZSTD_blockSizeMax(dctx), srcSize_wrong, "");
/* Decode literals section */
{ size_t const litCSize = ZSTD_decodeLiteralsBlock(dctx, src, srcSize, dst, dstCapacity, streaming);
/* Compute the maximum block size, which must also work when !frame and fParams are unset.
* Additionally, take the min with dstCapacity to ensure that the totalHistorySize fits in a size_t.
*/
- size_t const blockSizeMax = MIN(dstCapacity, (frame ? dctx->fParams.blockSizeMax : ZSTD_BLOCKSIZE_MAX));
- size_t const totalHistorySize = ZSTD_totalHistorySize((BYTE*)dst + blockSizeMax, (BYTE const*)dctx->virtualStart);
+ size_t const blockSizeMax = MIN(dstCapacity, ZSTD_blockSizeMax(dctx));
+ size_t const totalHistorySize = ZSTD_totalHistorySize(ZSTD_maybeNullPtrAdd((BYTE*)dst, blockSizeMax), (BYTE const*)dctx->virtualStart);
/* isLongOffset must be true if there are long offsets.
* Offsets are long if they are larger than ZSTD_maxShortOffset().
* We don't expect that to be the case in 64-bit mode.
{
#endif
#ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT
- return ZSTD_decompressSequencesLong(dctx, dst, dstCapacity, ip, srcSize, nbSeq, isLongOffset, frame);
+ return ZSTD_decompressSequencesLong(dctx, dst, dstCapacity, ip, srcSize, nbSeq, isLongOffset);
#endif
}
#ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG
/* else */
if (dctx->litBufferLocation == ZSTD_split)
- return ZSTD_decompressSequencesSplitLitBuffer(dctx, dst, dstCapacity, ip, srcSize, nbSeq, isLongOffset, frame);
+ return ZSTD_decompressSequencesSplitLitBuffer(dctx, dst, dstCapacity, ip, srcSize, nbSeq, isLongOffset);
else
- return ZSTD_decompressSequences(dctx, dst, dstCapacity, ip, srcSize, nbSeq, isLongOffset, frame);
+ return ZSTD_decompressSequences(dctx, dst, dstCapacity, ip, srcSize, nbSeq, isLongOffset);
#endif
}
}
+ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
void ZSTD_checkContinuity(ZSTD_DCtx* dctx, const void* dst, size_t dstSize)
{
if (dst != dctx->previousDstEnd && dstSize > 0) { /* not contiguous */
const void* src, size_t srcSize)
{
size_t dSize;
+ dctx->isFrameDecompression = 0;
ZSTD_checkContinuity(dctx, dst, dstCapacity);
- dSize = ZSTD_decompressBlock_internal(dctx, dst, dstCapacity, src, srcSize, /* frame */ 0, not_streaming);
+ dSize = ZSTD_decompressBlock_internal(dctx, dst, dstCapacity, src, srcSize, not_streaming);
+ FORWARD_IF_ERROR(dSize, "");
dctx->previousDstEnd = (char*)dst + dSize;
return dSize;
}
*/
size_t ZSTD_decompressBlock_internal(ZSTD_DCtx* dctx,
void* dst, size_t dstCapacity,
- const void* src, size_t srcSize, const int frame, const streaming_operation streaming);
+ const void* src, size_t srcSize, const streaming_operation streaming);
/* ZSTD_buildFSETable() :
* generate FSE decoding table for one symbol (ll, ml or off)
size_t litSize;
size_t rleSize;
size_t staticSize;
+ int isFrameDecompression;
#if DYNAMIC_BMI2 != 0
int bmi2; /* == 1 if the CPU supports BMI2 and 0 otherwise. CPU support is determined dynamically once per context lifetime. */
#endif
ZSTD_DDictHashSet* ddictSet; /* Hash set for multiple ddicts */
ZSTD_refMultipleDDicts_e refMultipleDDicts; /* User specified: if == 1, will allow references to multiple DDicts. Default == 0 (disabled) */
int disableHufAsm;
+ int maxBlockSizeParam;
/* streaming */
ZSTD_dStreamStage streamStage;
#endif
#include "../common/mem.h" /* read */
-#include "../common/pool.h"
-#include "../common/threading.h"
+#include "../common/pool.h" /* POOL_ctx */
+#include "../common/threading.h" /* ZSTD_pthread_mutex_t */
#include "../common/zstd_internal.h" /* includes zstd.h */
#include "../common/bits.h" /* ZSTD_highbit32 */
#include "../zdict.h"
#undef LOCALDISPLAYUPDATE
#define LOCALDISPLAYUPDATE(displayLevel, l, ...) \
if (displayLevel >= l) { \
- if ((clock() - g_time > g_refreshRate) || (displayLevel >= 4)) { \
+ if ((clock() - g_time > g_refreshRate) || (displayLevel >= 4)) { \
g_time = clock(); \
DISPLAY(__VA_ARGS__); \
} \
* Returns the first pointer in [first, last) whose element does not compare
* less than value. If no such element exists it returns last.
*/
-static const size_t *COVER_lower_bound(const size_t *first, const size_t *last,
+static const size_t *COVER_lower_bound(const size_t* first, const size_t* last,
size_t value) {
- size_t count = last - first;
+ size_t count = (size_t)(last - first);
+ assert(last >= first);
while (count != 0) {
size_t step = count / 2;
const size_t *ptr = first;
*/
static size_t COVER_ctx_init(COVER_ctx_t *ctx, const void *samplesBuffer,
const size_t *samplesSizes, unsigned nbSamples,
- unsigned d, double splitPoint) {
+ unsigned d, double splitPoint)
+{
const BYTE *const samples = (const BYTE *)samplesBuffer;
const size_t totalSamplesSize = COVER_sum(samplesSizes, nbSamples);
/* Split samples into testing and training sets */
return tail;
}
-ZDICTLIB_API size_t ZDICT_trainFromBuffer_cover(
+ZDICTLIB_STATIC_API size_t ZDICT_trainFromBuffer_cover(
void *dictBuffer, size_t dictBufferCapacity,
const void *samplesBuffer, const size_t *samplesSizes, unsigned nbSamples,
ZDICT_cover_params_t parameters)
* Decrements liveJobs and signals any waiting threads if liveJobs == 0.
* If this dictionary is the best so far save it and its parameters.
*/
-void COVER_best_finish(COVER_best_t *best, ZDICT_cover_params_t parameters,
- COVER_dictSelection_t selection) {
+void COVER_best_finish(COVER_best_t* best,
+ ZDICT_cover_params_t parameters,
+ COVER_dictSelection_t selection)
+{
void* dict = selection.dictContent;
size_t compressedSize = selection.totalCompressedSize;
size_t dictSize = selection.dictSize;
size_t largestCompressed = 0;
BYTE* customDictContentEnd = customDictContent + dictContentSize;
- BYTE * largestDictbuffer = (BYTE *)malloc(dictBufferCapacity);
- BYTE * candidateDictBuffer = (BYTE *)malloc(dictBufferCapacity);
+ BYTE* largestDictbuffer = (BYTE*)malloc(dictBufferCapacity);
+ BYTE* candidateDictBuffer = (BYTE*)malloc(dictBufferCapacity);
double regressionTolerance = ((double)params.shrinkDictMaxRegression / 100.0) + 1.00;
if (!largestDictbuffer || !candidateDictBuffer) {
free(freqs);
}
-ZDICTLIB_API size_t ZDICT_optimizeTrainFromBuffer_cover(
+ZDICTLIB_STATIC_API size_t ZDICT_optimizeTrainFromBuffer_cover(
void* dictBuffer, size_t dictBufferCapacity, const void* samplesBuffer,
const size_t* samplesSizes, unsigned nbSamples,
ZDICT_cover_params_t* parameters)
# define ZDICT_STATIC_LINKING_ONLY
#endif
-#include <stdio.h> /* fprintf */
-#include <stdlib.h> /* malloc, free, qsort */
-#include <string.h> /* memset */
-#include <time.h> /* clock */
-#include "../common/mem.h" /* read */
-#include "../common/pool.h"
-#include "../common/threading.h"
-#include "../common/zstd_internal.h" /* includes zstd.h */
+#include "../common/threading.h" /* ZSTD_pthread_mutex_t */
+#include "../common/mem.h" /* U32, BYTE */
#include "../zdict.h"
/**
}
-ZDICTLIB_API size_t
+ZDICTLIB_STATIC_API size_t
ZDICT_trainFromBuffer_fastCover(void* dictBuffer, size_t dictBufferCapacity,
const void* samplesBuffer,
const size_t* samplesSizes, unsigned nbSamples,
}
-ZDICTLIB_API size_t
+ZDICTLIB_STATIC_API size_t
ZDICT_optimizeTrainFromBuffer_fastCover(
void* dictBuffer, size_t dictBufferCapacity,
const void* samplesBuffer,
* Console display
***************************************/
#undef DISPLAY
-#define DISPLAY(...) { fprintf(stderr, __VA_ARGS__); fflush( stderr ); }
+#define DISPLAY(...) do { fprintf(stderr, __VA_ARGS__); fflush( stderr ); } while (0)
#undef DISPLAYLEVEL
-#define DISPLAYLEVEL(l, ...) if (notificationLevel>=l) { DISPLAY(__VA_ARGS__); } /* 0 : no display; 1: errors; 2: default; 3: details; 4: debug */
+#define DISPLAYLEVEL(l, ...) do { if (notificationLevel>=l) { DISPLAY(__VA_ARGS__); } } while (0) /* 0 : no display; 1: errors; 2: default; 3: details; 4: debug */
static clock_t ZDICT_clockSpan(clock_t nPrevious) { return clock() - nPrevious; }
clock_t const refreshRate = CLOCKS_PER_SEC * 3 / 10;
# undef DISPLAYUPDATE
-# define DISPLAYUPDATE(l, ...) if (notificationLevel>=l) { \
- if (ZDICT_clockSpan(displayClock) > refreshRate) \
- { displayClock = clock(); DISPLAY(__VA_ARGS__); \
- if (notificationLevel>=4) fflush(stderr); } }
+# define DISPLAYUPDATE(l, ...) \
+ do { \
+ if (notificationLevel>=l) { \
+ if (ZDICT_clockSpan(displayClock) > refreshRate) { \
+ displayClock = clock(); \
+ DISPLAY(__VA_ARGS__); \
+ } \
+ if (notificationLevel>=4) fflush(stderr); \
+ } \
+ } while (0)
/* init */
DISPLAYLEVEL(2, "\r%70s\r", ""); /* clean display line */
const void* dict,size_t dictSize)
{
U32 const version = ZSTD_isLegacy(src, compressedSize);
+ char x;
+ /* Avoid passing NULL to legacy decoding. */
+ if (dst == NULL) {
+ assert(dstCapacity == 0);
+ dst = &x;
+ }
+ if (src == NULL) {
+ assert(compressedSize == 0);
+ src = &x;
+ }
+ if (dict == NULL) {
+ assert(dictSize == 0);
+ dict = &x;
+ }
(void)dst; (void)dstCapacity; (void)dict; (void)dictSize; /* unused when ZSTD_LEGACY_SUPPORT >= 8 */
switch(version)
{
MEM_STATIC size_t ZSTD_initLegacyStream(void** legacyContext, U32 prevVersion, U32 newVersion,
const void* dict, size_t dictSize)
{
+ char x;
+ /* Avoid passing NULL to legacy decoding. */
+ if (dict == NULL) {
+ assert(dictSize == 0);
+ dict = &x;
+ }
DEBUGLOG(5, "ZSTD_initLegacyStream for v0.%u", newVersion);
if (prevVersion != newVersion) ZSTD_freeLegacyStreamContext(*legacyContext, prevVersion);
switch(newVersion)
MEM_STATIC size_t ZSTD_decompressLegacyStream(void* legacyContext, U32 version,
ZSTD_outBuffer* output, ZSTD_inBuffer* input)
{
+ static char x;
+ /* Avoid passing NULL to legacy decoding. */
+ if (output->dst == NULL) {
+ assert(output->size == 0);
+ output->dst = &x;
+ }
+ if (input->src == NULL) {
+ assert(input->size == 0);
+ input->src = &x;
+ }
DEBUGLOG(5, "ZSTD_decompressLegacyStream for v0.%u", version);
switch(version)
{
******************************************/
#include <stddef.h> /* size_t, ptrdiff_t */
#include "zstd_v01.h"
+#include "../common/compiler.h"
#include "../common/error_private.h"
}
ctx->phase = 1;
ctx->expected = ZSTD_blockHeaderSize;
+ if (ZSTDv01_isError(rSize)) return rSize;
ctx->previousDstEnd = (void*)( ((char*)dst) + rSize);
return rSize;
}
#include <stddef.h> /* size_t, ptrdiff_t */
#include "zstd_v02.h"
+#include "../common/compiler.h"
#include "../common/error_private.h"
#include <string.h> /* memcpy */
-/******************************************
-* Compiler-specific
-******************************************/
-#if defined(__GNUC__)
-# define MEM_STATIC static __attribute__((unused))
-#elif defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */)
-# define MEM_STATIC static inline
-#elif defined(_MSC_VER)
-# define MEM_STATIC static __inline
-#else
-# define MEM_STATIC static /* this version may generate warnings for unused static functions; disable the relevant warning */
-#endif
-
-
/****************************************************************
* Basic Types
*****************************************************************/
* Streaming functions
***************************************/
-typedef struct ZSTD_DCtx_s ZSTD_DCtx;
+typedef struct ZSTDv02_Dctx_s ZSTD_DCtx;
/*
Use above functions alternatively.
/* *************************************************************
* Decompression section
***************************************************************/
-struct ZSTD_DCtx_s
+struct ZSTDv02_Dctx_s
{
U32 LLTable[FSE_DTABLE_SIZE_U32(LLFSELog)];
U32 OffTable[FSE_DTABLE_SIZE_U32(OffFSELog)];
}
ctx->phase = 1;
ctx->expected = ZSTD_blockHeaderSize;
+ if (ZSTD_isError(rSize)) return rSize;
ctx->previousDstEnd = (void*)( ((char*)dst) + rSize);
return rSize;
}
#include <stddef.h> /* size_t, ptrdiff_t */
#include "zstd_v03.h"
+#include "../common/compiler.h"
#include "../common/error_private.h"
#include <string.h> /* memcpy */
-/******************************************
-* Compiler-specific
-******************************************/
-#if defined(__GNUC__)
-# define MEM_STATIC static __attribute__((unused))
-#elif defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */)
-# define MEM_STATIC static inline
-#elif defined(_MSC_VER)
-# define MEM_STATIC static __inline
-#else
-# define MEM_STATIC static /* this version may generate warnings for unused static functions; disable the relevant warning */
-#endif
-
-
/****************************************************************
* Basic Types
*****************************************************************/
* Streaming functions
***************************************/
-typedef struct ZSTD_DCtx_s ZSTD_DCtx;
+typedef struct ZSTDv03_Dctx_s ZSTD_DCtx;
/*
Use above functions alternatively.
/* *************************************************************
* Decompression section
***************************************************************/
-struct ZSTD_DCtx_s
+struct ZSTDv03_Dctx_s
{
U32 LLTable[FSE_DTABLE_SIZE_U32(LLFSELog)];
U32 OffTable[FSE_DTABLE_SIZE_U32(OffFSELog)];
}
ctx->phase = 1;
ctx->expected = ZSTD_blockHeaderSize;
+ if (ZSTD_isError(rSize)) return rSize;
ctx->previousDstEnd = (void*)( ((char*)dst) + rSize);
return rSize;
}
#include <string.h> /* memcpy */
#include "zstd_v04.h"
+#include "../common/compiler.h"
#include "../common/error_private.h"
# include <stdlib.h> /* _byteswap_ulong */
# include <intrin.h> /* _byteswap_* */
#endif
-#if defined(__GNUC__)
-# define MEM_STATIC static __attribute__((unused))
-#elif defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */)
-# define MEM_STATIC static inline
-#elif defined(_MSC_VER)
-# define MEM_STATIC static __inline
-#else
-# define MEM_STATIC static /* this version may generate warnings for unused static functions; disable the relevant warning */
-#endif
/****************************************************************
}
ctx->stage = ZSTDds_decodeBlockHeader;
ctx->expected = ZSTD_blockHeaderSize;
+ if (ZSTD_isError(rSize)) return rSize;
ctx->previousDstEnd = (char*)dst + rSize;
return rSize;
}
unsigned ZBUFFv04_isError(size_t errorCode) { return ERR_isError(errorCode); }
const char* ZBUFFv04_getErrorName(size_t errorCode) { return ERR_getErrorName(errorCode); }
-size_t ZBUFFv04_recommendedDInSize() { return BLOCKSIZE + 3; }
-size_t ZBUFFv04_recommendedDOutSize() { return BLOCKSIZE; }
+size_t ZBUFFv04_recommendedDInSize(void) { return BLOCKSIZE + 3; }
+size_t ZBUFFv04_recommendedDOutSize(void) { return BLOCKSIZE; }
}
dctx->stage = ZSTDv05ds_decodeBlockHeader;
dctx->expected = ZSTDv05_blockHeaderSize;
+ if (ZSTDv05_isError(rSize)) return rSize;
dctx->previousDstEnd = (char*)dst + rSize;
return rSize;
}
#include <stddef.h> /* size_t, ptrdiff_t */
#include <string.h> /* memcpy */
#include <stdlib.h> /* malloc, free, qsort */
+#include "../common/compiler.h"
#include "../common/error_private.h"
# include <stdlib.h> /* _byteswap_ulong */
# include <intrin.h> /* _byteswap_* */
#endif
-#if defined(__GNUC__)
-# define MEM_STATIC static __attribute__((unused))
-#elif defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */)
-# define MEM_STATIC static inline
-#elif defined(_MSC_VER)
-# define MEM_STATIC static __inline
-#else
-# define MEM_STATIC static /* this version may generate warnings for unused static functions; disable the relevant warning */
-#endif
/*-**************************************************************
}
dctx->stage = ZSTDds_decodeBlockHeader;
dctx->expected = ZSTDv06_blockHeaderSize;
+ if (ZSTDv06_isError(rSize)) return rSize;
dctx->previousDstEnd = (char*)dst + rSize;
return rSize;
}
#define HUFv07_STATIC_LINKING_ONLY /* HUFv07_TABLELOG_ABSOLUTEMAX */
#define ZSTDv07_STATIC_LINKING_ONLY
+#include "../common/compiler.h"
#include "../common/error_private.h"
# include <stdlib.h> /* _byteswap_ulong */
# include <intrin.h> /* _byteswap_* */
#endif
-#if defined(__GNUC__)
-# define MEM_STATIC static __attribute__((unused))
-#elif defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */)
-# define MEM_STATIC static inline
-#elif defined(_MSC_VER)
-# define MEM_STATIC static __inline
-#else
-# define MEM_STATIC static /* this version may generate warnings for unused static functions; disable the relevant warning */
-#endif
/*-**************************************************************
}
dctx->stage = ZSTDds_decodeBlockHeader;
dctx->expected = ZSTDv07_blockHeaderSize;
- dctx->previousDstEnd = (char*)dst + rSize;
if (ZSTDv07_isError(rSize)) return rSize;
+ dctx->previousDstEnd = (char*)dst + rSize;
if (dctx->fParams.checksumFlag) XXH64_update(&dctx->xxhState, dst, rSize);
return rSize;
}
# You may select, at your option, one of the above-listed licenses.
# ################################################################
+# This included Makefile provides the following variables :
+# LIB_SRCDIR, LIB_BINDIR
+
+# Ensure the file is not included twice
+# Note : must be included after setting the default target
+ifndef LIBZSTD_MK_INCLUDED
+LIBZSTD_MK_INCLUDED := 1
+
##################################################################
# Input Variables
##################################################################
-# Zstd lib directory
-LIBZSTD ?= ./
+# By default, library's directory is same as this included makefile
+LIB_SRCDIR ?= $(dir $(realpath $(lastword $(MAKEFILE_LIST))))
+LIB_BINDIR ?= $(LIBSRC_DIR)
# ZSTD_LIB_MINIFY is a helper variable that
# configures a bunch of other variables to space-optimized defaults.
# Assembly support
ZSTD_NO_ASM ?= 0
+ZSTD_LIB_EXCLUDE_COMPRESSORS_DFAST_AND_UP ?= 0
+ZSTD_LIB_EXCLUDE_COMPRESSORS_GREEDY_AND_UP ?= 0
+
##################################################################
# libzstd helpers
##################################################################
NUM_SYMBOL := \#
# define silent mode as default (verbose mode with V=1 or VERBOSE=1)
+# Note : must be defined _after_ the default target
$(V)$(VERBOSE).SILENT:
# When cross-compiling from linux to windows,
TARGET_SYSTEM ?= $(OS)
# Version numbers
-LIBVER_SRC := $(LIBZSTD)/zstd.h
+LIBVER_SRC := $(LIB_SRCDIR)/zstd.h
LIBVER_MAJOR_SCRIPT:=`sed -n '/define ZSTD_VERSION_MAJOR/s/.*[[:blank:]]\([0-9][0-9]*\).*/\1/p' < $(LIBVER_SRC)`
LIBVER_MINOR_SCRIPT:=`sed -n '/define ZSTD_VERSION_MINOR/s/.*[[:blank:]]\([0-9][0-9]*\).*/\1/p' < $(LIBVER_SRC)`
LIBVER_PATCH_SCRIPT:=`sed -n '/define ZSTD_VERSION_RELEASE/s/.*[[:blank:]]\([0-9][0-9]*\).*/\1/p' < $(LIBVER_SRC)`
endif
GREP = grep $(GREP_OPTIONS)
-ZSTD_COMMON_FILES := $(sort $(wildcard $(LIBZSTD)/common/*.c))
-ZSTD_COMPRESS_FILES := $(sort $(wildcard $(LIBZSTD)/compress/*.c))
-ZSTD_DECOMPRESS_FILES := $(sort $(wildcard $(LIBZSTD)/decompress/*.c))
-ZSTD_DICTBUILDER_FILES := $(sort $(wildcard $(LIBZSTD)/dictBuilder/*.c))
-ZSTD_DEPRECATED_FILES := $(sort $(wildcard $(LIBZSTD)/deprecated/*.c))
+ZSTD_COMMON_FILES := $(sort $(wildcard $(LIB_SRCDIR)/common/*.c))
+ZSTD_COMPRESS_FILES := $(sort $(wildcard $(LIB_SRCDIR)/compress/*.c))
+ZSTD_DECOMPRESS_FILES := $(sort $(wildcard $(LIB_SRCDIR)/decompress/*.c))
+ZSTD_DICTBUILDER_FILES := $(sort $(wildcard $(LIB_SRCDIR)/dictBuilder/*.c))
+ZSTD_DEPRECATED_FILES := $(sort $(wildcard $(LIB_SRCDIR)/deprecated/*.c))
ZSTD_LEGACY_FILES :=
-ZSTD_DECOMPRESS_AMD64_ASM_FILES := $(sort $(wildcard $(LIBZSTD)/decompress/*_amd64.S))
+ZSTD_DECOMPRESS_AMD64_ASM_FILES := $(sort $(wildcard $(LIB_SRCDIR)/decompress/*_amd64.S))
ifneq ($(ZSTD_NO_ASM), 0)
CPPFLAGS += -DZSTD_DISABLE_ASM
CFLAGS += -DZSTD_LEGACY_MULTITHREADED_API
endif
+ifneq ($(ZSTD_LIB_EXCLUDE_COMPRESSORS_DFAST_AND_UP), 0)
+ CFLAGS += -DZSTD_EXCLUDE_DFAST_BLOCK_COMPRESSOR -DZSTD_EXCLUDE_GREEDY_BLOCK_COMPRESSOR -DZSTD_EXCLUDE_LAZY2_BLOCK_COMPRESSOR -DZSTD_EXCLUDE_BTLAZY2_BLOCK_COMPRESSOR -DZSTD_EXCLUDE_BTOPT_BLOCK_COMPRESSOR -DZSTD_EXCLUDE_BTULTRA_BLOCK_COMPRESSOR
+else
+ifneq ($(ZSTD_LIB_EXCLUDE_COMPRESSORS_GREEDY_AND_UP), 0)
+ CFLAGS += -DZSTD_EXCLUDE_GREEDY_BLOCK_COMPRESSOR -DZSTD_EXCLUDE_LAZY2_BLOCK_COMPRESSOR -DZSTD_EXCLUDE_BTLAZY2_BLOCK_COMPRESSOR -DZSTD_EXCLUDE_BTOPT_BLOCK_COMPRESSOR -DZSTD_EXCLUDE_BTULTRA_BLOCK_COMPRESSOR
+endif
+endif
+
ifneq ($(ZSTD_LEGACY_SUPPORT), 0)
ifeq ($(shell test $(ZSTD_LEGACY_SUPPORT) -lt 8; echo $$?), 0)
- ZSTD_LEGACY_FILES += $(shell ls $(LIBZSTD)/legacy/*.c | $(GREP) 'v0[$(ZSTD_LEGACY_SUPPORT)-7]')
+ ZSTD_LEGACY_FILES += $(shell ls $(LIB_SRCDIR)/legacy/*.c | $(GREP) 'v0[$(ZSTD_LEGACY_SUPPORT)-7]')
endif
endif
CPPFLAGS += -DZSTD_LEGACY_SUPPORT=$(ZSTD_LEGACY_SUPPORT)
endif
endif # BUILD_DIR
-ZSTD_SUBDIR := $(LIBZSTD)/common $(LIBZSTD)/compress $(LIBZSTD)/decompress $(LIBZSTD)/dictBuilder $(LIBZSTD)/legacy $(LIBZSTD)/deprecated
+ZSTD_SUBDIR := $(LIB_SRCDIR)/common $(LIB_SRCDIR)/compress $(LIB_SRCDIR)/decompress $(LIB_SRCDIR)/dictBuilder $(LIB_SRCDIR)/legacy $(LIB_SRCDIR)/deprecated
vpath %.c $(ZSTD_SUBDIR)
vpath %.S $(ZSTD_SUBDIR)
+
+endif # LIBZSTD_MK_INCLUDED
/*------ Version ------*/
#define ZSTD_VERSION_MAJOR 1
#define ZSTD_VERSION_MINOR 5
-#define ZSTD_VERSION_RELEASE 5
+#define ZSTD_VERSION_RELEASE 6
#define ZSTD_VERSION_NUMBER (ZSTD_VERSION_MAJOR *100*100 + ZSTD_VERSION_MINOR *100 + ZSTD_VERSION_RELEASE)
/*! ZSTD_versionNumber() :
* for example to size a static array on stack.
* Will produce constant value 0 if srcSize too large.
*/
-#define ZSTD_MAX_INPUT_SIZE ((sizeof(size_t)==8) ? 0xFF00FF00FF00FF00LLU : 0xFF00FF00U)
+#define ZSTD_MAX_INPUT_SIZE ((sizeof(size_t)==8) ? 0xFF00FF00FF00FF00ULL : 0xFF00FF00U)
#define ZSTD_COMPRESSBOUND(srcSize) (((size_t)(srcSize) >= ZSTD_MAX_INPUT_SIZE) ? 0 : (srcSize) + ((srcSize)>>8) + (((srcSize) < (128<<10)) ? (((128<<10) - (srcSize)) >> 11) /* margin, from 64 to 0 */ : 0)) /* this formula ensures that bound(A) + bound(B) <= bound(A+B) as long as A and B >= 128 KB */
ZSTDLIB_API size_t ZSTD_compressBound(size_t srcSize); /*!< maximum compressed size in worst case single-pass scenario */
/* ZSTD_isError() :
/*= Compression context
* When compressing many times,
* it is recommended to allocate a context just once,
- * and re-use it for each successive compression operation.
+ * and reuse it for each successive compression operation.
* This will make workload friendlier for system's memory.
* Note : re-using context is just a speed / resource optimization.
* It doesn't change the compression ratio, which remains identical.
/*! ZSTD_compressCCtx() :
* Same as ZSTD_compress(), using an explicit ZSTD_CCtx.
- * Important : in order to behave similarly to `ZSTD_compress()`,
- * this function compresses at requested compression level,
- * __ignoring any other parameter__ .
+ * Important : in order to mirror `ZSTD_compress()` behavior,
+ * this function compresses at the requested compression level,
+ * __ignoring any other advanced parameter__ .
* If any advanced parameter was set using the advanced API,
* they will all be reset. Only `compressionLevel` remains.
*/
/*= Decompression context
* When decompressing many times,
* it is recommended to allocate a context only once,
- * and re-use it for each successive compression operation.
+ * and reuse it for each successive compression operation.
* This will make workload friendlier for system's memory.
* Use one context per thread for parallel execution. */
typedef struct ZSTD_DCtx_s ZSTD_DCtx;
/*! ZSTD_decompressDCtx() :
* Same as ZSTD_decompress(),
* requires an allocated ZSTD_DCtx.
- * Compatible with sticky parameters.
+ * Compatible with sticky parameters (see below).
*/
ZSTDLIB_API size_t ZSTD_decompressDCtx(ZSTD_DCtx* dctx,
void* dst, size_t dstCapacity,
* using ZSTD_CCtx_set*() functions.
* Pushed parameters are sticky : they are valid for next compressed frame, and any subsequent frame.
* "sticky" parameters are applicable to `ZSTD_compress2()` and `ZSTD_compressStream*()` !
- * __They do not apply to "simple" one-shot variants such as ZSTD_compressCCtx()__ .
+ * __They do not apply to one-shot variants such as ZSTD_compressCCtx()__ .
*
* It's possible to reset all parameters to "default" using ZSTD_CCtx_reset().
*
* This API supersedes all other "advanced" API entry points in the experimental section.
- * In the future, we expect to remove from experimental API entry points which are redundant with this API.
+ * In the future, we expect to remove API entry points from experimental which are redundant with this API.
*/
* The higher the value of selected strategy, the more complex it is,
* resulting in stronger and slower compression.
* Special: value 0 means "use default strategy". */
+
+ ZSTD_c_targetCBlockSize=130, /* v1.5.6+
+ * Attempts to fit compressed block size into approximatively targetCBlockSize.
+ * Bound by ZSTD_TARGETCBLOCKSIZE_MIN and ZSTD_TARGETCBLOCKSIZE_MAX.
+ * Note that it's not a guarantee, just a convergence target (default:0).
+ * No target when targetCBlockSize == 0.
+ * This is helpful in low bandwidth streaming environments to improve end-to-end latency,
+ * when a client can make use of partial documents (a prominent example being Chrome).
+ * Note: this parameter is stable since v1.5.6.
+ * It was present as an experimental parameter in earlier versions,
+ * but it's not recommended using it with earlier library versions
+ * due to massive performance regressions.
+ */
/* LDM mode parameters */
ZSTD_c_enableLongDistanceMatching=160, /* Enable long distance matching.
* This parameter is designed to improve compression ratio
* ZSTD_c_forceMaxWindow
* ZSTD_c_forceAttachDict
* ZSTD_c_literalCompressionMode
- * ZSTD_c_targetCBlockSize
* ZSTD_c_srcSizeHint
* ZSTD_c_enableDedicatedDictSearch
* ZSTD_c_stableInBuffer
ZSTD_c_experimentalParam3=1000,
ZSTD_c_experimentalParam4=1001,
ZSTD_c_experimentalParam5=1002,
- ZSTD_c_experimentalParam6=1003,
+ /* was ZSTD_c_experimentalParam6=1003; is now ZSTD_c_targetCBlockSize */
ZSTD_c_experimentalParam7=1004,
ZSTD_c_experimentalParam8=1005,
ZSTD_c_experimentalParam9=1006,
/*! ZSTD_compress2() :
* Behave the same as ZSTD_compressCCtx(), but compression parameters are set using the advanced API.
+ * (note that this entry point doesn't even expose a compression level parameter).
* ZSTD_compress2() always starts a new frame.
* Should cctx hold data from a previously unfinished frame, everything about it is forgotten.
* - Compression parameters are pushed into CCtx before starting compression, using ZSTD_CCtx_set*()
* ZSTD_d_forceIgnoreChecksum
* ZSTD_d_refMultipleDDicts
* ZSTD_d_disableHuffmanAssembly
+ * ZSTD_d_maxBlockSize
* Because they are not stable, it's necessary to define ZSTD_STATIC_LINKING_ONLY to access them.
* note : never ever use experimentalParam? names directly
*/
ZSTD_d_experimentalParam2=1001,
ZSTD_d_experimentalParam3=1002,
ZSTD_d_experimentalParam4=1003,
- ZSTD_d_experimentalParam5=1004
+ ZSTD_d_experimentalParam5=1004,
+ ZSTD_d_experimentalParam6=1005
} ZSTD_dParameter;
* A ZSTD_CStream object is required to track streaming operation.
* Use ZSTD_createCStream() and ZSTD_freeCStream() to create/release resources.
* ZSTD_CStream objects can be reused multiple times on consecutive compression operations.
-* It is recommended to re-use ZSTD_CStream since it will play nicer with system's memory, by re-using already allocated memory.
+* It is recommended to reuse ZSTD_CStream since it will play nicer with system's memory, by re-using already allocated memory.
*
* For parallel execution, use one separate ZSTD_CStream per thread.
*
* note : since v1.3.0, ZSTD_CStream and ZSTD_CCtx are the same thing.
*
* Parameters are sticky : when starting a new compression on the same context,
-* it will re-use the same sticky parameters as previous compression session.
+* it will reuse the same sticky parameters as previous compression session.
* When in doubt, it's recommended to fully initialize the context before usage.
* Use ZSTD_CCtx_reset() to reset the context and ZSTD_CCtx_setParameter(),
* ZSTD_CCtx_setPledgedSrcSize(), or ZSTD_CCtx_loadDictionary() and friends to
* only ZSTD_e_end or ZSTD_e_flush operations are allowed.
* Before starting a new compression job, or changing compression parameters,
* it is required to fully flush internal buffers.
+ * - note: if an operation ends with an error, it may leave @cctx in an undefined state.
+ * Therefore, it's UB to invoke ZSTD_compressStream2() of ZSTD_compressStream() on such a state.
+ * In order to be re-employed after an error, a state must be reset,
+ * which can be done explicitly (ZSTD_CCtx_reset()),
+ * or is sometimes implied by methods starting a new compression job (ZSTD_initCStream(), ZSTD_compressCCtx())
*/
ZSTDLIB_API size_t ZSTD_compressStream2( ZSTD_CCtx* cctx,
ZSTD_outBuffer* output,
*
* A ZSTD_DStream object is required to track streaming operations.
* Use ZSTD_createDStream() and ZSTD_freeDStream() to create/release resources.
-* ZSTD_DStream objects can be re-used multiple times.
+* ZSTD_DStream objects can be reused multiple times.
*
* Use ZSTD_initDStream() to start a new decompression operation.
* @return : recommended first input size
* @return : 0 when a frame is completely decoded and fully flushed,
* or an error code, which can be tested using ZSTD_isError(),
* or any other value > 0, which means there is some decoding or flushing to do to complete current frame.
+ *
+ * Note: when an operation returns with an error code, the @zds state may be left in undefined state.
+ * It's UB to invoke `ZSTD_decompressStream()` on such a state.
+ * In order to re-use such a state, it must be first reset,
+ * which can be done explicitly (`ZSTD_DCtx_reset()`),
+ * or is implied for operations starting some new decompression job (`ZSTD_initDStream`, `ZSTD_decompressDCtx()`, `ZSTD_decompress_usingDict()`)
*/
ZSTDLIB_API size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inBuffer* input);
*
* This API allows dictionaries to be used with ZSTD_compress2(),
* ZSTD_compressStream2(), and ZSTD_decompressDCtx().
- * Dictionaries are sticky, they remain valid when same context is re-used,
+ * Dictionaries are sticky, they remain valid when same context is reused,
* they only reset when the context is reset
* with ZSTD_reset_parameters or ZSTD_reset_session_and_parameters.
* In contrast, Prefixes are single-use.
#define ZSTD_LDM_HASHRATELOG_MAX (ZSTD_WINDOWLOG_MAX - ZSTD_HASHLOG_MIN)
/* Advanced parameter bounds */
-#define ZSTD_TARGETCBLOCKSIZE_MIN 64
+#define ZSTD_TARGETCBLOCKSIZE_MIN 1340 /* suitable to fit into an ethernet / wifi / 4G transport frame */
#define ZSTD_TARGETCBLOCKSIZE_MAX ZSTD_BLOCKSIZE_MAX
#define ZSTD_SRCSIZEHINT_MIN 0
#define ZSTD_SRCSIZEHINT_MAX INT_MAX
ZSTDLIB_STATIC_API size_t ZSTD_sequenceBound(size_t srcSize);
/*! ZSTD_generateSequences() :
+ * WARNING: This function is meant for debugging and informational purposes ONLY!
+ * Its implementation is flawed, and it will be deleted in a future version.
+ * It is not guaranteed to succeed, as there are several cases where it will give
+ * up and fail. You should NOT use this function in production code.
+ *
+ * This function is deprecated, and will be removed in a future version.
+ *
* Generate sequences using ZSTD_compress2(), given a source buffer.
*
+ * @param zc The compression context to be used for ZSTD_compress2(). Set any
+ * compression parameters you need on this context.
+ * @param outSeqs The output sequences buffer of size @p outSeqsSize
+ * @param outSeqsSize The size of the output sequences buffer.
+ * ZSTD_sequenceBound(srcSize) is an upper bound on the number
+ * of sequences that can be generated.
+ * @param src The source buffer to generate sequences from of size @p srcSize.
+ * @param srcSize The size of the source buffer.
+ *
* Each block will end with a dummy sequence
* with offset == 0, matchLength == 0, and litLength == length of last literals.
* litLength may be == 0, and if so, then the sequence of (of: 0 ml: 0 ll: 0)
* simply acts as a block delimiter.
*
- * @zc can be used to insert custom compression params.
- * This function invokes ZSTD_compress2().
- *
- * The output of this function can be fed into ZSTD_compressSequences() with CCtx
- * setting of ZSTD_c_blockDelimiters as ZSTD_sf_explicitBlockDelimiters
- * @return : number of sequences generated
+ * @returns The number of sequences generated, necessarily less than
+ * ZSTD_sequenceBound(srcSize), or an error code that can be checked
+ * with ZSTD_isError().
*/
-
+ZSTD_DEPRECATED("For debugging only, will be replaced by ZSTD_extractSequences()")
ZSTDLIB_STATIC_API size_t
-ZSTD_generateSequences( ZSTD_CCtx* zc,
- ZSTD_Sequence* outSeqs, size_t outSeqsSize,
- const void* src, size_t srcSize);
+ZSTD_generateSequences(ZSTD_CCtx* zc,
+ ZSTD_Sequence* outSeqs, size_t outSeqsSize,
+ const void* src, size_t srcSize);
/*! ZSTD_mergeBlockDelimiters() :
* Given an array of ZSTD_Sequence, remove all sequences that represent block delimiters/last literals
/*! ZSTD_estimate*() :
* These functions make it possible to estimate memory usage
* of a future {D,C}Ctx, before its creation.
+ * This is useful in combination with ZSTD_initStatic(),
+ * which makes it possible to employ a static buffer for ZSTD_CCtx* state.
*
* ZSTD_estimateCCtxSize() will provide a memory budget large enough
- * for any compression level up to selected one.
- * Note : Unlike ZSTD_estimateCStreamSize*(), this estimate
- * does not include space for a window buffer.
- * Therefore, the estimation is only guaranteed for single-shot compressions, not streaming.
+ * to compress data of any size using one-shot compression ZSTD_compressCCtx() or ZSTD_compress2()
+ * associated with any compression level up to max specified one.
* The estimate will assume the input may be arbitrarily large,
* which is the worst case.
*
+ * Note that the size estimation is specific for one-shot compression,
+ * it is not valid for streaming (see ZSTD_estimateCStreamSize*())
+ * nor other potential ways of using a ZSTD_CCtx* state.
+ *
* When srcSize can be bound by a known and rather "small" value,
- * this fact can be used to provide a tighter estimation
- * because the CCtx compression context will need less memory.
- * This tighter estimation can be provided by more advanced functions
+ * this knowledge can be used to provide a tighter budget estimation
+ * because the ZSTD_CCtx* state will need less memory for small inputs.
+ * This tighter estimation can be provided by employing more advanced functions
* ZSTD_estimateCCtxSize_usingCParams(), which can be used in tandem with ZSTD_getCParams(),
* and ZSTD_estimateCCtxSize_usingCCtxParams(), which can be used in tandem with ZSTD_CCtxParams_setParameter().
* Both can be used to estimate memory using custom compression parameters and arbitrary srcSize limits.
*
* Note : only single-threaded compression is supported.
* ZSTD_estimateCCtxSize_usingCCtxParams() will return an error code if ZSTD_c_nbWorkers is >= 1.
- *
- * Note 2 : ZSTD_estimateCCtxSize* functions are not compatible with the Block-Level Sequence Producer API at this time.
- * Size estimates assume that no external sequence producer is registered.
*/
-ZSTDLIB_STATIC_API size_t ZSTD_estimateCCtxSize(int compressionLevel);
+ZSTDLIB_STATIC_API size_t ZSTD_estimateCCtxSize(int maxCompressionLevel);
ZSTDLIB_STATIC_API size_t ZSTD_estimateCCtxSize_usingCParams(ZSTD_compressionParameters cParams);
ZSTDLIB_STATIC_API size_t ZSTD_estimateCCtxSize_usingCCtxParams(const ZSTD_CCtx_params* params);
ZSTDLIB_STATIC_API size_t ZSTD_estimateDCtxSize(void);
/*! ZSTD_estimateCStreamSize() :
- * ZSTD_estimateCStreamSize() will provide a budget large enough for any compression level up to selected one.
- * It will also consider src size to be arbitrarily "large", which is worst case.
+ * ZSTD_estimateCStreamSize() will provide a memory budget large enough for streaming compression
+ * using any compression level up to the max specified one.
+ * It will also consider src size to be arbitrarily "large", which is a worst case scenario.
* If srcSize is known to always be small, ZSTD_estimateCStreamSize_usingCParams() can provide a tighter estimation.
* ZSTD_estimateCStreamSize_usingCParams() can be used in tandem with ZSTD_getCParams() to create cParams from compressionLevel.
* ZSTD_estimateCStreamSize_usingCCtxParams() can be used in tandem with ZSTD_CCtxParams_setParameter(). Only single-threaded compression is supported. This function will return an error code if ZSTD_c_nbWorkers is >= 1.
* Note : CStream size estimation is only correct for single-threaded compression.
- * ZSTD_DStream memory budget depends on window Size.
+ * ZSTD_estimateCStreamSize_usingCCtxParams() will return an error code if ZSTD_c_nbWorkers is >= 1.
+ * Note 2 : ZSTD_estimateCStreamSize* functions are not compatible with the Block-Level Sequence Producer API at this time.
+ * Size estimates assume that no external sequence producer is registered.
+ *
+ * ZSTD_DStream memory budget depends on frame's window Size.
* This information can be passed manually, using ZSTD_estimateDStreamSize,
* or deducted from a valid frame Header, using ZSTD_estimateDStreamSize_fromFrame();
+ * Any frame requesting a window size larger than max specified one will be rejected.
* Note : if streaming is init with function ZSTD_init?Stream_usingDict(),
* an internal ?Dict will be created, which additional size is not estimated here.
* In this case, get total size by adding ZSTD_estimate?DictSize
- * Note 2 : only single-threaded compression is supported.
- * ZSTD_estimateCStreamSize_usingCCtxParams() will return an error code if ZSTD_c_nbWorkers is >= 1.
- * Note 3 : ZSTD_estimateCStreamSize* functions are not compatible with the Block-Level Sequence Producer API at this time.
- * Size estimates assume that no external sequence producer is registered.
*/
-ZSTDLIB_STATIC_API size_t ZSTD_estimateCStreamSize(int compressionLevel);
+ZSTDLIB_STATIC_API size_t ZSTD_estimateCStreamSize(int maxCompressionLevel);
ZSTDLIB_STATIC_API size_t ZSTD_estimateCStreamSize_usingCParams(ZSTD_compressionParameters cParams);
ZSTDLIB_STATIC_API size_t ZSTD_estimateCStreamSize_usingCCtxParams(const ZSTD_CCtx_params* params);
-ZSTDLIB_STATIC_API size_t ZSTD_estimateDStreamSize(size_t windowSize);
+ZSTDLIB_STATIC_API size_t ZSTD_estimateDStreamSize(size_t maxWindowSize);
ZSTDLIB_STATIC_API size_t ZSTD_estimateDStreamSize_fromFrame(const void* src, size_t srcSize);
/*! ZSTD_estimate?DictSize() :
*/
#define ZSTD_c_literalCompressionMode ZSTD_c_experimentalParam5
-/* Tries to fit compressed block size to be around targetCBlockSize.
- * No target when targetCBlockSize == 0.
- * There is no guarantee on compressed block size (default:0) */
-#define ZSTD_c_targetCBlockSize ZSTD_c_experimentalParam6
-
/* User's best guess of source size.
* Hint is not valid when srcSizeHint == 0.
* There is no guarantee that hint is close to actual source size,
*/
#define ZSTD_d_disableHuffmanAssembly ZSTD_d_experimentalParam5
+/* ZSTD_d_maxBlockSize
+ * Allowed values are between 1KB and ZSTD_BLOCKSIZE_MAX (128KB).
+ * The default is ZSTD_BLOCKSIZE_MAX, and setting to 0 will set to the default.
+ *
+ * Forces the decompressor to reject blocks whose content size is
+ * larger than the configured maxBlockSize. When maxBlockSize is
+ * larger than the windowSize, the windowSize is used instead.
+ * This saves memory on the decoder when you know all blocks are small.
+ *
+ * This option is typically used in conjunction with ZSTD_c_maxBlockSize.
+ *
+ * WARNING: This causes the decoder to reject otherwise valid frames
+ * that have block sizes larger than the configured maxBlockSize.
+ */
+#define ZSTD_d_maxBlockSize ZSTD_d_experimentalParam6
+
/*! ZSTD_DCtx_setFormat() :
* This function is REDUNDANT. Prefer ZSTD_DCtx_setParameter().
* explicitly specified.
*
* start a new frame, using same parameters from previous frame.
- * This is typically useful to skip dictionary loading stage, since it will re-use it in-place.
+ * This is typically useful to skip dictionary loading stage, since it will reuse it in-place.
* Note that zcs must be init at least once before using ZSTD_resetCStream().
* If pledgedSrcSize is not known at reset time, use macro ZSTD_CONTENTSIZE_UNKNOWN.
* If pledgedSrcSize > 0, its value must be correct, as it will be written in header, and controlled at the end.
*
* ZSTD_DCtx_reset(zds, ZSTD_reset_session_only);
*
- * re-use decompression parameters from previous init; saves dictionary loading
+ * reuse decompression parameters from previous init; saves dictionary loading
*/
ZSTD_DEPRECATED("use ZSTD_DCtx_reset, see zstd.h for detailed instructions")
ZSTDLIB_STATIC_API size_t ZSTD_resetDStream(ZSTD_DStream* zds);
#define ZSTD_SEQUENCE_PRODUCER_ERROR ((size_t)(-1))
-typedef size_t ZSTD_sequenceProducer_F (
+typedef size_t (*ZSTD_sequenceProducer_F) (
void* sequenceProducerState,
ZSTD_Sequence* outSeqs, size_t outSeqsCapacity,
const void* src, size_t srcSize,
ZSTD_registerSequenceProducer(
ZSTD_CCtx* cctx,
void* sequenceProducerState,
- ZSTD_sequenceProducer_F* sequenceProducer
+ ZSTD_sequenceProducer_F sequenceProducer
+);
+
+/*! ZSTD_CCtxParams_registerSequenceProducer() :
+ * Same as ZSTD_registerSequenceProducer(), but operates on ZSTD_CCtx_params.
+ * This is used for accurate size estimation with ZSTD_estimateCCtxSize_usingCCtxParams(),
+ * which is needed when creating a ZSTD_CCtx with ZSTD_initStaticCCtx().
+ *
+ * If you are using the external sequence producer API in a scenario where ZSTD_initStaticCCtx()
+ * is required, then this function is for you. Otherwise, you probably don't need it.
+ *
+ * See tests/zstreamtest.c for example usage. */
+ZSTDLIB_STATIC_API void
+ZSTD_CCtxParams_registerSequenceProducer(
+ ZSTD_CCtx_params* params,
+ void* sequenceProducerState,
+ ZSTD_sequenceProducer_F sequenceProducer
);
A ZSTD_CCtx object is required to track streaming operations.
Use ZSTD_createCCtx() / ZSTD_freeCCtx() to manage resource.
- ZSTD_CCtx object can be re-used multiple times within successive compression operations.
+ ZSTD_CCtx object can be reused multiple times within successive compression operations.
Start by initializing a context.
Use ZSTD_compressBegin(), or ZSTD_compressBegin_usingDict() for dictionary compression.
It's possible to use srcSize==0, in which case, it will write a final empty block to end the frame.
Without last block mark, frames are considered unfinished (hence corrupted) by compliant decoders.
- `ZSTD_CCtx` object can be re-used (ZSTD_compressBegin()) to compress again.
+ `ZSTD_CCtx` object can be reused (ZSTD_compressBegin()) to compress again.
*/
/*===== Buffer-less streaming compression functions =====*/
A ZSTD_DCtx object is required to track streaming operations.
Use ZSTD_createDCtx() / ZSTD_freeDCtx() to manage it.
- A ZSTD_DCtx object can be re-used multiple times.
+ A ZSTD_DCtx object can be reused multiple times.
First typical operation is to retrieve frame parameters, using ZSTD_getFrameHeader().
Frame header is extracted from the beginning of compressed frame, so providing only the frame's beginning is enough.
zstd-nolegacy
zstd-dictBuilder
zstd-dll
+zstd_arm64
+zstd_x64
# Object files
*.o
# zstd-decompress : decompressor-only version of zstd
# ##########################################################################
-.PHONY: default
-default: zstd-release
+# default target (when running `make` with no argument)
+zstd-release:
-LIBZSTD := ../lib
-
-include $(LIBZSTD)/libzstd.mk
+LIBZSTD_MK_DIR = ../lib
+include $(LIBZSTD_MK_DIR)/libzstd.mk
ifeq ($(shell $(CC) -v 2>&1 | $(GREP) -c "gcc version "), 1)
ALIGN_LOOP = -falign-loops=32
## zstd-dll: zstd executable linked to dynamic library libzstd (must have same version)
.PHONY: zstd-dll
-zstd-dll : LDFLAGS+= -L$(LIBZSTD)
+zstd-dll : LDFLAGS+= -L$(LIB_BINDIR)
zstd-dll : LDLIBS += -lzstd
zstd-dll : ZSTDLIB_LOCAL_SRC = xxhash.c pool.c threading.c
zstd-dll : zstd
#-----------------------------------------------------------------------------
# make install is validated only for Linux, macOS, BSD, Hurd and Solaris targets
#-----------------------------------------------------------------------------
-ifneq (,$(filter $(UNAME),Linux Darwin GNU/kFreeBSD GNU OpenBSD FreeBSD NetBSD DragonFly SunOS Haiku AIX))
+ifneq (,$(filter $(UNAME),Linux Darwin GNU/kFreeBSD GNU OpenBSD FreeBSD NetBSD DragonFly SunOS Haiku AIX MSYS_NT CYGWIN_NT))
HAVE_COLORNEVER = $(shell echo a | egrep --color=never a > /dev/null 2> /dev/null && echo 1 || echo 0)
EGREP_OPTIONS ?=
BMK_runOutcome_t BMK_benchFunction(BMK_benchParams_t p,
unsigned nbLoops)
{
- size_t dstSize = 0;
nbLoops += !nbLoops; /* minimum nbLoops is 1 */
/* init */
} }
/* benchmark */
- { UTIL_time_t const clockStart = UTIL_getTime();
+ { size_t dstSize = 0;
+ UTIL_time_t const clockStart = UTIL_getTime();
unsigned loopNb, blockNb;
if (p.initFn != NULL) p.initFn(p.initPayload);
for (loopNb = 0; loopNb < nbLoops; loopNb++) {
--- /dev/null
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+ */
+
+/* **************************************
+ * Tuning parameters
+ ****************************************/
+#ifndef BMK_TIMETEST_DEFAULT_S /* default minimum time per test */
+# define BMK_TIMETEST_DEFAULT_S 3
+#endif
+
+/* *************************************
+ * Includes
+ ***************************************/
+/* this must be included first */
+#include "platform.h" /* Large Files support, compiler specifics */
+
+/* then following system includes */
+#include <assert.h> /* assert */
+#include <errno.h>
+#include <stdio.h> /* fprintf, fopen */
+#include <stdlib.h> /* malloc, free */
+#include <string.h> /* memset, strerror */
+#include "util.h" /* UTIL_getFileSize, UTIL_sleep */
+#include "../lib/common/mem.h"
+#include "benchfn.h"
+#include "timefn.h" /* UTIL_time_t */
+#ifndef ZSTD_STATIC_LINKING_ONLY
+# define ZSTD_STATIC_LINKING_ONLY
+#endif
+#include "../lib/zstd.h"
+#include "datagen.h" /* RDG_genBuffer */
+#include "lorem.h" /* LOREM_genBuffer */
+#ifndef XXH_INLINE_ALL
+# define XXH_INLINE_ALL
+#endif
+#include "../lib/common/xxhash.h"
+#include "../lib/zstd_errors.h"
+#include "benchzstd.h"
+
+/* *************************************
+ * Constants
+ ***************************************/
+#ifndef ZSTD_GIT_COMMIT
+# define ZSTD_GIT_COMMIT_STRING ""
+#else
+# define ZSTD_GIT_COMMIT_STRING ZSTD_EXPAND_AND_QUOTE(ZSTD_GIT_COMMIT)
+#endif
+
+#define TIMELOOP_MICROSEC (1 * 1000000ULL) /* 1 second */
+#define TIMELOOP_NANOSEC (1 * 1000000000ULL) /* 1 second */
+#define ACTIVEPERIOD_MICROSEC (70 * TIMELOOP_MICROSEC) /* 70 seconds */
+#define COOLPERIOD_SEC 10
+
+#define KB *(1 << 10)
+#define MB *(1 << 20)
+#define GB *(1U << 30)
+
+#define BMK_RUNTEST_DEFAULT_MS 1000
+
+static const size_t maxMemory = (sizeof(size_t) == 4)
+ ?
+ /* 32-bit */ (2 GB - 64 MB)
+ :
+ /* 64-bit */ (size_t)(1ULL << ((sizeof(size_t) * 8) - 31));
+
+/* *************************************
+ * console display
+ ***************************************/
+#define DISPLAY(...) \
+ { \
+ fprintf(stderr, __VA_ARGS__); \
+ fflush(NULL); \
+ }
+#define DISPLAYLEVEL(l, ...) \
+ if (displayLevel >= l) { \
+ DISPLAY(__VA_ARGS__); \
+ }
+/* 0 : no display; 1: errors; 2 : + result + interaction + warnings; 3 : +
+ * progression; 4 : + information */
+#define OUTPUT(...) \
+ { \
+ fprintf(stdout, __VA_ARGS__); \
+ fflush(NULL); \
+ }
+#define OUTPUTLEVEL(l, ...) \
+ if (displayLevel >= l) { \
+ OUTPUT(__VA_ARGS__); \
+ }
+
+/* *************************************
+ * Exceptions
+ ***************************************/
+#ifndef DEBUG
+# define DEBUG 0
+#endif
+#define DEBUGOUTPUT(...) \
+ { \
+ if (DEBUG) \
+ DISPLAY(__VA_ARGS__); \
+ }
+
+#define RETURN_ERROR_INT(errorNum, ...) \
+ { \
+ DEBUGOUTPUT("%s: %i: \n", __FILE__, __LINE__); \
+ DISPLAYLEVEL(1, "Error %i : ", errorNum); \
+ DISPLAYLEVEL(1, __VA_ARGS__); \
+ DISPLAYLEVEL(1, " \n"); \
+ return errorNum; \
+ }
+
+#define CHECK_Z(zf) \
+ { \
+ size_t const zerr = zf; \
+ if (ZSTD_isError(zerr)) { \
+ DEBUGOUTPUT("%s: %i: \n", __FILE__, __LINE__); \
+ DISPLAY("Error : "); \
+ DISPLAY("%s failed : %s", #zf, ZSTD_getErrorName(zerr)); \
+ DISPLAY(" \n"); \
+ exit(1); \
+ } \
+ }
+
+#define RETURN_ERROR(errorNum, retType, ...) \
+ { \
+ retType r; \
+ memset(&r, 0, sizeof(retType)); \
+ DEBUGOUTPUT("%s: %i: \n", __FILE__, __LINE__); \
+ DISPLAYLEVEL(1, "Error %i : ", errorNum); \
+ DISPLAYLEVEL(1, __VA_ARGS__); \
+ DISPLAYLEVEL(1, " \n"); \
+ r.tag = errorNum; \
+ return r; \
+ }
+
+/* replacement for snprintf(), which is not supported by C89
+ * sprintf() would be the supported one, but it's labelled unsafe,
+ * so some modern static analyzer will flag it as such, making it unusable.
+ * formatString_u() replaces snprintf() for the specific case where there are only %u arguments */
+static int formatString_u(char* buffer, size_t buffer_size, const char* formatString, unsigned int value)
+{
+ size_t written = 0;
+ int i;
+ assert(value <= 100);
+
+ for (i = 0; formatString[i] != '\0' && written < buffer_size - 1; ++i) {
+ if (formatString[i] != '%') {
+ buffer[written++] = formatString[i];
+ continue;
+ }
+
+ if (formatString[++i] == 'u') {
+ /* Handle single digit */
+ if (value < 10) {
+ buffer[written++] = '0' + (char)value;
+ } else if (value < 100) {
+ /* Handle two digits */
+ if (written >= buffer_size - 2) {
+ return -1; /* buffer overflow */
+ }
+ buffer[written++] = '0' + (char)(value / 10);
+ buffer[written++] = '0' + (char)(value % 10);
+ } else { /* 100 */
+ if (written >= buffer_size - 3) {
+ return -1; /* buffer overflow */
+ }
+ buffer[written++] = '1';
+ buffer[written++] = '0';
+ buffer[written++] = '0';
+ }
+ } else if (formatString[i] == '%') { /* Check for escaped percent sign */
+ buffer[written++] = '%';
+ } else {
+ return -1; /* unsupported format */
+ }
+ }
+
+ if (written < buffer_size) {
+ buffer[written] = '\0';
+ } else {
+ buffer[0] = '\0'; /* Handle truncation */
+ }
+
+ return (int)written;
+}
+
+/* *************************************
+ * Benchmark Parameters
+ ***************************************/
+
+BMK_advancedParams_t BMK_initAdvancedParams(void)
+{
+ BMK_advancedParams_t const res = {
+ BMK_both, /* mode */
+ BMK_TIMETEST_DEFAULT_S, /* nbSeconds */
+ 0, /* blockSize */
+ 0, /* targetCBlockSize */
+ 0, /* nbWorkers */
+ 0, /* realTime */
+ 0, /* additionalParam */
+ 0, /* ldmFlag */
+ 0, /* ldmMinMatch */
+ 0, /* ldmHashLog */
+ 0, /* ldmBuckSizeLog */
+ 0, /* ldmHashRateLog */
+ ZSTD_ps_auto, /* literalCompressionMode */
+ 0 /* useRowMatchFinder */
+ };
+ return res;
+}
+
+/* ********************************************************
+ * Bench functions
+ **********************************************************/
+typedef struct {
+ const void* srcPtr;
+ size_t srcSize;
+ void* cPtr;
+ size_t cRoom;
+ size_t cSize;
+ void* resPtr;
+ size_t resSize;
+} blockParam_t;
+
+#undef MIN
+#undef MAX
+#define MIN(a, b) ((a) < (b) ? (a) : (b))
+#define MAX(a, b) ((a) > (b) ? (a) : (b))
+
+static void BMK_initCCtx(
+ ZSTD_CCtx* ctx,
+ const void* dictBuffer,
+ size_t dictBufferSize,
+ int cLevel,
+ const ZSTD_compressionParameters* comprParams,
+ const BMK_advancedParams_t* adv)
+{
+ ZSTD_CCtx_reset(ctx, ZSTD_reset_session_and_parameters);
+ if (adv->nbWorkers == 1) {
+ CHECK_Z(ZSTD_CCtx_setParameter(ctx, ZSTD_c_nbWorkers, 0));
+ } else {
+ CHECK_Z(ZSTD_CCtx_setParameter(ctx, ZSTD_c_nbWorkers, adv->nbWorkers));
+ }
+ CHECK_Z(ZSTD_CCtx_setParameter(ctx, ZSTD_c_compressionLevel, cLevel));
+ CHECK_Z(ZSTD_CCtx_setParameter(
+ ctx, ZSTD_c_useRowMatchFinder, adv->useRowMatchFinder));
+ CHECK_Z(ZSTD_CCtx_setParameter(
+ ctx, ZSTD_c_enableLongDistanceMatching, adv->ldmFlag));
+ CHECK_Z(ZSTD_CCtx_setParameter(ctx, ZSTD_c_ldmMinMatch, adv->ldmMinMatch));
+ CHECK_Z(ZSTD_CCtx_setParameter(ctx, ZSTD_c_ldmHashLog, adv->ldmHashLog));
+ CHECK_Z(ZSTD_CCtx_setParameter(
+ ctx, ZSTD_c_ldmBucketSizeLog, adv->ldmBucketSizeLog));
+ CHECK_Z(ZSTD_CCtx_setParameter(
+ ctx, ZSTD_c_ldmHashRateLog, adv->ldmHashRateLog));
+ CHECK_Z(ZSTD_CCtx_setParameter(
+ ctx, ZSTD_c_windowLog, (int)comprParams->windowLog));
+ CHECK_Z(ZSTD_CCtx_setParameter(
+ ctx, ZSTD_c_hashLog, (int)comprParams->hashLog));
+ CHECK_Z(ZSTD_CCtx_setParameter(
+ ctx, ZSTD_c_chainLog, (int)comprParams->chainLog));
+ CHECK_Z(ZSTD_CCtx_setParameter(
+ ctx, ZSTD_c_searchLog, (int)comprParams->searchLog));
+ CHECK_Z(ZSTD_CCtx_setParameter(
+ ctx, ZSTD_c_minMatch, (int)comprParams->minMatch));
+ CHECK_Z(ZSTD_CCtx_setParameter(
+ ctx, ZSTD_c_targetLength, (int)comprParams->targetLength));
+ CHECK_Z(ZSTD_CCtx_setParameter(
+ ctx,
+ ZSTD_c_literalCompressionMode,
+ (int)adv->literalCompressionMode));
+ CHECK_Z(ZSTD_CCtx_setParameter(
+ ctx, ZSTD_c_strategy, (int)comprParams->strategy));
+ CHECK_Z(ZSTD_CCtx_setParameter(
+ ctx, ZSTD_c_targetCBlockSize, (int)adv->targetCBlockSize));
+ CHECK_Z(ZSTD_CCtx_loadDictionary(ctx, dictBuffer, dictBufferSize));
+}
+
+static void
+BMK_initDCtx(ZSTD_DCtx* dctx, const void* dictBuffer, size_t dictBufferSize)
+{
+ CHECK_Z(ZSTD_DCtx_reset(dctx, ZSTD_reset_session_and_parameters));
+ CHECK_Z(ZSTD_DCtx_loadDictionary(dctx, dictBuffer, dictBufferSize));
+}
+
+typedef struct {
+ ZSTD_CCtx* cctx;
+ const void* dictBuffer;
+ size_t dictBufferSize;
+ int cLevel;
+ const ZSTD_compressionParameters* comprParams;
+ const BMK_advancedParams_t* adv;
+} BMK_initCCtxArgs;
+
+static size_t local_initCCtx(void* payload)
+{
+ BMK_initCCtxArgs* ag = (BMK_initCCtxArgs*)payload;
+ BMK_initCCtx(
+ ag->cctx,
+ ag->dictBuffer,
+ ag->dictBufferSize,
+ ag->cLevel,
+ ag->comprParams,
+ ag->adv);
+ return 0;
+}
+
+typedef struct {
+ ZSTD_DCtx* dctx;
+ const void* dictBuffer;
+ size_t dictBufferSize;
+} BMK_initDCtxArgs;
+
+static size_t local_initDCtx(void* payload)
+{
+ BMK_initDCtxArgs* ag = (BMK_initDCtxArgs*)payload;
+ BMK_initDCtx(ag->dctx, ag->dictBuffer, ag->dictBufferSize);
+ return 0;
+}
+
+/* `addArgs` is the context */
+static size_t local_defaultCompress(
+ const void* srcBuffer,
+ size_t srcSize,
+ void* dstBuffer,
+ size_t dstSize,
+ void* addArgs)
+{
+ ZSTD_CCtx* const cctx = (ZSTD_CCtx*)addArgs;
+ return ZSTD_compress2(cctx, dstBuffer, dstSize, srcBuffer, srcSize);
+}
+
+/* `addArgs` is the context */
+static size_t local_defaultDecompress(
+ const void* srcBuffer,
+ size_t srcSize,
+ void* dstBuffer,
+ size_t dstCapacity,
+ void* addArgs)
+{
+ size_t moreToFlush = 1;
+ ZSTD_DCtx* const dctx = (ZSTD_DCtx*)addArgs;
+ ZSTD_inBuffer in;
+ ZSTD_outBuffer out;
+ in.src = srcBuffer;
+ in.size = srcSize;
+ in.pos = 0;
+ out.dst = dstBuffer;
+ out.size = dstCapacity;
+ out.pos = 0;
+ while (moreToFlush) {
+ if (out.pos == out.size) {
+ return (size_t)-ZSTD_error_dstSize_tooSmall;
+ }
+ moreToFlush = ZSTD_decompressStream(dctx, &out, &in);
+ if (ZSTD_isError(moreToFlush)) {
+ return moreToFlush;
+ }
+ }
+ return out.pos;
+}
+
+/* ================================================================= */
+/* Benchmark Zstandard, mem-to-mem scenarios */
+/* ================================================================= */
+
+int BMK_isSuccessful_benchOutcome(BMK_benchOutcome_t outcome)
+{
+ return outcome.tag == 0;
+}
+
+BMK_benchResult_t BMK_extract_benchResult(BMK_benchOutcome_t outcome)
+{
+ assert(outcome.tag == 0);
+ return outcome.internal_never_use_directly;
+}
+
+static BMK_benchOutcome_t BMK_benchOutcome_error(void)
+{
+ BMK_benchOutcome_t b;
+ memset(&b, 0, sizeof(b));
+ b.tag = 1;
+ return b;
+}
+
+static BMK_benchOutcome_t BMK_benchOutcome_setValidResult(
+ BMK_benchResult_t result)
+{
+ BMK_benchOutcome_t b;
+ b.tag = 0;
+ b.internal_never_use_directly = result;
+ return b;
+}
+
+/* benchMem with no allocation */
+static BMK_benchOutcome_t BMK_benchMemAdvancedNoAlloc(
+ const void** srcPtrs,
+ size_t* srcSizes,
+ void** cPtrs,
+ size_t* cCapacities,
+ size_t* cSizes,
+ void** resPtrs,
+ size_t* resSizes,
+ void** resultBufferPtr,
+ void* compressedBuffer,
+ size_t maxCompressedSize,
+ BMK_timedFnState_t* timeStateCompress,
+ BMK_timedFnState_t* timeStateDecompress,
+
+ const void* srcBuffer,
+ size_t srcSize,
+ const size_t* fileSizes,
+ unsigned nbFiles,
+ const int cLevel,
+ const ZSTD_compressionParameters* comprParams,
+ const void* dictBuffer,
+ size_t dictBufferSize,
+ ZSTD_CCtx* cctx,
+ ZSTD_DCtx* dctx,
+ int displayLevel,
+ const char* displayName,
+ const BMK_advancedParams_t* adv)
+{
+ size_t const blockSize =
+ ((adv->blockSize >= 32 && (adv->mode != BMK_decodeOnly))
+ ? adv->blockSize
+ : srcSize)
+ + (!srcSize); /* avoid div by 0 */
+ BMK_benchResult_t benchResult;
+ size_t const loadedCompressedSize = srcSize;
+ size_t cSize = 0;
+ double ratio = 0.;
+ U32 nbBlocks;
+
+ assert(cctx != NULL);
+ assert(dctx != NULL);
+
+ /* init */
+ memset(&benchResult, 0, sizeof(benchResult));
+ if (strlen(displayName) > 17)
+ displayName +=
+ strlen(displayName) - 17; /* display last 17 characters */
+ if (adv->mode == BMK_decodeOnly) {
+ /* benchmark only decompression : source must be already compressed */
+ const char* srcPtr = (const char*)srcBuffer;
+ U64 totalDSize64 = 0;
+ U32 fileNb;
+ for (fileNb = 0; fileNb < nbFiles; fileNb++) {
+ U64 const fSize64 =
+ ZSTD_findDecompressedSize(srcPtr, fileSizes[fileNb]);
+ if (fSize64 == ZSTD_CONTENTSIZE_UNKNOWN) {
+ RETURN_ERROR(
+ 32,
+ BMK_benchOutcome_t,
+ "Decompressed size cannot be determined: cannot benchmark");
+ }
+ if (fSize64 == ZSTD_CONTENTSIZE_ERROR) {
+ RETURN_ERROR(
+ 32,
+ BMK_benchOutcome_t,
+ "Error while trying to assess decompressed size: data may be invalid");
+ }
+ totalDSize64 += fSize64;
+ srcPtr += fileSizes[fileNb];
+ }
+ {
+ size_t const decodedSize = (size_t)totalDSize64;
+ assert((U64)decodedSize == totalDSize64); /* check overflow */
+ free(*resultBufferPtr);
+ if (totalDSize64 > decodedSize) { /* size_t overflow */
+ RETURN_ERROR(
+ 32,
+ BMK_benchOutcome_t,
+ "decompressed size is too large for local system");
+ }
+ *resultBufferPtr = malloc(decodedSize);
+ if (!(*resultBufferPtr)) {
+ RETURN_ERROR(
+ 33,
+ BMK_benchOutcome_t,
+ "allocation error: not enough memory");
+ }
+ cSize = srcSize;
+ srcSize = decodedSize;
+ ratio = (double)srcSize / (double)cSize;
+ }
+ }
+
+ /* Init data blocks */
+ {
+ const char* srcPtr = (const char*)srcBuffer;
+ char* cPtr = (char*)compressedBuffer;
+ char* resPtr = (char*)(*resultBufferPtr);
+ U32 fileNb;
+ for (nbBlocks = 0, fileNb = 0; fileNb < nbFiles; fileNb++) {
+ size_t remaining = fileSizes[fileNb];
+ U32 const nbBlocksforThisFile = (adv->mode == BMK_decodeOnly)
+ ? 1
+ : (U32)((remaining + (blockSize - 1)) / blockSize);
+ U32 const blockEnd = nbBlocks + nbBlocksforThisFile;
+ for (; nbBlocks < blockEnd; nbBlocks++) {
+ size_t const thisBlockSize = MIN(remaining, blockSize);
+ srcPtrs[nbBlocks] = srcPtr;
+ srcSizes[nbBlocks] = thisBlockSize;
+ cPtrs[nbBlocks] = cPtr;
+ cCapacities[nbBlocks] = (adv->mode == BMK_decodeOnly)
+ ? thisBlockSize
+ : ZSTD_compressBound(thisBlockSize);
+ resPtrs[nbBlocks] = resPtr;
+ resSizes[nbBlocks] = (adv->mode == BMK_decodeOnly)
+ ? (size_t)ZSTD_findDecompressedSize(
+ srcPtr, thisBlockSize)
+ : thisBlockSize;
+ srcPtr += thisBlockSize;
+ cPtr += cCapacities[nbBlocks];
+ resPtr += thisBlockSize;
+ remaining -= thisBlockSize;
+ if (adv->mode == BMK_decodeOnly) {
+ cSizes[nbBlocks] = thisBlockSize;
+ benchResult.cSize = thisBlockSize;
+ }
+ }
+ }
+ }
+
+ /* warming up `compressedBuffer` */
+ if (adv->mode == BMK_decodeOnly) {
+ memcpy(compressedBuffer, srcBuffer, loadedCompressedSize);
+ } else {
+ RDG_genBuffer(compressedBuffer, maxCompressedSize, 0.10, 0.50, 1);
+ }
+
+ if (!UTIL_support_MT_measurements() && adv->nbWorkers > 1) {
+ OUTPUTLEVEL(
+ 2,
+ "Warning : time measurements may be incorrect in multithreading mode... \n")
+ }
+
+ /* Bench */
+ {
+ U64 const crcOrig = (adv->mode == BMK_decodeOnly)
+ ? 0
+ : XXH64(srcBuffer, srcSize, 0);
+#define NB_MARKS 4
+ const char* marks[NB_MARKS] = { " |", " /", " =", " \\" };
+ U32 markNb = 0;
+ int compressionCompleted = (adv->mode == BMK_decodeOnly);
+ int decompressionCompleted = (adv->mode == BMK_compressOnly);
+ BMK_benchParams_t cbp, dbp;
+ BMK_initCCtxArgs cctxprep;
+ BMK_initDCtxArgs dctxprep;
+
+ cbp.benchFn = local_defaultCompress; /* ZSTD_compress2 */
+ cbp.benchPayload = cctx;
+ cbp.initFn = local_initCCtx; /* BMK_initCCtx */
+ cbp.initPayload = &cctxprep;
+ cbp.errorFn = ZSTD_isError;
+ cbp.blockCount = nbBlocks;
+ cbp.srcBuffers = srcPtrs;
+ cbp.srcSizes = srcSizes;
+ cbp.dstBuffers = cPtrs;
+ cbp.dstCapacities = cCapacities;
+ cbp.blockResults = cSizes;
+
+ cctxprep.cctx = cctx;
+ cctxprep.dictBuffer = dictBuffer;
+ cctxprep.dictBufferSize = dictBufferSize;
+ cctxprep.cLevel = cLevel;
+ cctxprep.comprParams = comprParams;
+ cctxprep.adv = adv;
+
+ dbp.benchFn = local_defaultDecompress;
+ dbp.benchPayload = dctx;
+ dbp.initFn = local_initDCtx;
+ dbp.initPayload = &dctxprep;
+ dbp.errorFn = ZSTD_isError;
+ dbp.blockCount = nbBlocks;
+ dbp.srcBuffers = (const void* const*)cPtrs;
+ dbp.srcSizes = cSizes;
+ dbp.dstBuffers = resPtrs;
+ dbp.dstCapacities = resSizes;
+ dbp.blockResults = NULL;
+
+ dctxprep.dctx = dctx;
+ dctxprep.dictBuffer = dictBuffer;
+ dctxprep.dictBufferSize = dictBufferSize;
+
+ OUTPUTLEVEL(2, "\r%70s\r", ""); /* blank line */
+ assert(srcSize < UINT_MAX);
+ OUTPUTLEVEL(
+ 2,
+ "%2s-%-17.17s :%10u -> \r",
+ marks[markNb],
+ displayName,
+ (unsigned)srcSize);
+
+ while (!(compressionCompleted && decompressionCompleted)) {
+ if (!compressionCompleted) {
+ BMK_runOutcome_t const cOutcome =
+ BMK_benchTimedFn(timeStateCompress, cbp);
+
+ if (!BMK_isSuccessful_runOutcome(cOutcome)) {
+ RETURN_ERROR(30, BMK_benchOutcome_t, "compression error");
+ }
+
+ {
+ BMK_runTime_t const cResult = BMK_extract_runTime(cOutcome);
+ cSize = cResult.sumOfReturn;
+ ratio = (double)srcSize / (double)cSize;
+ {
+ BMK_benchResult_t newResult;
+ newResult.cSpeed =
+ (U64)((double)srcSize * TIMELOOP_NANOSEC
+ / cResult.nanoSecPerRun);
+ benchResult.cSize = cSize;
+ if (newResult.cSpeed > benchResult.cSpeed)
+ benchResult.cSpeed = newResult.cSpeed;
+ }
+ }
+
+ {
+ int const ratioAccuracy = (ratio < 10.) ? 3 : 2;
+ assert(cSize < UINT_MAX);
+ OUTPUTLEVEL(
+ 2,
+ "%2s-%-17.17s :%10u ->%10u (x%5.*f), %6.*f MB/s \r",
+ marks[markNb],
+ displayName,
+ (unsigned)srcSize,
+ (unsigned)cSize,
+ ratioAccuracy,
+ ratio,
+ benchResult.cSpeed < (10 * MB_UNIT) ? 2 : 1,
+ (double)benchResult.cSpeed / MB_UNIT);
+ }
+ compressionCompleted =
+ BMK_isCompleted_TimedFn(timeStateCompress);
+ }
+
+ if (!decompressionCompleted) {
+ BMK_runOutcome_t const dOutcome =
+ BMK_benchTimedFn(timeStateDecompress, dbp);
+
+ if (!BMK_isSuccessful_runOutcome(dOutcome)) {
+ RETURN_ERROR(30, BMK_benchOutcome_t, "decompression error");
+ }
+
+ {
+ BMK_runTime_t const dResult = BMK_extract_runTime(dOutcome);
+ U64 const newDSpeed =
+ (U64)((double)srcSize * TIMELOOP_NANOSEC
+ / dResult.nanoSecPerRun);
+ if (newDSpeed > benchResult.dSpeed)
+ benchResult.dSpeed = newDSpeed;
+ }
+
+ {
+ int const ratioAccuracy = (ratio < 10.) ? 3 : 2;
+ OUTPUTLEVEL(
+ 2,
+ "%2s-%-17.17s :%10u ->%10u (x%5.*f), %6.*f MB/s, %6.1f MB/s\r",
+ marks[markNb],
+ displayName,
+ (unsigned)srcSize,
+ (unsigned)cSize,
+ ratioAccuracy,
+ ratio,
+ benchResult.cSpeed < (10 * MB_UNIT) ? 2 : 1,
+ (double)benchResult.cSpeed / MB_UNIT,
+ (double)benchResult.dSpeed / MB_UNIT);
+ }
+ decompressionCompleted =
+ BMK_isCompleted_TimedFn(timeStateDecompress);
+ }
+ markNb = (markNb + 1) % NB_MARKS;
+ } /* while (!(compressionCompleted && decompressionCompleted)) */
+
+ /* CRC Checking */
+ {
+ const BYTE* resultBuffer = (const BYTE*)(*resultBufferPtr);
+ U64 const crcCheck = XXH64(resultBuffer, srcSize, 0);
+ if ((adv->mode == BMK_both) && (crcOrig != crcCheck)) {
+ size_t u;
+ DISPLAY("!!! WARNING !!! %14s : Invalid Checksum : %x != %x \n",
+ displayName,
+ (unsigned)crcOrig,
+ (unsigned)crcCheck);
+ for (u = 0; u < srcSize; u++) {
+ if (((const BYTE*)srcBuffer)[u] != resultBuffer[u]) {
+ unsigned segNb, bNb, pos;
+ size_t bacc = 0;
+ DISPLAY("Decoding error at pos %u ", (unsigned)u);
+ for (segNb = 0; segNb < nbBlocks; segNb++) {
+ if (bacc + srcSizes[segNb] > u)
+ break;
+ bacc += srcSizes[segNb];
+ }
+ pos = (U32)(u - bacc);
+ bNb = pos / (128 KB);
+ DISPLAY("(sample %u, block %u, pos %u) \n",
+ segNb,
+ bNb,
+ pos);
+ {
+ size_t const lowest = (u > 5) ? 5 : u;
+ size_t n;
+ DISPLAY("origin: ");
+ for (n = lowest; n > 0; n--)
+ DISPLAY("%02X ",
+ ((const BYTE*)srcBuffer)[u - n]);
+ DISPLAY(" :%02X: ", ((const BYTE*)srcBuffer)[u]);
+ for (n = 1; n < 3; n++)
+ DISPLAY("%02X ",
+ ((const BYTE*)srcBuffer)[u + n]);
+ DISPLAY(" \n");
+ DISPLAY("decode: ");
+ for (n = lowest; n > 0; n--)
+ DISPLAY("%02X ", resultBuffer[u - n]);
+ DISPLAY(" :%02X: ", resultBuffer[u]);
+ for (n = 1; n < 3; n++)
+ DISPLAY("%02X ", resultBuffer[u + n]);
+ DISPLAY(" \n");
+ }
+ break;
+ }
+ if (u == srcSize - 1) { /* should never happen */
+ DISPLAY("no difference detected\n");
+ }
+ } /* for (u=0; u<srcSize; u++) */
+ } /* if ((adv->mode == BMK_both) && (crcOrig!=crcCheck)) */
+ } /* CRC Checking */
+
+ if (displayLevel
+ == 1) { /* hidden display mode -q, used by python speed benchmark */
+ double const cSpeed = (double)benchResult.cSpeed / MB_UNIT;
+ double const dSpeed = (double)benchResult.dSpeed / MB_UNIT;
+ if (adv->additionalParam) {
+ OUTPUT("-%-3i%11i (%5.3f) %6.2f MB/s %6.1f MB/s %s (param=%d)\n",
+ cLevel,
+ (int)cSize,
+ ratio,
+ cSpeed,
+ dSpeed,
+ displayName,
+ adv->additionalParam);
+ } else {
+ OUTPUT("-%-3i%11i (%5.3f) %6.2f MB/s %6.1f MB/s %s\n",
+ cLevel,
+ (int)cSize,
+ ratio,
+ cSpeed,
+ dSpeed,
+ displayName);
+ }
+ }
+
+ OUTPUTLEVEL(2, "%2i#\n", cLevel);
+ } /* Bench */
+
+ benchResult.cMem =
+ (1ULL << (comprParams->windowLog)) + ZSTD_sizeof_CCtx(cctx);
+ return BMK_benchOutcome_setValidResult(benchResult);
+}
+
+BMK_benchOutcome_t BMK_benchMemAdvanced(
+ const void* srcBuffer,
+ size_t srcSize,
+ void* dstBuffer,
+ size_t dstCapacity,
+ const size_t* fileSizes,
+ unsigned nbFiles,
+ int cLevel,
+ const ZSTD_compressionParameters* comprParams,
+ const void* dictBuffer,
+ size_t dictBufferSize,
+ int displayLevel,
+ const char* displayName,
+ const BMK_advancedParams_t* adv)
+
+{
+ int const dstParamsError =
+ !dstBuffer ^ !dstCapacity; /* must be both NULL or none */
+
+ size_t const blockSize =
+ ((adv->blockSize >= 32 && (adv->mode != BMK_decodeOnly))
+ ? adv->blockSize
+ : srcSize)
+ + (!srcSize) /* avoid div by 0 */;
+ U32 const maxNbBlocks =
+ (U32)((srcSize + (blockSize - 1)) / blockSize) + nbFiles;
+
+ /* these are the blockTable parameters, just split up */
+ const void** const srcPtrs =
+ (const void**)malloc(maxNbBlocks * sizeof(void*));
+ size_t* const srcSizes = (size_t*)malloc(maxNbBlocks * sizeof(size_t));
+
+ void** const cPtrs = (void**)malloc(maxNbBlocks * sizeof(void*));
+ size_t* const cSizes = (size_t*)malloc(maxNbBlocks * sizeof(size_t));
+ size_t* const cCapacities = (size_t*)malloc(maxNbBlocks * sizeof(size_t));
+
+ void** const resPtrs = (void**)malloc(maxNbBlocks * sizeof(void*));
+ size_t* const resSizes = (size_t*)malloc(maxNbBlocks * sizeof(size_t));
+
+ BMK_timedFnState_t* timeStateCompress = BMK_createTimedFnState(
+ adv->nbSeconds * 1000, BMK_RUNTEST_DEFAULT_MS);
+ BMK_timedFnState_t* timeStateDecompress = BMK_createTimedFnState(
+ adv->nbSeconds * 1000, BMK_RUNTEST_DEFAULT_MS);
+
+ ZSTD_CCtx* const cctx = ZSTD_createCCtx();
+ ZSTD_DCtx* const dctx = ZSTD_createDCtx();
+
+ const size_t maxCompressedSize = dstCapacity
+ ? dstCapacity
+ : ZSTD_compressBound(srcSize) + (maxNbBlocks * 1024);
+
+ void* const internalDstBuffer =
+ dstBuffer ? NULL : malloc(maxCompressedSize);
+ void* const compressedBuffer = dstBuffer ? dstBuffer : internalDstBuffer;
+
+ BMK_benchOutcome_t outcome =
+ BMK_benchOutcome_error(); /* error by default */
+
+ void* resultBuffer = srcSize ? malloc(srcSize) : NULL;
+
+ int const allocationincomplete = !srcPtrs || !srcSizes || !cPtrs || !cSizes
+ || !cCapacities || !resPtrs || !resSizes || !timeStateCompress
+ || !timeStateDecompress || !cctx || !dctx || !compressedBuffer
+ || !resultBuffer;
+
+ if (!allocationincomplete && !dstParamsError) {
+ outcome = BMK_benchMemAdvancedNoAlloc(
+ srcPtrs,
+ srcSizes,
+ cPtrs,
+ cCapacities,
+ cSizes,
+ resPtrs,
+ resSizes,
+ &resultBuffer,
+ compressedBuffer,
+ maxCompressedSize,
+ timeStateCompress,
+ timeStateDecompress,
+ srcBuffer,
+ srcSize,
+ fileSizes,
+ nbFiles,
+ cLevel,
+ comprParams,
+ dictBuffer,
+ dictBufferSize,
+ cctx,
+ dctx,
+ displayLevel,
+ displayName,
+ adv);
+ }
+
+ /* clean up */
+ BMK_freeTimedFnState(timeStateCompress);
+ BMK_freeTimedFnState(timeStateDecompress);
+
+ ZSTD_freeCCtx(cctx);
+ ZSTD_freeDCtx(dctx);
+
+ free(internalDstBuffer);
+ free(resultBuffer);
+
+ free((void*)srcPtrs);
+ free(srcSizes);
+ free(cPtrs);
+ free(cSizes);
+ free(cCapacities);
+ free(resPtrs);
+ free(resSizes);
+
+ if (allocationincomplete) {
+ RETURN_ERROR(
+ 31, BMK_benchOutcome_t, "allocation error : not enough memory");
+ }
+
+ if (dstParamsError) {
+ RETURN_ERROR(32, BMK_benchOutcome_t, "Dst parameters not coherent");
+ }
+ return outcome;
+}
+
+BMK_benchOutcome_t BMK_benchMem(
+ const void* srcBuffer,
+ size_t srcSize,
+ const size_t* fileSizes,
+ unsigned nbFiles,
+ int cLevel,
+ const ZSTD_compressionParameters* comprParams,
+ const void* dictBuffer,
+ size_t dictBufferSize,
+ int displayLevel,
+ const char* displayName)
+{
+ BMK_advancedParams_t const adv = BMK_initAdvancedParams();
+ return BMK_benchMemAdvanced(
+ srcBuffer,
+ srcSize,
+ NULL,
+ 0,
+ fileSizes,
+ nbFiles,
+ cLevel,
+ comprParams,
+ dictBuffer,
+ dictBufferSize,
+ displayLevel,
+ displayName,
+ &adv);
+}
+
+static BMK_benchOutcome_t BMK_benchCLevel(
+ const void* srcBuffer,
+ size_t benchedSize,
+ const size_t* fileSizes,
+ unsigned nbFiles,
+ int cLevel,
+ const ZSTD_compressionParameters* comprParams,
+ const void* dictBuffer,
+ size_t dictBufferSize,
+ int displayLevel,
+ const char* displayName,
+ BMK_advancedParams_t const* const adv)
+{
+ const char* pch = strrchr(displayName, '\\'); /* Windows */
+ if (!pch)
+ pch = strrchr(displayName, '/'); /* Linux */
+ if (pch)
+ displayName = pch + 1;
+
+ if (adv->realTime) {
+ DISPLAYLEVEL(2, "Note : switching to real-time priority \n");
+ SET_REALTIME_PRIORITY;
+ }
+
+ if (displayLevel == 1 && !adv->additionalParam) /* --quiet mode */
+ OUTPUT("bench %s %s: input %u bytes, %u seconds, %u KB blocks\n",
+ ZSTD_VERSION_STRING,
+ ZSTD_GIT_COMMIT_STRING,
+ (unsigned)benchedSize,
+ adv->nbSeconds,
+ (unsigned)(adv->blockSize >> 10));
+
+ return BMK_benchMemAdvanced(
+ srcBuffer,
+ benchedSize,
+ NULL,
+ 0,
+ fileSizes,
+ nbFiles,
+ cLevel,
+ comprParams,
+ dictBuffer,
+ dictBufferSize,
+ displayLevel,
+ displayName,
+ adv);
+}
+
+int BMK_syntheticTest(
+ int cLevel,
+ double compressibility,
+ const ZSTD_compressionParameters* compressionParams,
+ int displayLevel,
+ const BMK_advancedParams_t* adv)
+{
+ char nameBuff[20] = { 0 };
+ const char* name = nameBuff;
+ size_t const benchedSize = adv->blockSize ? adv->blockSize : 10000000;
+ void* srcBuffer;
+ BMK_benchOutcome_t res;
+
+ if (cLevel > ZSTD_maxCLevel()) {
+ DISPLAYLEVEL(1, "Invalid Compression Level");
+ return 15;
+ }
+
+ /* Memory allocation */
+ srcBuffer = malloc(benchedSize);
+ if (!srcBuffer) {
+ DISPLAYLEVEL(1, "allocation error : not enough memory");
+ return 16;
+ }
+
+ /* Fill input buffer */
+ if (compressibility < 0.0) {
+ LOREM_genBuffer(srcBuffer, benchedSize, 0);
+ name = "Lorem ipsum";
+ } else {
+ RDG_genBuffer(srcBuffer, benchedSize, compressibility, 0.0, 0);
+ formatString_u(
+ nameBuff,
+ sizeof(nameBuff),
+ "Synthetic %u%%",
+ (unsigned)(compressibility * 100));
+ }
+
+ /* Bench */
+ res = BMK_benchCLevel(
+ srcBuffer,
+ benchedSize,
+ &benchedSize /* ? */,
+ 1 /* ? */,
+ cLevel,
+ compressionParams,
+ NULL,
+ 0, /* dictionary */
+ displayLevel,
+ name,
+ adv);
+
+ /* clean up */
+ free(srcBuffer);
+
+ return !BMK_isSuccessful_benchOutcome(res);
+}
+
+static size_t BMK_findMaxMem(U64 requiredMem)
+{
+ size_t const step = 64 MB;
+ BYTE* testmem = NULL;
+
+ requiredMem = (((requiredMem >> 26) + 1) << 26);
+ requiredMem += step;
+ if (requiredMem > maxMemory)
+ requiredMem = maxMemory;
+
+ do {
+ testmem = (BYTE*)malloc((size_t)requiredMem);
+ requiredMem -= step;
+ } while (!testmem && requiredMem > 0);
+
+ free(testmem);
+ return (size_t)(requiredMem);
+}
+
+/*! BMK_loadFiles() :
+ * Loads `buffer` with content of files listed within `fileNamesTable`.
+ * At most, fills `buffer` entirely. */
+static int BMK_loadFiles(
+ void* buffer,
+ size_t bufferSize,
+ size_t* fileSizes,
+ const char* const* fileNamesTable,
+ unsigned nbFiles,
+ int displayLevel)
+{
+ size_t pos = 0, totalSize = 0;
+ unsigned n;
+ for (n = 0; n < nbFiles; n++) {
+ U64 fileSize = UTIL_getFileSize(
+ fileNamesTable[n]); /* last file may be shortened */
+ if (UTIL_isDirectory(fileNamesTable[n])) {
+ DISPLAYLEVEL(
+ 2, "Ignoring %s directory... \n", fileNamesTable[n]);
+ fileSizes[n] = 0;
+ continue;
+ }
+ if (fileSize == UTIL_FILESIZE_UNKNOWN) {
+ DISPLAYLEVEL(
+ 2,
+ "Cannot evaluate size of %s, ignoring ... \n",
+ fileNamesTable[n]);
+ fileSizes[n] = 0;
+ continue;
+ }
+ {
+ FILE* const f = fopen(fileNamesTable[n], "rb");
+ if (f == NULL)
+ RETURN_ERROR_INT(
+ 10, "impossible to open file %s", fileNamesTable[n]);
+ OUTPUTLEVEL(2, "Loading %s... \r", fileNamesTable[n]);
+ if (fileSize > bufferSize - pos)
+ fileSize = bufferSize - pos,
+ nbFiles = n; /* buffer too small - stop after this file */
+ {
+ size_t const readSize =
+ fread(((char*)buffer) + pos, 1, (size_t)fileSize, f);
+ if (readSize != (size_t)fileSize)
+ RETURN_ERROR_INT(
+ 11, "could not read %s", fileNamesTable[n]);
+ pos += readSize;
+ }
+ fileSizes[n] = (size_t)fileSize;
+ totalSize += (size_t)fileSize;
+ fclose(f);
+ }
+ }
+
+ if (totalSize == 0)
+ RETURN_ERROR_INT(12, "no data to bench");
+ return 0;
+}
+
+int BMK_benchFilesAdvanced(
+ const char* const* fileNamesTable,
+ unsigned nbFiles,
+ const char* dictFileName,
+ int cLevel,
+ const ZSTD_compressionParameters* compressionParams,
+ int displayLevel,
+ const BMK_advancedParams_t* adv)
+{
+ void* srcBuffer = NULL;
+ size_t benchedSize;
+ void* dictBuffer = NULL;
+ size_t dictBufferSize = 0;
+ size_t* fileSizes = NULL;
+ BMK_benchOutcome_t res;
+ U64 const totalSizeToLoad = UTIL_getTotalFileSize(fileNamesTable, nbFiles);
+
+ if (!nbFiles) {
+ DISPLAYLEVEL(1, "No Files to Benchmark");
+ return 13;
+ }
+
+ if (cLevel > ZSTD_maxCLevel()) {
+ DISPLAYLEVEL(1, "Invalid Compression Level");
+ return 14;
+ }
+
+ if (totalSizeToLoad == UTIL_FILESIZE_UNKNOWN) {
+ DISPLAYLEVEL(1, "Error loading files");
+ return 15;
+ }
+
+ fileSizes = (size_t*)calloc(nbFiles, sizeof(size_t));
+ if (!fileSizes) {
+ DISPLAYLEVEL(1, "not enough memory for fileSizes");
+ return 16;
+ }
+
+ /* Load dictionary */
+ if (dictFileName != NULL) {
+ U64 const dictFileSize = UTIL_getFileSize(dictFileName);
+ if (dictFileSize == UTIL_FILESIZE_UNKNOWN) {
+ DISPLAYLEVEL(
+ 1,
+ "error loading %s : %s \n",
+ dictFileName,
+ strerror(errno));
+ free(fileSizes);
+ DISPLAYLEVEL(1, "benchmark aborted");
+ return 17;
+ }
+ if (dictFileSize > 64 MB) {
+ free(fileSizes);
+ DISPLAYLEVEL(1, "dictionary file %s too large", dictFileName);
+ return 18;
+ }
+ dictBufferSize = (size_t)dictFileSize;
+ dictBuffer = malloc(dictBufferSize);
+ if (dictBuffer == NULL) {
+ free(fileSizes);
+ DISPLAYLEVEL(
+ 1,
+ "not enough memory for dictionary (%u bytes)",
+ (unsigned)dictBufferSize);
+ return 19;
+ }
+
+ {
+ int const errorCode = BMK_loadFiles(
+ dictBuffer,
+ dictBufferSize,
+ fileSizes,
+ &dictFileName /*?*/,
+ 1 /*?*/,
+ displayLevel);
+ if (errorCode) {
+ res = BMK_benchOutcome_error();
+ goto _cleanUp;
+ }
+ }
+ }
+
+ /* Memory allocation & restrictions */
+ benchedSize = BMK_findMaxMem(totalSizeToLoad * 3) / 3;
+ if ((U64)benchedSize > totalSizeToLoad)
+ benchedSize = (size_t)totalSizeToLoad;
+ if (benchedSize < totalSizeToLoad)
+ DISPLAY("Not enough memory; testing %u MB only...\n",
+ (unsigned)(benchedSize >> 20));
+
+ srcBuffer = benchedSize ? malloc(benchedSize) : NULL;
+ if (!srcBuffer) {
+ free(dictBuffer);
+ free(fileSizes);
+ DISPLAYLEVEL(1, "not enough memory for srcBuffer");
+ return 20;
+ }
+
+ /* Load input buffer */
+ {
+ int const errorCode = BMK_loadFiles(
+ srcBuffer,
+ benchedSize,
+ fileSizes,
+ fileNamesTable,
+ nbFiles,
+ displayLevel);
+ if (errorCode) {
+ res = BMK_benchOutcome_error();
+ goto _cleanUp;
+ }
+ }
+
+ /* Bench */
+ {
+ char mfName[20] = { 0 };
+ formatString_u(mfName, sizeof(mfName), " %u files", nbFiles);
+ {
+ const char* const displayName =
+ (nbFiles > 1) ? mfName : fileNamesTable[0];
+ res = BMK_benchCLevel(
+ srcBuffer,
+ benchedSize,
+ fileSizes,
+ nbFiles,
+ cLevel,
+ compressionParams,
+ dictBuffer,
+ dictBufferSize,
+ displayLevel,
+ displayName,
+ adv);
+ }
+ }
+
+_cleanUp:
+ free(srcBuffer);
+ free(dictBuffer);
+ free(fileSizes);
+ return !BMK_isSuccessful_benchOutcome(res);
+}
+
+int BMK_benchFiles(
+ const char* const* fileNamesTable,
+ unsigned nbFiles,
+ const char* dictFileName,
+ int cLevel,
+ const ZSTD_compressionParameters* compressionParams,
+ int displayLevel)
+{
+ BMK_advancedParams_t const adv = BMK_initAdvancedParams();
+ return BMK_benchFilesAdvanced(
+ fileNamesTable,
+ nbFiles,
+ dictFileName,
+ cLevel,
+ compressionParams,
+ displayLevel,
+ &adv);
+}
BMK_mode_t mode; /* 0: all, 1: compress only 2: decode only */
unsigned nbSeconds; /* default timing is in nbSeconds */
size_t blockSize; /* Maximum size of each block*/
+ size_t targetCBlockSize;/* Approximative size of compressed blocks */
int nbWorkers; /* multithreading */
unsigned realTime; /* real time priority */
int additionalParam; /* used by python speed benchmark */
/*! BMK_syntheticTest() -- called from zstdcli */
/* Generates a sample with datagen, using compressibility argument */
-/* cLevel - compression level to benchmark, errors if invalid
- * compressibility - determines compressibility of sample
- * compressionParams - basic compression Parameters
- * displayLevel - see benchFiles
- * adv - see advanced_Params_t
+/* @cLevel - compression level to benchmark, errors if invalid
+ * @compressibility - determines compressibility of sample, range [0.0 - 1.0]
+ * if @compressibility < 0.0, uses the lorem ipsum generator
+ * @compressionParams - basic compression Parameters
+ * @displayLevel - see benchFiles
+ * @adv - see advanced_Params_t
* @return: 0 on success, !0 on error
*/
int BMK_syntheticTest(int cLevel, double compressibility,
DISPLAYLEVEL(2, "zstd: Refusing to remove non-regular file %s\n", path);
return 0;
}
-#if defined(_WIN32) || defined(WIN32)
+#if defined(_WIN32)
/* windows doesn't allow remove read-only files,
* so try to make it writable first */
if (!(statbuf.st_mode & _S_IWRITE)) {
comprParams->windowLog = MAX(ZSTD_WINDOWLOG_MIN, MIN(ZSTD_WINDOWLOG_MAX, fileWindowLog));
if (fileWindowLog > ZSTD_cycleLog(cParams.chainLog, cParams.strategy)) {
if (!prefs->ldmFlag)
- DISPLAYLEVEL(1, "long mode automatically triggered\n");
+ DISPLAYLEVEL(2, "long mode automatically triggered\n");
FIO_setLdmFlag(prefs, 1);
}
if (cParams.strategy >= ZSTD_btopt) {
- DISPLAYLEVEL(1, "[Optimal parser notes] Consider the following to improve patch size at the cost of speed:\n");
- DISPLAYLEVEL(1, "- Use --single-thread mode in the zstd cli\n");
- DISPLAYLEVEL(1, "- Set a larger targetLength (e.g. --zstd=targetLength=4096)\n");
- DISPLAYLEVEL(1, "- Set a larger chainLog (e.g. --zstd=chainLog=%u)\n", ZSTD_CHAINLOG_MAX);
- DISPLAYLEVEL(1, "Also consider playing around with searchLog and hashLog\n");
+ DISPLAYLEVEL(3, "[Optimal parser notes] Consider the following to improve patch size at the cost of speed:\n");
+ DISPLAYLEVEL(3, "- Use --single-thread mode in the zstd cli\n");
+ DISPLAYLEVEL(3, "- Set a larger targetLength (e.g. --zstd=targetLength=4096)\n");
+ DISPLAYLEVEL(3, "- Set a larger chainLog (e.g. --zstd=chainLog=%u)\n", ZSTD_CHAINLOG_MAX);
+ DISPLAYLEVEL(3, "Also consider playing around with searchLog and hashLog\n");
}
}
int closeDstFile = 0;
int result;
int transferStat = 0;
- FILE *dstFile;
int dstFd = -1;
assert(AIO_ReadPool_getFile(ress.readCtx) != NULL);
closeDstFile = 1;
DISPLAYLEVEL(6, "FIO_compressFilename_dstFile: opening dst: %s \n", dstFileName);
- dstFile = FIO_openDstFile(fCtx, prefs, srcFileName, dstFileName, dstFileInitialPermissions);
- if (dstFile==NULL) return 1; /* could not open dstFileName */
- dstFd = fileno(dstFile);
- AIO_WritePool_setFile(ress.writeCtx, dstFile);
+ { FILE *dstFile = FIO_openDstFile(fCtx, prefs, srcFileName, dstFileName, dstFileInitialPermissions);
+ if (dstFile==NULL) return 1; /* could not open dstFileName */
+ dstFd = fileno(dstFile);
+ AIO_WritePool_setFile(ress.writeCtx, dstFile);
+ }
/* Must only be added after FIO_openDstFile() succeeds.
* Otherwise we may delete the destination file if it already exists,
* and the user presses Ctrl-C when asked if they wish to overwrite.
TXZ_EXTENSION,
LZ4_EXTENSION,
TLZ4_EXTENSION,
+ ".7z",
+ ".aa3",
+ ".aac",
+ ".aar",
+ ".ace",
+ ".alac",
+ ".ape",
+ ".apk",
+ ".apng",
+ ".arc",
+ ".archive",
+ ".arj",
+ ".ark",
+ ".asf",
+ ".avi",
+ ".avif",
+ ".ba",
+ ".br",
+ ".bz2",
+ ".cab",
+ ".cdx",
+ ".chm",
+ ".cr2",
+ ".divx",
+ ".dmg",
+ ".dng",
+ ".docm",
+ ".docx",
+ ".dotm",
+ ".dotx",
+ ".dsft",
+ ".ear",
+ ".eftx",
+ ".emz",
+ ".eot",
+ ".epub",
+ ".f4v",
+ ".flac",
+ ".flv",
+ ".gho",
+ ".gif",
+ ".gifv",
+ ".gnp",
+ ".iso",
+ ".jar",
+ ".jpeg",
+ ".jpg",
+ ".jxl",
+ ".lz",
+ ".lzh",
+ ".m4a",
+ ".m4v",
+ ".mkv",
+ ".mov",
+ ".mp2",
+ ".mp3",
+ ".mp4",
+ ".mpa",
+ ".mpc",
+ ".mpe",
+ ".mpeg",
+ ".mpg",
+ ".mpl",
+ ".mpv",
+ ".msi",
+ ".odp",
+ ".ods",
+ ".odt",
+ ".ogg",
+ ".ogv",
+ ".otp",
+ ".ots",
+ ".ott",
+ ".pea",
+ ".png",
+ ".pptx",
+ ".qt",
+ ".rar",
+ ".s7z",
+ ".sfx",
+ ".sit",
+ ".sitx",
+ ".sqx",
+ ".svgz",
+ ".swf",
+ ".tbz2",
+ ".tib",
+ ".tlz",
+ ".vob",
+ ".war",
+ ".webm",
+ ".webp",
+ ".wma",
+ ".wmv",
+ ".woff",
+ ".woff2",
+ ".wvl",
+ ".xlsx",
+ ".xpi",
+ ".xps",
+ ".zip",
+ ".zipx",
+ ".zoo",
+ ".zpaq",
NULL
};
int forceNoUseMMap = prefs->mmapDict == ZSTD_ps_disable;
stat_t statbuf;
dRess_t ress;
+ memset(&statbuf, 0, sizeof(statbuf));
memset(&ress, 0, sizeof(ress));
FIO_getDictFileStat(dictFileName, &statbuf);
U64 frameSize = 0;
IOJob_t *writeJob = AIO_WritePool_acquireJob(ress->writeCtx);
- /* display last 20 characters only */
+ /* display last 20 characters only when not --verbose */
{ size_t const srcFileLength = strlen(srcFileName);
- if (srcFileLength>20) srcFileName += srcFileLength-20;
+ if ((srcFileLength>20) && (g_display_prefs.displayLevel<3))
+ srcFileName += srcFileLength-20;
}
ZSTD_DCtx_reset(ress->dctx, ZSTD_reset_session_only);
/* AIO_ReadPool_numReadsInFlight:
* Returns the number of IO read jobs currently in flight. */
static size_t AIO_ReadPool_numReadsInFlight(ReadPoolCtx_t* ctx) {
- const size_t jobsHeld = (ctx->currentJobHeld==NULL ? 0 : 1);
- return ctx->base.totalIoJobs - (ctx->base.availableJobsCount + ctx->completedJobsCount + jobsHeld);
+ const int jobsHeld = (ctx->currentJobHeld==NULL ? 0 : 1);
+ return (size_t)(ctx->base.totalIoJobs - (ctx->base.availableJobsCount + ctx->completedJobsCount + jobsHeld));
}
/* AIO_ReadPool_getNextCompletedJob:
}
static void AIO_ReadPool_startReading(ReadPoolCtx_t* ctx) {
- int i;
- for (i = 0; i < ctx->base.availableJobsCount; i++) {
+ while(ctx->base.availableJobsCount) {
AIO_ReadPool_enqueueRead(ctx);
}
}
AIO_IOPool_init(&ctx->base, prefs, AIO_ReadPool_executeReadJob, bufferSize);
ctx->coalesceBuffer = (U8*) malloc(bufferSize * 2);
+ if(!ctx->coalesceBuffer) EXM_THROW(100, "Allocation error : not enough memory");
ctx->srcBuffer = ctx->coalesceBuffer;
ctx->srcBufferLoaded = 0;
ctx->completedJobsCount = 0;
--- /dev/null
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+ */
+
+/* Implementation notes:
+ *
+ * This is a very simple lorem ipsum generator
+ * which features a static list of words
+ * and print them one after another randomly
+ * with a fake sentence / paragraph structure.
+ *
+ * The goal is to generate a printable text
+ * that can be used to fake a text compression scenario.
+ * The resulting compression / ratio curve of the lorem ipsum generator
+ * is more satisfying than the previous statistical generator,
+ * which was initially designed for entropy compression,
+ * and lacks a regularity more representative of text.
+ *
+ * The compression ratio achievable on the generated lorem ipsum
+ * is still a bit too good, presumably because the dictionary is a bit too
+ * small. It would be possible to create some more complex scheme, notably by
+ * enlarging the dictionary with a word generator, and adding grammatical rules
+ * (composition) and syntax rules. But that's probably overkill for the intended
+ * goal.
+ */
+
+#include "lorem.h"
+#include <assert.h>
+#include <limits.h> /* INT_MAX */
+#include <string.h> /* memcpy */
+
+#define WORD_MAX_SIZE 20
+
+/* Define the word pool */
+static const char* kWords[] = {
+ "lorem", "ipsum", "dolor", "sit", "amet",
+ "consectetur", "adipiscing", "elit", "sed", "do",
+ "eiusmod", "tempor", "incididunt", "ut", "labore",
+ "et", "dolore", "magna", "aliqua", "dis",
+ "lectus", "vestibulum", "mattis", "ullamcorper", "velit",
+ "commodo", "a", "lacus", "arcu", "magnis",
+ "parturient", "montes", "nascetur", "ridiculus", "mus",
+ "mauris", "nulla", "malesuada", "pellentesque", "eget",
+ "gravida", "in", "dictum", "non", "erat",
+ "nam", "voluptat", "maecenas", "blandit", "aliquam",
+ "etiam", "enim", "lobortis", "scelerisque", "fermentum",
+ "dui", "faucibus", "ornare", "at", "elementum",
+ "eu", "facilisis", "odio", "morbi", "quis",
+ "eros", "donec", "ac", "orci", "purus",
+ "turpis", "cursus", "leo", "vel", "porta",
+ "consequat", "interdum", "varius", "vulputate", "aliquet",
+ "pharetra", "nunc", "auctor", "urna", "id",
+ "metus", "viverra", "nibh", "cras", "mi",
+ "unde", "omnis", "iste", "natus", "error",
+ "perspiciatis", "voluptatem", "accusantium", "doloremque", "laudantium",
+ "totam", "rem", "aperiam", "eaque", "ipsa",
+ "quae", "ab", "illo", "inventore", "veritatis",
+ "quasi", "architecto", "beatae", "vitae", "dicta",
+ "sunt", "explicabo", "nemo", "ipsam", "quia",
+ "voluptas", "aspernatur", "aut", "odit", "fugit",
+ "consequuntur", "magni", "dolores", "eos", "qui",
+ "ratione", "sequi", "nesciunt", "neque", "porro",
+ "quisquam", "est", "dolorem", "adipisci", "numquam",
+ "eius", "modi", "tempora", "incidunt", "magnam",
+ "quaerat", "ad", "minima", "veniam", "nostrum",
+ "ullam", "corporis", "suscipit", "laboriosam", "nisi",
+ "aliquid", "ex", "ea", "commodi", "consequatur",
+ "autem", "eum", "iure", "voluptate", "esse",
+ "quam", "nihil", "molestiae", "illum", "fugiat",
+ "quo", "pariatur", "vero", "accusamus", "iusto",
+ "dignissimos", "ducimus", "blanditiis", "praesentium", "voluptatum",
+ "deleniti", "atque", "corrupti", "quos", "quas",
+ "molestias", "excepturi", "sint", "occaecati", "cupiditate",
+ "provident", "similique", "culpa", "officia", "deserunt",
+ "mollitia", "animi", "laborum", "dolorum", "fuga",
+ "harum", "quidem", "rerum", "facilis", "expedita",
+ "distinctio", "libero", "tempore", "cum", "soluta",
+ "nobis", "eligendi", "optio", "cumque", "impedit",
+ "minus", "quod", "maxime", "placeat", "facere",
+ "possimus", "assumenda", "repellendus", "temporibus", "quibusdam",
+ "officiis", "debitis", "saepe", "eveniet", "voluptates",
+ "repudiandae", "recusandae", "itaque", "earum", "hic",
+ "tenetur", "sapiente", "delectus", "reiciendis", "cillum",
+ "maiores", "alias", "perferendis", "doloribus", "asperiores",
+ "repellat", "minim", "nostrud", "exercitation", "ullamco",
+ "laboris", "aliquip", "duis", "aute", "irure",
+};
+static const unsigned kNbWords = sizeof(kWords) / sizeof(kWords[0]);
+
+/* simple 1-dimension distribution, based on word's length, favors small words
+ */
+static const int kWeights[] = { 0, 8, 6, 4, 3, 2 };
+static const size_t kNbWeights = sizeof(kWeights) / sizeof(kWeights[0]);
+
+#define DISTRIB_SIZE_MAX 650
+static int g_distrib[DISTRIB_SIZE_MAX] = { 0 };
+static unsigned g_distribCount = 0;
+
+static void countFreqs(
+ const char* words[],
+ size_t nbWords,
+ const int* weights,
+ size_t nbWeights)
+{
+ unsigned total = 0;
+ size_t w;
+ for (w = 0; w < nbWords; w++) {
+ size_t len = strlen(words[w]);
+ int lmax;
+ if (len >= nbWeights)
+ len = nbWeights - 1;
+ lmax = weights[len];
+ total += (unsigned)lmax;
+ }
+ g_distribCount = total;
+ assert(g_distribCount <= DISTRIB_SIZE_MAX);
+}
+
+static void init_word_distrib(
+ const char* words[],
+ size_t nbWords,
+ const int* weights,
+ size_t nbWeights)
+{
+ size_t w, d = 0;
+ countFreqs(words, nbWords, weights, nbWeights);
+ for (w = 0; w < nbWords; w++) {
+ size_t len = strlen(words[w]);
+ int l, lmax;
+ if (len >= nbWeights)
+ len = nbWeights - 1;
+ lmax = weights[len];
+ for (l = 0; l < lmax; l++) {
+ g_distrib[d++] = (int)w;
+ }
+ }
+}
+
+/* Note: this unit only works when invoked sequentially.
+ * No concurrent access is allowed */
+static char* g_ptr = NULL;
+static size_t g_nbChars = 0;
+static size_t g_maxChars = 10000000;
+static unsigned g_randRoot = 0;
+
+#define RDG_rotl32(x, r) ((x << r) | (x >> (32 - r)))
+static unsigned LOREM_rand(unsigned range)
+{
+ static const unsigned prime1 = 2654435761U;
+ static const unsigned prime2 = 2246822519U;
+ unsigned rand32 = g_randRoot;
+ rand32 *= prime1;
+ rand32 ^= prime2;
+ rand32 = RDG_rotl32(rand32, 13);
+ g_randRoot = rand32;
+ return (unsigned)(((unsigned long long)rand32 * range) >> 32);
+}
+
+static void writeLastCharacters(void)
+{
+ size_t lastChars = g_maxChars - g_nbChars;
+ assert(g_maxChars >= g_nbChars);
+ if (lastChars == 0)
+ return;
+ g_ptr[g_nbChars++] = '.';
+ if (lastChars > 2) {
+ memset(g_ptr + g_nbChars, ' ', lastChars - 2);
+ }
+ if (lastChars > 1) {
+ g_ptr[g_maxChars - 1] = '\n';
+ }
+ g_nbChars = g_maxChars;
+}
+
+static void generateWord(const char* word, const char* separator, int upCase)
+{
+ size_t const len = strlen(word) + strlen(separator);
+ if (g_nbChars + len > g_maxChars) {
+ writeLastCharacters();
+ return;
+ }
+ memcpy(g_ptr + g_nbChars, word, strlen(word));
+ if (upCase) {
+ static const char toUp = 'A' - 'a';
+ g_ptr[g_nbChars] = (char)(g_ptr[g_nbChars] + toUp);
+ }
+ g_nbChars += strlen(word);
+ memcpy(g_ptr + g_nbChars, separator, strlen(separator));
+ g_nbChars += strlen(separator);
+}
+
+static int about(unsigned target)
+{
+ return (int)(LOREM_rand(target) + LOREM_rand(target) + 1);
+}
+
+/* Function to generate a random sentence */
+static void generateSentence(int nbWords)
+{
+ int commaPos = about(9);
+ int comma2 = commaPos + about(7);
+ int qmark = (LOREM_rand(11) == 7);
+ const char* endSep = qmark ? "? " : ". ";
+ int i;
+ for (i = 0; i < nbWords; i++) {
+ int const wordID = g_distrib[LOREM_rand(g_distribCount)];
+ const char* const word = kWords[wordID];
+ const char* sep = " ";
+ if (i == commaPos)
+ sep = ", ";
+ if (i == comma2)
+ sep = ", ";
+ if (i == nbWords - 1)
+ sep = endSep;
+ generateWord(word, sep, i == 0);
+ }
+}
+
+static void generateParagraph(int nbSentences)
+{
+ int i;
+ for (i = 0; i < nbSentences; i++) {
+ int wordsPerSentence = about(11);
+ generateSentence(wordsPerSentence);
+ }
+ if (g_nbChars < g_maxChars) {
+ g_ptr[g_nbChars++] = '\n';
+ }
+ if (g_nbChars < g_maxChars) {
+ g_ptr[g_nbChars++] = '\n';
+ }
+}
+
+/* It's "common" for lorem ipsum generators to start with the same first
+ * pre-defined sentence */
+static void generateFirstSentence(void)
+{
+ int i;
+ for (i = 0; i < 18; i++) {
+ const char* word = kWords[i];
+ const char* separator = " ";
+ if (i == 4)
+ separator = ", ";
+ if (i == 7)
+ separator = ", ";
+ generateWord(word, separator, i == 0);
+ }
+ generateWord(kWords[18], ". ", 0);
+}
+
+size_t
+LOREM_genBlock(void* buffer, size_t size, unsigned seed, int first, int fill)
+{
+ g_ptr = (char*)buffer;
+ assert(size < INT_MAX);
+ g_maxChars = size;
+ g_nbChars = 0;
+ g_randRoot = seed;
+ if (g_distribCount == 0) {
+ init_word_distrib(kWords, kNbWords, kWeights, kNbWeights);
+ }
+
+ if (first) {
+ generateFirstSentence();
+ }
+ while (g_nbChars < g_maxChars) {
+ int sentencePerParagraph = about(7);
+ generateParagraph(sentencePerParagraph);
+ if (!fill)
+ break; /* only generate one paragraph in not-fill mode */
+ }
+ g_ptr = NULL;
+ return g_nbChars;
+}
+
+void LOREM_genBuffer(void* buffer, size_t size, unsigned seed)
+{
+ LOREM_genBlock(buffer, size, seed, 1, 1);
+}
--- /dev/null
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+ */
+
+/* lorem ipsum generator */
+
+#include <stddef.h> /* size_t */
+
+/*
+ * LOREM_genBuffer():
+ * Generate @size bytes of compressible data using lorem ipsum generator
+ * into provided @buffer.
+ */
+void LOREM_genBuffer(void* buffer, size_t size, unsigned seed);
+
+/*
+ * LOREM_genBlock():
+ * Similar to LOREM_genBuffer, with additional controls :
+ * - @first : generate the first sentence
+ * - @fill : fill the entire @buffer,
+ * if ==0: generate one paragraph at most.
+ * @return : nb of bytes generated into @buffer.
+ */
+size_t LOREM_genBlock(void* buffer, size_t size,
+ unsigned seed,
+ int first, int fill);
***************************************************************/
#ifndef PLATFORM_POSIX_VERSION
-# if (defined(__APPLE__) && defined(__MACH__)) || defined(__SVR4) || defined(_AIX) || defined(__hpux) /* POSIX.1-2001 (SUSv3) conformant */ \
- || defined(__DragonFly__) || defined(__FreeBSD__) || defined(__NetBSD__) || defined(__OpenBSD__) /* BSD distros */
+# if (defined(__APPLE__) && defined(__MACH__)) || defined(__SVR4) || defined(_AIX) || defined(__hpux) /* POSIX.1-2001 (SUSv3) conformant */
/* exception rule : force posix version to 200112L,
* note: it's better to use unistd.h's _POSIX_VERSION whenever possible */
# define PLATFORM_POSIX_VERSION 200112L
*/
# elif !defined(_WIN32) \
&& ( defined(__unix__) || defined(__unix) \
- || defined(__midipix__) || defined(__VMS) || defined(__HAIKU__) )
+ || defined(_QNX_SOURCE) || defined(__midipix__) || defined(__VMS) || defined(__HAIKU__) )
# if defined(__linux__) || defined(__linux) || defined(__CYGWIN__)
# ifndef _POSIX_C_SOURCE
#elif defined(MSDOS) || defined(OS2)
# include <io.h> /* _isatty */
# define IS_CONSOLE(stdStream) _isatty(_fileno(stdStream))
-#elif defined(WIN32) || defined(_WIN32)
+#elif defined(_WIN32)
# include <io.h> /* _isatty */
# include <windows.h> /* DeviceIoControl, HANDLE, FSCTL_SET_SPARSE */
# include <stdio.h> /* FILE */
/******************************
* OS-specific IO behaviors
******************************/
-#if defined(MSDOS) || defined(OS2) || defined(WIN32) || defined(_WIN32)
+#if defined(MSDOS) || defined(OS2) || defined(_WIN32)
# include <fcntl.h> /* _O_BINARY */
# include <io.h> /* _setmode, _fileno, _get_osfhandle */
# if !defined(__DJGPP__)
#include <errno.h>
#include <assert.h>
+#if defined(__FreeBSD__)
+#include <sys/param.h> /* __FreeBSD_version */
+#endif /* #ifdef __FreeBSD__ */
+
#if defined(_WIN32)
# include <sys/utime.h> /* utime */
# include <io.h> /* _chmod */
+# define ZSTD_USE_UTIMENSAT 0
#else
# include <unistd.h> /* chown, stat */
-# if PLATFORM_POSIX_VERSION < 200809L || !defined(st_mtime)
-# include <utime.h> /* utime */
+# include <sys/stat.h> /* utimensat, st_mtime */
+# if (PLATFORM_POSIX_VERSION >= 200809L && defined(st_mtime)) \
+ || (defined(__FreeBSD__) && __FreeBSD_version >= 1100056)
+# define ZSTD_USE_UTIMENSAT 1
# else
+# define ZSTD_USE_UTIMENSAT 0
+# endif
+# if ZSTD_USE_UTIMENSAT
# include <fcntl.h> /* AT_FDCWD */
-# include <sys/stat.h> /* utimensat */
+# else
+# include <utime.h> /* utime */
# endif
#endif
* that struct stat has a struct timespec st_mtim member. We need this
* check because there are some platforms that claim to be POSIX 2008
* compliant but which do not have st_mtim... */
-#if (PLATFORM_POSIX_VERSION >= 200809L) && defined(st_mtime)
+ /* FreeBSD has implemented POSIX 2008 for a long time but still only
+ * advertises support for POSIX 2001. They have a version macro that
+ * lets us safely gate them in.
+ * See https://docs.freebsd.org/en/books/porters-handbook/versions/.
+ */
+#if ZSTD_USE_UTIMENSAT
{
/* (atime, mtime) */
struct timespec timebuf[2] = { {0, UTIME_NOW} };
size_t nbFiles = 0;
char* buf;
size_t bufSize;
- size_t pos = 0;
stat_t statbuf;
if (!UTIL_stat(inputFileName, &statbuf) || !UTIL_isRegularFileStat(&statbuf))
{ const char** filenamesTable = (const char**) malloc(nbFiles * sizeof(*filenamesTable));
CONTROL(filenamesTable != NULL);
- { size_t fnb;
- for (fnb = 0, pos = 0; fnb < nbFiles; fnb++) {
+ { size_t fnb, pos = 0;
+ for (fnb = 0; fnb < nbFiles; fnb++) {
filenamesTable[fnb] = buf+pos;
pos += strlen(buf+pos)+1; /* +1 for the finishing `\0` */
- } }
+ }
assert(pos <= bufSize);
+ }
return UTIL_assembleFileNamesTable(filenamesTable, nbFiles, buf);
}
static size_t getTotalTableSize(FileNamesTable* table)
{
- size_t fnb = 0, totalSize = 0;
+ size_t fnb, totalSize = 0;
for(fnb = 0 ; fnb < table->tableSize && table->fileNames[fnb] ; ++fnb) {
totalSize += strlen(table->fileNames[fnb]) + 1; /* +1 to add '\0' at the end of each fileName */
}
memcpy(outDirBuffer, dir1, dir1Size);
outDirBuffer[dir1Size] = '\0';
- if (dir2[0] == '.')
- return outDirBuffer;
-
buffer = outDirBuffer + dir1Size;
if (dir1Size > 0 && *(buffer - 1) != PATH_SEP) {
*buffer = PATH_SEP;
#elif defined(__FreeBSD__)
-#include <sys/param.h>
#include <sys/sysctl.h>
/* Use physical core sysctl when available
FileNamesTable*
UTIL_createExpandedFNT(const char* const* filenames, size_t nbFilenames, int followLinks);
-#if defined(_WIN32) || defined(WIN32)
+#if defined(_WIN32)
DWORD CountSetBits(ULONG_PTR bitMask);
#endif
-.TH "ZSTD" "1" "March 2023" "zstd 1.5.5" "User Commands"
+.
+.TH "ZSTD" "1" "March 2024" "zstd 1.5.6" "User Commands"
+.
.SH "NAME"
\fBzstd\fR \- zstd, zstdmt, unzstd, zstdcat \- Compress or decompress \.zst files
+.
.SH "SYNOPSIS"
-.TS
-allbox;
-\fBzstd\fR [\fIOPTIONS\fR] [\- \fIINPUT\-FILE\fR] [\-o \fIOUTPUT\-FILE\fR]
-.TE
+\fBzstd\fR [\fIOPTIONS\fR] [\-|\fIINPUT\-FILE\fR] [\-o \fIOUTPUT\-FILE\fR]
+.
.P
\fBzstdmt\fR is equivalent to \fBzstd \-T0\fR
+.
.P
\fBunzstd\fR is equivalent to \fBzstd \-d\fR
+.
.P
\fBzstdcat\fR is equivalent to \fBzstd \-dcf\fR
+.
.SH "DESCRIPTION"
-\fBzstd\fR is a fast lossless compression algorithm and data compression tool, with command line syntax similar to \fBgzip\fR(1) and \fBxz\fR(1)\. It is based on the \fBLZ77\fR family, with further FSE & huff0 entropy stages\. \fBzstd\fR offers highly configurable compression speed, from fast modes at > 200 MB/s per core, to strong modes with excellent compression ratios\. It also features a very fast decoder, with speeds > 500 MB/s per core\.
+\fBzstd\fR is a fast lossless compression algorithm and data compression tool, with command line syntax similar to \fBgzip\fR(1) and \fBxz\fR(1)\. It is based on the \fBLZ77\fR family, with further FSE & huff0 entropy stages\. \fBzstd\fR offers highly configurable compression speed, from fast modes at > 200 MB/s per core, to strong modes with excellent compression ratios\. It also features a very fast decoder, with speeds > 500 MB/s per core, which remains roughly stable at all compression settings\.
+.
.P
-\fBzstd\fR command line syntax is generally similar to gzip, but features the following differences:
-.IP "\[ci]" 4
+\fBzstd\fR command line syntax is generally similar to gzip, but features the following few differences:
+.
+.IP "\(bu" 4
Source files are preserved by default\. It\'s possible to remove them automatically by using the \fB\-\-rm\fR command\.
-.IP "\[ci]" 4
+.
+.IP "\(bu" 4
When compressing a single file, \fBzstd\fR displays progress notifications and result summary by default\. Use \fB\-q\fR to turn them off\.
-.IP "\[ci]" 4
+.
+.IP "\(bu" 4
\fBzstd\fR displays a short help page when command line is an error\. Use \fB\-q\fR to turn it off\.
-.IP "\[ci]" 4
+.
+.IP "\(bu" 4
\fBzstd\fR does not accept input from console, though it does accept \fBstdin\fR when it\'s not the console\.
-.IP "\[ci]" 4
+.
+.IP "\(bu" 4
\fBzstd\fR does not store the input\'s filename or attributes, only its contents\.
+.
.IP "" 0
+.
.P
\fBzstd\fR processes each \fIfile\fR according to the selected operation mode\. If no \fIfiles\fR are given or \fIfile\fR is \fB\-\fR, \fBzstd\fR reads from standard input and writes the processed data to standard output\. \fBzstd\fR will refuse to write compressed data to standard output if it is a terminal: it will display an error message and skip the file\. Similarly, \fBzstd\fR will refuse to read compressed data from standard input if it is a terminal\.
+.
.P
Unless \fB\-\-stdout\fR or \fB\-o\fR is specified, \fIfiles\fR are written to a new file whose name is derived from the source \fIfile\fR name:
-.IP "\[ci]" 4
+.
+.IP "\(bu" 4
When compressing, the suffix \fB\.zst\fR is appended to the source filename to get the target filename\.
-.IP "\[ci]" 4
+.
+.IP "\(bu" 4
When decompressing, the \fB\.zst\fR suffix is removed from the source filename to get the target filename
+.
.IP "" 0
+.
.SS "Concatenation with \.zst Files"
It is possible to concatenate multiple \fB\.zst\fR files\. \fBzstd\fR will decompress such agglomerated file as if it was a single \fB\.zst\fR file\.
+.
.SH "OPTIONS"
+.
.SS "Integer Suffixes and Special Values"
In most places where an integer argument is expected, an optional suffix is supported to easily indicate large integers\. There must be no space between the integer and the suffix\.
+.
.TP
\fBKiB\fR
-Multiply the integer by 1,024 (2\e^10)\. \fBKi\fR, \fBK\fR, and \fBKB\fR are accepted as synonyms for \fBKiB\fR\.
+Multiply the integer by 1,024 (2^10)\. \fBKi\fR, \fBK\fR, and \fBKB\fR are accepted as synonyms for \fBKiB\fR\.
+.
.TP
\fBMiB\fR
-Multiply the integer by 1,048,576 (2\e^20)\. \fBMi\fR, \fBM\fR, and \fBMB\fR are accepted as synonyms for \fBMiB\fR\.
+Multiply the integer by 1,048,576 (2^20)\. \fBMi\fR, \fBM\fR, and \fBMB\fR are accepted as synonyms for \fBMiB\fR\.
+.
.SS "Operation Mode"
If multiple operation mode options are given, the last one takes effect\.
+.
.TP
\fB\-z\fR, \fB\-\-compress\fR
Compress\. This is the default operation mode when no operation mode option is specified and no other operation mode is implied from the command name (for example, \fBunzstd\fR implies \fB\-\-decompress\fR)\.
+.
.TP
\fB\-d\fR, \fB\-\-decompress\fR, \fB\-\-uncompress\fR
Decompress\.
+.
.TP
\fB\-t\fR, \fB\-\-test\fR
Test the integrity of compressed \fIfiles\fR\. This option is equivalent to \fB\-\-decompress \-\-stdout > /dev/null\fR, decompressed data is discarded and checksummed for errors\. No files are created or removed\.
+.
.TP
\fB\-b#\fR
Benchmark file(s) using compression level \fI#\fR\. See \fIBENCHMARK\fR below for a description of this operation\.
+.
.TP
\fB\-\-train FILES\fR
Use \fIFILES\fR as a training set to create a dictionary\. The training set should contain a lot of small files (> 100)\. See \fIDICTIONARY BUILDER\fR below for a description of this operation\.
+.
.TP
\fB\-l\fR, \fB\-\-list\fR
Display information related to a zstd compressed file, such as size, ratio, and checksum\. Some of these fields may not be available\. This command\'s output can be augmented with the \fB\-v\fR modifier\.
+.
.SS "Operation Modifiers"
-.IP "\[ci]" 4
-\fB\-#\fR: selects \fB#\fR compression level [1\-19] (default: 3)
-.IP "\[ci]" 4
+.
+.IP "\(bu" 4
+\fB\-#\fR: selects \fB#\fR compression level [1\-19] (default: 3)\. Higher compression levels \fIgenerally\fR produce higher compression ratio at the expense of speed and memory\. A rough rule of thumb is that compression speed is expected to be divided by 2 every 2 levels\. Technically, each level is mapped to a set of advanced parameters (that can also be modified individually, see below)\. Because the compressor\'s behavior highly depends on the content to compress, there\'s no guarantee of a smooth progression from one level to another\.
+.
+.IP "\(bu" 4
\fB\-\-ultra\fR: unlocks high compression levels 20+ (maximum 22), using a lot more memory\. Note that decompression will also require more memory when using these levels\.
-.IP "\[ci]" 4
+.
+.IP "\(bu" 4
\fB\-\-fast[=#]\fR: switch to ultra\-fast compression levels\. If \fB=#\fR is not present, it defaults to \fB1\fR\. The higher the value, the faster the compression speed, at the cost of some compression ratio\. This setting overwrites compression level if one was set previously\. Similarly, if a compression level is set after \fB\-\-fast\fR, it overrides it\.
-.IP "\[ci]" 4
+.
+.IP "\(bu" 4
\fB\-T#\fR, \fB\-\-threads=#\fR: Compress using \fB#\fR working threads (default: 1)\. If \fB#\fR is 0, attempt to detect and use the number of physical CPU cores\. In all cases, the nb of threads is capped to \fBZSTDMT_NBWORKERS_MAX\fR, which is either 64 in 32\-bit mode, or 256 for 64\-bit environments\. This modifier does nothing if \fBzstd\fR is compiled without multithread support\.
-.IP "\[ci]" 4
+.
+.IP "\(bu" 4
\fB\-\-single\-thread\fR: Use a single thread for both I/O and compression\. As compression is serialized with I/O, this can be slightly slower\. Single\-thread mode features significantly lower memory usage, which can be useful for systems with limited amount of memory, such as 32\-bit systems\.
+.
.IP
Note 1: this mode is the only available one when multithread support is disabled\.
+.
.IP
Note 2: this mode is different from \fB\-T1\fR, which spawns 1 compression thread in parallel with I/O\. Final compressed result is also slightly different from \fB\-T1\fR\.
-.IP "\[ci]" 4
+.
+.IP "\(bu" 4
\fB\-\-auto\-threads={physical,logical} (default: physical)\fR: When using a default amount of threads via \fB\-T0\fR, choose the default based on the number of detected physical or logical cores\.
-.IP "\[ci]" 4
+.
+.IP "\(bu" 4
\fB\-\-adapt[=min=#,max=#]\fR: \fBzstd\fR will dynamically adapt compression level to perceived I/O conditions\. Compression level adaptation can be observed live by using command \fB\-v\fR\. Adaptation can be constrained between supplied \fBmin\fR and \fBmax\fR levels\. The feature works when combined with multi\-threading and \fB\-\-long\fR mode\. It does not work with \fB\-\-single\-thread\fR\. It sets window size to 8 MiB by default (can be changed manually, see \fBwlog\fR)\. Due to the chaotic nature of dynamic adaptation, compressed result is not reproducible\.
+.
.IP
\fINote\fR: at the time of this writing, \fB\-\-adapt\fR can remain stuck at low speed when combined with multiple worker threads (>=2)\.
-.IP "\[ci]" 4
+.
+.IP "\(bu" 4
\fB\-\-long[=#]\fR: enables long distance matching with \fB#\fR \fBwindowLog\fR, if \fB#\fR is not present it defaults to \fB27\fR\. This increases the window size (\fBwindowLog\fR) and memory usage for both the compressor and decompressor\. This setting is designed to improve the compression ratio for files with long matches at a large distance\.
+.
.IP
Note: If \fBwindowLog\fR is set to larger than 27, \fB\-\-long=windowLog\fR or \fB\-\-memory=windowSize\fR needs to be passed to the decompressor\.
-.IP "\[ci]" 4
+.
+.IP "\(bu" 4
\fB\-D DICT\fR: use \fBDICT\fR as Dictionary to compress or decompress FILE(s)
-.IP "\[ci]" 4
+.
+.IP "\(bu" 4
\fB\-\-patch\-from FILE\fR: Specify the file to be used as a reference point for zstd\'s diff engine\. This is effectively dictionary compression with some convenient parameter selection, namely that \fIwindowSize\fR > \fIsrcSize\fR\.
+.
.IP
Note: cannot use both this and \fB\-D\fR together\.
+.
.IP
Note: \fB\-\-long\fR mode will be automatically activated if \fIchainLog\fR < \fIfileLog\fR (\fIfileLog\fR being the \fIwindowLog\fR required to cover the whole file)\. You can also manually force it\.
+.
.IP
Note: for all levels, you can use \fB\-\-patch\-from\fR in \fB\-\-single\-thread\fR mode to improve compression ratio at the cost of speed\.
+.
.IP
Note: for level 19, you can get increased compression ratio at the cost of speed by specifying \fB\-\-zstd=targetLength=\fR to be something large (i\.e\. 4096), and by setting a large \fB\-\-zstd=chainLog=\fR\.
-.IP "\[ci]" 4
+.
+.IP "\(bu" 4
\fB\-\-rsyncable\fR: \fBzstd\fR will periodically synchronize the compression state to make the compressed file more rsync\-friendly\. There is a negligible impact to compression ratio, and a potential impact to compression speed, perceptible at higher speeds, for example when combining \fB\-\-rsyncable\fR with many parallel worker threads\. This feature does not work with \fB\-\-single\-thread\fR\. You probably don\'t want to use it with long range mode, since it will decrease the effectiveness of the synchronization points, but your mileage may vary\.
-.IP "\[ci]" 4
+.
+.IP "\(bu" 4
\fB\-C\fR, \fB\-\-[no\-]check\fR: add integrity check computed from uncompressed data (default: enabled)
-.IP "\[ci]" 4
+.
+.IP "\(bu" 4
\fB\-\-[no\-]content\-size\fR: enable / disable whether or not the original size of the file is placed in the header of the compressed file\. The default option is \fB\-\-content\-size\fR (meaning that the original size will be placed in the header)\.
-.IP "\[ci]" 4
+.
+.IP "\(bu" 4
\fB\-\-no\-dictID\fR: do not store dictionary ID within frame header (dictionary compression)\. The decoder will have to rely on implicit knowledge about which dictionary to use, it won\'t be able to check if it\'s correct\.
-.IP "\[ci]" 4
+.
+.IP "\(bu" 4
\fB\-M#\fR, \fB\-\-memory=#\fR: Set a memory usage limit\. By default, \fBzstd\fR uses 128 MiB for decompression as the maximum amount of memory the decompressor is allowed to use, but you can override this manually if need be in either direction (i\.e\. you can increase or decrease it)\.
+.
.IP
This is also used during compression when using with \fB\-\-patch\-from=\fR\. In this case, this parameter overrides that maximum size allowed for a dictionary\. (128 MiB)\.
+.
.IP
Additionally, this can be used to limit memory for dictionary training\. This parameter overrides the default limit of 2 GiB\. zstd will load training samples up to the memory limit and ignore the rest\.
-.IP "\[ci]" 4
+.
+.IP "\(bu" 4
\fB\-\-stream\-size=#\fR: Sets the pledged source size of input coming from a stream\. This value must be exact, as it will be included in the produced frame header\. Incorrect stream sizes will cause an error\. This information will be used to better optimize compression parameters, resulting in better and potentially faster compression, especially for smaller source sizes\.
-.IP "\[ci]" 4
+.
+.IP "\(bu" 4
\fB\-\-size\-hint=#\fR: When handling input from a stream, \fBzstd\fR must guess how large the source size will be when optimizing compression parameters\. If the stream size is relatively small, this guess may be a poor one, resulting in a higher compression ratio than expected\. This feature allows for controlling the guess when needed\. Exact guesses result in better compression ratios\. Overestimates result in slightly degraded compression ratios, while underestimates may result in significant degradation\.
-.IP "\[ci]" 4
-\fB\-o FILE\fR: save result into \fBFILE\fR\.
-.IP "\[ci]" 4
+.
+.IP "\(bu" 4
+\fB\-\-target\-compressed\-block\-size=#\fR: Attempt to produce compressed blocks of approximately this size\. This will split larger blocks in order to approach this target\. This feature is notably useful for improved latency, when the receiver can leverage receiving early incomplete data\. This parameter defines a loose target: compressed blocks will target this size "on average", but individual blocks can still be larger or smaller\. Enabling this feature can decrease compression speed by up to ~10% at level 1\. Higher levels will see smaller relative speed regression, becoming invisible at higher settings\.
+.
+.IP "\(bu" 4
\fB\-f\fR, \fB\-\-force\fR: disable input and output checks\. Allows overwriting existing files, input from console, output to stdout, operating on links, block devices, etc\. During decompression and when the output destination is stdout, pass\-through unrecognized formats as\-is\.
-.IP "\[ci]" 4
-\fB\-c\fR, \fB\-\-stdout\fR: write to standard output (even if it is the console); keep original files unchanged\.
-.IP "\[ci]" 4
+.
+.IP "\(bu" 4
+\fB\-c\fR, \fB\-\-stdout\fR: write to standard output (even if it is the console); keep original files (disable \fB\-\-rm\fR)\.
+.
+.IP "\(bu" 4
+\fB\-o FILE\fR: save result into \fBFILE\fR\. Note that this operation is in conflict with \fB\-c\fR\. If both operations are present on the command line, the last expressed one wins\.
+.
+.IP "\(bu" 4
\fB\-\-[no\-]sparse\fR: enable / disable sparse FS support, to make files with many zeroes smaller on disk\. Creating sparse files may save disk space and speed up decompression by reducing the amount of disk I/O\. default: enabled when output is into a file, and disabled when output is stdout\. This setting overrides default and can force sparse mode over stdout\.
-.IP "\[ci]" 4
+.
+.IP "\(bu" 4
\fB\-\-[no\-]pass\-through\fR enable / disable passing through uncompressed files as\-is\. During decompression when pass\-through is enabled, unrecognized formats will be copied as\-is from the input to the output\. By default, pass\-through will occur when the output destination is stdout and the force (\fB\-f\fR) option is set\.
-.IP "\[ci]" 4
+.
+.IP "\(bu" 4
\fB\-\-rm\fR: remove source file(s) after successful compression or decompression\. This command is silently ignored if output is \fBstdout\fR\. If used in combination with \fB\-o\fR, triggers a confirmation prompt (which can be silenced with \fB\-f\fR), as this is a destructive operation\.
-.IP "\[ci]" 4
+.
+.IP "\(bu" 4
\fB\-k\fR, \fB\-\-keep\fR: keep source file(s) after successful compression or decompression\. This is the default behavior\.
-.IP "\[ci]" 4
+.
+.IP "\(bu" 4
\fB\-r\fR: operate recursively on directories\. It selects all files in the named directory and all its subdirectories\. This can be useful both to reduce command line typing, and to circumvent shell expansion limitations, when there are a lot of files and naming breaks the maximum size of a command line\.
-.IP "\[ci]" 4
+.
+.IP "\(bu" 4
\fB\-\-filelist FILE\fR read a list of files to process as content from \fBFILE\fR\. Format is compatible with \fBls\fR output, with one file per line\.
-.IP "\[ci]" 4
+.
+.IP "\(bu" 4
\fB\-\-output\-dir\-flat DIR\fR: resulting files are stored into target \fBDIR\fR directory, instead of same directory as origin file\. Be aware that this command can introduce name collision issues, if multiple files, from different directories, end up having the same name\. Collision resolution ensures first file with a given name will be present in \fBDIR\fR, while in combination with \fB\-f\fR, the last file will be present instead\.
-.IP "\[ci]" 4
+.
+.IP "\(bu" 4
\fB\-\-output\-dir\-mirror DIR\fR: similar to \fB\-\-output\-dir\-flat\fR, the output files are stored underneath target \fBDIR\fR directory, but this option will replicate input directory hierarchy into output \fBDIR\fR\.
+.
.IP
If input directory contains "\.\.", the files in this directory will be ignored\. If input directory is an absolute directory (i\.e\. "/var/tmp/abc"), it will be stored into the "output\-dir/var/tmp/abc"\. If there are multiple input files or directories, name collision resolution will follow the same rules as \fB\-\-output\-dir\-flat\fR\.
-.IP "\[ci]" 4
+.
+.IP "\(bu" 4
\fB\-\-format=FORMAT\fR: compress and decompress in other formats\. If compiled with support, zstd can compress to or decompress from other compression algorithm formats\. Possibly available options are \fBzstd\fR, \fBgzip\fR, \fBxz\fR, \fBlzma\fR, and \fBlz4\fR\. If no such format is provided, \fBzstd\fR is the default\.
-.IP "\[ci]" 4
+.
+.IP "\(bu" 4
\fB\-h\fR/\fB\-H\fR, \fB\-\-help\fR: display help/long help and exit
-.IP "\[ci]" 4
-\fB\-V\fR, \fB\-\-version\fR: display version number and exit\. Advanced: \fB\-vV\fR also displays supported formats\. \fB\-vvV\fR also displays POSIX support\. \fB\-q\fR will only display the version number, suitable for machine reading\.
-.IP "\[ci]" 4
+.
+.IP "\(bu" 4
+\fB\-V\fR, \fB\-\-version\fR: display version number and immediately exit\. note that, since it exits, flags specified after \fB\-V\fR are effectively ignored\. Advanced: \fB\-vV\fR also displays supported formats\. \fB\-vvV\fR also displays POSIX support\. \fB\-qV\fR will only display the version number, suitable for machine reading\.
+.
+.IP "\(bu" 4
\fB\-v\fR, \fB\-\-verbose\fR: verbose mode, display more information
-.IP "\[ci]" 4
+.
+.IP "\(bu" 4
\fB\-q\fR, \fB\-\-quiet\fR: suppress warnings, interactivity, and notifications\. specify twice to suppress errors too\.
-.IP "\[ci]" 4
+.
+.IP "\(bu" 4
\fB\-\-no\-progress\fR: do not display the progress bar, but keep all other messages\.
-.IP "\[ci]" 4
+.
+.IP "\(bu" 4
\fB\-\-show\-default\-cparams\fR: shows the default compression parameters that will be used for a particular input file, based on the provided compression level and the input size\. If the provided file is not a regular file (e\.g\. a pipe), this flag will output the parameters used for inputs of unknown size\.
-.IP "\[ci]" 4
+.
+.IP "\(bu" 4
+\fB\-\-exclude\-compressed\fR: only compress files that are not already compressed\.
+.
+.IP "\(bu" 4
\fB\-\-\fR: All arguments after \fB\-\-\fR are treated as files
+.
.IP "" 0
+.
.SS "gzip Operation Modifiers"
When invoked via a \fBgzip\fR symlink, \fBzstd\fR will support further options that intend to mimic the \fBgzip\fR behavior:
+.
.TP
\fB\-n\fR, \fB\-\-no\-name\fR
do not store the original filename and timestamps when compressing a file\. This is the default behavior and hence a no\-op\.
+.
.TP
\fB\-\-best\fR
alias to the option \fB\-9\fR\.
+.
.SS "Environment Variables"
-Employing environment variables to set parameters has security implications\. Therefore, this avenue is intentionally limited\. Only \fBZSTD_CLEVEL\fR and \fBZSTD_NBTHREADS\fR are currently supported\. They set the compression level and number of threads to use during compression, respectively\.
+Employing environment variables to set parameters has security implications\. Therefore, this avenue is intentionally limited\. Only \fBZSTD_CLEVEL\fR and \fBZSTD_NBTHREADS\fR are currently supported\. They set the default compression level and number of threads to use during compression, respectively\.
+.
.P
\fBZSTD_CLEVEL\fR can be used to set the level between 1 and 19 (the "normal" range)\. If the value of \fBZSTD_CLEVEL\fR is not a valid integer, it will be ignored with a warning message\. \fBZSTD_CLEVEL\fR just replaces the default compression level (\fB3\fR)\.
+.
.P
-\fBZSTD_NBTHREADS\fR can be used to set the number of threads \fBzstd\fR will attempt to use during compression\. If the value of \fBZSTD_NBTHREADS\fR is not a valid unsigned integer, it will be ignored with a warning message\. \fBZSTD_NBTHREADS\fR has a default value of (\fB1\fR), and is capped at ZSTDMT_NBWORKERS_MAX==200\. \fBzstd\fR must be compiled with multithread support for this to have any effect\.
+\fBZSTD_NBTHREADS\fR can be used to set the number of threads \fBzstd\fR will attempt to use during compression\. If the value of \fBZSTD_NBTHREADS\fR is not a valid unsigned integer, it will be ignored with a warning message\. \fBZSTD_NBTHREADS\fR has a default value of (\fB1\fR), and is capped at ZSTDMT_NBWORKERS_MAX==200\. \fBzstd\fR must be compiled with multithread support for this variable to have any effect\.
+.
.P
They can both be overridden by corresponding command line arguments: \fB\-#\fR for compression level and \fB\-T#\fR for number of compression threads\.
-.SH "DICTIONARY BUILDER"
-\fBzstd\fR offers \fIdictionary\fR compression, which greatly improves efficiency on small files and messages\. It\'s possible to train \fBzstd\fR with a set of samples, the result of which is saved into a file called a \fBdictionary\fR\. Then, during compression and decompression, reference the same dictionary, using command \fB\-D dictionaryFileName\fR\. Compression of small files similar to the sample set will be greatly improved\.
-.TP
-\fB\-\-train FILEs\fR
-Use FILEs as training set to create a dictionary\. The training set should ideally contain a lot of samples (> 100), and weight typically 100x the target dictionary size (for example, ~10 MB for a 100 KB dictionary)\. \fB\-\-train\fR can be combined with \fB\-r\fR to indicate a directory rather than listing all the files, which can be useful to circumvent shell expansion limits\.
-.IP
-Since dictionary compression is mostly effective for small files, the expectation is that the training set will only contain small files\. In the case where some samples happen to be large, only the first 128 KiB of these samples will be used for training\.
-.IP
-\fB\-\-train\fR supports multithreading if \fBzstd\fR is compiled with threading support (default)\. Additional advanced parameters can be specified with \fB\-\-train\-fastcover\fR\. The legacy dictionary builder can be accessed with \fB\-\-train\-legacy\fR\. The slower cover dictionary builder can be accessed with \fB\-\-train\-cover\fR\. Default \fB\-\-train\fR is equivalent to \fB\-\-train\-fastcover=d=8,steps=4\fR\.
-.TP
-\fB\-o FILE\fR
-Dictionary saved into \fBFILE\fR (default name: dictionary)\.
-.TP
-\fB\-\-maxdict=#\fR
-Limit dictionary to specified size (default: 112640 bytes)\. As usual, quantities are expressed in bytes by default, and it\'s possible to employ suffixes (like \fBKB\fR or \fBMB\fR) to specify larger values\.
-.TP
-\fB\-#\fR
-Use \fB#\fR compression level during training (optional)\. Will generate statistics more tuned for selected compression level, resulting in a \fIsmall\fR compression ratio improvement for this level\.
-.TP
-\fB\-B#\fR
-Split input files into blocks of size # (default: no split)
-.TP
-\fB\-M#\fR, \fB\-\-memory=#\fR
-Limit the amount of sample data loaded for training (default: 2 GB)\. Note that the default (2 GB) is also the maximum\. This parameter can be useful in situations where the training set size is not well controlled and could be potentially very large\. Since speed of the training process is directly correlated to the size of the training sample set, a smaller sample set leads to faster training\.
-.IP
-In situations where the training set is larger than maximum memory, the CLI will randomly select samples among the available ones, up to the maximum allowed memory budget\. This is meant to improve dictionary relevance by mitigating the potential impact of clustering, such as selecting only files from the beginning of a list sorted by modification date, or sorted by alphabetical order\. The randomization process is deterministic, so training of the same list of files with the same parameters will lead to the creation of the same dictionary\.
-.TP
-\fB\-\-dictID=#\fR
-A dictionary ID is a locally unique ID\. The decoder will use this value to verify it is using the right dictionary\. By default, zstd will create a 4\-bytes random number ID\. It\'s possible to provide an explicit number ID instead\. It\'s up to the dictionary manager to not assign twice the same ID to 2 different dictionaries\. Note that short numbers have an advantage: an ID < 256 will only need 1 byte in the compressed frame header, and an ID < 65536 will only need 2 bytes\. This compares favorably to 4 bytes default\.
-.IP
-Note that RFC8878 reserves IDs less than 32768 and greater than or equal to 2\e^31, so they should not be used in public\.
-.TP
-\fB\-\-train\-cover[=k#,d=#,steps=#,split=#,shrink[=#]]\fR
-Select parameters for the default dictionary builder algorithm named cover\. If \fId\fR is not specified, then it tries \fId\fR = 6 and \fId\fR = 8\. If \fIk\fR is not specified, then it tries \fIsteps\fR values in the range [50, 2000]\. If \fIsteps\fR is not specified, then the default value of 40 is used\. If \fIsplit\fR is not specified or split <= 0, then the default value of 100 is used\. Requires that \fId\fR <= \fIk\fR\. If \fIshrink\fR flag is not used, then the default value for \fIshrinkDict\fR of 0 is used\. If \fIshrink\fR is not specified, then the default value for \fIshrinkDictMaxRegression\fR of 1 is used\.
-.IP
-Selects segments of size \fIk\fR with highest score to put in the dictionary\. The score of a segment is computed by the sum of the frequencies of all the subsegments of size \fId\fR\. Generally \fId\fR should be in the range [6, 8], occasionally up to 16, but the algorithm will run faster with d <= \fI8\fR\. Good values for \fIk\fR vary widely based on the input data, but a safe range is [2 * \fId\fR, 2000]\. If \fIsplit\fR is 100, all input samples are used for both training and testing to find optimal \fId\fR and \fIk\fR to build dictionary\. Supports multithreading if \fBzstd\fR is compiled with threading support\. Having \fIshrink\fR enabled takes a truncated dictionary of minimum size and doubles in size until compression ratio of the truncated dictionary is at most \fIshrinkDictMaxRegression%\fR worse than the compression ratio of the largest dictionary\.
-.IP
-Examples:
-.IP
-\fBzstd \-\-train\-cover FILEs\fR
-.IP
-\fBzstd \-\-train\-cover=k=50,d=8 FILEs\fR
-.IP
-\fBzstd \-\-train\-cover=d=8,steps=500 FILEs\fR
-.IP
-\fBzstd \-\-train\-cover=k=50 FILEs\fR
-.IP
-\fBzstd \-\-train\-cover=k=50,split=60 FILEs\fR
-.IP
-\fBzstd \-\-train\-cover=shrink FILEs\fR
-.IP
-\fBzstd \-\-train\-cover=shrink=2 FILEs\fR
-.TP
-\fB\-\-train\-fastcover[=k#,d=#,f=#,steps=#,split=#,accel=#]\fR
-Same as cover but with extra parameters \fIf\fR and \fIaccel\fR and different default value of split If \fIsplit\fR is not specified, then it tries \fIsplit\fR = 75\. If \fIf\fR is not specified, then it tries \fIf\fR = 20\. Requires that 0 < \fIf\fR < 32\. If \fIaccel\fR is not specified, then it tries \fIaccel\fR = 1\. Requires that 0 < \fIaccel\fR <= 10\. Requires that \fId\fR = 6 or \fId\fR = 8\.
-.IP
-\fIf\fR is log of size of array that keeps track of frequency of subsegments of size \fId\fR\. The subsegment is hashed to an index in the range [0,2^\fIf\fR \- 1]\. It is possible that 2 different subsegments are hashed to the same index, and they are considered as the same subsegment when computing frequency\. Using a higher \fIf\fR reduces collision but takes longer\.
-.IP
-Examples:
-.IP
-\fBzstd \-\-train\-fastcover FILEs\fR
-.IP
-\fBzstd \-\-train\-fastcover=d=8,f=15,accel=2 FILEs\fR
-.TP
-\fB\-\-train\-legacy[=selectivity=#]\fR
-Use legacy dictionary builder algorithm with the given dictionary \fIselectivity\fR (default: 9)\. The smaller the \fIselectivity\fR value, the denser the dictionary, improving its efficiency but reducing its achievable maximum size\. \fB\-\-train\-legacy=s=#\fR is also accepted\.
-.IP
-Examples:
-.IP
-\fBzstd \-\-train\-legacy FILEs\fR
-.IP
-\fBzstd \-\-train\-legacy=selectivity=8 FILEs\fR
-.SH "BENCHMARK"
-.TP
-\fB\-b#\fR
-benchmark file(s) using compression level #
-.TP
-\fB\-e#\fR
-benchmark file(s) using multiple compression levels, from \fB\-b#\fR to \fB\-e#\fR (inclusive)
-.TP
-\fB\-i#\fR
-minimum evaluation time, in seconds (default: 3s), benchmark mode only
-.TP
-\fB\-B#\fR, \fB\-\-block\-size=#\fR
-cut file(s) into independent chunks of size # (default: no chunking)
-.TP
-\fB\-\-priority=rt\fR
-set process priority to real\-time
-.P
-\fBOutput Format:\fR CompressionLevel#Filename: InputSize \-> OutputSize (CompressionRatio), CompressionSpeed, DecompressionSpeed
-.P
-\fBMethodology:\fR For both compression and decompression speed, the entire input is compressed/decompressed in\-memory to measure speed\. A run lasts at least 1 sec, so when files are small, they are compressed/decompressed several times per run, in order to improve measurement accuracy\.
+.
.SH "ADVANCED COMPRESSION OPTIONS"
-### \-B#: Specify the size of each compression job\. This parameter is only available when multi\-threading is enabled\. Each compression job is run in parallel, so this value indirectly impacts the nb of active threads\. Default job size varies depending on compression level (generally \fB4 * windowSize\fR)\. \fB\-B#\fR makes it possible to manually select a custom size\. Note that job size must respect a minimum value which is enforced transparently\. This minimum is either 512 KB, or \fBoverlapSize\fR, whichever is largest\. Different job sizes will lead to non\-identical compressed frames\.
+\fBzstd\fR provides 22 predefined regular compression levels plus the fast levels\. A compression level is translated internally into multiple advanced parameters that control the behavior of the compressor (one can observe the result of this translation with \fB\-\-show\-default\-cparams\fR)\. These advanced parameters can be overridden using advanced compression options\.
+.
.SS "\-\-zstd[=options]:"
-\fBzstd\fR provides 22 predefined regular compression levels plus the fast levels\. This compression level is translated internally into a number of specific parameters that actually control the behavior of the compressor\. (You can see the result of this translation with \fB\-\-show\-default\-cparams\fR\.) These specific parameters can be overridden with advanced compression options\. The \fIoptions\fR are provided as a comma\-separated list\. You may specify only the options you want to change and the rest will be taken from the selected or default compression level\. The list of available \fIoptions\fR:
+The \fIoptions\fR are provided as a comma\-separated list\. You may specify only the options you want to change and the rest will be taken from the selected or default compression level\. The list of available \fIoptions\fR:
+.
.TP
\fBstrategy\fR=\fIstrat\fR, \fBstrat\fR=\fIstrat\fR
Specify a strategy used by a match finder\.
+.
.IP
There are 9 strategies numbered from 1 to 9, from fastest to strongest: 1=\fBZSTD_fast\fR, 2=\fBZSTD_dfast\fR, 3=\fBZSTD_greedy\fR, 4=\fBZSTD_lazy\fR, 5=\fBZSTD_lazy2\fR, 6=\fBZSTD_btlazy2\fR, 7=\fBZSTD_btopt\fR, 8=\fBZSTD_btultra\fR, 9=\fBZSTD_btultra2\fR\.
+.
.TP
\fBwindowLog\fR=\fIwlog\fR, \fBwlog\fR=\fIwlog\fR
Specify the maximum number of bits for a match distance\.
+.
.IP
The higher number of increases the chance to find a match which usually improves compression ratio\. It also increases memory requirements for the compressor and decompressor\. The minimum \fIwlog\fR is 10 (1 KiB) and the maximum is 30 (1 GiB) on 32\-bit platforms and 31 (2 GiB) on 64\-bit platforms\.
+.
.IP
Note: If \fBwindowLog\fR is set to larger than 27, \fB\-\-long=windowLog\fR or \fB\-\-memory=windowSize\fR needs to be passed to the decompressor\.
+.
.TP
\fBhashLog\fR=\fIhlog\fR, \fBhlog\fR=\fIhlog\fR
Specify the maximum number of bits for a hash table\.
+.
.IP
Bigger hash tables cause fewer collisions which usually makes compression faster, but requires more memory during compression\.
+.
.IP
The minimum \fIhlog\fR is 6 (64 entries / 256 B) and the maximum is 30 (1B entries / 4 GiB)\.
+.
.TP
\fBchainLog\fR=\fIclog\fR, \fBclog\fR=\fIclog\fR
Specify the maximum number of bits for the secondary search structure, whose form depends on the selected \fBstrategy\fR\.
+.
.IP
Higher numbers of bits increases the chance to find a match which usually improves compression ratio\. It also slows down compression speed and increases memory requirements for compression\. This option is ignored for the \fBZSTD_fast\fR \fBstrategy\fR, which only has the primary hash table\.
+.
.IP
The minimum \fIclog\fR is 6 (64 entries / 256 B) and the maximum is 29 (512M entries / 2 GiB) on 32\-bit platforms and 30 (1B entries / 4 GiB) on 64\-bit platforms\.
+.
.TP
\fBsearchLog\fR=\fIslog\fR, \fBslog\fR=\fIslog\fR
Specify the maximum number of searches in a hash chain or a binary tree using logarithmic scale\.
+.
.IP
More searches increases the chance to find a match which usually increases compression ratio but decreases compression speed\.
+.
.IP
The minimum \fIslog\fR is 1 and the maximum is \'windowLog\' \- 1\.
+.
.TP
\fBminMatch\fR=\fImml\fR, \fBmml\fR=\fImml\fR
Specify the minimum searched length of a match in a hash table\.
+.
.IP
Larger search lengths usually decrease compression ratio but improve decompression speed\.
+.
.IP
The minimum \fImml\fR is 3 and the maximum is 7\.
+.
.TP
\fBtargetLength\fR=\fItlen\fR, \fBtlen\fR=\fItlen\fR
The impact of this field vary depending on selected strategy\.
+.
.IP
For \fBZSTD_btopt\fR, \fBZSTD_btultra\fR and \fBZSTD_btultra2\fR, it specifies the minimum match length that causes match finder to stop searching\. A larger \fBtargetLength\fR usually improves compression ratio but decreases compression speed\.
+.
.IP
For \fBZSTD_fast\fR, it triggers ultra\-fast mode when > 0\. The value represents the amount of data skipped between match sampling\. Impact is reversed: a larger \fBtargetLength\fR increases compression speed but decreases compression ratio\.
+.
.IP
For all other strategies, this field has no impact\.
+.
.IP
The minimum \fItlen\fR is 0 and the maximum is 128 KiB\.
+.
.TP
\fBoverlapLog\fR=\fIovlog\fR, \fBovlog\fR=\fIovlog\fR
Determine \fBoverlapSize\fR, amount of data reloaded from previous job\. This parameter is only available when multithreading is enabled\. Reloading more data improves compression ratio, but decreases speed\.
+.
.IP
The minimum \fIovlog\fR is 0, and the maximum is 9\. 1 means "no overlap", hence completely independent jobs\. 9 means "full overlap", meaning up to \fBwindowSize\fR is reloaded from previous job\. Reducing \fIovlog\fR by 1 reduces the reloaded amount by a factor 2\. For example, 8 means "windowSize/2", and 6 means "windowSize/8"\. Value 0 is special and means "default": \fIovlog\fR is automatically determined by \fBzstd\fR\. In which case, \fIovlog\fR will range from 6 to 9, depending on selected \fIstrat\fR\.
+.
.TP
\fBldmHashLog\fR=\fIlhlog\fR, \fBlhlog\fR=\fIlhlog\fR
Specify the maximum size for a hash table used for long distance matching\.
+.
.IP
This option is ignored unless long distance matching is enabled\.
+.
.IP
Bigger hash tables usually improve compression ratio at the expense of more memory during compression and a decrease in compression speed\.
+.
.IP
The minimum \fIlhlog\fR is 6 and the maximum is 30 (default: 20)\.
+.
.TP
\fBldmMinMatch\fR=\fIlmml\fR, \fBlmml\fR=\fIlmml\fR
Specify the minimum searched length of a match for long distance matching\.
+.
.IP
This option is ignored unless long distance matching is enabled\.
+.
.IP
Larger/very small values usually decrease compression ratio\.
+.
.IP
The minimum \fIlmml\fR is 4 and the maximum is 4096 (default: 64)\.
+.
.TP
\fBldmBucketSizeLog\fR=\fIlblog\fR, \fBlblog\fR=\fIlblog\fR
Specify the size of each bucket for the hash table used for long distance matching\.
+.
.IP
This option is ignored unless long distance matching is enabled\.
+.
.IP
Larger bucket sizes improve collision resolution but decrease compression speed\.
+.
.IP
The minimum \fIlblog\fR is 1 and the maximum is 8 (default: 3)\.
+.
.TP
\fBldmHashRateLog\fR=\fIlhrlog\fR, \fBlhrlog\fR=\fIlhrlog\fR
Specify the frequency of inserting entries into the long distance matching hash table\.
+.
.IP
This option is ignored unless long distance matching is enabled\.
+.
.IP
Larger values will improve compression speed\. Deviating far from the default value will likely result in a decrease in compression ratio\.
+.
.IP
The default value is \fBwlog \- lhlog\fR\.
+.
.SS "Example"
The following parameters sets advanced compression options to something similar to predefined level 19 for files bigger than 256 KB:
+.
.P
\fB\-\-zstd\fR=wlog=23,clog=23,hlog=22,slog=6,mml=3,tlen=48,strat=6
+.
+.SS "\-B#:"
+Specify the size of each compression job\. This parameter is only available when multi\-threading is enabled\. Each compression job is run in parallel, so this value indirectly impacts the nb of active threads\. Default job size varies depending on compression level (generally \fB4 * windowSize\fR)\. \fB\-B#\fR makes it possible to manually select a custom size\. Note that job size must respect a minimum value which is enforced transparently\. This minimum is either 512 KB, or \fBoverlapSize\fR, whichever is largest\. Different job sizes will lead to non\-identical compressed frames\.
+.
+.SH "DICTIONARY BUILDER"
+\fBzstd\fR offers \fIdictionary\fR compression, which greatly improves efficiency on small files and messages\. It\'s possible to train \fBzstd\fR with a set of samples, the result of which is saved into a file called a \fBdictionary\fR\. Then, during compression and decompression, reference the same dictionary, using command \fB\-D dictionaryFileName\fR\. Compression of small files similar to the sample set will be greatly improved\.
+.
+.TP
+\fB\-\-train FILEs\fR
+Use FILEs as training set to create a dictionary\. The training set should ideally contain a lot of samples (> 100), and weight typically 100x the target dictionary size (for example, ~10 MB for a 100 KB dictionary)\. \fB\-\-train\fR can be combined with \fB\-r\fR to indicate a directory rather than listing all the files, which can be useful to circumvent shell expansion limits\.
+.
+.IP
+Since dictionary compression is mostly effective for small files, the expectation is that the training set will only contain small files\. In the case where some samples happen to be large, only the first 128 KiB of these samples will be used for training\.
+.
+.IP
+\fB\-\-train\fR supports multithreading if \fBzstd\fR is compiled with threading support (default)\. Additional advanced parameters can be specified with \fB\-\-train\-fastcover\fR\. The legacy dictionary builder can be accessed with \fB\-\-train\-legacy\fR\. The slower cover dictionary builder can be accessed with \fB\-\-train\-cover\fR\. Default \fB\-\-train\fR is equivalent to \fB\-\-train\-fastcover=d=8,steps=4\fR\.
+.
+.TP
+\fB\-o FILE\fR
+Dictionary saved into \fBFILE\fR (default name: dictionary)\.
+.
+.TP
+\fB\-\-maxdict=#\fR
+Limit dictionary to specified size (default: 112640 bytes)\. As usual, quantities are expressed in bytes by default, and it\'s possible to employ suffixes (like \fBKB\fR or \fBMB\fR) to specify larger values\.
+.
+.TP
+\fB\-#\fR
+Use \fB#\fR compression level during training (optional)\. Will generate statistics more tuned for selected compression level, resulting in a \fIsmall\fR compression ratio improvement for this level\.
+.
+.TP
+\fB\-B#\fR
+Split input files into blocks of size # (default: no split)
+.
+.TP
+\fB\-M#\fR, \fB\-\-memory=#\fR
+Limit the amount of sample data loaded for training (default: 2 GB)\. Note that the default (2 GB) is also the maximum\. This parameter can be useful in situations where the training set size is not well controlled and could be potentially very large\. Since speed of the training process is directly correlated to the size of the training sample set, a smaller sample set leads to faster training\.
+.
+.IP
+In situations where the training set is larger than maximum memory, the CLI will randomly select samples among the available ones, up to the maximum allowed memory budget\. This is meant to improve dictionary relevance by mitigating the potential impact of clustering, such as selecting only files from the beginning of a list sorted by modification date, or sorted by alphabetical order\. The randomization process is deterministic, so training of the same list of files with the same parameters will lead to the creation of the same dictionary\.
+.
+.TP
+\fB\-\-dictID=#\fR
+A dictionary ID is a locally unique ID\. The decoder will use this value to verify it is using the right dictionary\. By default, zstd will create a 4\-bytes random number ID\. It\'s possible to provide an explicit number ID instead\. It\'s up to the dictionary manager to not assign twice the same ID to 2 different dictionaries\. Note that short numbers have an advantage: an ID < 256 will only need 1 byte in the compressed frame header, and an ID < 65536 will only need 2 bytes\. This compares favorably to 4 bytes default\.
+.
+.IP
+Note that RFC8878 reserves IDs less than 32768 and greater than or equal to 2^31, so they should not be used in public\.
+.
+.TP
+\fB\-\-train\-cover[=k#,d=#,steps=#,split=#,shrink[=#]]\fR
+Select parameters for the default dictionary builder algorithm named cover\. If \fId\fR is not specified, then it tries \fId\fR = 6 and \fId\fR = 8\. If \fIk\fR is not specified, then it tries \fIsteps\fR values in the range [50, 2000]\. If \fIsteps\fR is not specified, then the default value of 40 is used\. If \fIsplit\fR is not specified or split <= 0, then the default value of 100 is used\. Requires that \fId\fR <= \fIk\fR\. If \fIshrink\fR flag is not used, then the default value for \fIshrinkDict\fR of 0 is used\. If \fIshrink\fR is not specified, then the default value for \fIshrinkDictMaxRegression\fR of 1 is used\.
+.
+.IP
+Selects segments of size \fIk\fR with highest score to put in the dictionary\. The score of a segment is computed by the sum of the frequencies of all the subsegments of size \fId\fR\. Generally \fId\fR should be in the range [6, 8], occasionally up to 16, but the algorithm will run faster with d <= \fI8\fR\. Good values for \fIk\fR vary widely based on the input data, but a safe range is [2 * \fId\fR, 2000]\. If \fIsplit\fR is 100, all input samples are used for both training and testing to find optimal \fId\fR and \fIk\fR to build dictionary\. Supports multithreading if \fBzstd\fR is compiled with threading support\. Having \fIshrink\fR enabled takes a truncated dictionary of minimum size and doubles in size until compression ratio of the truncated dictionary is at most \fIshrinkDictMaxRegression%\fR worse than the compression ratio of the largest dictionary\.
+.
+.IP
+Examples:
+.
+.IP
+\fBzstd \-\-train\-cover FILEs\fR
+.
+.IP
+\fBzstd \-\-train\-cover=k=50,d=8 FILEs\fR
+.
+.IP
+\fBzstd \-\-train\-cover=d=8,steps=500 FILEs\fR
+.
+.IP
+\fBzstd \-\-train\-cover=k=50 FILEs\fR
+.
+.IP
+\fBzstd \-\-train\-cover=k=50,split=60 FILEs\fR
+.
+.IP
+\fBzstd \-\-train\-cover=shrink FILEs\fR
+.
+.IP
+\fBzstd \-\-train\-cover=shrink=2 FILEs\fR
+.
+.TP
+\fB\-\-train\-fastcover[=k#,d=#,f=#,steps=#,split=#,accel=#]\fR
+Same as cover but with extra parameters \fIf\fR and \fIaccel\fR and different default value of split If \fIsplit\fR is not specified, then it tries \fIsplit\fR = 75\. If \fIf\fR is not specified, then it tries \fIf\fR = 20\. Requires that 0 < \fIf\fR < 32\. If \fIaccel\fR is not specified, then it tries \fIaccel\fR = 1\. Requires that 0 < \fIaccel\fR <= 10\. Requires that \fId\fR = 6 or \fId\fR = 8\.
+.
+.IP
+\fIf\fR is log of size of array that keeps track of frequency of subsegments of size \fId\fR\. The subsegment is hashed to an index in the range [0,2^\fIf\fR \- 1]\. It is possible that 2 different subsegments are hashed to the same index, and they are considered as the same subsegment when computing frequency\. Using a higher \fIf\fR reduces collision but takes longer\.
+.
+.IP
+Examples:
+.
+.IP
+\fBzstd \-\-train\-fastcover FILEs\fR
+.
+.IP
+\fBzstd \-\-train\-fastcover=d=8,f=15,accel=2 FILEs\fR
+.
+.TP
+\fB\-\-train\-legacy[=selectivity=#]\fR
+Use legacy dictionary builder algorithm with the given dictionary \fIselectivity\fR (default: 9)\. The smaller the \fIselectivity\fR value, the denser the dictionary, improving its efficiency but reducing its achievable maximum size\. \fB\-\-train\-legacy=s=#\fR is also accepted\.
+.
+.IP
+Examples:
+.
+.IP
+\fBzstd \-\-train\-legacy FILEs\fR
+.
+.IP
+\fBzstd \-\-train\-legacy=selectivity=8 FILEs\fR
+.
+.SH "BENCHMARK"
+The \fBzstd\fR CLI provides a benchmarking mode that can be used to easily find suitable compression parameters, or alternatively to benchmark a computer\'s performance\. Note that the results are highly dependent on the content being compressed\.
+.
+.TP
+\fB\-b#\fR
+benchmark file(s) using compression level #
+.
+.TP
+\fB\-e#\fR
+benchmark file(s) using multiple compression levels, from \fB\-b#\fR to \fB\-e#\fR (inclusive)
+.
+.TP
+\fB\-d\fR
+benchmark decompression speed only (requires providing an already zstd\-compressed content)
+.
+.TP
+\fB\-i#\fR
+minimum evaluation time, in seconds (default: 3s), benchmark mode only
+.
+.TP
+\fB\-B#\fR, \fB\-\-block\-size=#\fR
+cut file(s) into independent chunks of size # (default: no chunking)
+.
+.TP
+\fB\-\-priority=rt\fR
+set process priority to real\-time (Windows)
+.
+.P
+\fBOutput Format:\fR CompressionLevel#Filename: InputSize \-> OutputSize (CompressionRatio), CompressionSpeed, DecompressionSpeed
+.
+.P
+\fBMethodology:\fR For both compression and decompression speed, the entire input is compressed/decompressed in\-memory to measure speed\. A run lasts at least 1 sec, so when files are small, they are compressed/decompressed several times per run, in order to improve measurement accuracy\.
+.
.SH "SEE ALSO"
\fBzstdgrep\fR(1), \fBzstdless\fR(1), \fBgzip\fR(1), \fBxz\fR(1)
+.
.P
The \fIzstandard\fR format is specified in Y\. Collet, "Zstandard Compression and the \'application/zstd\' Media Type", https://www\.ietf\.org/rfc/rfc8878\.txt, Internet RFC 8878 (February 2021)\.
+.
.SH "BUGS"
Report bugs at: https://github\.com/facebook/zstd/issues
+.
.SH "AUTHOR"
Yann Collet
`zstd` offers highly configurable compression speed,
from fast modes at > 200 MB/s per core,
to strong modes with excellent compression ratios.
-It also features a very fast decoder, with speeds > 500 MB/s per core.
+It also features a very fast decoder, with speeds > 500 MB/s per core,
+which remains roughly stable at all compression settings.
`zstd` command line syntax is generally similar to gzip,
-but features the following differences:
+but features the following few differences:
- Source files are preserved by default.
It's possible to remove them automatically by using the `--rm` command.
### Operation Modifiers
* `-#`:
- selects `#` compression level \[1-19\] (default: 3)
+ selects `#` compression level \[1-19\] (default: 3).
+ Higher compression levels *generally* produce higher compression ratio at the expense of speed and memory.
+ A rough rule of thumb is that compression speed is expected to be divided by 2 every 2 levels.
+ Technically, each level is mapped to a set of advanced parameters (that can also be modified individually, see below).
+ Because the compressor's behavior highly depends on the content to compress, there's no guarantee of a smooth progression from one level to another.
* `--ultra`:
unlocks high compression levels 20+ (maximum 22), using a lot more memory.
Note that decompression will also require more memory when using these levels.
expected. This feature allows for controlling the guess when needed.
Exact guesses result in better compression ratios. Overestimates result in slightly
degraded compression ratios, while underestimates may result in significant degradation.
-* `-o FILE`:
- save result into `FILE`.
+* `--target-compressed-block-size=#`:
+ Attempt to produce compressed blocks of approximately this size.
+ This will split larger blocks in order to approach this target.
+ This feature is notably useful for improved latency, when the receiver can leverage receiving early incomplete data.
+ This parameter defines a loose target: compressed blocks will target this size "on average", but individual blocks can still be larger or smaller.
+ Enabling this feature can decrease compression speed by up to ~10% at level 1.
+ Higher levels will see smaller relative speed regression, becoming invisible at higher settings.
* `-f`, `--force`:
disable input and output checks. Allows overwriting existing files, input
from console, output to stdout, operating on links, block devices, etc.
During decompression and when the output destination is stdout, pass-through
unrecognized formats as-is.
* `-c`, `--stdout`:
- write to standard output (even if it is the console); keep original files unchanged.
+ write to standard output (even if it is the console); keep original files (disable `--rm`).
+* `-o FILE`:
+ save result into `FILE`.
+ Note that this operation is in conflict with `-c`.
+ If both operations are present on the command line, the last expressed one wins.
* `--[no-]sparse`:
enable / disable sparse FS support,
to make files with many zeroes smaller on disk.
* `-h`/`-H`, `--help`:
display help/long help and exit
* `-V`, `--version`:
- display version number and exit.
+ display version number and immediately exit.
+ note that, since it exits, flags specified after `-V` are effectively ignored.
Advanced: `-vV` also displays supported formats.
`-vvV` also displays POSIX support.
- `-q` will only display the version number, suitable for machine reading.
+ `-qV` will only display the version number, suitable for machine reading.
* `-v`, `--verbose`:
verbose mode, display more information
* `-q`, `--quiet`:
* `--show-default-cparams`:
shows the default compression parameters that will be used for a particular input file, based on the provided compression level and the input size.
If the provided file is not a regular file (e.g. a pipe), this flag will output the parameters used for inputs of unknown size.
+* `--exclude-compressed`:
+ only compress files that are not already compressed.
* `--`:
All arguments after `--` are treated as files
### Environment Variables
-
Employing environment variables to set parameters has security implications.
Therefore, this avenue is intentionally limited.
Only `ZSTD_CLEVEL` and `ZSTD_NBTHREADS` are currently supported.
-They set the compression level and number of threads to use during compression, respectively.
+They set the default compression level and number of threads to use during compression, respectively.
`ZSTD_CLEVEL` can be used to set the level between 1 and 19 (the "normal" range).
If the value of `ZSTD_CLEVEL` is not a valid integer, it will be ignored with a warning message.
`ZSTD_NBTHREADS` can be used to set the number of threads `zstd` will attempt to use during compression.
If the value of `ZSTD_NBTHREADS` is not a valid unsigned integer, it will be ignored with a warning message.
`ZSTD_NBTHREADS` has a default value of (`1`), and is capped at ZSTDMT_NBWORKERS_MAX==200.
-`zstd` must be compiled with multithread support for this to have any effect.
+`zstd` must be compiled with multithread support for this variable to have any effect.
They can both be overridden by corresponding command line arguments:
`-#` for compression level and `-T#` for number of compression threads.
+ADVANCED COMPRESSION OPTIONS
+----------------------------
+`zstd` provides 22 predefined regular compression levels plus the fast levels.
+A compression level is translated internally into multiple advanced parameters that control the behavior of the compressor
+(one can observe the result of this translation with `--show-default-cparams`).
+These advanced parameters can be overridden using advanced compression options.
+
+### --zstd[=options]:
+The _options_ are provided as a comma-separated list.
+You may specify only the options you want to change and the rest will be
+taken from the selected or default compression level.
+The list of available _options_:
+
+- `strategy`=_strat_, `strat`=_strat_:
+ Specify a strategy used by a match finder.
+
+ There are 9 strategies numbered from 1 to 9, from fastest to strongest:
+ 1=`ZSTD_fast`, 2=`ZSTD_dfast`, 3=`ZSTD_greedy`,
+ 4=`ZSTD_lazy`, 5=`ZSTD_lazy2`, 6=`ZSTD_btlazy2`,
+ 7=`ZSTD_btopt`, 8=`ZSTD_btultra`, 9=`ZSTD_btultra2`.
+
+- `windowLog`=_wlog_, `wlog`=_wlog_:
+ Specify the maximum number of bits for a match distance.
+
+ The higher number of increases the chance to find a match which usually
+ improves compression ratio.
+ It also increases memory requirements for the compressor and decompressor.
+ The minimum _wlog_ is 10 (1 KiB) and the maximum is 30 (1 GiB) on 32-bit
+ platforms and 31 (2 GiB) on 64-bit platforms.
+
+ Note: If `windowLog` is set to larger than 27, `--long=windowLog` or
+ `--memory=windowSize` needs to be passed to the decompressor.
+
+- `hashLog`=_hlog_, `hlog`=_hlog_:
+ Specify the maximum number of bits for a hash table.
+
+ Bigger hash tables cause fewer collisions which usually makes compression
+ faster, but requires more memory during compression.
+
+ The minimum _hlog_ is 6 (64 entries / 256 B) and the maximum is 30 (1B entries / 4 GiB).
+
+- `chainLog`=_clog_, `clog`=_clog_:
+ Specify the maximum number of bits for the secondary search structure,
+ whose form depends on the selected `strategy`.
+
+ Higher numbers of bits increases the chance to find a match which usually
+ improves compression ratio.
+ It also slows down compression speed and increases memory requirements for
+ compression.
+ This option is ignored for the `ZSTD_fast` `strategy`, which only has the primary hash table.
+
+ The minimum _clog_ is 6 (64 entries / 256 B) and the maximum is 29 (512M entries / 2 GiB) on 32-bit platforms
+ and 30 (1B entries / 4 GiB) on 64-bit platforms.
+
+- `searchLog`=_slog_, `slog`=_slog_:
+ Specify the maximum number of searches in a hash chain or a binary tree
+ using logarithmic scale.
+
+ More searches increases the chance to find a match which usually increases
+ compression ratio but decreases compression speed.
+
+ The minimum _slog_ is 1 and the maximum is 'windowLog' - 1.
+
+- `minMatch`=_mml_, `mml`=_mml_:
+ Specify the minimum searched length of a match in a hash table.
+
+ Larger search lengths usually decrease compression ratio but improve
+ decompression speed.
+
+ The minimum _mml_ is 3 and the maximum is 7.
+
+- `targetLength`=_tlen_, `tlen`=_tlen_:
+ The impact of this field vary depending on selected strategy.
+
+ For `ZSTD_btopt`, `ZSTD_btultra` and `ZSTD_btultra2`, it specifies
+ the minimum match length that causes match finder to stop searching.
+ A larger `targetLength` usually improves compression ratio
+ but decreases compression speed.
+
+ For `ZSTD_fast`, it triggers ultra-fast mode when > 0.
+ The value represents the amount of data skipped between match sampling.
+ Impact is reversed: a larger `targetLength` increases compression speed
+ but decreases compression ratio.
+
+ For all other strategies, this field has no impact.
+
+ The minimum _tlen_ is 0 and the maximum is 128 KiB.
+
+- `overlapLog`=_ovlog_, `ovlog`=_ovlog_:
+ Determine `overlapSize`, amount of data reloaded from previous job.
+ This parameter is only available when multithreading is enabled.
+ Reloading more data improves compression ratio, but decreases speed.
+
+ The minimum _ovlog_ is 0, and the maximum is 9.
+ 1 means "no overlap", hence completely independent jobs.
+ 9 means "full overlap", meaning up to `windowSize` is reloaded from previous job.
+ Reducing _ovlog_ by 1 reduces the reloaded amount by a factor 2.
+ For example, 8 means "windowSize/2", and 6 means "windowSize/8".
+ Value 0 is special and means "default": _ovlog_ is automatically determined by `zstd`.
+ In which case, _ovlog_ will range from 6 to 9, depending on selected _strat_.
+
+- `ldmHashLog`=_lhlog_, `lhlog`=_lhlog_:
+ Specify the maximum size for a hash table used for long distance matching.
+
+ This option is ignored unless long distance matching is enabled.
+
+ Bigger hash tables usually improve compression ratio at the expense of more
+ memory during compression and a decrease in compression speed.
+
+ The minimum _lhlog_ is 6 and the maximum is 30 (default: 20).
+
+- `ldmMinMatch`=_lmml_, `lmml`=_lmml_:
+ Specify the minimum searched length of a match for long distance matching.
+
+ This option is ignored unless long distance matching is enabled.
+
+ Larger/very small values usually decrease compression ratio.
+
+ The minimum _lmml_ is 4 and the maximum is 4096 (default: 64).
+
+- `ldmBucketSizeLog`=_lblog_, `lblog`=_lblog_:
+ Specify the size of each bucket for the hash table used for long distance
+ matching.
+
+ This option is ignored unless long distance matching is enabled.
+
+ Larger bucket sizes improve collision resolution but decrease compression
+ speed.
+
+ The minimum _lblog_ is 1 and the maximum is 8 (default: 3).
+
+- `ldmHashRateLog`=_lhrlog_, `lhrlog`=_lhrlog_:
+ Specify the frequency of inserting entries into the long distance matching
+ hash table.
+
+ This option is ignored unless long distance matching is enabled.
+
+ Larger values will improve compression speed. Deviating far from the
+ default value will likely result in a decrease in compression ratio.
+
+ The default value is `wlog - lhlog`.
+
+### Example
+The following parameters sets advanced compression options to something
+similar to predefined level 19 for files bigger than 256 KB:
+
+`--zstd`=wlog=23,clog=23,hlog=22,slog=6,mml=3,tlen=48,strat=6
+
+### -B#:
+Specify the size of each compression job.
+This parameter is only available when multi-threading is enabled.
+Each compression job is run in parallel, so this value indirectly impacts the nb of active threads.
+Default job size varies depending on compression level (generally `4 * windowSize`).
+`-B#` makes it possible to manually select a custom size.
+Note that job size must respect a minimum value which is enforced transparently.
+This minimum is either 512 KB, or `overlapSize`, whichever is largest.
+Different job sizes will lead to non-identical compressed frames.
+
+
DICTIONARY BUILDER
------------------
`zstd` offers _dictionary_ compression,
BENCHMARK
---------
+The `zstd` CLI provides a benchmarking mode that can be used to easily find suitable compression parameters, or alternatively to benchmark a computer's performance.
+Note that the results are highly dependent on the content being compressed.
* `-b#`:
benchmark file(s) using compression level #
* `-e#`:
benchmark file(s) using multiple compression levels, from `-b#` to `-e#` (inclusive)
+* `-d`:
+ benchmark decompression speed only (requires providing an already zstd-compressed content)
* `-i#`:
minimum evaluation time, in seconds (default: 3s), benchmark mode only
* `-B#`, `--block-size=#`:
cut file(s) into independent chunks of size # (default: no chunking)
* `--priority=rt`:
- set process priority to real-time
+ set process priority to real-time (Windows)
**Output Format:** CompressionLevel#Filename: InputSize -> OutputSize (CompressionRatio), CompressionSpeed, DecompressionSpeed
**Methodology:** For both compression and decompression speed, the entire input is compressed/decompressed in-memory to measure speed. A run lasts at least 1 sec, so when files are small, they are compressed/decompressed several times per run, in order to improve measurement accuracy.
-ADVANCED COMPRESSION OPTIONS
-----------------------------
-### -B#:
-Specify the size of each compression job.
-This parameter is only available when multi-threading is enabled.
-Each compression job is run in parallel, so this value indirectly impacts the nb of active threads.
-Default job size varies depending on compression level (generally `4 * windowSize`).
-`-B#` makes it possible to manually select a custom size.
-Note that job size must respect a minimum value which is enforced transparently.
-This minimum is either 512 KB, or `overlapSize`, whichever is largest.
-Different job sizes will lead to non-identical compressed frames.
-
-### --zstd[=options]:
-`zstd` provides 22 predefined regular compression levels plus the fast levels.
-This compression level is translated internally into a number of specific parameters that actually control the behavior of the compressor.
-(You can see the result of this translation with `--show-default-cparams`.)
-These specific parameters can be overridden with advanced compression options.
-The _options_ are provided as a comma-separated list.
-You may specify only the options you want to change and the rest will be
-taken from the selected or default compression level.
-The list of available _options_:
-
-- `strategy`=_strat_, `strat`=_strat_:
- Specify a strategy used by a match finder.
-
- There are 9 strategies numbered from 1 to 9, from fastest to strongest:
- 1=`ZSTD_fast`, 2=`ZSTD_dfast`, 3=`ZSTD_greedy`,
- 4=`ZSTD_lazy`, 5=`ZSTD_lazy2`, 6=`ZSTD_btlazy2`,
- 7=`ZSTD_btopt`, 8=`ZSTD_btultra`, 9=`ZSTD_btultra2`.
-
-- `windowLog`=_wlog_, `wlog`=_wlog_:
- Specify the maximum number of bits for a match distance.
-
- The higher number of increases the chance to find a match which usually
- improves compression ratio.
- It also increases memory requirements for the compressor and decompressor.
- The minimum _wlog_ is 10 (1 KiB) and the maximum is 30 (1 GiB) on 32-bit
- platforms and 31 (2 GiB) on 64-bit platforms.
-
- Note: If `windowLog` is set to larger than 27, `--long=windowLog` or
- `--memory=windowSize` needs to be passed to the decompressor.
-
-- `hashLog`=_hlog_, `hlog`=_hlog_:
- Specify the maximum number of bits for a hash table.
-
- Bigger hash tables cause fewer collisions which usually makes compression
- faster, but requires more memory during compression.
-
- The minimum _hlog_ is 6 (64 entries / 256 B) and the maximum is 30 (1B entries / 4 GiB).
-
-- `chainLog`=_clog_, `clog`=_clog_:
- Specify the maximum number of bits for the secondary search structure,
- whose form depends on the selected `strategy`.
-
- Higher numbers of bits increases the chance to find a match which usually
- improves compression ratio.
- It also slows down compression speed and increases memory requirements for
- compression.
- This option is ignored for the `ZSTD_fast` `strategy`, which only has the primary hash table.
-
- The minimum _clog_ is 6 (64 entries / 256 B) and the maximum is 29 (512M entries / 2 GiB) on 32-bit platforms
- and 30 (1B entries / 4 GiB) on 64-bit platforms.
-
-- `searchLog`=_slog_, `slog`=_slog_:
- Specify the maximum number of searches in a hash chain or a binary tree
- using logarithmic scale.
-
- More searches increases the chance to find a match which usually increases
- compression ratio but decreases compression speed.
-
- The minimum _slog_ is 1 and the maximum is 'windowLog' - 1.
-
-- `minMatch`=_mml_, `mml`=_mml_:
- Specify the minimum searched length of a match in a hash table.
-
- Larger search lengths usually decrease compression ratio but improve
- decompression speed.
-
- The minimum _mml_ is 3 and the maximum is 7.
-
-- `targetLength`=_tlen_, `tlen`=_tlen_:
- The impact of this field vary depending on selected strategy.
-
- For `ZSTD_btopt`, `ZSTD_btultra` and `ZSTD_btultra2`, it specifies
- the minimum match length that causes match finder to stop searching.
- A larger `targetLength` usually improves compression ratio
- but decreases compression speed.
-
- For `ZSTD_fast`, it triggers ultra-fast mode when > 0.
- The value represents the amount of data skipped between match sampling.
- Impact is reversed: a larger `targetLength` increases compression speed
- but decreases compression ratio.
-
- For all other strategies, this field has no impact.
-
- The minimum _tlen_ is 0 and the maximum is 128 KiB.
-
-- `overlapLog`=_ovlog_, `ovlog`=_ovlog_:
- Determine `overlapSize`, amount of data reloaded from previous job.
- This parameter is only available when multithreading is enabled.
- Reloading more data improves compression ratio, but decreases speed.
-
- The minimum _ovlog_ is 0, and the maximum is 9.
- 1 means "no overlap", hence completely independent jobs.
- 9 means "full overlap", meaning up to `windowSize` is reloaded from previous job.
- Reducing _ovlog_ by 1 reduces the reloaded amount by a factor 2.
- For example, 8 means "windowSize/2", and 6 means "windowSize/8".
- Value 0 is special and means "default": _ovlog_ is automatically determined by `zstd`.
- In which case, _ovlog_ will range from 6 to 9, depending on selected _strat_.
-
-- `ldmHashLog`=_lhlog_, `lhlog`=_lhlog_:
- Specify the maximum size for a hash table used for long distance matching.
-
- This option is ignored unless long distance matching is enabled.
-
- Bigger hash tables usually improve compression ratio at the expense of more
- memory during compression and a decrease in compression speed.
-
- The minimum _lhlog_ is 6 and the maximum is 30 (default: 20).
-
-- `ldmMinMatch`=_lmml_, `lmml`=_lmml_:
- Specify the minimum searched length of a match for long distance matching.
-
- This option is ignored unless long distance matching is enabled.
-
- Larger/very small values usually decrease compression ratio.
-
- The minimum _lmml_ is 4 and the maximum is 4096 (default: 64).
-
-- `ldmBucketSizeLog`=_lblog_, `lblog`=_lblog_:
- Specify the size of each bucket for the hash table used for long distance
- matching.
-
- This option is ignored unless long distance matching is enabled.
-
- Larger bucket sizes improve collision resolution but decrease compression
- speed.
-
- The minimum _lblog_ is 1 and the maximum is 8 (default: 3).
-
-- `ldmHashRateLog`=_lhrlog_, `lhrlog`=_lhrlog_:
- Specify the frequency of inserting entries into the long distance matching
- hash table.
-
- This option is ignored unless long distance matching is enabled.
-
- Larger values will improve compression speed. Deviating far from the
- default value will likely result in a decrease in compression ratio.
-
- The default value is `wlog - lhlog`.
-
-### Example
-The following parameters sets advanced compression options to something
-similar to predefined level 19 for files bigger than 256 KB:
-
-`--zstd`=wlog=23,clog=23,hlog=22,slog=6,mml=3,tlen=48,strat=6
SEE ALSO
--------
* Command Line
**************************************/
/* print help either in `stderr` or `stdout` depending on originating request
- * error (badusage) => stderr
- * help (usage_advanced) => stdout
+ * error (badUsage) => stderr
+ * help (usageAdvanced) => stdout
*/
static void usage(FILE* f, const char* programName)
{
DISPLAY_F(f, "\n");
}
-static void usage_advanced(const char* programName)
+static void usageAdvanced(const char* programName)
{
DISPLAYOUT(WELCOME_MESSAGE);
DISPLAYOUT("\n");
DISPLAYOUT("\n");
DISPLAYOUT(" --format=zstd Compress files to the `.zst` format. [Default]\n");
- DISPLAYOUT(" --mmap-dict Memory-map dictionary file rather than mallocing and loading all at once");
+ DISPLAYOUT(" --[no-]mmap-dict Memory-map dictionary file rather than mallocing and loading all at once\n");
#ifdef ZSTD_GZCOMPRESS
DISPLAYOUT(" --format=gzip Compress files to the `.gz` format.\n");
#endif
}
-static void badusage(const char* programName)
+static void badUsage(const char* programName, const char* parameter)
{
- DISPLAYLEVEL(1, "Incorrect parameters \n");
+ DISPLAYLEVEL(1, "Incorrect parameter: %s \n", parameter);
if (g_displayLevel >= 2) usage(stderr, programName);
}
/** parseAdaptParameters() :
- * reads adapt parameters from *stringPtr (e.g. "--zstd=min=1,max=19) and store them into adaptMinPtr and adaptMaxPtr.
+ * reads adapt parameters from *stringPtr (e.g. "--adapt=min=1,max=19) and store them into adaptMinPtr and adaptMaxPtr.
* Both adaptMinPtr and adaptMaxPtr must be already allocated and correctly initialized.
* There is no guarantee that any of these values will be updated.
* @return 1 means that parsing was successful,
ZSTD_paramSwitch_e useRowMatchFinder = ZSTD_ps_auto;
FIO_compressionType_t cType = FIO_zstdCompression;
unsigned nbWorkers = 0;
- double compressibility = 0.5;
+ double compressibility = -1.0; /* lorem ipsum generator */
unsigned bench_nbSeconds = 3; /* would be better if this value was synchronized from bench */
size_t blockSize = 0;
/* command switches */
for (argNb=1; argNb<argCount; argNb++) {
const char* argument = argv[argNb];
+ const char* const originalArgument = argument;
if (!argument) continue; /* Protection if argument empty */
if (nextArgumentsAreFiles) {
if (!strcmp(argument, "--uncompress")) { operation=zom_decompress; continue; }
if (!strcmp(argument, "--force")) { FIO_overwriteMode(prefs); forceStdin=1; forceStdout=1; followLinks=1; allowBlockDevices=1; continue; }
if (!strcmp(argument, "--version")) { printVersion(); CLEAN_RETURN(0); }
- if (!strcmp(argument, "--help")) { usage_advanced(programName); CLEAN_RETURN(0); }
+ if (!strcmp(argument, "--help")) { usageAdvanced(programName); CLEAN_RETURN(0); }
if (!strcmp(argument, "--verbose")) { g_displayLevel++; continue; }
if (!strcmp(argument, "--quiet")) { g_displayLevel--; continue; }
- if (!strcmp(argument, "--stdout")) { forceStdout=1; outFileName=stdoutmark; removeSrcFile=0; continue; }
+ if (!strcmp(argument, "--stdout")) { forceStdout=1; outFileName=stdoutmark; continue; }
if (!strcmp(argument, "--ultra")) { ultra=1; continue; }
if (!strcmp(argument, "--check")) { FIO_setChecksumFlag(prefs, 2); continue; }
if (!strcmp(argument, "--no-check")) { FIO_setChecksumFlag(prefs, 0); continue; }
if (!strcmp(argument, "--adapt")) { adapt = 1; continue; }
if (!strcmp(argument, "--no-row-match-finder")) { useRowMatchFinder = ZSTD_ps_disable; continue; }
if (!strcmp(argument, "--row-match-finder")) { useRowMatchFinder = ZSTD_ps_enable; continue; }
- if (longCommandWArg(&argument, "--adapt=")) { adapt = 1; if (!parseAdaptParameters(argument, &adaptMin, &adaptMax)) { badusage(programName); CLEAN_RETURN(1); } continue; }
+ if (longCommandWArg(&argument, "--adapt=")) { adapt = 1; if (!parseAdaptParameters(argument, &adaptMin, &adaptMax)) { badUsage(programName, originalArgument); CLEAN_RETURN(1); } continue; }
if (!strcmp(argument, "--single-thread")) { nbWorkers = 0; singleThread = 1; continue; }
if (!strcmp(argument, "--format=zstd")) { suffix = ZSTD_EXTENSION; cType = FIO_zstdCompression; continue; }
if (!strcmp(argument, "--mmap-dict")) { mmapDict = ZSTD_ps_enable; continue; }
dict = cover;
/* Allow optional arguments following an = */
if (*argument == 0) { memset(&coverParams, 0, sizeof(coverParams)); }
- else if (*argument++ != '=') { badusage(programName); CLEAN_RETURN(1); }
- else if (!parseCoverParameters(argument, &coverParams)) { badusage(programName); CLEAN_RETURN(1); }
+ else if (*argument++ != '=') { badUsage(programName, originalArgument); CLEAN_RETURN(1); }
+ else if (!parseCoverParameters(argument, &coverParams)) { badUsage(programName, originalArgument); CLEAN_RETURN(1); }
continue;
}
if (longCommandWArg(&argument, "--train-fastcover")) {
dict = fastCover;
/* Allow optional arguments following an = */
if (*argument == 0) { memset(&fastCoverParams, 0, sizeof(fastCoverParams)); }
- else if (*argument++ != '=') { badusage(programName); CLEAN_RETURN(1); }
- else if (!parseFastCoverParameters(argument, &fastCoverParams)) { badusage(programName); CLEAN_RETURN(1); }
+ else if (*argument++ != '=') { badUsage(programName, originalArgument); CLEAN_RETURN(1); }
+ else if (!parseFastCoverParameters(argument, &fastCoverParams)) { badUsage(programName, originalArgument); CLEAN_RETURN(1); }
continue;
}
if (longCommandWArg(&argument, "--train-legacy")) {
dict = legacy;
/* Allow optional arguments following an = */
if (*argument == 0) { continue; }
- else if (*argument++ != '=') { badusage(programName); CLEAN_RETURN(1); }
- else if (!parseLegacyParameters(argument, &dictSelect)) { badusage(programName); CLEAN_RETURN(1); }
+ else if (*argument++ != '=') { badUsage(programName, originalArgument); CLEAN_RETURN(1); }
+ else if (!parseLegacyParameters(argument, &dictSelect)) { badUsage(programName, originalArgument); CLEAN_RETURN(1); }
continue;
}
#endif
if (longCommandWArg(&argument, "--block-size")) { NEXT_TSIZE(blockSize); continue; }
if (longCommandWArg(&argument, "--maxdict")) { NEXT_UINT32(maxDictSize); continue; }
if (longCommandWArg(&argument, "--dictID")) { NEXT_UINT32(dictID); continue; }
- if (longCommandWArg(&argument, "--zstd=")) { if (!parseCompressionParameters(argument, &compressionParams)) { badusage(programName); CLEAN_RETURN(1); } ; cType = FIO_zstdCompression; continue; }
+ if (longCommandWArg(&argument, "--zstd=")) { if (!parseCompressionParameters(argument, &compressionParams)) { badUsage(programName, originalArgument); CLEAN_RETURN(1); } ; cType = FIO_zstdCompression; continue; }
if (longCommandWArg(&argument, "--stream-size")) { NEXT_TSIZE(streamSrcSize); continue; }
if (longCommandWArg(&argument, "--target-compressed-block-size")) { NEXT_TSIZE(targetCBlockSize); continue; }
if (longCommandWArg(&argument, "--size-hint")) { NEXT_TSIZE(srcSizeHint); continue; }
ldmWindowLog = readU32FromChar(&argument);
} else if (*argument != 0) {
/* Invalid character following --long */
- badusage(programName);
+ badUsage(programName, originalArgument);
CLEAN_RETURN(1);
} else {
ldmWindowLog = g_defaultMaxWindowLog;
if (fastLevel) {
dictCLevel = cLevel = -(int)fastLevel;
} else {
- badusage(programName);
+ badUsage(programName, originalArgument);
CLEAN_RETURN(1);
}
} else if (*argument != 0) {
/* Invalid character following --fast */
- badusage(programName);
+ badUsage(programName, originalArgument);
CLEAN_RETURN(1);
} else {
cLevel = -1; /* default for --fast */
continue;
}
- /* fall-through, will trigger bad_usage() later on */
+ badUsage(programName, originalArgument);
+ CLEAN_RETURN(1);
}
argument++;
{
/* Display help */
case 'V': printVersion(); CLEAN_RETURN(0); /* Version Only */
- case 'H': usage_advanced(programName); CLEAN_RETURN(0);
+ case 'H': usageAdvanced(programName); CLEAN_RETURN(0);
case 'h': usage(stdout, programName); CLEAN_RETURN(0);
/* Compress */
operation=zom_decompress; argument++; break;
/* Force stdout, even if stdout==console */
- case 'c': forceStdout=1; outFileName=stdoutmark; removeSrcFile=0; argument++; break;
+ case 'c': forceStdout=1; outFileName=stdoutmark; argument++; break;
+
+ /* destination file name */
+ case 'o': argument++; NEXT_FIELD(outFileName); break;
/* do not store filename - gzip compatibility - nothing to do */
case 'n': argument++; break;
/* test compressed file */
case 't': operation=zom_test; argument++; break;
- /* destination file name */
- case 'o': argument++; NEXT_FIELD(outFileName); break;
-
/* limit memory */
case 'M':
argument++;
break;
/* unknown command */
- default : badusage(programName); CLEAN_RETURN(1);
+ default :
+ { char shortArgument[3] = {'-', 0, 0};
+ shortArgument[1] = argument[0];
+ badUsage(programName, shortArgument);
+ CLEAN_RETURN(1);
+ }
}
}
continue;
CLEAN_RETURN(1);
}
benchParams.blockSize = blockSize;
+ benchParams.targetCBlockSize = targetCBlockSize;
benchParams.nbWorkers = (int)nbWorkers;
benchParams.realTime = (unsigned)setRealTimePrio;
benchParams.nbSeconds = bench_nbSeconds;
-.TH "ZSTDGREP" "1" "March 2023" "zstd 1.5.5" "User Commands"
+.
+.TH "ZSTDGREP" "1" "March 2024" "zstd 1.5.6" "User Commands"
+.
.SH "NAME"
\fBzstdgrep\fR \- print lines matching a pattern in zstandard\-compressed files
+.
.SH "SYNOPSIS"
-\fBzstdgrep\fR [\fIgrep\-flags\fR] [\-\-] \fIpattern\fR [\fIfiles\fR \|\.\|\.\|\.]
+\fBzstdgrep\fR [\fIgrep\-flags\fR] [\-\-] \fIpattern\fR [\fIfiles\fR \.\.\.]
+.
.SH "DESCRIPTION"
\fBzstdgrep\fR runs \fBgrep\fR(1) on files, or \fBstdin\fR if no files argument is given, after decompressing them with \fBzstdcat\fR(1)\.
+.
.P
The \fIgrep\-flags\fR and \fIpattern\fR arguments are passed on to \fBgrep\fR(1)\. If an \fB\-e\fR flag is found in the \fIgrep\-flags\fR, \fBzstdgrep\fR will not look for a \fIpattern\fR argument\.
+.
.P
Note that modern \fBgrep\fR alternatives such as \fBripgrep\fR (\fBrg\fR(1)) support \fBzstd\fR\-compressed files out of the box, and can prove better alternatives than \fBzstdgrep\fR notably for unsupported complex pattern searches\. Note though that such alternatives may also feature some minor command line differences\.
+.
.SH "EXIT STATUS"
In case of missing arguments or missing pattern, 1 will be returned, otherwise 0\.
+.
.SH "SEE ALSO"
\fBzstd\fR(1)
+.
.SH "AUTHORS"
Thomas Klausner \fIwiz@NetBSD\.org\fR
-.TH "ZSTDLESS" "1" "March 2023" "zstd 1.5.5" "User Commands"
+.
+.TH "ZSTDLESS" "1" "March 2024" "zstd 1.5.6" "User Commands"
+.
.SH "NAME"
\fBzstdless\fR \- view zstandard\-compressed files
+.
.SH "SYNOPSIS"
-\fBzstdless\fR [\fIflags\fR] [\fIfile\fR \|\.\|\.\|\.]
+\fBzstdless\fR [\fIflags\fR] [\fIfile\fR \.\.\.]
+.
.SH "DESCRIPTION"
\fBzstdless\fR runs \fBless\fR(1) on files or stdin, if no \fIfile\fR argument is given, after decompressing them with \fBzstdcat\fR(1)\.
+.
.SH "SEE ALSO"
\fBzstd\fR(1)
*.exe
*.out
*.app
+
+# Specific exclusions
+!golden-decompression/*.zst
# zstreamtest32: Same as zstreamtest, but forced to compile in 32-bits mode
# ##########################################################################
-LIBZSTD = ../lib
-
-ZSTD_LEGACY_SUPPORT ?= 0
+ZSTD_LEGACY_SUPPORT ?= 5
+export ZSTD_LEGACY_SUPPORT
DEBUGLEVEL ?= 2
export DEBUGLEVEL # transmit value to sub-makefiles
-include $(LIBZSTD)/libzstd.mk
+LIBZSTD_MK_DIR := ../lib
+include $(LIBZSTD_MK_DIR)/libzstd.mk
-ZSTDDIR = $(LIBZSTD)
PRGDIR = ../programs
PYTHON ?= python3
TESTARTEFACT := versionsTest
DEBUGFLAGS += -g -Wno-c++-compat
-CPPFLAGS += -I$(ZSTDDIR) -I$(ZSTDDIR)/common -I$(ZSTDDIR)/compress \
- -I$(ZSTDDIR)/dictBuilder -I$(ZSTDDIR)/deprecated -I$(PRGDIR) \
+CPPFLAGS += -I$(LIB_SRCDIR) -I$(LIB_SRCDIR)/common -I$(LIB_SRCDIR)/compress -I$(LIB_SRCDIR)/legacy \
+ -I$(LIB_SRCDIR)/dictBuilder -I$(LIB_SRCDIR)/deprecated -I$(PRGDIR) \
-DZSTD_WINDOW_OVERFLOW_CORRECT_FREQUENTLY=1
ZSTDCOMMON_FILES := $(sort $(ZSTD_COMMON_FILES))
ZSTDCOMP_FILES := $(sort $(ZSTD_COMPRESS_FILES))
ZSTDDECOMP_FILES := $(sort $(ZSTD_DECOMPRESS_FILES))
-ZSTD_FILES := $(ZSTDDECOMP_FILES) $(ZSTDCOMMON_FILES) $(ZSTDCOMP_FILES)
+ZSTDLEGACY_FILES := $(sort $(wildcard $(LIB_SRCDIR)/legacy/*.c))
+ZSTD_FILES := $(ZSTDDECOMP_FILES) $(ZSTDCOMMON_FILES) $(ZSTDCOMP_FILES) $(ZSTDLEGACY_FILES)
ZDICT_FILES := $(sort $(ZSTD_DICTBUILDER_FILES))
ZSTD_F1 := $(sort $(wildcard $(ZSTD_FILES)))
-ZSTD_OBJ1 := $(subst $(ZSTDDIR)/common/,zstdm_,$(ZSTD_F1))
-ZSTD_OBJ2 := $(subst $(ZSTDDIR)/compress/,zstdc_,$(ZSTD_OBJ1))
-ZSTD_OBJ3 := $(subst $(ZSTDDIR)/decompress/,zstdd_,$(ZSTD_OBJ2))
-ZSTD_OBJ4 := $(ZSTD_OBJ3:.c=.o)
-ZSTD_OBJECTS := $(ZSTD_OBJ4:.S=.o)
-
-ZSTDMT_OBJ1 := $(subst $(ZSTDDIR)/common/,zstdmt_m_,$(ZSTD_F1))
-ZSTDMT_OBJ2 := $(subst $(ZSTDDIR)/compress/,zstdmt_c_,$(ZSTDMT_OBJ1))
-ZSTDMT_OBJ3 := $(subst $(ZSTDDIR)/decompress/,zstdmt_d_,$(ZSTDMT_OBJ2))
-ZSTDMT_OBJ4 := $(ZSTDMT_OBJ3:.c=.o)
-ZSTDMT_OBJECTS := $(ZSTDMT_OBJ4:.S=.o)
+ZSTD_OBJ1 := $(subst $(LIB_SRCDIR)/common/,zstdm_,$(ZSTD_F1))
+ZSTD_OBJ2 := $(subst $(LIB_SRCDIR)/compress/,zstdc_,$(ZSTD_OBJ1))
+ZSTD_OBJ3 := $(subst $(LIB_SRCDIR)/decompress/,zstdd_,$(ZSTD_OBJ2))
+ZSTD_OBJ4 := $(subst $(LIB_SRCDIR)/legacy/,zstdl_,$(ZSTD_OBJ3))
+ZSTD_OBJ5 := $(ZSTD_OBJ4:.c=.o)
+ZSTD_OBJECTS := $(ZSTD_OBJ5:.S=.o)
+
+ZSTDMT_OBJ1 := $(subst $(LIB_SRCDIR)/common/,zstdmt_m_,$(ZSTD_F1))
+ZSTDMT_OBJ2 := $(subst $(LIB_SRCDIR)/compress/,zstdmt_c_,$(ZSTDMT_OBJ1))
+ZSTDMT_OBJ3 := $(subst $(LIB_SRCDIR)/decompress/,zstdmt_d_,$(ZSTDMT_OBJ2))
+ZSTDMT_OBJ4 := $(subst $(LIB_SRCDIR)/legacy/,zstdmt_l_,$(ZSTDMT_OBJ3))
+ZSTDMT_OBJ5 := $(ZSTDMT_OBJ4:.c=.o)
+ZSTDMT_OBJECTS := $(ZSTDMT_OBJ5:.S=.o)
# Define *.exe as extension for Windows systems
ifneq (,$(filter Windows%,$(OS)))
.PHONY: libzstd
libzstd :
- $(MAKE) -C $(ZSTDDIR) libzstd MOREFLAGS+="$(DEBUGFLAGS)"
+ $(MAKE) -C $(LIB_SRCDIR) libzstd MOREFLAGS+="$(DEBUGFLAGS)"
%-dll : libzstd
-%-dll : LDFLAGS += -L$(ZSTDDIR) -lzstd
+%-dll : LDFLAGS += -L$(LIB_BINDIR) -lzstd
-$(ZSTDDIR)/libzstd.a :
- $(MAKE) -C $(ZSTDDIR) libzstd.a
+$(LIB_BINDIR)/libzstd.a :
+ $(MAKE) -C $(LIB_SRCDIR) libzstd.a
-zstdm_%.o : $(ZSTDDIR)/common/%.c
+zstdm_%.o : $(LIB_SRCDIR)/common/%.c
$(CC) -c $(CPPFLAGS) $(CFLAGS) $< -o $@
-zstdc_%.o : $(ZSTDDIR)/compress/%.c
+zstdc_%.o : $(LIB_SRCDIR)/compress/%.c
$(CC) -c $(CPPFLAGS) $(CFLAGS) $< -o $@
-zstdd_%.o : $(ZSTDDIR)/decompress/%.c
+zstdd_%.o : $(LIB_SRCDIR)/decompress/%.c
$(CC) -c $(CPPFLAGS) $(CFLAGS) $< -o $@
-zstdd_%.o : $(ZSTDDIR)/decompress/%.S
+zstdd_%.o : $(LIB_SRCDIR)/decompress/%.S
$(CC) -c $(CPPFLAGS) $(ASFLAGS) $< -o $@
+zstdl_%.o : $(LIB_SRCDIR)/legacy/%.c
+ $(CC) -c $(CPPFLAGS) $(CFLAGS) $< -o $@
+
zstdmt%.o : CPPFLAGS += $(MULTITHREAD_CPP)
-zstdmt_m_%.o : $(ZSTDDIR)/common/%.c
+zstdmt_m_%.o : $(LIB_SRCDIR)/common/%.c
$(CC) -c $(CPPFLAGS) $(CFLAGS) $< -o $@
-zstdmt_c_%.o : $(ZSTDDIR)/compress/%.c
+zstdmt_c_%.o : $(LIB_SRCDIR)/compress/%.c
$(CC) -c $(CPPFLAGS) $(CFLAGS) $< -o $@
-zstdmt_d_%.o : $(ZSTDDIR)/decompress/%.c
+zstdmt_d_%.o : $(LIB_SRCDIR)/decompress/%.c
$(CC) -c $(CPPFLAGS) $(CFLAGS) $< -o $@
-zstdmt_d_%.o : $(ZSTDDIR)/decompress/%.S
+zstdmt_d_%.o : $(LIB_SRCDIR)/decompress/%.S
$(CC) -c $(CPPFLAGS) $(ASFLAGS) $< -o $@
+zstdmt_l_%.o : $(LIB_SRCDIR)/legacy/%.c
+ $(CC) -c $(CPPFLAGS) $(CFLAGS) $< -o $@
+
FULLBENCHS := fullbench fullbench32
CLEAN += $(FULLBENCHS)
fullbench32: CPPFLAGS += -m32
CLEAN += fullbench-lib
fullbench-lib : CPPFLAGS += -DXXH_NAMESPACE=ZSTD_
-fullbench-lib : $(PRGDIR)/datagen.c $(PRGDIR)/util.c $(PRGDIR)/timefn.c $(PRGDIR)/benchfn.c $(ZSTDDIR)/libzstd.a fullbench.c
+fullbench-lib : $(PRGDIR)/datagen.c $(PRGDIR)/util.c $(PRGDIR)/timefn.c $(PRGDIR)/benchfn.c $(LIB_SRCDIR)/libzstd.a fullbench.c
$(LINK.c) $^ -o $@$(EXT)
# note : broken : requires symbols unavailable from dynamic library
fullbench-dll: $(PRGDIR)/datagen.c $(PRGDIR)/util.c $(PRGDIR)/benchfn.c $(PRGDIR)/timefn.c fullbench.c
-# $(CC) $(FLAGS) $(filter %.c,$^) -o $@$(EXT) -DZSTD_DLL_IMPORT=1 $(ZSTDDIR)/dll/libzstd.dll
+# $(CC) $(FLAGS) $(filter %.c,$^) -o $@$(EXT) -DZSTD_DLL_IMPORT=1 $(LIB_SRCDIR)/dll/libzstd.dll
$(LINK.c) $^ $(LDLIBS) -o $@$(EXT)
CLEAN += fuzzer fuzzer32
$(LINK.c) $^ -o $@$(EXT)
# note : broken : requires symbols unavailable from dynamic library
-fuzzer-dll : $(ZSTDDIR)/common/xxhash.c $(PRGDIR)/util.c $(PRGDIR)/timefn.c $(PRGDIR)/datagen.c fuzzer.c
+fuzzer-dll : $(LIB_SRCDIR)/common/xxhash.c $(PRGDIR)/util.c $(PRGDIR)/timefn.c $(PRGDIR)/datagen.c fuzzer.c
$(CC) $(CPPFLAGS) $(CFLAGS) $(filter %.c,$^) $(LDFLAGS) -o $@$(EXT)
CLEAN += zstreamtest zstreamtest32
$(LINK.c) $(MULTITHREAD) $^ -o $@$(EXT)
# note : broken : requires symbols unavailable from dynamic library
-zstreamtest-dll : $(ZSTDDIR)/common/xxhash.c # xxh symbols not exposed from dll
+zstreamtest-dll : $(LIB_SRCDIR)/common/xxhash.c # xxh symbols not exposed from dll
zstreamtest-dll : $(ZSTREAM_LOCAL_FILES)
$(CC) $(CPPFLAGS) $(CFLAGS) $(filter %.c,$^) $(LDFLAGS) -o $@$(EXT)
CLEAN += paramgrill
paramgrill : DEBUGFLAGS = # turn off debug for speed measurements
paramgrill : LDLIBS += -lm
-paramgrill : $(ZSTD_FILES) $(PRGDIR)/util.c $(PRGDIR)/timefn.c $(PRGDIR)/benchfn.c $(PRGDIR)/benchzstd.c $(PRGDIR)/datagen.c paramgrill.c
+paramgrill : $(ZSTD_FILES) $(PRGDIR)/util.c $(PRGDIR)/timefn.c $(PRGDIR)/benchfn.c $(PRGDIR)/benchzstd.c $(PRGDIR)/datagen.c $(PRGDIR)/lorem.c paramgrill.c
CLEAN += datagen
-datagen : $(PRGDIR)/datagen.c datagencli.c
+datagen : $(PRGDIR)/datagen.c $(PRGDIR)/lorem.c loremOut.c datagencli.c
$(LINK.c) $^ -o $@$(EXT)
CLEAN += roundTripCrash
invalidDictionaries : $(ZSTD_OBJECTS) invalidDictionaries.c
CLEAN += legacy
-legacy : CPPFLAGS += -I$(ZSTDDIR)/legacy -UZSTD_LEGACY_SUPPORT -DZSTD_LEGACY_SUPPORT=4
-legacy : $(ZSTD_FILES) $(sort $(wildcard $(ZSTDDIR)/legacy/*.c)) legacy.c
+legacy : CPPFLAGS += -UZSTD_LEGACY_SUPPORT -DZSTD_LEGACY_SUPPORT=4
+legacy : $(ZSTD_FILES) legacy.c
CLEAN += decodecorpus
decodecorpus : LDLIBS += -lm
decodecorpus : $(filter-out zstdc_zstd_compress.o, $(ZSTD_OBJECTS)) $(ZDICT_FILES) $(PRGDIR)/util.c $(PRGDIR)/timefn.c decodecorpus.c
CLEAN += poolTests
-poolTests : $(PRGDIR)/util.c $(PRGDIR)/timefn.c poolTests.c $(ZSTDDIR)/common/pool.c $(ZSTDDIR)/common/threading.c $(ZSTDDIR)/common/zstd_common.c $(ZSTDDIR)/common/error_private.c
+poolTests : $(PRGDIR)/util.c $(PRGDIR)/timefn.c poolTests.c $(LIB_SRCDIR)/common/pool.c $(LIB_SRCDIR)/common/threading.c $(LIB_SRCDIR)/common/zstd_common.c $(LIB_SRCDIR)/common/error_private.c
$(LINK.c) $(MULTITHREAD) $^ -o $@$(EXT)
.PHONY: versionsTest
# make checkTag : check that release tag corresponds to release version
CLEAN += checkTag
-checkTag.o : $(ZSTDDIR)/zstd.h
+checkTag.o : $(LIB_SRCDIR)/zstd.h
.PHONY: clean
clean:
- $(MAKE) -C $(ZSTDDIR) clean
+ $(MAKE) -C $(LIB_SRCDIR) clean
$(MAKE) -C $(PRGDIR) clean
- $(RM) -fR $(TESTARTEFACT)
- $(RM) -rf tmp* # some test directories are named tmp*
+ $(MAKE) -C fuzz clean
+ $(RM) -R $(TESTARTEFACT)
+ $(RM) -r tmp* # some test directories are named tmp*
$(RM) $(CLEAN) core *.o *.tmp result* *.gcda dictionary *.zst \
$(PRGDIR)/zstd$(EXT) $(PRGDIR)/zstd32$(EXT) \
fullbench-dll$(EXT) fuzzer-dll$(EXT) zstreamtest-dll$(EXT)
# valgrind tests validated only for some posix platforms
#----------------------------------------------------------------------------------
UNAME := $(shell uname)
-ifneq (,$(filter $(UNAME),Linux Darwin GNU/kFreeBSD GNU OpenBSD FreeBSD NetBSD DragonFly SunOS AIX))
+ifneq (,$(filter $(UNAME),Linux Darwin GNU/kFreeBSD GNU OpenBSD FreeBSD NetBSD DragonFly SunOS AIX CYGWIN_NT))
HOST_OS = POSIX
.PHONY: test-valgrind
fuzztest: test-fuzzer test-zstream test-decodecorpus
.PHONY: test
-test: test-zstd test-fullbench test-fuzzer test-zstream test-invalidDictionaries test-legacy test-decodecorpus test-cli-tests
+test: test-zstd test-cli-tests test-fullbench test-fuzzer test-zstream test-invalidDictionaries test-legacy test-decodecorpus
ifeq ($(QEMU_SYS),)
test: test-pool
endif
--- /dev/null
+#!/bin/sh
+
+println "+ zstd --blah" >&2
+zstd --blah
+println "+ zstd -xz" >&2
+zstd -xz
+println "+ zstd --adapt=min=1,maxx=2 file.txt" >&2
+zstd --adapt=min=1,maxx=2 file.txt
+println "+ zstd --train-cover=k=48,d=8,steps32 file.txt" >&2
+zstd --train-cover=k=48,d=8,steps32 file.txt
--- /dev/null
++ zstd --blah
+Incorrect parameter: --blah
+...
+Usage: zstd *
+
+Options:
+...
++ zstd -xz
+Incorrect parameter: -x
+...
+Usage: zstd *
+
+Options:
+...
++ zstd --adapt=min=1,maxx=2 file.txt
+Incorrect parameter: --adapt=min=1,maxx=2
+...
+Usage: zstd *
+
+Options:
+...
++ zstd --train-cover=k=48,d=8,steps32 file.txt
+Incorrect parameter: --train-cover=k=48,d=8,steps32
+...
+Usage: zstd *
+
+Options:
+...
--- /dev/null
+#!/bin/sh
+
+zstdname=$(basename $0)
+
+if [ -z "$EXEC_PREFIX" ]; then
+ "$ZSTD_SYMLINK_DIR/$zstdname" $@
+else
+ $EXEC_PREFIX "$ZSTD_SYMLINK_DIR/$zstdname" $@
+fi
--- /dev/null
+#!/bin/sh
+
+zstdname=$(basename $0)
+
+if [ -z "$EXEC_PREFIX" ]; then
+ "$ZSTD_SYMLINK_DIR/$zstdname" $@
+else
+ $EXEC_PREFIX "$ZSTD_SYMLINK_DIR/$zstdname" $@
+fi
--- /dev/null
+#!/bin/sh
+
+set -e
+
+GOLDEN_DIR="$ZSTD_REPO_DIR/tests/golden-decompression-errors/"
+
+for file in "$GOLDEN_DIR"/*; do
+ zstd -t $file && die "should have detected an error"
+done
+exit 0
+
--- /dev/null
+#!/bin/sh
+set -e
+
+# setup
+mkdir -p src/.hidden src/dir
+mkdir mid dst
+
+echo "file1" > src/file1
+echo "file2" > src/.file2
+echo "file3" > src/.hidden/.file3
+echo "file4" > src/dir/.file4
+
+# relative paths
+zstd -q -r --output-dir-mirror mid/ src/
+zstd -q -d -r --output-dir-mirror dst/ mid/src/
+
+diff --brief --recursive --new-file src/ dst/mid/src/
+
+# reset
+rm -rf mid dst
+mkdir mid dst
+
+# from inside the directory
+(cd src; zstd -q -r --output-dir-mirror ../mid/ ./)
+(cd mid; zstd -q -d -r --output-dir-mirror ../dst/ ./)
+
+diff --brief --recursive --new-file src/ dst/
+
+# reset
+rm -rf mid dst
+mkdir mid dst
+
+# absolute paths
+export BASE_PATH="$(pwd)"
+
+zstd -q -r --output-dir-mirror mid/ "${BASE_PATH}/src/"
+zstd -q -d -r --output-dir-mirror dst/ "${BASE_PATH}/mid/${BASE_PATH}/src/"
+
+diff --brief --recursive --new-file src/ "dst/${BASE_PATH}/mid/${BASE_PATH}/src/"
+
+# reset
+rm -rf mid dst
+mkdir mid dst
+
+# dots
+zstd -q -r --output-dir-mirror mid/ ./src/./
+zstd -q -d -r --output-dir-mirror dst/ ./mid/./src/./
+
+diff --brief --recursive --new-file src/ dst/mid/src/
--- /dev/null
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+ */
+
+/*-************************************
+ * Dependencies
+ **************************************/
+#include <stdio.h> /* fprintf, stderr */
+#include "datagen.h" /* RDG_generate */
+#include "loremOut.h" /* LOREM_genOut */
+#include "util.h" /* Compiler options */
+
+/*-************************************
+ * Constants
+ **************************************/
+#define KB *(1 << 10)
+#define MB *(1 << 20)
+#define GB *(1U << 30)
+
+#define SIZE_DEFAULT ((64 KB) + 1)
+#define SEED_DEFAULT 0
+#define COMPRESSIBILITY_DEFAULT 9999
+
+/*-************************************
+ * Macros
+ **************************************/
+#define DISPLAY(...) fprintf(stderr, __VA_ARGS__)
+#define DISPLAYLEVEL(l, ...) \
+ if (displayLevel >= l) { \
+ DISPLAY(__VA_ARGS__); \
+ }
+static unsigned displayLevel = 2;
+
+/*-*******************************************************
+ * Command line
+ *********************************************************/
+static int usage(const char* programName)
+{
+ DISPLAY("Compressible data generator\n");
+ DISPLAY("Usage :\n");
+ DISPLAY(" %s [args]\n", programName);
+ DISPLAY("\n");
+ DISPLAY("Arguments :\n");
+ DISPLAY(" -g# : generate # data (default:%i)\n", SIZE_DEFAULT);
+ DISPLAY(" -s# : Select seed (default:%i)\n", SEED_DEFAULT);
+ DISPLAY(" -P# : Select compressibility in %% (range [0-100])\n");
+ DISPLAY(" -h : display help and exit\n");
+ return 0;
+}
+
+int main(int argc, const char** argv)
+{
+ unsigned probaU32 = COMPRESSIBILITY_DEFAULT;
+ double litProba = 0.0;
+ U64 size = SIZE_DEFAULT;
+ U32 seed = SEED_DEFAULT;
+ const char* const programName = argv[0];
+
+ int argNb;
+ for (argNb = 1; argNb < argc; argNb++) {
+ const char* argument = argv[argNb];
+
+ if (!argument)
+ continue; /* Protection if argument empty */
+
+ /* Handle commands. Aggregated commands are allowed */
+ if (*argument == '-') {
+ argument++;
+ while (*argument != 0) {
+ switch (*argument) {
+ case 'h':
+ return usage(programName);
+ case 'g':
+ argument++;
+ size = 0;
+ while ((*argument >= '0') && (*argument <= '9'))
+ size *= 10, size += (U64)(*argument++ - '0');
+ if (*argument == 'K') {
+ size <<= 10;
+ argument++;
+ }
+ if (*argument == 'M') {
+ size <<= 20;
+ argument++;
+ }
+ if (*argument == 'G') {
+ size <<= 30;
+ argument++;
+ }
+ if (*argument == 'B') {
+ argument++;
+ }
+ break;
+ case 's':
+ argument++;
+ seed = 0;
+ while ((*argument >= '0') && (*argument <= '9'))
+ seed *= 10, seed += (U32)(*argument++ - '0');
+ break;
+ case 'P':
+ argument++;
+ probaU32 = 0;
+ while ((*argument >= '0') && (*argument <= '9'))
+ probaU32 *= 10,
+ probaU32 += (U32)(*argument++ - '0');
+ if (probaU32 > 100)
+ probaU32 = 100;
+ break;
+ case 'L': /* hidden argument : Literal distribution
+ probability */
+ argument++;
+ litProba = 0.;
+ while ((*argument >= '0') && (*argument <= '9'))
+ litProba *= 10, litProba += *argument++ - '0';
+ if (litProba > 100.)
+ litProba = 100.;
+ litProba /= 100.;
+ break;
+ case 'v':
+ displayLevel = 4;
+ argument++;
+ break;
+ default:
+ return usage(programName);
+ }
+ }
+ }
+ } /* for(argNb=1; argNb<argc; argNb++) */
+
+ DISPLAYLEVEL(4, "Compressible data Generator \n");
+ DISPLAYLEVEL(3, "Seed = %u \n", (unsigned)seed);
+
+ if (probaU32 != COMPRESSIBILITY_DEFAULT) {
+ DISPLAYLEVEL(3, "Compressibility : %i%%\n", probaU32);
+ RDG_genStdout(size, (double)probaU32 / 100, litProba, seed);
+ } else {
+ LOREM_genOut(size, seed);
+ }
+
+ DISPLAYLEVEL(3, "\n");
+
+ return 0;
+}
}
} while (((!info.useDict) && (offset > (size_t)((BYTE*)srcPtr - (BYTE*)frame->srcStart))) || offset == 0);
- { BYTE* const dictEnd = info.dictContent + info.dictContentSize;
+ { BYTE* const dictEnd = ZSTD_maybeNullPtrAdd(info.dictContent, info.dictContentSize);
size_t j;
for (j = 0; j < matchLen; j++) {
if ((U32)((BYTE*)srcPtr - (BYTE*)frame->srcStart) < offset) {
/* Sequences Header */
if ((oend-op) < 3 /*max nbSeq Size*/ + 1 /*seqHead */) return ERROR(dstSize_tooSmall);
- if (nbSeq < 0x7F) *op++ = (BYTE)nbSeq;
+ if (nbSeq < 128) *op++ = (BYTE)nbSeq;
else if (nbSeq < LONGNBSEQ) op[0] = (BYTE)((nbSeq>>8) + 0x80), op[1] = (BYTE)nbSeq, op+=2;
else op[0]=0xFF, MEM_writeLE16(op+1, (U16)(nbSeq - LONGNBSEQ)), op+=3;
return ZSTD_decompress(dst, dstSize, buff2, g_cSize);
}
-static ZSTD_DCtx* g_zdc = NULL;
+static ZSTD_DCtx* g_zdc = NULL; /* will be initialized within benchMem */
+static size_t local_ZSTD_decompressDCtx(const void* src, size_t srcSize,
+ void* dst, size_t dstSize,
+ void* buff2)
+{
+ (void)src; (void)srcSize;
+ return ZSTD_decompressDCtx(g_zdc, dst, dstSize, buff2, g_cSize);
+}
#ifndef ZSTD_DLL_IMPORT
-typedef enum {
- not_streaming = 0,
- is_streaming = 1
-} streaming_operation;
-extern size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* ctx, const void* src, size_t srcSize, void* dst, size_t dstCapacity, const streaming_operation streaming);
+
+extern size_t ZSTD_decodeLiteralsBlock_wrapper(ZSTD_DCtx* dctx,
+ const void* src, size_t srcSize,
+ void* dst, size_t dstCapacity);
static size_t local_ZSTD_decodeLiteralsBlock(const void* src, size_t srcSize, void* dst, size_t dstSize, void* buff2)
{
(void)src; (void)srcSize; (void)dst; (void)dstSize;
- return ZSTD_decodeLiteralsBlock(g_zdc, buff2, g_cSize, dst, dstSize, not_streaming);
+ return ZSTD_decodeLiteralsBlock_wrapper(g_zdc, buff2, g_cSize, dst, dstSize);
}
static size_t local_ZSTD_decodeSeqHeaders(const void* src, size_t srcSize, void* dst, size_t dstSize, void* buff2)
case 3:
benchFunction = local_ZSTD_compress_freshCCtx; benchName = "compress_freshCCtx";
break;
+ case 4:
+ benchFunction = local_ZSTD_decompressDCtx; benchName = "decompressDCtx";
+ break;
#ifndef ZSTD_DLL_IMPORT
case 11:
benchFunction = local_ZSTD_compressContinue; benchName = "compressContinue";
case 3:
payload = &cparams;
break;
+ case 4:
+ g_cSize = ZSTD_compress(dstBuff2, dstBuffSize, src, srcSize, cLevel);
+ break;
#ifndef ZSTD_DLL_IMPORT
case 11:
payload = &cparams;
ip += ZSTD_blockHeaderSize; /* skip block header */
ZSTD_decompressBegin(g_zdc);
CONTROL(iend > ip);
- ip += ZSTD_decodeLiteralsBlock(g_zdc, ip, (size_t)(iend-ip), dstBuff, dstBuffSize, not_streaming); /* skip literal segment */
+ ip += ZSTD_decodeLiteralsBlock_wrapper(g_zdc, ip, (size_t)(iend-ip), dstBuff, dstBuffSize); /* skip literal segment */
g_cSize = (size_t)(iend-ip);
memcpy(dstBuff2, ip, g_cSize); /* copy rest of block (it starts by SeqHeader) */
srcSize = srcSize > 128 KB ? 128 KB : srcSize; /* speed relative to block */
endif
CORPORA_URL_PREFIX:=https://github.com/facebook/zstd/releases/download/fuzz-corpora/
-LIBZSTD = ../../lib
+LIBZSTD_MK_DIR = ../../lib
DEBUGLEVEL ?= 2
ZSTD_LEGACY_SUPPORT ?= 1
-include $(LIBZSTD)/libzstd.mk
+include $(LIBZSTD_MK_DIR)/libzstd.mk
-ZSTDDIR = ../../lib
PRGDIR = ../../programs
CONTRIBDIR = ../../contrib
DEFAULT_SEQ_PROD_SRC = $(DEFAULT_SEQ_PROD_DIR)/sequence_producer.c
THIRD_PARTY_SEQ_PROD_OBJ ?=
-FUZZ_CPPFLAGS := -I$(ZSTDDIR) -I$(ZSTDDIR)/common -I$(ZSTDDIR)/compress \
- -I$(ZSTDDIR)/dictBuilder -I$(ZSTDDIR)/deprecated -I$(ZSTDDIR)/legacy \
+FUZZ_CPPFLAGS := -I$(LIB_SRCDIR) -I$(LIB_SRCDIR)/common -I$(LIB_SRCDIR)/compress \
+ -I$(LIB_SRCDIR)/dictBuilder -I$(LIB_SRCDIR)/deprecated -I$(LIB_SRCDIR)/legacy \
-I$(CONTRIBDIR)/seekable_format -I$(PRGDIR) -I$(DEFAULT_SEQ_PROD_DIR) \
-DZSTD_MULTITHREAD -DZSTD_LEGACY_SUPPORT=1 $(CPPFLAGS)
FUZZ_EXTRA_FLAGS := -Wall -Wextra -Wcast-qual -Wcast-align -Wshadow \
$(DEFAULT_SEQ_PROD_SRC)
FUZZ_SRC := $(sort $(wildcard $(FUZZ_SRC)))
-FUZZ_D_OBJ1 := $(subst $(ZSTDDIR)/common/,d_lib_common_,$(FUZZ_SRC))
-FUZZ_D_OBJ2 := $(subst $(ZSTDDIR)/compress/,d_lib_compress_,$(FUZZ_D_OBJ1))
-FUZZ_D_OBJ3 := $(subst $(ZSTDDIR)/decompress/,d_lib_decompress_,$(FUZZ_D_OBJ2))
-FUZZ_D_OBJ4 := $(subst $(ZSTDDIR)/dictBuilder/,d_lib_dictBuilder_,$(FUZZ_D_OBJ3))
-FUZZ_D_OBJ5 := $(subst $(ZSTDDIR)/legacy/,d_lib_legacy_,$(FUZZ_D_OBJ4))
+FUZZ_D_OBJ1 := $(subst $(LIB_SRCDIR)/common/,d_lib_common_,$(FUZZ_SRC))
+FUZZ_D_OBJ2 := $(subst $(LIB_SRCDIR)/compress/,d_lib_compress_,$(FUZZ_D_OBJ1))
+FUZZ_D_OBJ3 := $(subst $(LIB_SRCDIR)/decompress/,d_lib_decompress_,$(FUZZ_D_OBJ2))
+FUZZ_D_OBJ4 := $(subst $(LIB_SRCDIR)/dictBuilder/,d_lib_dictBuilder_,$(FUZZ_D_OBJ3))
+FUZZ_D_OBJ5 := $(subst $(LIB_SRCDIR)/legacy/,d_lib_legacy_,$(FUZZ_D_OBJ4))
FUZZ_D_OBJ6 := $(subst $(PRGDIR)/,d_prg_,$(FUZZ_D_OBJ5))
FUZZ_D_OBJ7 := $(subst $(DEFAULT_SEQ_PROD_DIR)/,d_default_seq_prod_,$(FUZZ_D_OBJ6))
FUZZ_D_OBJ8 := $(subst $\./,d_fuzz_,$(FUZZ_D_OBJ7))
FUZZ_D_OBJ10 := $(THIRD_PARTY_SEQ_PROD_OBJ) $(FUZZ_D_OBJ9)
FUZZ_DECOMPRESS_OBJ := $(FUZZ_D_OBJ10:.S=.o)
-FUZZ_RT_OBJ1 := $(subst $(ZSTDDIR)/common/,rt_lib_common_,$(FUZZ_SRC))
-FUZZ_RT_OBJ2 := $(subst $(ZSTDDIR)/compress/,rt_lib_compress_,$(FUZZ_RT_OBJ1))
-FUZZ_RT_OBJ3 := $(subst $(ZSTDDIR)/decompress/,rt_lib_decompress_,$(FUZZ_RT_OBJ2))
-FUZZ_RT_OBJ4 := $(subst $(ZSTDDIR)/dictBuilder/,rt_lib_dictBuilder_,$(FUZZ_RT_OBJ3))
-FUZZ_RT_OBJ5 := $(subst $(ZSTDDIR)/legacy/,rt_lib_legacy_,$(FUZZ_RT_OBJ4))
+FUZZ_RT_OBJ1 := $(subst $(LIB_SRCDIR)/common/,rt_lib_common_,$(FUZZ_SRC))
+FUZZ_RT_OBJ2 := $(subst $(LIB_SRCDIR)/compress/,rt_lib_compress_,$(FUZZ_RT_OBJ1))
+FUZZ_RT_OBJ3 := $(subst $(LIB_SRCDIR)/decompress/,rt_lib_decompress_,$(FUZZ_RT_OBJ2))
+FUZZ_RT_OBJ4 := $(subst $(LIB_SRCDIR)/dictBuilder/,rt_lib_dictBuilder_,$(FUZZ_RT_OBJ3))
+FUZZ_RT_OBJ5 := $(subst $(LIB_SRCDIR)/legacy/,rt_lib_legacy_,$(FUZZ_RT_OBJ4))
FUZZ_RT_OBJ6 := $(subst $(PRGDIR)/,rt_prg_,$(FUZZ_RT_OBJ5))
FUZZ_RT_OBJ7 := $(subst $(DEFAULT_SEQ_PROD_DIR)/,rt_default_seq_prod_,$(FUZZ_RT_OBJ6))
FUZZ_RT_OBJ8 := $(subst $\./,rt_fuzz_,$(FUZZ_RT_OBJ7))
sequence_compression_api \
seekable_roundtrip \
huf_round_trip \
- huf_decompress
+ huf_decompress \
+ decompress_cross_format \
+ generate_sequences
all: libregression.a $(FUZZ_TARGETS)
-rt_lib_common_%.o: $(ZSTDDIR)/common/%.c
+rt_lib_common_%.o: $(LIB_SRCDIR)/common/%.c
$(CC) $(FUZZ_CPPFLAGS) $(FUZZ_CFLAGS) $(FUZZ_ROUND_TRIP_FLAGS) $< -c -o $@
-rt_lib_compress_%.o: $(ZSTDDIR)/compress/%.c
+rt_lib_compress_%.o: $(LIB_SRCDIR)/compress/%.c
$(CC) $(FUZZ_CPPFLAGS) $(FUZZ_CFLAGS) $(FUZZ_ROUND_TRIP_FLAGS) $< -c -o $@
-rt_lib_decompress_%.o: $(ZSTDDIR)/decompress/%.c
+rt_lib_decompress_%.o: $(LIB_SRCDIR)/decompress/%.c
$(CC) $(FUZZ_CPPFLAGS) $(FUZZ_CFLAGS) $(FUZZ_ROUND_TRIP_FLAGS) $< -c -o $@
-rt_lib_decompress_%.o: $(ZSTDDIR)/decompress/%.S
+rt_lib_decompress_%.o: $(LIB_SRCDIR)/decompress/%.S
$(CC) $(FUZZ_CPPFLAGS) $(FUZZ_ASFLAGS) $(FUZZ_ROUND_TRIP_FLAGS) $< -c -o $@
-rt_lib_dictBuilder_%.o: $(ZSTDDIR)/dictBuilder/%.c
+rt_lib_dictBuilder_%.o: $(LIB_SRCDIR)/dictBuilder/%.c
$(CC) $(FUZZ_CPPFLAGS) $(FUZZ_CFLAGS) $(FUZZ_ROUND_TRIP_FLAGS) $< -c -o $@
-rt_lib_legacy_%.o: $(ZSTDDIR)/legacy/%.c
+rt_lib_legacy_%.o: $(LIB_SRCDIR)/legacy/%.c
$(CC) $(FUZZ_CPPFLAGS) $(FUZZ_CFLAGS) $(FUZZ_ROUND_TRIP_FLAGS) $< -c -o $@
rt_prg_%.o: $(PRGDIR)/%.c
rt_default_seq_prod_%.o: $(DEFAULT_SEQ_PROD_DIR)/%.c
$(CC) $(FUZZ_CPPFLAGS) $(FUZZ_CFLAGS) $(FUZZ_ROUND_TRIP_FLAGS) $< -c -o $@
-d_lib_common_%.o: $(ZSTDDIR)/common/%.c
+d_lib_common_%.o: $(LIB_SRCDIR)/common/%.c
$(CC) $(FUZZ_CPPFLAGS) $(FUZZ_CFLAGS) $< -c -o $@
-d_lib_compress_%.o: $(ZSTDDIR)/compress/%.c
+d_lib_compress_%.o: $(LIB_SRCDIR)/compress/%.c
$(CC) $(FUZZ_CPPFLAGS) $(FUZZ_CFLAGS) $< -c -o $@
-d_lib_decompress_%.o: $(ZSTDDIR)/decompress/%.c
+d_lib_decompress_%.o: $(LIB_SRCDIR)/decompress/%.c
$(CC) $(FUZZ_CPPFLAGS) $(FUZZ_CFLAGS) $< -c -o $@
-d_lib_decompress_%.o: $(ZSTDDIR)/decompress/%.S
+d_lib_decompress_%.o: $(LIB_SRCDIR)/decompress/%.S
$(CC) $(FUZZ_CPPFLAGS) $(FUZZ_ASFLAGS) $< -c -o $@
-d_lib_dictBuilder_%.o: $(ZSTDDIR)/dictBuilder/%.c
+d_lib_dictBuilder_%.o: $(LIB_SRCDIR)/dictBuilder/%.c
$(CC) $(FUZZ_CPPFLAGS) $(FUZZ_CFLAGS) $< -c -o $@
-d_lib_legacy_%.o: $(ZSTDDIR)/legacy/%.c
+d_lib_legacy_%.o: $(LIB_SRCDIR)/legacy/%.c
$(CC) $(FUZZ_CPPFLAGS) $(FUZZ_CFLAGS) $< -c -o $@
d_prg_%.o: $(PRGDIR)/%.c
huf_decompress: $(FUZZ_HEADERS) $(FUZZ_DECOMPRESS_OBJ) d_fuzz_huf_decompress.o
$(CXX) $(FUZZ_TARGET_FLAGS) $(FUZZ_DECOMPRESS_OBJ) d_fuzz_huf_decompress.o $(LIB_FUZZING_ENGINE) -o $@
+decompress_cross_format: $(FUZZ_HEADERS) $(FUZZ_DECOMPRESS_OBJ) d_fuzz_decompress_cross_format.o
+ $(CXX) $(FUZZ_TARGET_FLAGS) $(FUZZ_DECOMPRESS_OBJ) d_fuzz_decompress_cross_format.o $(LIB_FUZZING_ENGINE) -o $@
+
+generate_sequences: $(FUZZ_HEADERS) $(FUZZ_ROUND_TRIP_OBJ) rt_fuzz_generate_sequences.o
+ $(CXX) $(FUZZ_TARGET_FLAGS) $(FUZZ_ROUND_TRIP_OBJ) rt_fuzz_generate_sequences.o $(LIB_FUZZING_ENGINE) -o $@
+
libregression.a: $(FUZZ_HEADERS) $(PRGDIR)/util.h $(PRGDIR)/util.c d_fuzz_regression_driver.o
$(AR) $(FUZZ_ARFLAGS) $@ d_fuzz_regression_driver.o
seedcorpora: $(patsubst %,corpora/%_seed_corpus.zip,$(FUZZ_TARGETS))
regressiontest: corpora
- CC="$(CC)" CXX="$(CXX)" CFLAGS="$(CFLAGS)" CXXFLAGS="$(CXXFLAGS)" LDFLAGS="$(LDFLAGS)" $(PYTHON) ./fuzz.py build all
+ CC="$(CC)" CXX="$(CXX)" CFLAGS="$(CFLAGS)" CXXFLAGS="$(CXXFLAGS)" LDFLAGS="$(LDFLAGS)" $(PYTHON) ./fuzz.py build all --debug=$(DEBUGLEVEL)
$(PYTHON) ./fuzz.py regression all
clean:
## Fuzzing a custom sequence producer plugin
Sequence producer plugin authors can use the zstd fuzzers to stress-test their code.
See the documentation in `fuzz_third_party_seq_prod.h` for details.
+
+## Adding a new fuzzer
+There are several steps involved in adding a new fuzzer harness.
+
+### Build your harness
+1. Create a new your fuzzer harness `tests/fuzz/your_harness.c`.
+
+2. Add your harness to the Makefile
+
+ 2.1 Follow [this example](https://github.com/facebook/zstd/blob/e124e39301381de8f323436a3e4c46539747ba24/tests/fuzz/Makefile#L216) if your fuzzer requires both compression and decompression symbols (prefix `rt_`). If your fuzzer only requires decompression symbols, follow [this example](https://github.com/facebook/zstd/blob/6a0052a409e2604bd40354b76b86272b712edd7d/tests/fuzz/Makefile#L194) (prefix `d_`).
+
+ 2.2 Add your target to [`FUZZ_TARGETS`](https://github.com/facebook/zstd/blob/6a0052a409e2604bd40354b76b86272b712edd7d/tests/fuzz/Makefile#L108).
+
+3. Add your harness to [`fuzz.py`](https://github.com/facebook/zstd/blob/6a0052a409e2604bd40354b76b86272b712edd7d/tests/fuzz/fuzz.py#L48).
+
+### Generate seed data
+Follow the instructions above to generate seed data:
+```
+make -C ../tests decodecorpus
+./fuzz.py gen your_harness
+```
+
+### Run the harness
+Follow the instructions above to run your harness and fix any crashes:
+```
+./fuzz.py build your_harness --enable-fuzzer --enable-asan --enable-ubsan --cc clang --cxx clang++
+./fuzz.py libfuzzer your_harness
+```
+
+### Minimize and zip the corpus
+After running the fuzzer for a while, you will have a large corpus at `tests/fuzz/corpora/your_harness*`.
+This corpus must be minimized and zipped before uploading to GitHub for regression testing:
+```
+./fuzz.py minimize your_harness
+./fuzz.py zip your_harness
+```
+
+### Upload the zip file to GitHub
+The previous step should produce a `.zip` file containing the corpus for your new harness.
+This corpus must be uploaded to GitHub here: https://github.com/facebook/zstd/releases/tag/fuzz-corpora
+
+
--- /dev/null
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+ */
+
+// This fuzz target validates decompression of magicless-format compressed data.
+
+#include <stddef.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include "fuzz_helpers.h"
+#define ZSTD_STATIC_LINKING_ONLY
+#include "zstd.h"
+#include "fuzz_data_producer.h"
+
+static ZSTD_DCtx *dctx = NULL;
+
+int LLVMFuzzerTestOneInput(const uint8_t *src, size_t size)
+{
+ // Give a random portion of src data to the producer, to use for parameter generation.
+ // The rest will be interpreted as magicless compressed data.
+ FUZZ_dataProducer_t *producer = FUZZ_dataProducer_create(src, size);
+ size_t magiclessSize = FUZZ_dataProducer_reserveDataPrefix(producer);
+ const uint8_t* const magiclessSrc = src;
+ size_t const dstSize = FUZZ_dataProducer_uint32Range(producer, 0, 10 * size);
+ uint8_t* const standardDst = (uint8_t*)FUZZ_malloc(dstSize);
+ uint8_t* const magiclessDst = (uint8_t*)FUZZ_malloc(dstSize);
+
+ // Create standard-format src from magicless-format src
+ const uint32_t zstd_magic = ZSTD_MAGICNUMBER;
+ size_t standardSize = sizeof(zstd_magic) + magiclessSize;
+ uint8_t* const standardSrc = (uint8_t*)FUZZ_malloc(standardSize);
+ memcpy(standardSrc, &zstd_magic, sizeof(zstd_magic)); // assume fuzzing on little-endian machine
+ memcpy(standardSrc + sizeof(zstd_magic), magiclessSrc, magiclessSize);
+
+ // Truncate to a single frame
+ {
+ const size_t standardFrameCompressedSize = ZSTD_findFrameCompressedSize(standardSrc, standardSize);
+ if (ZSTD_isError(standardFrameCompressedSize)) {
+ goto cleanup_and_return;
+ }
+ standardSize = standardFrameCompressedSize;
+ magiclessSize = standardFrameCompressedSize - sizeof(zstd_magic);
+ }
+
+ // Create DCtx if needed
+ if (!dctx) {
+ dctx = ZSTD_createDCtx();
+ FUZZ_ASSERT(dctx);
+ }
+
+ // Test one-shot decompression
+ {
+ FUZZ_ZASSERT(ZSTD_DCtx_reset(dctx, ZSTD_reset_session_and_parameters));
+ FUZZ_ZASSERT(ZSTD_DCtx_setParameter(dctx, ZSTD_d_format, ZSTD_f_zstd1));
+ const size_t standardRet = ZSTD_decompressDCtx(
+ dctx, standardDst, dstSize, standardSrc, standardSize);
+
+ FUZZ_ZASSERT(ZSTD_DCtx_reset(dctx, ZSTD_reset_session_and_parameters));
+ FUZZ_ZASSERT(ZSTD_DCtx_setParameter(dctx, ZSTD_d_format, ZSTD_f_zstd1_magicless));
+ const size_t magiclessRet = ZSTD_decompressDCtx(
+ dctx, magiclessDst, dstSize, magiclessSrc, magiclessSize);
+
+ // Standard accepts => magicless should accept
+ if (!ZSTD_isError(standardRet)) FUZZ_ZASSERT(magiclessRet);
+
+ // Magicless accepts => standard should accept
+ // NOTE: this is nice-to-have, please disable this check if it is difficult to satisfy.
+ if (!ZSTD_isError(magiclessRet)) FUZZ_ZASSERT(standardRet);
+
+ // If both accept, decompressed size and data should match
+ if (!ZSTD_isError(standardRet) && !ZSTD_isError(magiclessRet)) {
+ FUZZ_ASSERT(standardRet == magiclessRet);
+ if (standardRet > 0) {
+ FUZZ_ASSERT(
+ memcmp(standardDst, magiclessDst, standardRet) == 0
+ );
+ }
+ }
+ }
+
+ // Test streaming decompression
+ {
+ ZSTD_inBuffer standardIn = { standardSrc, standardSize, 0 };
+ ZSTD_inBuffer magiclessIn = { magiclessSrc, magiclessSize, 0 };
+ ZSTD_outBuffer standardOut = { standardDst, dstSize, 0 };
+ ZSTD_outBuffer magiclessOut = { magiclessDst, dstSize, 0 };
+
+ FUZZ_ZASSERT(ZSTD_DCtx_reset(dctx, ZSTD_reset_session_and_parameters));
+ FUZZ_ZASSERT(ZSTD_DCtx_setParameter(dctx, ZSTD_d_format, ZSTD_f_zstd1));
+ const size_t standardRet = ZSTD_decompressStream(dctx, &standardOut, &standardIn);
+
+ FUZZ_ZASSERT(ZSTD_DCtx_reset(dctx, ZSTD_reset_session_and_parameters));
+ FUZZ_ZASSERT(ZSTD_DCtx_setParameter(dctx, ZSTD_d_format, ZSTD_f_zstd1_magicless));
+ const size_t magiclessRet = ZSTD_decompressStream(dctx, &magiclessOut, &magiclessIn);
+
+ // Standard accepts => magicless should accept
+ if (standardRet == 0) FUZZ_ASSERT(magiclessRet == 0);
+
+ // Magicless accepts => standard should accept
+ // NOTE: this is nice-to-have, please disable this check if it is difficult to satisfy.
+ if (magiclessRet == 0) FUZZ_ASSERT(standardRet == 0);
+
+ // If both accept, decompressed size and data should match
+ if (standardRet == 0 && magiclessRet == 0) {
+ FUZZ_ASSERT(standardOut.pos == magiclessOut.pos);
+ if (standardOut.pos > 0) {
+ FUZZ_ASSERT(
+ memcmp(standardOut.dst, magiclessOut.dst, standardOut.pos) == 0
+ );
+ }
+ }
+ }
+
+cleanup_and_return:
+#ifndef STATEFUL_FUZZING
+ ZSTD_freeDCtx(dctx); dctx = NULL;
+#endif
+ free(standardSrc);
+ free(standardDst);
+ free(magiclessDst);
+ FUZZ_dataProducer_free(producer);
+ return 0;
+}
#include "fuzz_data_producer.h"
#include "fuzz_third_party_seq_prod.h"
-static ZSTD_CCtx *cctx = NULL;
-static ZSTD_DCtx *dctx = NULL;
+static ZSTD_CCtx* cctx = NULL;
+static ZSTD_DCtx* dctx = NULL;
-static size_t roundTripTest(void *result, size_t resultCapacity,
- void *compressed, size_t compressedCapacity,
- const void *src, size_t srcSize,
- FUZZ_dataProducer_t *producer)
+static size_t roundTripTest(void* result, size_t resultCapacity,
+ void* compressed, size_t compressedCapacity,
+ const void* src, size_t srcSize,
+ FUZZ_dataProducer_t* producer)
{
ZSTD_dictContentType_e dictContentType = ZSTD_dct_auto;
FUZZ_dict_t dict = FUZZ_train(src, srcSize, producer);
'seekable_roundtrip': TargetInfo(InputType.RAW_DATA),
'huf_round_trip': TargetInfo(InputType.RAW_DATA),
'huf_decompress': TargetInfo(InputType.RAW_DATA),
+ 'decompress_cross_format': TargetInfo(InputType.RAW_DATA),
+ 'generate_sequences': TargetInfo(InputType.RAW_DATA),
}
TARGETS = list(TARGET_INFO.keys())
ALL_TARGETS = TARGETS + ['all']
action='store_true',
help='Enable UBSAN')
parser.add_argument(
- '--enable-ubsan-pointer-overflow',
+ '--disable-ubsan-pointer-overflow',
dest='ubsan_pointer_overflow',
- action='store_true',
- help='Enable UBSAN pointer overflow check (known failure)')
+ action='store_false',
+ help='Disable UBSAN pointer overflow check (known failure)')
parser.add_argument(
'--enable-msan', dest='msan', action='store_true', help='Enable MSAN')
parser.add_argument(
raise RuntimeError('MSAN may not be used with any other sanitizers')
if args.msan_track_origins and not args.msan:
raise RuntimeError('--enable-msan-track-origins requires MSAN')
- if args.ubsan_pointer_overflow and not args.ubsan:
- raise RuntimeError('--enable-ubsan-pointer-overflow requires UBSAN')
if args.sanitize_recover and not args.sanitize:
raise RuntimeError('--enable-sanitize-recover but no sanitizers used')
cxxflags = shlex.split(args.cxxflags)
mflags = shlex.split(args.mflags)
# Flags to be added to both cflags and cxxflags
- common_flags = []
+ common_flags = [
+ '-Werror',
+ '-Wno-error=declaration-after-statement',
+ '-Wno-error=c++-compat',
+ '-Wno-error=deprecated' # C files are sometimes compiled with CXX
+ ]
cppflags += [
'-DDEBUGLEVEL={}'.format(args.debug),
subprocess.check_call(clean_cmd)
build_cmd = [
'make',
+ '-j',
cc_str,
cxx_str,
cppflags_str,
uint32_t FUZZ_dataProducer_uint32Range(FUZZ_dataProducer_t *producer, uint32_t min,
uint32_t max) {
- FUZZ_ASSERT(min <= max);
-
uint32_t range = max - min;
uint32_t rolling = range;
uint32_t result = 0;
+ FUZZ_ASSERT(min <= max);
+
while (rolling > 0 && producer->size > 0) {
uint8_t next = *(producer->data + producer->size - 1);
producer->size -= 1;
size_t FUZZ_dataProducer_contract(FUZZ_dataProducer_t *producer, size_t newSize)
{
- newSize = newSize > producer->size ? producer->size : newSize;
+ const size_t effectiveNewSize = newSize > producer->size ? producer->size : newSize;
- size_t remaining = producer->size - newSize;
+ size_t remaining = producer->size - effectiveNewSize;
producer->data = producer->data + remaining;
- producer->size = newSize;
+ producer->size = effectiveNewSize;
return remaining;
}
size_t FUZZ_seqProdSetup(void);
/* The fuzzer will call this function after each test-case. It should free
- * resources aquired by FUZZ_seqProdSetup() to prevent leaks across test-cases.
+ * resources acquired by FUZZ_seqProdSetup() to prevent leaks across test-cases.
*
* The fuzzer will assert() that the return value is zero. To signal an error,
* please return a non-zero value. */
void* FUZZ_createSeqProdState(void);
/* The fuzzer will call this function after each test-case. It should free any
- * resources aquired by FUZZ_createSeqProdState().
+ * resources acquired by FUZZ_createSeqProdState().
*
* The fuzzer will assert() that the return value is zero. To signal an error,
* please return a non-zero value. */
--- /dev/null
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+ */
+
+#define ZSTD_STATIC_LINKING_ONLY
+
+#include <stddef.h>
+#include <stdint.h>
+#include <string.h>
+#include <stdlib.h>
+
+#include "fuzz_data_producer.h"
+#include "fuzz_helpers.h"
+#include "zstd_helpers.h"
+
+/**
+ * This fuzz target ensures that ZSTD_generateSequences() does not crash and
+ * if it succeeds that ZSTD_compressSequences() round trips.
+ */
+
+static void testRoundTrip(ZSTD_CCtx* cctx, ZSTD_Sequence const* seqs, size_t nbSeqs, const void* src, size_t srcSize) {
+ /* Compress the sequences with block delimiters */
+ const size_t compressBound = ZSTD_compressBound(srcSize);
+ void* dst = FUZZ_malloc(compressBound);
+ FUZZ_ASSERT(dst);
+
+ size_t compressedSize = ZSTD_compressSequences(cctx, dst, compressBound, seqs, nbSeqs, src, srcSize);
+ FUZZ_ZASSERT(compressedSize);
+
+ void* decompressed = FUZZ_malloc(srcSize);
+ FUZZ_ASSERT(srcSize == 0 || decompressed);
+ size_t decompressedSize = ZSTD_decompress(decompressed, srcSize, dst, compressedSize);
+ FUZZ_ZASSERT(decompressedSize);
+ FUZZ_ASSERT(decompressedSize == srcSize);
+ if (srcSize != 0) {
+ FUZZ_ASSERT(!memcmp(src, decompressed, srcSize));
+ }
+
+ free(decompressed);
+ free(dst);
+}
+
+int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size) {
+
+ FUZZ_dataProducer_t *producer = FUZZ_dataProducer_create(data, size);
+ size = FUZZ_dataProducer_reserveDataPrefix(producer);
+
+ ZSTD_CCtx* cctx = ZSTD_createCCtx();
+ FUZZ_ASSERT(cctx);
+
+ const size_t seqsCapacity = FUZZ_dataProducer_uint32Range(producer, 0, 2 * ZSTD_sequenceBound(size));
+ ZSTD_Sequence* seqs = (ZSTD_Sequence*)FUZZ_malloc(sizeof(ZSTD_Sequence) * seqsCapacity);
+ FUZZ_ASSERT(seqsCapacity == 0 || seqs);
+
+ FUZZ_setRandomParameters(cctx, size, producer);
+ FUZZ_ZASSERT(ZSTD_CCtx_setParameter(cctx, ZSTD_c_targetCBlockSize, 0));
+ FUZZ_ZASSERT(ZSTD_CCtx_setParameter(cctx, ZSTD_c_nbWorkers, 0));
+
+ const size_t nbSeqs = ZSTD_generateSequences(cctx, seqs, seqsCapacity, data, size);
+ if (ZSTD_isError(nbSeqs)) {
+ /* Allowed to error if the destination is too small */
+ if (ZSTD_getErrorCode(nbSeqs) == ZSTD_error_dstSize_tooSmall) {
+ FUZZ_ASSERT(seqsCapacity < ZSTD_sequenceBound(size));
+ }
+ } else {
+ /* Ensure we round trip with and without block delimiters*/
+
+ FUZZ_ZASSERT(ZSTD_CCtx_setParameter(cctx, ZSTD_c_blockDelimiters, ZSTD_sf_explicitBlockDelimiters));
+ testRoundTrip(cctx, seqs, nbSeqs, data, size);
+
+ const size_t nbMergedSeqs = ZSTD_mergeBlockDelimiters(seqs, nbSeqs);
+ FUZZ_ASSERT(nbMergedSeqs <= nbSeqs);
+ FUZZ_ZASSERT(ZSTD_CCtx_reset(cctx, ZSTD_reset_session_only));
+ FUZZ_ZASSERT(ZSTD_CCtx_setParameter(cctx, ZSTD_c_blockDelimiters, ZSTD_sf_noBlockDelimiters));
+ testRoundTrip(cctx, seqs, nbMergedSeqs, data, size);
+ }
+
+ free(seqs);
+ ZSTD_freeCCtx(cctx);
+ FUZZ_dataProducer_free(producer);
+ return 0;
+}
fprintf(stderr, "WARNING: No files passed to %s\n", argv[0]);
for (i = 0; i < files->tableSize; ++i) {
char const *fileName = files->fileNames[i];
- DEBUGLOG(3, "Running %s", fileName);
size_t const fileSize = UTIL_getFileSize(fileName);
size_t readSize;
FILE *file;
+ DEBUGLOG(3, "Running %s", fileName);
+
/* Check that it is a regular file, and that the fileSize is valid.
* If it is not a regular file, then it may have been deleted since we
* constructed the list, so just skip it, but return an error exit code.
}
}
for (; j < matchLength; ++j) {
- op[j] = op[j - generatedSequences[i].offset];
+ op[j] = op[(ptrdiff_t)(j - generatedSequences[i].offset)];
}
op += j;
FUZZ_ASSERT(generatedSequences[i].matchLength == j + k);
#include <stddef.h>
#include <stdlib.h>
#include <stdio.h>
+
+#define ZSTD_STATIC_LINKING_ONLY
+
#include "fuzz_helpers.h"
#include "zstd.h"
#include "fuzz_data_producer.h"
FUZZ_ASSERT(dctx);
}
- size_t const bufSize = FUZZ_dataProducer_uint32Range(producer, 0, 10 * size);
- void *rBuf = FUZZ_malloc(bufSize);
-
- ZSTD_decompressDCtx(dctx, rBuf, bufSize, src, size);
- free(rBuf);
+ {
+ size_t const bufSize = FUZZ_dataProducer_uint32Range(producer, 0, 10 * size);
+ void *rBuf = FUZZ_malloc(bufSize);
+ size_t const dSize = ZSTD_decompressDCtx(dctx, rBuf, bufSize, src, size);
+ if (!ZSTD_isError(dSize)) {
+ /* If decompression was successful, the content size from the frame header(s) should be valid. */
+ unsigned long long const expectedSize = ZSTD_findDecompressedSize(src, size);
+ FUZZ_ASSERT(expectedSize != ZSTD_CONTENTSIZE_ERROR);
+ FUZZ_ASSERT(expectedSize == ZSTD_CONTENTSIZE_UNKNOWN || expectedSize == dSize);
+ }
+ free(rBuf);
+ }
FUZZ_dataProducer_free(producer);
static ZSTD_CCtx *cctx = NULL;
static ZSTD_DCtx *dctx = NULL;
-static size_t getDecompressionMargin(void const* compressed, size_t cSize, size_t srcSize, int hasSmallBlocks)
+static size_t getDecompressionMargin(void const* compressed, size_t cSize, size_t srcSize, int hasSmallBlocks, int maxBlockSize)
{
size_t margin = ZSTD_decompressionMargin(compressed, cSize);
if (!hasSmallBlocks) {
ZSTD_frameHeader zfh;
size_t marginM;
FUZZ_ZASSERT(ZSTD_getFrameHeader(&zfh, compressed, cSize));
- marginM = ZSTD_DECOMPRESSION_MARGIN(srcSize, zfh.blockSizeMax);
+ if (maxBlockSize == 0) {
+ maxBlockSize = zfh.blockSizeMax;
+ } else {
+ maxBlockSize = MIN(maxBlockSize, (int)zfh.blockSizeMax);
+ }
+ marginM = ZSTD_DECOMPRESSION_MARGIN(srcSize, maxBlockSize);
if (marginM < margin)
margin = marginM;
}
size_t cSize;
size_t dSize;
int targetCBlockSize = 0;
+ int maxBlockSize = 0;
if (FUZZ_dataProducer_uint32Range(producer, 0, 1)) {
size_t const remainingBytes = FUZZ_dataProducer_remainingBytes(producer);
FUZZ_setRandomParameters(cctx, srcSize, producer);
cSize = ZSTD_compress2(cctx, compressed, compressedCapacity, src, srcSize);
FUZZ_ZASSERT(cSize);
FUZZ_ZASSERT(ZSTD_CCtx_getParameter(cctx, ZSTD_c_targetCBlockSize, &targetCBlockSize));
+ FUZZ_ZASSERT(ZSTD_CCtx_getParameter(cctx, ZSTD_c_maxBlockSize, &maxBlockSize));
// Compress a second time and check for determinism
{
size_t const cSize0 = cSize;
FUZZ_ASSERT(XXH64(compressed, cSize, 0) == hash0);
}
}
+ if (FUZZ_dataProducer_uint32Range(producer, 0, 1)) {
+ FUZZ_ZASSERT(ZSTD_DCtx_setParameter(dctx, ZSTD_d_maxBlockSize, maxBlockSize));
+ }
dSize = ZSTD_decompressDCtx(dctx, result, resultCapacity, compressed, cSize);
FUZZ_ZASSERT(dSize);
FUZZ_ASSERT_MSG(dSize == srcSize, "Incorrect regenerated size");
FUZZ_ASSERT_MSG(!FUZZ_memcmp(src, result, dSize), "Corruption!");
{
- size_t margin = getDecompressionMargin(compressed, cSize, srcSize, targetCBlockSize);
+ size_t margin = getDecompressionMargin(compressed, cSize, srcSize, targetCBlockSize, maxBlockSize);
size_t const outputSize = srcSize + margin;
char* const output = (char*)FUZZ_malloc(outputSize);
char* const input = output + outputSize - cSize;
size_t dstSize = 0;
ZSTD_CCtx_reset(cctx, ZSTD_reset_session_only);
FUZZ_setRandomParameters(cctx, srcSize, producer);
+ int maxBlockSize;
+ FUZZ_ZASSERT(ZSTD_CCtx_getParameter(cctx, ZSTD_c_maxBlockSize, &maxBlockSize));
while (srcSize > 0) {
ZSTD_inBuffer in = makeInBuffer(&src, &srcSize, producer);
if (FUZZ_dataProducer_uint32Range(producer, 0, 7) == 0) {
size_t const remaining = in.size - in.pos;
FUZZ_setRandomParameters(cctx, remaining, producer);
+ /* Always use the same maxBlockSize */
+ FUZZ_ZASSERT(ZSTD_CCtx_setParameter(cctx, ZSTD_c_maxBlockSize, maxBlockSize));
}
mode = -1;
}
return dstSize;
}
+static size_t decompress(void* dst, size_t dstCapacity, void const* src, size_t srcSize, FUZZ_dataProducer_t* producer)
+{
+ ZSTD_inBuffer in = {src, srcSize, 0};
+ ZSTD_outBuffer out = {dst, dstCapacity, 0};
+ int maxBlockSize;
+ FUZZ_ZASSERT(ZSTD_CCtx_getParameter(cctx, ZSTD_c_maxBlockSize, &maxBlockSize));
+ if (FUZZ_dataProducer_uint32Range(producer, 0, 1)) {
+ FUZZ_ZASSERT(ZSTD_DCtx_setParameter(dctx, ZSTD_d_maxBlockSize, maxBlockSize));
+ }
+ while (in.pos < in.size) {
+ size_t const ret = ZSTD_decompressStream(dctx, &out, &in);
+ FUZZ_ZASSERT(ret);
+ FUZZ_ASSERT(ret == 0);
+ }
+ return out.pos;
+}
+
int LLVMFuzzerTestOneInput(const uint8_t *src, size_t size)
{
FUZZ_SEQ_PROD_SETUP();
{
size_t const cSize = compress(cBuf, neededBufSize, src, size, producer);
- size_t const rSize =
- ZSTD_decompressDCtx(dctx, rBuf, neededBufSize, cBuf, cSize);
+ size_t const rSize = decompress(rBuf, neededBufSize, cBuf, cSize, producer);
FUZZ_ZASSERT(rSize);
FUZZ_ASSERT_MSG(rSize == size, "Incorrect regenerated size");
FUZZ_ASSERT_MSG(!FUZZ_memcmp(src, rBuf, size), "Corruption!");
if (seqs[i].offset != 0) {
for (j = 0; j < seqs[i].matchLength; ++j)
- dst[j] = dst[j - seqs[i].offset];
+ dst[j] = dst[(ptrdiff_t)(j - seqs[i].offset)];
dst += seqs[i].matchLength;
src += seqs[i].matchLength;
size -= seqs[i].matchLength;
RDG_genBuffer(CNBuffer, CNBuffSize, 0.5, 0.5, 0);
- DISPLAYLEVEL(3, "thread pool test : threadPool re-use roundtrips: ");
+ DISPLAYLEVEL(3, "thread pool test : threadPool reuse roundtrips: ");
{
ZSTD_CCtx* cctx = ZSTD_createCCtx();
ZSTD_threadPool* pool = ZSTD_createThreadPool(kPoolNumThreads);
CHECK_EQ( ZSTD_flushStream(cctx, &out), 0 );
}
CHECK_EQ( ZSTD_endStream(cctx, &out), 0 );
- CHECK( ZSTD_decompressBound(outBuffer, out.pos) > 0x100000000LLU /* 4 GB */ );
+ CHECK( ZSTD_decompressBound(outBuffer, out.pos) > 0x100000000ULL /* 4 GB */ );
ZSTD_freeCCtx(cctx);
free(outBuffer);
}
ZSTD_freeCCtx(cctx);
}
+ DISPLAYLEVEL(3, "test%3i : maxBlockSize = 2K", testNb++);
+ {
+ ZSTD_CCtx* cctx = ZSTD_createCCtx();
+ ZSTD_DCtx* dctx = ZSTD_createDCtx();
+ CHECK_Z(ZSTD_CCtx_setParameter(cctx, ZSTD_c_checksumFlag, 1));
+ CHECK_Z(ZSTD_CCtx_setParameter(cctx, ZSTD_c_maxBlockSize, 2048));
+ CHECK_Z(ZSTD_DCtx_setParameter(dctx, ZSTD_d_maxBlockSize, 2048));
+
+ cSize = ZSTD_compress2(cctx, compressedBuffer, compressedBufferSize, CNBuffer, CNBuffSize);
+ CHECK_Z(cSize);
+ CHECK_Z(ZSTD_decompressDCtx(dctx, decodedBuffer, CNBuffSize, compressedBuffer, cSize));
+
+ CHECK_Z(ZSTD_DCtx_setParameter(dctx, ZSTD_d_maxBlockSize, 1024));
+ CHECK(ZSTD_isError(ZSTD_decompressDCtx(dctx, decodedBuffer, CNBuffSize, compressedBuffer, cSize)));
+
+ ZSTD_freeDCtx(dctx);
+ ZSTD_freeCCtx(cctx);
+ }
+
DISPLAYLEVEL(3, "test%3i : ldm fill dict out-of-bounds check", testNb++);
{
ZSTD_CCtx* const cctx = ZSTD_createCCtx();
size_t const srcSize1 = kWindowSize / 2;
size_t const srcSize2 = kWindowSize * 10;
+ CHECK(cctx!=NULL);
+ CHECK(dctx!=NULL);
+ CHECK(dict!=NULL);
if (CNBuffSize < dictSize) goto _output_error;
RDG_genBuffer(dict, dictSize, 0.5, 0.5, seed);
cSize = ZSTD_compress2(cctx, compressedBuffer, compressedBufferSize, CNBuffer, srcSize1);
CHECK_Z(cSize);
CHECK_Z(ZSTD_decompress_usingDict(dctx, decodedBuffer, CNBuffSize, compressedBuffer, cSize, dict, dictSize));
+
cSize = ZSTD_compress2(cctx, compressedBuffer, compressedBufferSize, CNBuffer, srcSize2);
/* Streaming decompression to catch out of bounds offsets. */
{
CHECK_Z(ZSTD_CCtx_setParameter(cctx, ZSTD_c_nbWorkers, 2));
/* Round trip once with a dictionary. */
CHECK_Z(ZSTD_CCtx_refPrefix(cctx, dict, dictSize));
- {
- ZSTD_inBuffer in = {CNBuffer, srcSize1, 0};
+ { ZSTD_inBuffer in = {CNBuffer, srcSize1, 0};
ZSTD_outBuffer out = {compressedBuffer, compressedBufferSize, 0};
CHECK_Z(ZSTD_compressStream2(cctx, &out, &in, ZSTD_e_flush));
CHECK_Z(ZSTD_compressStream2(cctx, &out, &in, ZSTD_e_end));
cSize = out.pos;
}
CHECK_Z(ZSTD_decompress_usingDict(dctx, decodedBuffer, CNBuffSize, compressedBuffer, cSize, dict, dictSize));
- {
- ZSTD_inBuffer in = {CNBuffer, srcSize2, 0};
+
+ { ZSTD_inBuffer in = {CNBuffer, srcSize2, 0};
ZSTD_outBuffer out = {compressedBuffer, compressedBufferSize, 0};
CHECK_Z(ZSTD_compressStream2(cctx, &out, &in, ZSTD_e_flush));
CHECK_Z(ZSTD_compressStream2(cctx, &out, &in, ZSTD_e_end));
cSize = out.pos;
}
/* Streaming decompression to catch out of bounds offsets. */
- {
- ZSTD_inBuffer in = {compressedBuffer, cSize, 0};
+ { ZSTD_inBuffer in = {compressedBuffer, cSize, 0};
ZSTD_outBuffer out = {decodedBuffer, CNBuffSize, 0};
size_t const dSize = ZSTD_decompressStream(dctx, &out, &in);
CHECK_Z(dSize);
}
DISPLAYLEVEL(3, "OK \n");
- DISPLAYLEVEL(3, "test%3d: superblock uncompressible data, too many nocompress superblocks : ", testNb++);
+ DISPLAYLEVEL(3, "test%3d : superblock uncompressible data: too many nocompress superblocks : ", testNb++);
{
ZSTD_CCtx* const cctx = ZSTD_createCCtx();
const BYTE* src = (BYTE*)CNBuffer; BYTE* dst = (BYTE*)compressedBuffer;
}
DISPLAYLEVEL(3, "OK \n");
- DISPLAYLEVEL(3, "test%3d : re-use CCtx with expanding block size : ", testNb++);
+ DISPLAYLEVEL(3, "test%3d : reuse CCtx with expanding block size : ", testNb++);
{ ZSTD_CCtx* const cctx = ZSTD_createCCtx();
ZSTD_parameters const params = ZSTD_getParams(1, ZSTD_CONTENTSIZE_UNKNOWN, 0);
assert(params.fParams.contentSizeFlag == 1); /* block size will be adapted if pledgedSrcSize is enabled */
CHECK_Z( ZSTD_compressBegin_advanced(cctx, NULL, 0, params, 1 /*pledgedSrcSize*/) );
CHECK_Z( ZSTD_compressEnd(cctx, compressedBuffer, compressedBufferSize, CNBuffer, 1) ); /* creates a block size of 1 */
- CHECK_Z( ZSTD_compressBegin_advanced(cctx, NULL, 0, params, ZSTD_CONTENTSIZE_UNKNOWN) ); /* re-use same parameters */
+ CHECK_Z( ZSTD_compressBegin_advanced(cctx, NULL, 0, params, ZSTD_CONTENTSIZE_UNKNOWN) ); /* reuse same parameters */
{ size_t const inSize = 2* 128 KB;
size_t const outSize = ZSTD_compressBound(inSize);
CHECK_Z( ZSTD_compressEnd(cctx, compressedBuffer, outSize, CNBuffer, inSize) );
params.cParams.windowLog = ZSTD_WINDOWLOG_MAX;
for (cnb = 0; cnb < nbCompressions; ++cnb) {
DISPLAYLEVEL(6, "run %zu / %zu \n", cnb, nbCompressions);
- CHECK_Z( ZSTD_compressBegin_advanced(cctx, NULL, 0, params, ZSTD_CONTENTSIZE_UNKNOWN) ); /* re-use same parameters */
+ CHECK_Z( ZSTD_compressBegin_advanced(cctx, NULL, 0, params, ZSTD_CONTENTSIZE_UNKNOWN) ); /* reuse same parameters */
CHECK_Z( ZSTD_compressEnd(cctx, compressedBuffer, compressedBufferSize, CNBuffer, CNBuffSize) );
}
ZSTD_freeCCtx(cctx);
} }
DISPLAYLEVEL(3, "OK \n");
+#if !defined(ZSTD_EXCLUDE_DFAST_BLOCK_COMPRESSOR) \
+ && !defined(ZSTD_EXCLUDE_GREEDY_BLOCK_COMPRESSOR) \
+ && !defined(ZSTD_EXCLUDE_DFAST_BLOCK_COMPRESSOR) \
+ && !defined(ZSTD_EXCLUDE_LAZY_BLOCK_COMPRESSOR) \
+ && !defined(ZSTD_EXCLUDE_LAZY2_BLOCK_COMPRESSOR) \
+ && !defined(ZSTD_EXCLUDE_BTLAZY2_BLOCK_COMPRESSOR) \
+ && !defined(ZSTD_EXCLUDE_BTOPT_BLOCK_COMPRESSOR) \
+ && !defined(ZSTD_EXCLUDE_BTULTRA_BLOCK_COMPRESSOR)
/* Note : these tests should be replaced by proper regression tests,
* but existing ones do not focus on small data + dictionary + all levels.
*/
DISPLAYLEVEL(4, "compression efficiency tests OK \n");
}
+#endif
ZSTD_freeCCtx(ctxOrig);
ZSTD_freeCCtx(ctxDuplicated);
/* Test with block delimiters roundtrip */
seqsSize = ZSTD_generateSequences(cctx, seqs, srcSize, src, srcSize);
+ CHECK_Z(seqsSize);
FUZ_decodeSequences(decoded, seqs, seqsSize, src, srcSize, ZSTD_sf_explicitBlockDelimiters);
assert(!memcmp(CNBuffer, compressedBuffer, srcSize));
/* Test no block delimiters roundtrip */
seqsSize = ZSTD_mergeBlockDelimiters(seqs, seqsSize);
+ CHECK_Z(seqsSize);
FUZ_decodeSequences(decoded, seqs, seqsSize, src, srcSize, ZSTD_sf_noBlockDelimiters);
assert(!memcmp(CNBuffer, compressedBuffer, srcSize));
}
DISPLAYLEVEL(3, "OK \n");
+ DISPLAYLEVEL(3, "test%3i : ZSTD_generateSequences too small output buffer : ", testNb++);
+ {
+ const size_t seqsCapacity = 10;
+ const size_t srcSize = 150 KB;
+ const BYTE* src = (BYTE*)CNBuffer;
+
+ ZSTD_CCtx* const cctx = ZSTD_createCCtx();
+ ZSTD_Sequence* const seqs = (ZSTD_Sequence*)malloc(seqsCapacity * sizeof(ZSTD_Sequence));
+
+ if (seqs == NULL) goto _output_error;
+ if (cctx == NULL) goto _output_error;
+ /* Populate src with random data */
+ RDG_genBuffer(CNBuffer, srcSize, compressibility, 0.5, seed);
+
+ /* Test with block delimiters roundtrip */
+ {
+ size_t const seqsSize = ZSTD_generateSequences(cctx, seqs, seqsCapacity, src, srcSize);
+ if (!ZSTD_isError(seqsSize)) goto _output_error;
+ }
+
+ ZSTD_freeCCtx(cctx);
+ free(seqs);
+ }
+ DISPLAYLEVEL(3, "OK \n");
+
DISPLAYLEVEL(3, "test%3i : ZSTD_getSequences followed by ZSTD_compressSequences : ", testNb++);
{
const size_t srcSize = 500 KB;
--- /dev/null
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+ */
+
+/* Implementation notes:
+ * Generates a stream of Lorem ipsum paragraphs to stdout,
+ * up to the requested size, which can be very large (> 4 GB).
+ * Note that, beyond 1 paragraph, this generator produces
+ * a different content than LOREM_genBuffer (even when using same seed).
+ */
+
+#include "loremOut.h"
+#include <assert.h>
+#include <stdio.h>
+#include "lorem.h" /* LOREM_genBlock */
+#include "platform.h" /* Compiler options, SET_BINARY_MODE */
+
+#define MIN(a, b) ((a) < (b) ? (a) : (b))
+#define LOREM_BLOCKSIZE (1 << 10)
+void LOREM_genOut(unsigned long long size, unsigned seed)
+{
+ char buff[LOREM_BLOCKSIZE] = { 0 };
+ unsigned long long total = 0;
+ size_t genBlockSize = (size_t)MIN(size, LOREM_BLOCKSIZE);
+
+ /* init */
+ SET_BINARY_MODE(stdout);
+
+ /* Generate Ipsum text, one paragraph at a time */
+ while (total < size) {
+ size_t generated =
+ LOREM_genBlock(buff, genBlockSize, seed++, total == 0, 0);
+ assert(generated <= genBlockSize);
+ total += generated;
+ assert(total <= size);
+ fwrite(buff,
+ 1,
+ generated,
+ stdout); /* note: should check potential write error */
+ if (size - total < genBlockSize)
+ genBlockSize = (size_t)(size - total);
+ }
+ assert(total == size);
+}
--- /dev/null
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+ */
+
+/* LOREM_genOut():
+ * Generate @size bytes of compressible data using lorem ipsum generator into
+ * stdout.
+ */
+void LOREM_genOut(unsigned long long size, unsigned seed);
#!/bin/sh
-set -e
+set -e # exit immediately on error
+# set -x # print commands before execution (debug)
unset ZSTD_CLEVEL
unset ZSTD_NBTHREADS
}
zstd() {
- if [ -z "$EXEC_PREFIX" ]; then
+ if [ -z "$EXE_PREFIX" ]; then
"$ZSTD_BIN" "$@"
else
- "$EXEC_PREFIX" "$ZSTD_BIN" "$@"
+ "$EXE_PREFIX" "$ZSTD_BIN" "$@"
fi
}
sudoZstd() {
- if [ -z "$EXEC_PREFIX" ]; then
+ if [ -z "$EXE_PREFIX" ]; then
sudo "$ZSTD_BIN" "$@"
else
- sudo "$EXEC_PREFIX" "$ZSTD_BIN" "$@"
+ sudo "$EXE_PREFIX" "$ZSTD_BIN" "$@"
fi
}
SCRIPT_DIR=$(cd "$(dirname "$0")" && pwd)
PRGDIR="$SCRIPT_DIR/../programs"
TESTDIR="$SCRIPT_DIR/../tests"
-UNAME=$(uname)
+UNAME=${UNAME:-$(uname)}
+GREP=${GREP:-grep}
+
+case "$UNAME" in
+ SunOS) DIFF=${DIFF:-gdiff} ;;
+ *) DIFF=${DIFF:-diff} ;;
+esac
detectedTerminal=false
if [ -t 0 ] && [ -t 1 ]
[ "$STAT1" = "$STAT2" ] || die "permissions on $1 don't match those on $2 ($STAT1 != $STAT2)"
}
-DIFF="diff"
-case "$UNAME" in
- SunOS) DIFF="gdiff" ;;
-esac
-
# check if ZSTD_BIN is defined. if not, use the default value
if [ -z "${ZSTD_BIN}" ]; then
[ -n "$DATAGEN_BIN" ] || die "datagen not found at $DATAGEN_BIN! \n Please define DATAGEN_BIN pointing to the datagen binary. You might also consider rebuilding zstd tests following the instructions in README.md. "
println "\nStarting playTests.sh isWindows=$isWindows EXE_PREFIX='$EXE_PREFIX' ZSTD_BIN='$ZSTD_BIN' DATAGEN_BIN='$DATAGEN_BIN'"
-if echo hello | zstd -v -T2 2>&1 > $INTOVOID | grep -q 'multi-threading is disabled'
+if echo hello | zstd -v -T2 2>&1 > $INTOVOID | $GREP -q 'multi-threading is disabled'
then
hasMT=""
else
println "test : compress to stdout"
zstd tmp -c > tmpCompressed
zstd tmp --stdout > tmpCompressed # long command format
-println "test : compress to named file"
+
+println "test : compress to named file (-o)"
rm -f tmpCompressed
zstd tmp -o tmpCompressed
test -f tmpCompressed # file must be created
+
println "test : force write, correct order"
zstd tmp -fo tmpCompressed
+
+println "test : -c + -o : last one wins"
+rm -f tmpOut
+zstd tmp -c > tmpCompressed -o tmpOut
+test -f tmpOut # file must be created
+rm -f tmpCompressed
+zstd tmp -o tmpOut -c > tmpCompressed
+test -f tmpCompressed # file must be created
+
println "test : forgotten argument"
cp tmp tmp2
zstd tmp2 -fo && die "-o must be followed by filename "
println -n '' | zstd - --stdout | zstd -d --stdout
println "test : ensure small file doesn't add 3-bytes null block"
datagen -g1 > tmp1
-zstd tmp1 -c | wc -c | grep "14"
-zstd < tmp1 | wc -c | grep "14"
+zstd tmp1 -c | wc -c | $GREP "14"
+zstd < tmp1 | wc -c | $GREP "14"
println "test : decompress file with wrong suffix (must fail)"
zstd -d tmpCompressed && die "wrong suffix error not detected!"
zstd -df tmp && die "should have refused : wrong extension"
zstd tmpro -c --no-progress | zstd -d -f -o "$INTOVOID" --no-progress
zstd tmpro -cv --no-progress | zstd -dv -f -o "$INTOVOID" --no-progress
println "test: --progress flag"
-zstd tmpro -c | zstd -d -f -o "$INTOVOID" --progress 2>&1 | grep -E "[A-Za-z0-9._ ]+: [0-9]+ bytes"
-zstd tmpro -c | zstd -d -f -q -o "$INTOVOID" --progress 2>&1 | grep -E "[A-Za-z0-9._ ]+: [0-9]+ bytes"
-zstd tmpro -c | zstd -d -f -v -o "$INTOVOID" 2>&1 | grep -E "[A-Za-z0-9._ ]+: [0-9]+ bytes"
+zstd tmpro -c | zstd -d -f -o "$INTOVOID" --progress 2>&1 | $GREP '[A-Za-z0-9._ ]*: [0-9]* bytes'
+zstd tmpro -c | zstd -d -f -q -o "$INTOVOID" --progress 2>&1 | $GREP '[A-Za-z0-9._ ]*: [0-9]* bytes'
+zstd tmpro -c | zstd -d -f -v -o "$INTOVOID" 2>&1 | $GREP '[A-Za-z0-9._ ]*: [0-9]* bytes'
rm -f tmpro tmpro.zst
println "test: overwrite input file (must fail)"
zstd tmp -fo tmp && die "zstd compression overwrote the input file"
if [ "$isWindows" = false ] && [ "$UNAME" != "AIX" ]; then
if [ -n "$(which readelf)" ]; then
println "test: check if binary has executable stack (#2963)"
- readelf -lW "$ZSTD_BIN" | grep 'GNU_STACK .* RW ' || die "zstd binary has executable stack!"
+ readelf -lW "$ZSTD_BIN" | $GREP 'GNU_STACK .* RW ' || die "zstd binary has executable stack!"
fi
fi
+println "\n===> multiple_thread test "
+
+datagen > tmp
+println "test : single-thread "
+zstd --fast --single-thread tmp -o tmpMT0
+println "test : one worker thread (default)"
+zstd --fast -T1 tmp -o tmpMT1
+println "test : two worker threads "
+zstd --fast -T2 tmp -o tmpMT2
+println "test : 16-thread "
+zstd --fast -T16 tmp -o tmpMT3
+println "test : 127-thread "
+zstd --fast -T127 tmp -o tmpMT4
+println "test : 128-thread "
+zstd --fast -T128 tmp -o tmpMT5
+println "test : max allowed numeric value is 4294967295 "
+zstd --fast -4294967295 tmp -o tmpMT6
+println "test : numeric value overflows 32-bit unsigned int "
+zstd --fast -4294967296 tmp -o tmptest9 && die "max allowed numeric value is 4294967295"
+
+datagen > tmp
+println "test : basic compression "
+zstd -f tmp # trivial compression case, creates tmp.zst
+println "test : basic decompression"
+zstd -d -f -T1 tmp.zst
+println "note : decompression does not support -T mode, but execution support"
+rm -rf tmpMT*
+
+println "\n===> --fast_argument test "
+datagen > tmp
+println "test : basic compression "
+zstd -f tmp # trivial compression case, creates tmp.zst
+println "test: --fast=1"
+zstd --fast=1 -f tmp
+println "test: --fast=99"
+zstd --fast=99 -f tmp
+println "test: Invalid value -- negative number"
+zstd --fast=-1 -f tmp && die "error: Invalid value -- negative number"
+println "test: Invalid value -- zero"
+zstd --fast=0 -f tmp && die "error: Invalid value -- 0 number"
+println "test: max allowed numeric argument of --fast is 4294967295"
+zstd --fast=4294967295 -f tmp
+println "test: numeric value overflows 32-bit unsigned int "
+zstd --fast=4294967296 -f tmp && die "max allowed argument of --fast is 4294967295"
+
println "\n===> --exclude-compressed flag"
rm -rf precompressedFilterTestDir
mkdir -p precompressedFilterTestDir
# Files should get compressed again without the --exclude-compressed flag.
test -f precompressedFilterTestDir/input.5.zst.zst
test -f precompressedFilterTestDir/input.6.zst.zst
+
+# Test some other compressed file extensions
+datagen $size > precompressedFilterTestDir/input.flac
+datagen $size > precompressedFilterTestDir/input.mov
+datagen $size > precompressedFilterTestDir/input.mp3
+zstd --exclude-compressed --long --rm -r precompressedFilterTestDir
+test ! -f precompressedFilterTestDir/input.flac.zst
+test ! -f precompressedFilterTestDir/input.mov.zst
+test ! -f precompressedFilterTestDir/input.mp3.zst
+zstd --long --rm -r precompressedFilterTestDir
+test -f precompressedFilterTestDir/input.flac.zst
+test -f precompressedFilterTestDir/input.mov.zst
+test -f precompressedFilterTestDir/input.mp3.zst
rm -rf precompressedFilterTestDir
println "Test completed"
test -f tmp
zstd --rm tmp -c > $INTOVOID
test -f tmp # tmp shall still be there
+zstd --rm tmp --stdout > $INTOVOID
+test -f tmp # tmp shall still be there
zstd -f --rm tmp -c > $INTOVOID
test -f tmp # tmp shall still be there
zstd -f tmp -c > $INTOVOID --rm
test -f tmp
test -f tmp2
zstd -q tmp tmp2 -o tmp3.zst --rm && die "should refuse to concatenate"
-
+println "test: --rm is active with -o when single input"
+rm -f tmp2.zst
+zstd --rm tmp2 -o tmp2.zst
+test -f tmp2.zst
+test ! -f tmp2
+println "test: -c followed by -o => -o wins, so --rm remains active" # (#3719)
+rm tmp2.zst
+cp tmp tmp2
+zstd --rm tmp2 -c > $INTOVOID -o tmp2.zst
+test ! -f tmp2
+println "test: -o followed by -c => -c wins, so --rm is disabled" # (#3719)
+rm tmp3.zst
+cp tmp tmp2
+zstd -v --rm tmp2 -o tmp2.zst -c > tmp3.zst
+test -f tmp2
+test -f tmp3.zst
println "test : should quietly not remove non-regular file"
println hello > tmp
zstd tmp -f -o "$DEVDEVICE" 2>tmplog > "$INTOVOID"
-grep "Refusing to remove non-regular file" tmplog && die
+$GREP "Refusing to remove non-regular file" tmplog && die
rm -f tmplog
-zstd tmp -f -o "$INTOVOID" 2>&1 | grep "Refusing to remove non-regular file" && die
+zstd tmp -f -o "$INTOVOID" 2>&1 | $GREP "Refusing to remove non-regular file" && die
println "test : --rm on stdin"
println a | zstd --rm > $INTOVOID # --rm should remain silent
rm -f tmp
touch tmp_empty
zstd -d -o tmp2 "$TESTDIR/golden-decompression/empty-block.zst"
$DIFF -s tmp2 tmp_empty
+
+zstd -t "$TESTDIR/golden-decompression/zeroSeq_2B.zst"
+
+zstd -t "$TESTDIR/golden-decompression-errors/zeroSeq_extraneous.zst" && die "invalid Sequences section should have been detected"
+
rm -f tmp*
println "\n===> compress multiple files"
zstd tmp -f -o tmp.zst
sudoZstd -d tmp.zst -c > $INTOVOID
sudoZstd -d tmp.zst -o $INTOVOID
- ls -las $INTOVOID | grep "rw-rw-rw-"
+ ls -las $INTOVOID | $GREP "rw-rw-rw-"
fi
if [ -n "$READFROMBLOCKDEVICE" ] ; then
println "\n===> checking that zstd can read from a block device"
datagen -g65536 > tmp.img
sudo losetup -fP tmp.img
- LOOP_DEV=$(losetup -a | grep 'tmp\.img' | cut -f1 -d:)
+ LOOP_DEV=$(losetup -a | $GREP 'tmp\.img' | cut -f1 -d:)
[ -z "$LOOP_DEV" ] && die "failed to get loopback device"
sudoZstd $LOOP_DEV -c > tmp.img.zst && die "should fail without -f"
sudoZstd -f $LOOP_DEV -c > tmp.img.zst
datagen > tmp_contentsize
zstd -f tmp_contentsize
-zstd -lv tmp_contentsize.zst | grep "Decompressed Size:"
+zstd -lv tmp_contentsize.zst | $GREP "Decompressed Size:"
zstd -f --no-content-size tmp_contentsize
-zstd -lv tmp_contentsize.zst | grep "Decompressed Size:" && die
+zstd -lv tmp_contentsize.zst | $GREP "Decompressed Size:" && die
zstd -f --content-size tmp_contentsize
-zstd -lv tmp_contentsize.zst | grep "Decompressed Size:"
+zstd -lv tmp_contentsize.zst | $GREP "Decompressed Size:"
zstd -f --content-size --no-content-size tmp_contentsize
-zstd -lv tmp_contentsize.zst | grep "Decompressed Size:" && die
+zstd -lv tmp_contentsize.zst | $GREP "Decompressed Size:" && die
rm -rf tmp*
println "test : show-default-cparams regular"
println "test : show compression parameters in verbose mode"
datagen > tmp
zstd -vv tmp 2>&1 | \
-grep -q -E -- "--zstd=wlog=[[:digit:]]+,clog=[[:digit:]]+,hlog=[[:digit:]]+,\
-slog=[[:digit:]]+,mml=[[:digit:]]+,tlen=[[:digit:]]+,strat=[[:digit:]]+"
+$GREP -q -- "--zstd=wlog=[0-9]*,clog=[0-9]*,hlog=[0-9]*,slog=[0-9]*,mml=[0-9]*,tlen=[0-9]*,strat=[0-9]*"
rm -rf tmp*
println "\n===> Advanced compression parameters "
datagen -g12M -P90 > tmpCorpusHighCompress
zstd --train -B2K tmpCorpusHighCompress -o tmpDictHighCompress --memory=10K && die "Dictionary training should fail : --memory too low (10K)"
zstd --train -B2K tmpCorpusHighCompress -o tmpDictHighCompress --memory=5MB 2> zstTrainWithMemLimitStdErr
-cat zstTrainWithMemLimitStdErr | grep "setting manual memory limit for dictionary training data at 5 MB"
-cat zstTrainWithMemLimitStdErr | grep "Training samples set too large (12 MB); training on 5 MB only..."
+cat zstTrainWithMemLimitStdErr | $GREP "setting manual memory limit for dictionary training data at 5 MB"
+cat zstTrainWithMemLimitStdErr | $GREP "Training samples set too large (12 MB); training on 5 MB only..."
rm zstTrainWithMemLimitStdErr
println "\n===> fastCover dictionary builder : advanced options "
! zstd -d tmp.abc 2> tmplg
if [ $GZIPMODE -ne 1 ]; then
- grep ".gz" tmplg > $INTOVOID && die "Unsupported suffix listed"
+ $GREP ".gz" tmplg > $INTOVOID && die "Unsupported suffix listed"
fi
if [ $LZMAMODE -ne 1 ]; then
- grep ".lzma" tmplg > $INTOVOID && die "Unsupported suffix listed"
- grep ".xz" tmplg > $INTOVOID && die "Unsupported suffix listed"
+ $GREP ".lzma" tmplg > $INTOVOID && die "Unsupported suffix listed"
+ $GREP ".xz" tmplg > $INTOVOID && die "Unsupported suffix listed"
fi
if [ $LZ4MODE -ne 1 ]; then
- grep ".lz4" tmplg > $INTOVOID && die "Unsupported suffix listed"
+ $GREP ".lz4" tmplg > $INTOVOID && die "Unsupported suffix listed"
fi
touch tmp1
datagen > tmp3
zstd tmp*
zstd -l ./*.zst
-zstd -lv ./*.zst | grep "Decompressed Size:" # check that decompressed size is present in header
+zstd -lv ./*.zst | $GREP "Decompressed Size:" # check that decompressed size is present in header
zstd --list ./*.zst
zstd --list -v ./*.zst
zstd tmp5
zstd -l tmp5.zst
zstd -l tmp5* && die "-l must fail on non-zstd file"
-zstd -lv tmp5.zst | grep "Decompressed Size: 0 B (0 B)" # check that 0 size is present in header
+zstd -lv tmp5.zst | $GREP "Decompressed Size: 0 B (0 B)" # check that 0 size is present in header
zstd -lv tmp5* && die "-l must fail on non-zstd file"
println "\n===> zstd --list/-l test with no content size field "
datagen -g513K | zstd > tmp6.zst
zstd -l tmp6.zst
-zstd -lv tmp6.zst | grep "Decompressed Size:" && die "Field :Decompressed Size: should not be available in this compressed file"
+zstd -lv tmp6.zst | $GREP "Decompressed Size:" && die "Field :Decompressed Size: should not be available in this compressed file"
println "\n===> zstd --list/-l test with no checksum "
zstd -f --no-check tmp1
roundTripTest -g1M -P50 "1 --single-thread --long=29" " --zstd=wlog=28 --memory=512MB"
-println "\n===> zstd long distance matching with optimal parser compressed size tests "
-optCSize16=$(datagen -g511K | zstd -16 -c | wc -c)
-longCSize16=$(datagen -g511K | zstd -16 --long -c | wc -c)
-optCSize19=$(datagen -g2M | zstd -19 -c | wc -c)
-longCSize19=$(datagen -g2M | zstd -19 --long -c | wc -c)
-optCSize19wlog23=$(datagen -g2M | zstd -19 -c --zstd=wlog=23 | wc -c)
-longCSize19wlog23=$(datagen -g2M | zstd -19 -c --long=23 | wc -c)
-if [ "$longCSize16" -gt "$optCSize16" ]; then
- echo using --long on compression level 16 should not cause compressed size regression
- exit 1
-elif [ "$longCSize19" -gt "$optCSize19" ]; then
- echo using --long on compression level 19 should not cause compressed size regression
- exit 1
-elif [ "$longCSize19wlog23" -gt "$optCSize19wlog23" ]; then
- echo using --long on compression level 19 with wLog=23 should not cause compressed size regression
- exit 1
+if [ "$ZSTD_LIB_EXCLUDE_COMPRESSORS_DFAST_AND_UP" -ne "1" ]; then
+ println "\n===> zstd long distance matching with optimal parser compressed size tests "
+ optCSize16=$(datagen -g511K | zstd -16 -c | wc -c)
+ longCSize16=$(datagen -g511K | zstd -16 --long -c | wc -c)
+ optCSize19=$(datagen -g2M | zstd -19 -c | wc -c)
+ longCSize19=$(datagen -g2M | zstd -19 --long -c | wc -c)
+ optCSize19wlog23=$(datagen -g2M | zstd -19 -c --zstd=wlog=23 | wc -c)
+ longCSize19wlog23=$(datagen -g2M | zstd -19 -c --long=23 | wc -c)
+ if [ "$longCSize16" -gt "$optCSize16" ]; then
+ echo using --long on compression level 16 should not cause compressed size regression
+ exit 1
+ elif [ "$longCSize19" -gt "$optCSize19" ]; then
+ echo using --long on compression level 19 should not cause compressed size regression
+ exit 1
+ elif [ "$longCSize19wlog23" -gt "$optCSize19wlog23" ]; then
+ echo using --long on compression level 19 with wLog=23 should not cause compressed size regression
+ exit 1
+ fi
fi
println "\n===> zstd asyncio tests "
rm -rf tmp*
println "\n===> patch-from long mode trigger larger file test"
-datagen -g5000000 > tmp_dict
-datagen -g5000000 > tmp_patch
-zstd -15 --patch-from=tmp_dict tmp_patch 2>&1 | grep "long mode automatically triggered"
+if [ "$ZSTD_LIB_EXCLUDE_COMPRESSORS_DFAST_AND_UP" -eq "1" ]; then
+ # if binary tree strategies are excluded, the threshold is different
+ datagen -g10000000 > tmp_dict
+ datagen -g10000000 > tmp_patch
+else
+ datagen -g5000000 > tmp_dict
+ datagen -g5000000 > tmp_patch
+fi
+zstd -15 --patch-from=tmp_dict tmp_patch 2>&1 | $GREP "long mode automatically triggered"
rm -rf tmp*
println "\n===> patch-from very large dictionary and file test"
silesia.tar, level 7, compress simple, 4579828
silesia.tar, level 9, compress simple, 4555448
silesia.tar, level 13, compress simple, 4502956
-silesia.tar, level 16, compress simple, 4360546
-silesia.tar, level 19, compress simple, 4265911
+silesia.tar, level 16, compress simple, 4360385
+silesia.tar, level 19, compress simple, 4260939
silesia.tar, uncompressed literals, compress simple, 4854086
-silesia.tar, uncompressed literals optimal, compress simple, 4265911
+silesia.tar, uncompressed literals optimal, compress simple, 4260939
silesia.tar, huffman literals, compress simple, 6179047
github.tar, level -5, compress simple, 52115
github.tar, level -3, compress simple, 45678
github.tar, level 9, compress simple, 36723
github.tar, level 13, compress simple, 35501
github.tar, level 16, compress simple, 40466
-github.tar, level 19, compress simple, 32276
+github.tar, level 19, compress simple, 32262
github.tar, uncompressed literals, compress simple, 38831
-github.tar, uncompressed literals optimal, compress simple, 32276
+github.tar, uncompressed literals optimal, compress simple, 32262
github.tar, huffman literals, compress simple, 42560
silesia, level -5, compress cctx, 6857372
silesia, level -3, compress cctx, 6503412
silesia, level 7, compress cctx, 4570271
silesia, level 9, compress cctx, 4545850
silesia, level 13, compress cctx, 4493990
-silesia, level 16, compress cctx, 4360041
-silesia, level 19, compress cctx, 4296055
+silesia, level 16, compress cctx, 4359652
+silesia, level 19, compress cctx, 4266582
silesia, long distance mode, compress cctx, 4842075
silesia, multithreaded, compress cctx, 4842075
silesia, multithreaded long distance mode, compress cctx, 4842075
silesia, small chain log, compress cctx, 4912197
silesia, explicit params, compress cctx, 4794318
silesia, uncompressed literals, compress cctx, 4842075
-silesia, uncompressed literals optimal, compress cctx, 4296055
+silesia, uncompressed literals optimal, compress cctx, 4266582
silesia, huffman literals, compress cctx, 6172202
silesia, multithreaded with advanced params, compress cctx, 4842075
github, level -5, compress cctx, 204407
github, level 13, compress cctx, 132878
github, level 13 with dict, compress cctx, 39948
github, level 16, compress cctx, 133209
-github, level 16 with dict, compress cctx, 37568
+github, level 16 with dict, compress cctx, 37892
github, level 19, compress cctx, 132879
-github, level 19 with dict, compress cctx, 37567
+github, level 19 with dict, compress cctx, 37906
github, long distance mode, compress cctx, 141069
github, multithreaded, compress cctx, 141069
github, multithreaded long distance mode, compress cctx, 141069
silesia, level 7, zstdcli, 4570319
silesia, level 9, zstdcli, 4545898
silesia, level 13, zstdcli, 4494038
-silesia, level 16, zstdcli, 4360089
-silesia, level 19, zstdcli, 4296103
+silesia, level 16, zstdcli, 4359700
+silesia, level 19, zstdcli, 4266630
silesia, long distance mode, zstdcli, 4833785
silesia, multithreaded, zstdcli, 4842123
silesia, multithreaded long distance mode, zstdcli, 4833785
silesia, small chain log, zstdcli, 4912245
silesia, explicit params, zstdcli, 4795840
silesia, uncompressed literals, zstdcli, 5120614
-silesia, uncompressed literals optimal, zstdcli, 4319566
+silesia, uncompressed literals optimal, zstdcli, 4316928
silesia, huffman literals, zstdcli, 5321417
silesia, multithreaded with advanced params, zstdcli, 5120614
silesia.tar, level -5, zstdcli, 6862049
silesia.tar, level 7, zstdcli, 4581791
silesia.tar, level 9, zstdcli, 4555452
silesia.tar, level 13, zstdcli, 4502960
-silesia.tar, level 16, zstdcli, 4360550
-silesia.tar, level 19, zstdcli, 4265915
+silesia.tar, level 16, zstdcli, 4360389
+silesia.tar, level 19, zstdcli, 4260943
silesia.tar, no source size, zstdcli, 4854160
silesia.tar, long distance mode, zstdcli, 4845745
silesia.tar, multithreaded, zstdcli, 4854164
silesia.tar, small chain log, zstdcli, 4917022
silesia.tar, explicit params, zstdcli, 4821112
silesia.tar, uncompressed literals, zstdcli, 5122571
-silesia.tar, uncompressed literals optimal, zstdcli, 4310145
+silesia.tar, uncompressed literals optimal, zstdcli, 4308455
silesia.tar, huffman literals, zstdcli, 5342074
silesia.tar, multithreaded with advanced params, zstdcli, 5122571
github, level -5, zstdcli, 206407
github, level 13, zstdcli, 134878
github, level 13 with dict, zstdcli, 41900
github, level 16, zstdcli, 135209
-github, level 16 with dict, zstdcli, 39577
+github, level 16 with dict, zstdcli, 39902
github, level 19, zstdcli, 134879
-github, level 19 with dict, zstdcli, 39576
+github, level 19 with dict, zstdcli, 39916
github, long distance mode, zstdcli, 138332
github, multithreaded, zstdcli, 138332
github, multithreaded long distance mode, zstdcli, 138332
github.tar, level 13, zstdcli, 35505
github.tar, level 13 with dict, zstdcli, 37134
github.tar, level 16, zstdcli, 40470
-github.tar, level 16 with dict, zstdcli, 33378
-github.tar, level 19, zstdcli, 32280
-github.tar, level 19 with dict, zstdcli, 32716
+github.tar, level 16 with dict, zstdcli, 33379
+github.tar, level 19, zstdcli, 32266
+github.tar, level 19 with dict, zstdcli, 32705
github.tar, no source size, zstdcli, 38832
github.tar, no source size with dict, zstdcli, 38004
github.tar, long distance mode, zstdcli, 40236
github.tar, small chain log, zstdcli, 41673
github.tar, explicit params, zstdcli, 41385
github.tar, uncompressed literals, zstdcli, 41529
-github.tar, uncompressed literals optimal, zstdcli, 35401
+github.tar, uncompressed literals optimal, zstdcli, 35360
github.tar, huffman literals, zstdcli, 38857
github.tar, multithreaded with advanced params, zstdcli, 41529
silesia, level -5, advanced one pass, 6857372
silesia, level 12 row 1, advanced one pass, 4505658
silesia, level 12 row 2, advanced one pass, 4503429
silesia, level 13, advanced one pass, 4493990
-silesia, level 16, advanced one pass, 4360041
-silesia, level 19, advanced one pass, 4296055
+silesia, level 16, advanced one pass, 4359652
+silesia, level 19, advanced one pass, 4266582
silesia, no source size, advanced one pass, 4842075
silesia, long distance mode, advanced one pass, 4833710
silesia, multithreaded, advanced one pass, 4842075
silesia, small chain log, advanced one pass, 4912197
silesia, explicit params, advanced one pass, 4795840
silesia, uncompressed literals, advanced one pass, 5120566
-silesia, uncompressed literals optimal, advanced one pass, 4319518
+silesia, uncompressed literals optimal, advanced one pass, 4316880
silesia, huffman literals, advanced one pass, 5321369
silesia, multithreaded with advanced params, advanced one pass, 5120566
silesia.tar, level -5, advanced one pass, 6861055
silesia.tar, level 12 row 1, advanced one pass, 4514517
silesia.tar, level 12 row 2, advanced one pass, 4514007
silesia.tar, level 13, advanced one pass, 4502956
-silesia.tar, level 16, advanced one pass, 4360546
-silesia.tar, level 19, advanced one pass, 4265911
+silesia.tar, level 16, advanced one pass, 4360385
+silesia.tar, level 19, advanced one pass, 4260939
silesia.tar, no source size, advanced one pass, 4854086
silesia.tar, long distance mode, advanced one pass, 4840452
silesia.tar, multithreaded, advanced one pass, 4854160
silesia.tar, small chain log, advanced one pass, 4917041
silesia.tar, explicit params, advanced one pass, 4807274
silesia.tar, uncompressed literals, advanced one pass, 5122473
-silesia.tar, uncompressed literals optimal, advanced one pass, 4310141
+silesia.tar, uncompressed literals optimal, advanced one pass, 4308451
silesia.tar, huffman literals, advanced one pass, 5341705
silesia.tar, multithreaded with advanced params, advanced one pass, 5122567
github, level -5, advanced one pass, 204407
github, level 13 with dict copy, advanced one pass, 39948
github, level 13 with dict load, advanced one pass, 42624
github, level 16, advanced one pass, 133209
-github, level 16 with dict, advanced one pass, 37577
-github, level 16 with dict dms, advanced one pass, 37577
-github, level 16 with dict dds, advanced one pass, 37577
-github, level 16 with dict copy, advanced one pass, 37568
-github, level 16 with dict load, advanced one pass, 42338
+github, level 16 with dict, advanced one pass, 37902
+github, level 16 with dict dms, advanced one pass, 37902
+github, level 16 with dict dds, advanced one pass, 37902
+github, level 16 with dict copy, advanced one pass, 37892
+github, level 16 with dict load, advanced one pass, 42402
github, level 19, advanced one pass, 132879
-github, level 19 with dict, advanced one pass, 37576
-github, level 19 with dict dms, advanced one pass, 37576
-github, level 19 with dict dds, advanced one pass, 37576
-github, level 19 with dict copy, advanced one pass, 37567
-github, level 19 with dict load, advanced one pass, 39613
+github, level 19 with dict, advanced one pass, 37916
+github, level 19 with dict dms, advanced one pass, 37916
+github, level 19 with dict dds, advanced one pass, 37916
+github, level 19 with dict copy, advanced one pass, 37906
+github, level 19 with dict load, advanced one pass, 39770
github, no source size, advanced one pass, 136332
github, no source size with dict, advanced one pass, 41148
github, long distance mode, advanced one pass, 136332
github.tar, level 13 with dict copy, advanced one pass, 37130
github.tar, level 13 with dict load, advanced one pass, 36010
github.tar, level 16, advanced one pass, 40466
-github.tar, level 16 with dict, advanced one pass, 33374
-github.tar, level 16 with dict dms, advanced one pass, 33206
-github.tar, level 16 with dict dds, advanced one pass, 33206
-github.tar, level 16 with dict copy, advanced one pass, 33374
+github.tar, level 16 with dict, advanced one pass, 33375
+github.tar, level 16 with dict dms, advanced one pass, 33207
+github.tar, level 16 with dict dds, advanced one pass, 33207
+github.tar, level 16 with dict copy, advanced one pass, 33375
github.tar, level 16 with dict load, advanced one pass, 39081
-github.tar, level 19, advanced one pass, 32276
-github.tar, level 19 with dict, advanced one pass, 32712
-github.tar, level 19 with dict dms, advanced one pass, 32555
-github.tar, level 19 with dict dds, advanced one pass, 32555
-github.tar, level 19 with dict copy, advanced one pass, 32712
-github.tar, level 19 with dict load, advanced one pass, 32479
+github.tar, level 19, advanced one pass, 32262
+github.tar, level 19 with dict, advanced one pass, 32701
+github.tar, level 19 with dict dms, advanced one pass, 32565
+github.tar, level 19 with dict dds, advanced one pass, 32565
+github.tar, level 19 with dict copy, advanced one pass, 32701
+github.tar, level 19 with dict load, advanced one pass, 32428
github.tar, no source size, advanced one pass, 38831
github.tar, no source size with dict, advanced one pass, 37995
github.tar, long distance mode, advanced one pass, 40252
github.tar, small chain log, advanced one pass, 41669
github.tar, explicit params, advanced one pass, 41385
github.tar, uncompressed literals, advanced one pass, 41525
-github.tar, uncompressed literals optimal, advanced one pass, 35397
+github.tar, uncompressed literals optimal, advanced one pass, 35356
github.tar, huffman literals, advanced one pass, 38853
github.tar, multithreaded with advanced params, advanced one pass, 41525
silesia, level -5, advanced one pass small out, 6857372
silesia, level 12 row 1, advanced one pass small out, 4505658
silesia, level 12 row 2, advanced one pass small out, 4503429
silesia, level 13, advanced one pass small out, 4493990
-silesia, level 16, advanced one pass small out, 4360041
-silesia, level 19, advanced one pass small out, 4296055
+silesia, level 16, advanced one pass small out, 4359652
+silesia, level 19, advanced one pass small out, 4266582
silesia, no source size, advanced one pass small out, 4842075
silesia, long distance mode, advanced one pass small out, 4833710
silesia, multithreaded, advanced one pass small out, 4842075
silesia, small chain log, advanced one pass small out, 4912197
silesia, explicit params, advanced one pass small out, 4795840
silesia, uncompressed literals, advanced one pass small out, 5120566
-silesia, uncompressed literals optimal, advanced one pass small out, 4319518
+silesia, uncompressed literals optimal, advanced one pass small out, 4316880
silesia, huffman literals, advanced one pass small out, 5321369
silesia, multithreaded with advanced params, advanced one pass small out, 5120566
silesia.tar, level -5, advanced one pass small out, 6861055
silesia.tar, level 12 row 1, advanced one pass small out, 4514517
silesia.tar, level 12 row 2, advanced one pass small out, 4514007
silesia.tar, level 13, advanced one pass small out, 4502956
-silesia.tar, level 16, advanced one pass small out, 4360546
-silesia.tar, level 19, advanced one pass small out, 4265911
+silesia.tar, level 16, advanced one pass small out, 4360385
+silesia.tar, level 19, advanced one pass small out, 4260939
silesia.tar, no source size, advanced one pass small out, 4854086
silesia.tar, long distance mode, advanced one pass small out, 4840452
silesia.tar, multithreaded, advanced one pass small out, 4854160
silesia.tar, small chain log, advanced one pass small out, 4917041
silesia.tar, explicit params, advanced one pass small out, 4807274
silesia.tar, uncompressed literals, advanced one pass small out, 5122473
-silesia.tar, uncompressed literals optimal, advanced one pass small out, 4310141
+silesia.tar, uncompressed literals optimal, advanced one pass small out, 4308451
silesia.tar, huffman literals, advanced one pass small out, 5341705
silesia.tar, multithreaded with advanced params, advanced one pass small out, 5122567
github, level -5, advanced one pass small out, 204407
github, level 13 with dict copy, advanced one pass small out, 39948
github, level 13 with dict load, advanced one pass small out, 42624
github, level 16, advanced one pass small out, 133209
-github, level 16 with dict, advanced one pass small out, 37577
-github, level 16 with dict dms, advanced one pass small out, 37577
-github, level 16 with dict dds, advanced one pass small out, 37577
-github, level 16 with dict copy, advanced one pass small out, 37568
-github, level 16 with dict load, advanced one pass small out, 42338
+github, level 16 with dict, advanced one pass small out, 37902
+github, level 16 with dict dms, advanced one pass small out, 37902
+github, level 16 with dict dds, advanced one pass small out, 37902
+github, level 16 with dict copy, advanced one pass small out, 37892
+github, level 16 with dict load, advanced one pass small out, 42402
github, level 19, advanced one pass small out, 132879
-github, level 19 with dict, advanced one pass small out, 37576
-github, level 19 with dict dms, advanced one pass small out, 37576
-github, level 19 with dict dds, advanced one pass small out, 37576
-github, level 19 with dict copy, advanced one pass small out, 37567
-github, level 19 with dict load, advanced one pass small out, 39613
+github, level 19 with dict, advanced one pass small out, 37916
+github, level 19 with dict dms, advanced one pass small out, 37916
+github, level 19 with dict dds, advanced one pass small out, 37916
+github, level 19 with dict copy, advanced one pass small out, 37906
+github, level 19 with dict load, advanced one pass small out, 39770
github, no source size, advanced one pass small out, 136332
github, no source size with dict, advanced one pass small out, 41148
github, long distance mode, advanced one pass small out, 136332
github.tar, level 13 with dict copy, advanced one pass small out, 37130
github.tar, level 13 with dict load, advanced one pass small out, 36010
github.tar, level 16, advanced one pass small out, 40466
-github.tar, level 16 with dict, advanced one pass small out, 33374
-github.tar, level 16 with dict dms, advanced one pass small out, 33206
-github.tar, level 16 with dict dds, advanced one pass small out, 33206
-github.tar, level 16 with dict copy, advanced one pass small out, 33374
+github.tar, level 16 with dict, advanced one pass small out, 33375
+github.tar, level 16 with dict dms, advanced one pass small out, 33207
+github.tar, level 16 with dict dds, advanced one pass small out, 33207
+github.tar, level 16 with dict copy, advanced one pass small out, 33375
github.tar, level 16 with dict load, advanced one pass small out, 39081
-github.tar, level 19, advanced one pass small out, 32276
-github.tar, level 19 with dict, advanced one pass small out, 32712
-github.tar, level 19 with dict dms, advanced one pass small out, 32555
-github.tar, level 19 with dict dds, advanced one pass small out, 32555
-github.tar, level 19 with dict copy, advanced one pass small out, 32712
-github.tar, level 19 with dict load, advanced one pass small out, 32479
+github.tar, level 19, advanced one pass small out, 32262
+github.tar, level 19 with dict, advanced one pass small out, 32701
+github.tar, level 19 with dict dms, advanced one pass small out, 32565
+github.tar, level 19 with dict dds, advanced one pass small out, 32565
+github.tar, level 19 with dict copy, advanced one pass small out, 32701
+github.tar, level 19 with dict load, advanced one pass small out, 32428
github.tar, no source size, advanced one pass small out, 38831
github.tar, no source size with dict, advanced one pass small out, 37995
github.tar, long distance mode, advanced one pass small out, 40252
github.tar, small chain log, advanced one pass small out, 41669
github.tar, explicit params, advanced one pass small out, 41385
github.tar, uncompressed literals, advanced one pass small out, 41525
-github.tar, uncompressed literals optimal, advanced one pass small out, 35397
+github.tar, uncompressed literals optimal, advanced one pass small out, 35356
github.tar, huffman literals, advanced one pass small out, 38853
github.tar, multithreaded with advanced params, advanced one pass small out, 41525
silesia, level -5, advanced streaming, 6854744
silesia, level 12 row 1, advanced streaming, 4505658
silesia, level 12 row 2, advanced streaming, 4503429
silesia, level 13, advanced streaming, 4493990
-silesia, level 16, advanced streaming, 4360041
-silesia, level 19, advanced streaming, 4296055
+silesia, level 16, advanced streaming, 4359652
+silesia, level 19, advanced streaming, 4266582
silesia, no source size, advanced streaming, 4842039
silesia, long distance mode, advanced streaming, 4833710
silesia, multithreaded, advanced streaming, 4842075
silesia, small chain log, advanced streaming, 4912197
silesia, explicit params, advanced streaming, 4795857
silesia, uncompressed literals, advanced streaming, 5120566
-silesia, uncompressed literals optimal, advanced streaming, 4319518
+silesia, uncompressed literals optimal, advanced streaming, 4316880
silesia, huffman literals, advanced streaming, 5321370
silesia, multithreaded with advanced params, advanced streaming, 5120566
silesia.tar, level -5, advanced streaming, 6856523
silesia.tar, level 12 row 1, advanced streaming, 4514514
silesia.tar, level 12 row 2, advanced streaming, 4514003
silesia.tar, level 13, advanced streaming, 4502956
-silesia.tar, level 16, advanced streaming, 4360546
-silesia.tar, level 19, advanced streaming, 4265911
+silesia.tar, level 16, advanced streaming, 4360385
+silesia.tar, level 19, advanced streaming, 4260939
silesia.tar, no source size, advanced streaming, 4859267
silesia.tar, long distance mode, advanced streaming, 4840452
silesia.tar, multithreaded, advanced streaming, 4854160
silesia.tar, small chain log, advanced streaming, 4917021
silesia.tar, explicit params, advanced streaming, 4807288
silesia.tar, uncompressed literals, advanced streaming, 5127423
-silesia.tar, uncompressed literals optimal, advanced streaming, 4310141
+silesia.tar, uncompressed literals optimal, advanced streaming, 4308451
silesia.tar, huffman literals, advanced streaming, 5341712
silesia.tar, multithreaded with advanced params, advanced streaming, 5122567
github, level -5, advanced streaming, 204407
github, level 13 with dict copy, advanced streaming, 39948
github, level 13 with dict load, advanced streaming, 42624
github, level 16, advanced streaming, 133209
-github, level 16 with dict, advanced streaming, 37577
-github, level 16 with dict dms, advanced streaming, 37577
-github, level 16 with dict dds, advanced streaming, 37577
-github, level 16 with dict copy, advanced streaming, 37568
-github, level 16 with dict load, advanced streaming, 42338
+github, level 16 with dict, advanced streaming, 37902
+github, level 16 with dict dms, advanced streaming, 37902
+github, level 16 with dict dds, advanced streaming, 37902
+github, level 16 with dict copy, advanced streaming, 37892
+github, level 16 with dict load, advanced streaming, 42402
github, level 19, advanced streaming, 132879
-github, level 19 with dict, advanced streaming, 37576
-github, level 19 with dict dms, advanced streaming, 37576
-github, level 19 with dict dds, advanced streaming, 37576
-github, level 19 with dict copy, advanced streaming, 37567
-github, level 19 with dict load, advanced streaming, 39613
+github, level 19 with dict, advanced streaming, 37916
+github, level 19 with dict dms, advanced streaming, 37916
+github, level 19 with dict dds, advanced streaming, 37916
+github, level 19 with dict copy, advanced streaming, 37906
+github, level 19 with dict load, advanced streaming, 39770
github, no source size, advanced streaming, 136332
github, no source size with dict, advanced streaming, 41148
github, long distance mode, advanced streaming, 136332
github.tar, level 13 with dict copy, advanced streaming, 37130
github.tar, level 13 with dict load, advanced streaming, 36010
github.tar, level 16, advanced streaming, 40466
-github.tar, level 16 with dict, advanced streaming, 33374
-github.tar, level 16 with dict dms, advanced streaming, 33206
-github.tar, level 16 with dict dds, advanced streaming, 33206
-github.tar, level 16 with dict copy, advanced streaming, 33374
+github.tar, level 16 with dict, advanced streaming, 33375
+github.tar, level 16 with dict dms, advanced streaming, 33207
+github.tar, level 16 with dict dds, advanced streaming, 33207
+github.tar, level 16 with dict copy, advanced streaming, 33375
github.tar, level 16 with dict load, advanced streaming, 39081
-github.tar, level 19, advanced streaming, 32276
-github.tar, level 19 with dict, advanced streaming, 32712
-github.tar, level 19 with dict dms, advanced streaming, 32555
-github.tar, level 19 with dict dds, advanced streaming, 32555
-github.tar, level 19 with dict copy, advanced streaming, 32712
-github.tar, level 19 with dict load, advanced streaming, 32479
+github.tar, level 19, advanced streaming, 32262
+github.tar, level 19 with dict, advanced streaming, 32701
+github.tar, level 19 with dict dms, advanced streaming, 32565
+github.tar, level 19 with dict dds, advanced streaming, 32565
+github.tar, level 19 with dict copy, advanced streaming, 32701
+github.tar, level 19 with dict load, advanced streaming, 32428
github.tar, no source size, advanced streaming, 38828
github.tar, no source size with dict, advanced streaming, 38000
github.tar, long distance mode, advanced streaming, 40252
github.tar, small chain log, advanced streaming, 41669
github.tar, explicit params, advanced streaming, 41385
github.tar, uncompressed literals, advanced streaming, 41525
-github.tar, uncompressed literals optimal, advanced streaming, 35397
+github.tar, uncompressed literals optimal, advanced streaming, 35356
github.tar, huffman literals, advanced streaming, 38853
github.tar, multithreaded with advanced params, advanced streaming, 41525
silesia, level -5, old streaming, 6854744
silesia, level 7, old streaming, 4570271
silesia, level 9, old streaming, 4545850
silesia, level 13, old streaming, 4493990
-silesia, level 16, old streaming, 4360041
-silesia, level 19, old streaming, 4296055
+silesia, level 16, old streaming, 4359652
+silesia, level 19, old streaming, 4266582
silesia, no source size, old streaming, 4842039
silesia, uncompressed literals, old streaming, 4842075
-silesia, uncompressed literals optimal, old streaming, 4296055
+silesia, uncompressed literals optimal, old streaming, 4266582
silesia, huffman literals, old streaming, 6172207
silesia.tar, level -5, old streaming, 6856523
silesia.tar, level -3, old streaming, 6505954
silesia.tar, level 7, old streaming, 4579823
silesia.tar, level 9, old streaming, 4555445
silesia.tar, level 13, old streaming, 4502956
-silesia.tar, level 16, old streaming, 4360546
-silesia.tar, level 19, old streaming, 4265911
+silesia.tar, level 16, old streaming, 4360385
+silesia.tar, level 19, old streaming, 4260939
silesia.tar, no source size, old streaming, 4859267
silesia.tar, uncompressed literals, old streaming, 4859271
-silesia.tar, uncompressed literals optimal, old streaming, 4265911
+silesia.tar, uncompressed literals optimal, old streaming, 4260939
silesia.tar, huffman literals, old streaming, 6179056
github, level -5, old streaming, 204407
github, level -5 with dict, old streaming, 45832
github, level 13, old streaming, 132878
github, level 13 with dict, old streaming, 39900
github, level 16, old streaming, 133209
-github, level 16 with dict, old streaming, 37577
+github, level 16 with dict, old streaming, 37902
github, level 19, old streaming, 132879
-github, level 19 with dict, old streaming, 37576
+github, level 19 with dict, old streaming, 37916
github, no source size, old streaming, 140599
github, no source size with dict, old streaming, 40654
github, uncompressed literals, old streaming, 136332
github.tar, level 13, old streaming, 35501
github.tar, level 13 with dict, old streaming, 37130
github.tar, level 16, old streaming, 40466
-github.tar, level 16 with dict, old streaming, 33374
-github.tar, level 19, old streaming, 32276
-github.tar, level 19 with dict, old streaming, 32712
+github.tar, level 16 with dict, old streaming, 33375
+github.tar, level 19, old streaming, 32262
+github.tar, level 19 with dict, old streaming, 32701
github.tar, no source size, old streaming, 38828
github.tar, no source size with dict, old streaming, 38000
github.tar, uncompressed literals, old streaming, 38831
-github.tar, uncompressed literals optimal, old streaming, 32276
+github.tar, uncompressed literals optimal, old streaming, 32262
github.tar, huffman literals, old streaming, 42560
silesia, level -5, old streaming advanced, 6854744
silesia, level -3, old streaming advanced, 6503319
silesia, level 7, old streaming advanced, 4570271
silesia, level 9, old streaming advanced, 4545850
silesia, level 13, old streaming advanced, 4493990
-silesia, level 16, old streaming advanced, 4360041
-silesia, level 19, old streaming advanced, 4296055
+silesia, level 16, old streaming advanced, 4359652
+silesia, level 19, old streaming advanced, 4266582
silesia, no source size, old streaming advanced, 4842039
silesia, long distance mode, old streaming advanced, 4842075
silesia, multithreaded, old streaming advanced, 4842075
silesia, small chain log, old streaming advanced, 4912197
silesia, explicit params, old streaming advanced, 4795857
silesia, uncompressed literals, old streaming advanced, 4842075
-silesia, uncompressed literals optimal, old streaming advanced, 4296055
+silesia, uncompressed literals optimal, old streaming advanced, 4266582
silesia, huffman literals, old streaming advanced, 6172207
silesia, multithreaded with advanced params, old streaming advanced, 4842075
silesia.tar, level -5, old streaming advanced, 6856523
silesia.tar, level 7, old streaming advanced, 4579823
silesia.tar, level 9, old streaming advanced, 4555445
silesia.tar, level 13, old streaming advanced, 4502956
-silesia.tar, level 16, old streaming advanced, 4360546
-silesia.tar, level 19, old streaming advanced, 4265911
+silesia.tar, level 16, old streaming advanced, 4360385
+silesia.tar, level 19, old streaming advanced, 4260939
silesia.tar, no source size, old streaming advanced, 4859267
silesia.tar, long distance mode, old streaming advanced, 4859271
silesia.tar, multithreaded, old streaming advanced, 4859271
silesia.tar, small chain log, old streaming advanced, 4917021
silesia.tar, explicit params, old streaming advanced, 4807288
silesia.tar, uncompressed literals, old streaming advanced, 4859271
-silesia.tar, uncompressed literals optimal, old streaming advanced, 4265911
+silesia.tar, uncompressed literals optimal, old streaming advanced, 4260939
silesia.tar, huffman literals, old streaming advanced, 6179056
silesia.tar, multithreaded with advanced params, old streaming advanced, 4859271
github, level -5, old streaming advanced, 213265
github, level 13, old streaming advanced, 138676
github, level 13 with dict, old streaming advanced, 39725
github, level 16, old streaming advanced, 138575
-github, level 16 with dict, old streaming advanced, 40789
+github, level 16 with dict, old streaming advanced, 40804
github, level 19, old streaming advanced, 132879
-github, level 19 with dict, old streaming advanced, 37576
+github, level 19 with dict, old streaming advanced, 37916
github, no source size, old streaming advanced, 140599
github, no source size with dict, old streaming advanced, 40608
github, long distance mode, old streaming advanced, 141104
github.tar, level 13 with dict, old streaming advanced, 35807
github.tar, level 16, old streaming advanced, 40466
github.tar, level 16 with dict, old streaming advanced, 38578
-github.tar, level 19, old streaming advanced, 32276
-github.tar, level 19 with dict, old streaming advanced, 32704
+github.tar, level 19, old streaming advanced, 32262
+github.tar, level 19 with dict, old streaming advanced, 32678
github.tar, no source size, old streaming advanced, 38828
github.tar, no source size with dict, old streaming advanced, 38015
github.tar, long distance mode, old streaming advanced, 38831
github.tar, small chain log, old streaming advanced, 41669
github.tar, explicit params, old streaming advanced, 41385
github.tar, uncompressed literals, old streaming advanced, 38831
-github.tar, uncompressed literals optimal, old streaming advanced, 32276
+github.tar, uncompressed literals optimal, old streaming advanced, 32262
github.tar, huffman literals, old streaming advanced, 42560
github.tar, multithreaded with advanced params, old streaming advanced, 38831
github, level -5 with dict, old streaming cdict, 45832
github, level 7 with dict, old streaming cdict, 38765
github, level 9 with dict, old streaming cdict, 39439
github, level 13 with dict, old streaming cdict, 39900
-github, level 16 with dict, old streaming cdict, 37577
-github, level 19 with dict, old streaming cdict, 37576
+github, level 16 with dict, old streaming cdict, 37902
+github, level 19 with dict, old streaming cdict, 37916
github, no source size with dict, old streaming cdict, 40654
github.tar, level -5 with dict, old streaming cdict, 51286
github.tar, level -3 with dict, old streaming cdict, 45147
github.tar, level 9 with dict, old streaming cdict, 36322
github.tar, level 13 with dict, old streaming cdict, 36010
github.tar, level 16 with dict, old streaming cdict, 39081
-github.tar, level 19 with dict, old streaming cdict, 32479
+github.tar, level 19 with dict, old streaming cdict, 32428
github.tar, no source size with dict, old streaming cdict, 38000
github, level -5 with dict, old streaming advanced cdict, 46708
github, level -3 with dict, old streaming advanced cdict, 45476
github, level 7 with dict, old streaming advanced cdict, 38875
github, level 9 with dict, old streaming advanced cdict, 38941
github, level 13 with dict, old streaming advanced cdict, 39725
-github, level 16 with dict, old streaming advanced cdict, 40789
-github, level 19 with dict, old streaming advanced cdict, 37576
+github, level 16 with dict, old streaming advanced cdict, 40804
+github, level 19 with dict, old streaming advanced cdict, 37916
github, no source size with dict, old streaming advanced cdict, 40608
github.tar, level -5 with dict, old streaming advanced cdict, 50791
github.tar, level -3 with dict, old streaming advanced cdict, 44926
github.tar, level 9 with dict, old streaming advanced cdict, 36241
github.tar, level 13 with dict, old streaming advanced cdict, 35807
github.tar, level 16 with dict, old streaming advanced cdict, 38578
-github.tar, level 19 with dict, old streaming advanced cdict, 32704
+github.tar, level 19 with dict, old streaming advanced cdict, 32678
github.tar, no source size with dict, old streaming advanced cdict, 38015
if (inBuff.pos != inBuff.size) goto _output_error; /* should have read the entire frame */
DISPLAYLEVEL(3, "OK \n");
- /* Re-use without init */
- DISPLAYLEVEL(3, "test%3i : decompress again without init (re-use previous settings): ", testNb++);
+ /* Reuse without init */
+ DISPLAYLEVEL(3, "test%3i : decompress again without init (reuse previous settings): ", testNb++);
outBuff.pos = 0;
{ size_t const remaining = ZSTD_decompressStream(zd, &outBuff, &inBuff2);
if (remaining != 0) goto _output_error; } /* should reach end of frame == 0; otherwise, some data left, or an error */
DISPLAYLEVEL(3, "OK (error detected : %s) \n", ZSTD_getErrorName(r));
} }
- /* Compression state re-use scenario */
- DISPLAYLEVEL(3, "test%3i : context re-use : ", testNb++);
+ /* Compression state reuse scenario */
+ DISPLAYLEVEL(3, "test%3i : context reuse : ", testNb++);
ZSTD_freeCStream(zc);
zc = ZSTD_createCStream();
if (zc==NULL) goto _output_error; /* memory allocation issue */
}
DISPLAYLEVEL(3, "OK \n");
+ DISPLAYLEVEL(3, "test%3i : maxBlockSize = 2KB : ", testNb++);
+ {
+ ZSTD_DCtx* dctx = ZSTD_createDCtx();
+ size_t singlePassSize, streamingSize, streaming2KSize;
+
+ {
+ ZSTD_CCtx* cctx = ZSTD_createCCtx();
+ CHECK_Z(ZSTD_CCtx_setParameter(cctx, ZSTD_c_checksumFlag, 1));
+ CHECK_Z(ZSTD_CCtx_setParameter(cctx, ZSTD_c_windowLog, 18));
+ CHECK_Z(ZSTD_CCtx_setParameter(cctx, ZSTD_c_contentSizeFlag, 0));
+ CHECK_Z(ZSTD_CCtx_setParameter(cctx, ZSTD_c_maxBlockSize, 2048));
+ cSize = ZSTD_compress2(cctx, compressedBuffer, compressedBufferSize, CNBuffer, CNBufferSize);
+ CHECK_Z(cSize);
+ ZSTD_freeCCtx(cctx);
+ }
+
+ CHECK_Z(ZSTD_decompressDCtx(dctx, decodedBuffer, CNBufferSize, compressedBuffer, cSize));
+ singlePassSize = ZSTD_sizeof_DCtx(dctx);
+ CHECK_Z(singlePassSize);
+
+ inBuff.src = compressedBuffer;
+ inBuff.size = cSize;
+
+ outBuff.dst = decodedBuffer;
+ outBuff.size = decodedBufferSize;
+
+ CHECK_Z(ZSTD_DCtx_setParameter(dctx, ZSTD_d_maxBlockSize, 2048));
+ inBuff.pos = 0;
+ outBuff.pos = 0;
+ {
+ size_t const r = ZSTD_decompressStream(dctx, &outBuff, &inBuff);
+ CHECK_Z(r);
+ CHECK(r != 0, "Entire frame must be decompressed");
+ }
+ streaming2KSize = ZSTD_sizeof_DCtx(dctx);
+ CHECK_Z(streaming2KSize);
+
+ CHECK_Z(ZSTD_DCtx_reset(dctx, ZSTD_reset_session_and_parameters));
+ inBuff.pos = 0;
+ outBuff.pos = 0;
+ {
+ size_t const r = ZSTD_decompressStream(dctx, &outBuff, &inBuff);
+ CHECK_Z(r);
+ CHECK(r != 0, "Entire frame must be decompressed");
+ }
+ streamingSize = ZSTD_sizeof_DCtx(dctx);
+ CHECK_Z(streamingSize);
+
+ CHECK_Z(ZSTD_DCtx_setParameter(dctx, ZSTD_d_maxBlockSize, 1024));
+ inBuff.pos = 0;
+ outBuff.pos = 0;
+ CHECK(!ZSTD_isError(ZSTD_decompressStream(dctx, &outBuff, &inBuff)), "decompression must fail");
+
+ CHECK(streamingSize < singlePassSize + (1 << 18) + 3 * ZSTD_BLOCKSIZE_MAX, "Streaming doesn't use the right amount of memory");
+ CHECK(streamingSize != streaming2KSize + 3 * (ZSTD_BLOCKSIZE_MAX - 2048), "ZSTD_d_blockSizeMax didn't save the right amount of memory");
+ DISPLAYLEVEL(3, "| %zu | %zu | %zu | ", singlePassSize, streaming2KSize, streamingSize);
+
+ ZSTD_freeDCtx(dctx);
+ }
+ DISPLAYLEVEL(3, "OK \n");
+
/* Decompression with ZSTD_d_stableOutBuffer */
cSize = ZSTD_compress(compressedBuffer, compressedBufferSize, CNBuffer, CNBufferSize, 1);
CHECK_Z(cSize);
DISPLAYLEVEL(3, "test%3i : Block-Level External Sequence Producer API: ", testNb++);
{
size_t const dstBufSize = ZSTD_compressBound(CNBufferSize);
- BYTE* const dstBuf = (BYTE*)malloc(ZSTD_compressBound(dstBufSize));
+ BYTE* const dstBuf = (BYTE*)malloc(dstBufSize);
size_t const checkBufSize = CNBufferSize;
BYTE* const checkBuf = (BYTE*)malloc(checkBufSize);
int enableFallback;
}
DISPLAYLEVEL(3, "OK \n");
+ DISPLAYLEVEL(3, "test%3i : Testing external sequence producer with static CCtx: ", testNb++);
+ {
+ size_t const dstBufSize = ZSTD_compressBound(CNBufferSize);
+ BYTE* const dstBuf = (BYTE*)malloc(dstBufSize);
+ size_t const checkBufSize = CNBufferSize;
+ BYTE* const checkBuf = (BYTE*)malloc(checkBufSize);
+ ZSTD_CCtx_params* params = ZSTD_createCCtxParams();
+ ZSTD_CCtx* staticCCtx;
+ void* cctxBuf;
+ EMF_testCase seqProdState;
+
+ CHECK_Z(ZSTD_CCtxParams_setParameter(params, ZSTD_c_validateSequences, 1));
+ CHECK_Z(ZSTD_CCtxParams_setParameter(params, ZSTD_c_enableSeqProducerFallback, 0));
+ ZSTD_CCtxParams_registerSequenceProducer(params, &seqProdState, zstreamSequenceProducer);
+
+ {
+ size_t const cctxSize = ZSTD_estimateCCtxSize_usingCCtxParams(params);
+ cctxBuf = malloc(cctxSize);
+ staticCCtx = ZSTD_initStaticCCtx(cctxBuf, cctxSize);
+ ZSTD_CCtx_setParametersUsingCCtxParams(staticCCtx, params);
+ }
+
+ // Check that compression with external sequence producer succeeds when expected
+ seqProdState = EMF_LOTS_OF_SEQS;
+ {
+ size_t dResult;
+ size_t const cResult = ZSTD_compress2(staticCCtx, dstBuf, dstBufSize, CNBuffer, CNBufferSize);
+ CHECK(ZSTD_isError(cResult), "EMF: Compression error: %s", ZSTD_getErrorName(cResult));
+ dResult = ZSTD_decompress(checkBuf, checkBufSize, dstBuf, cResult);
+ CHECK(ZSTD_isError(dResult), "EMF: Decompression error: %s", ZSTD_getErrorName(dResult));
+ CHECK(dResult != CNBufferSize, "EMF: Corruption!");
+ CHECK(memcmp(CNBuffer, checkBuf, CNBufferSize) != 0, "EMF: Corruption!");
+ }
+
+ // Check that compression with external sequence producer fails when expected
+ seqProdState = EMF_BIG_ERROR;
+ {
+ size_t const cResult = ZSTD_compress2(staticCCtx, dstBuf, dstBufSize, CNBuffer, CNBufferSize);
+ CHECK(!ZSTD_isError(cResult), "EMF: Should have raised an error!");
+ CHECK(
+ ZSTD_getErrorCode(cResult) != ZSTD_error_sequenceProducer_failed,
+ "EMF: Wrong error code: %s", ZSTD_getErrorName(cResult)
+ );
+ }
+
+ free(dstBuf);
+ free(checkBuf);
+ free(cctxBuf);
+ ZSTD_freeCCtxParams(params);
+ }
+ DISPLAYLEVEL(3, "OK \n");
+
+ DISPLAYLEVEL(3, "test%3i : Decoder should reject invalid frame header on legacy frames: ", testNb++);
+ {
+ const unsigned char compressed[] = { 0x26,0xb5,0x2f,0xfd,0x50,0x91,0xfd,0xd8,0xb5 };
+ const size_t compressedSize = 9;
+ size_t const dSize = ZSTD_decompress(NULL, 0, compressed, compressedSize);
+ CHECK(!ZSTD_isError(dSize), "must reject when legacy frame header is invalid");
+ }
+ DISPLAYLEVEL(3, "OK \n");
+
+ DISPLAYLEVEL(3, "test%3i : Test single-shot fallback for magicless mode: ", testNb++);
+ {
+ // Aquire resources
+ size_t const srcSize = COMPRESSIBLE_NOISE_LENGTH;
+ void* src = malloc(srcSize);
+ size_t const dstSize = ZSTD_compressBound(srcSize);
+ void* dst = malloc(dstSize);
+ size_t const valSize = srcSize;
+ void* val = malloc(valSize);
+ ZSTD_inBuffer inBuf = { dst, dstSize, 0 };
+ ZSTD_outBuffer outBuf = { val, valSize, 0 };
+ ZSTD_CCtx* cctx = ZSTD_createCCtx();
+ ZSTD_DCtx* dctx = ZSTD_createDCtx();
+ CHECK(!src || !dst || !val || !dctx || !cctx, "memory allocation failure");
+
+ // Write test data for decompression to dst
+ RDG_genBuffer(src, srcSize, compressibility, 0.0, 0xdeadbeef);
+ CHECK_Z(ZSTD_CCtx_setParameter(cctx, ZSTD_c_format, ZSTD_f_zstd1_magicless));
+ CHECK_Z(ZSTD_compress2(cctx, dst, dstSize, src, srcSize));
+
+ // Run decompression
+ CHECK_Z(ZSTD_DCtx_setParameter(dctx, ZSTD_d_format, ZSTD_f_zstd1_magicless));
+ CHECK_Z(ZSTD_decompressStream(dctx, &outBuf, &inBuf));
+
+ // Validate
+ CHECK(outBuf.pos != srcSize, "decompressed size must match");
+ CHECK(memcmp(src, val, srcSize) != 0, "decompressed data must match");
+
+ // Cleanup
+ free(src); free(dst); free(val);
+ ZSTD_freeCCtx(cctx);
+ ZSTD_freeDCtx(dctx);
+ }
+ DISPLAYLEVEL(3, "OK \n");
+
_end:
FUZ_freeDictionary(dictionary);
ZSTD_freeCStream(zc);
if (FUZ_rand(&lseed) & 1) CHECK_Z( setCCtxParameter(zc, cctxParams, ZSTD_c_forceMaxWindow, FUZ_rand(&lseed) & 1, opaqueAPI) );
if (FUZ_rand(&lseed) & 1) CHECK_Z( setCCtxParameter(zc, cctxParams, ZSTD_c_deterministicRefPrefix, FUZ_rand(&lseed) & 1, opaqueAPI) );
+ /* Set max block size parameters */
+ if (FUZ_rand(&lseed) & 1) {
+ int maxBlockSize = (int)(FUZ_rand(&lseed) % ZSTD_BLOCKSIZE_MAX);
+ maxBlockSize = MAX(1024, maxBlockSize);
+ CHECK_Z( setCCtxParameter(zc, cctxParams, ZSTD_c_maxBlockSize, maxBlockSize, opaqueAPI) );
+ }
+
/* Apply parameters */
if (opaqueAPI) {
DISPLAYLEVEL(5, "t%u: applying CCtxParams \n", testNb);
if (FUZ_rand(&lseed) & 1) {
CHECK_Z(ZSTD_DCtx_setParameter(zd, ZSTD_d_disableHuffmanAssembly, FUZ_rand(&lseed) & 1));
}
+ if (FUZ_rand(&lseed) & 1) {
+ int maxBlockSize;
+ CHECK_Z(ZSTD_CCtx_getParameter(zc, ZSTD_c_maxBlockSize, &maxBlockSize));
+ CHECK_Z(ZSTD_DCtx_setParameter(zd, ZSTD_d_maxBlockSize, maxBlockSize));
+ } else {
+ CHECK_Z(ZSTD_DCtx_setParameter(zd, ZSTD_d_maxBlockSize, 0));
+ }
{ size_t decompressionResult = 1;
ZSTD_inBuffer inBuff = { cBuffer, cSize, 0 };
ZSTD_outBuffer outBuff= { dstBuffer, dstBufferSize, 0 };
void *myalloc _Z_OF((void *, unsigned, unsigned));
void myfree _Z_OF((void *, void *));
-void *myalloc(q, n, m)
- void *q;
- unsigned n, m;
+void *myalloc(void *q, unsigned n, unsigned m)
{
void *buf = calloc(n, m);
q = Z_NULL;
/* ===========================================================================
* Test compress() and uncompress()
*/
-void test_compress(compr, comprLen, uncompr, uncomprLen)
- Byte *compr, *uncompr;
- uLong comprLen, uncomprLen;
-{
+void test_compress(Byte *compr, uLong comprLen, Byte *uncompr,
+ uLong uncomprLen) {
int err;
uLong len = (uLong)strlen(hello)+1;
/* ===========================================================================
* Test read/write of .gz files
*/
-void test_gzio(fname, uncompr, uncomprLen)
- const char *fname; /* compressed file name */
- Byte *uncompr;
- uLong uncomprLen;
-{
+void test_gzio(const char *fname, Byte *uncompr, uLong uncomprLen) {
#ifdef NO_GZCOMPRESS
fprintf(stderr, "NO_GZCOMPRESS -- gz* functions cannot compress\n");
#else
/* ===========================================================================
* Test deflate() with small buffers
*/
-void test_deflate(compr, comprLen)
- Byte *compr;
- uLong comprLen;
-{
+void test_deflate(Byte *compr, uLong comprLen) {
z_stream c_stream; /* compression stream */
int err;
uLong len = (uLong)strlen(hello)+1;
/* ===========================================================================
* Test inflate() with small buffers
*/
-void test_inflate(compr, comprLen, uncompr, uncomprLen)
- Byte *compr, *uncompr;
- uLong comprLen, uncomprLen;
-{
+void test_inflate(Byte *compr, uLong comprLen, Byte *uncompr,
+ uLong uncomprLen) {
int err;
z_stream d_stream; /* decompression stream */
/* ===========================================================================
* Test deflate() with large buffers and dynamic change of compression level
*/
-void test_large_deflate(compr, comprLen, uncompr, uncomprLen)
- Byte *compr, *uncompr;
- uLong comprLen, uncomprLen;
-{
+void test_large_deflate(Byte *compr, uLong comprLen, Byte *uncompr,
+ uLong uncomprLen) {
z_stream c_stream; /* compression stream */
int err;
/* ===========================================================================
* Test inflate() with large buffers
- */
-void test_large_inflate(compr, comprLen, uncompr, uncomprLen)
- Byte *compr, *uncompr;
- uLong comprLen, uncomprLen;
-{
+ */
+void test_large_inflate(Byte *compr, uLong comprLen, Byte *uncompr,
+ uLong uncomprLen) {
int err;
z_stream d_stream; /* decompression stream */
/* ===========================================================================
* Test deflate() with full flush
*/
-void test_flush(compr, comprLen)
- Byte *compr;
- uLong *comprLen;
-{
+void test_flush(Byte *compr, uLong *comprLen) {
z_stream c_stream; /* compression stream */
int err;
uInt len = (uInt)strlen(hello)+1;
/* ===========================================================================
* Test inflateSync()
*/
-void test_sync(compr, comprLen, uncompr, uncomprLen)
- Byte *compr, *uncompr;
- uLong comprLen, uncomprLen;
-{
+void test_sync(Byte *compr, uLong comprLen, Byte *uncompr, uLong uncomprLen) {
int err;
z_stream d_stream; /* decompression stream */
/* ===========================================================================
* Test deflate() with preset dictionary
*/
-void test_dict_deflate(compr, comprLen)
- Byte *compr;
- uLong comprLen;
-{
+void test_dict_deflate(Byte *compr, uLong comprLen) {
z_stream c_stream; /* compression stream */
int err;
/* ===========================================================================
* Test inflate() with a preset dictionary
*/
-void test_dict_inflate(compr, comprLen, uncompr, uncomprLen)
- Byte *compr, *uncompr;
- uLong comprLen, uncomprLen;
-{
+void test_dict_inflate(Byte *compr, uLong comprLen, Byte *uncompr,
+ uLong uncomprLen) {
int err;
z_stream d_stream; /* decompression stream */
* Usage: example [output.gz [input.gz]]
*/
-int main(argc, argv)
- int argc;
- char *argv[];
-{
+int main(int argc, char *argv[]) {
Byte *compr, *uncompr;
uLong comprLen = 10000*sizeof(int); /* don't overflow on MSDOS */
uLong uncomprLen = comprLen;
/* ===========================================================================
* Test compress() and uncompress()
*/
-void test_compress(compr, comprLen, uncompr, uncomprLen)
- Byte *compr, *uncompr;
- uLong comprLen, uncomprLen;
+void test_compress(Byte *compr, uLong comprLen, Byte *uncompr, uLong uncomprLen)
{
int err;
uLong len = (uLong)strlen(hello)+1;
/* ===========================================================================
* Test read/write of .gz files
*/
-void test_gzio(fname, uncompr, uncomprLen)
- const char *fname; /* compressed file name */
- Byte *uncompr;
- uLong uncomprLen;
+void test_gzio(const char *fname /* compressed file name */, Byte *uncompr,
+ uLong uncomprLen)
{
#ifdef NO_GZCOMPRESS
fprintf(stderr, "NO_GZCOMPRESS -- gz* functions cannot compress\n");
/* ===========================================================================
* Test deflate() with small buffers
*/
-void test_deflate(compr, comprLen)
- Byte *compr;
- uLong comprLen;
+void test_deflate(Byte *compr, uLong comprLen)
{
z_stream c_stream; /* compression stream */
int err;
/* ===========================================================================
* Test inflate() with small buffers
*/
-void test_inflate(compr, comprLen, uncompr, uncomprLen)
- Byte *compr, *uncompr;
- uLong comprLen, uncomprLen;
+void test_inflate(Byte *compr, uLong comprLen, Byte *uncompr, uLong uncomprLen)
{
int err;
z_stream d_stream; /* decompression stream */
/* ===========================================================================
* Test deflate() with large buffers and dynamic change of compression level
*/
-void test_large_deflate(compr, comprLen, uncompr, uncomprLen)
- Byte *compr, *uncompr;
- uLong comprLen, uncomprLen;
+void test_large_deflate(Byte *compr, uLong comprLen, Byte *uncompr,
+ uLong uncomprLen)
{
z_stream c_stream; /* compression stream */
int err;
/* ===========================================================================
* Test inflate() with large buffers
*/
-void test_large_inflate(compr, comprLen, uncompr, uncomprLen)
- Byte *compr, *uncompr;
- uLong comprLen, uncomprLen;
+void test_large_inflate(Byte *compr, uLong comprLen, Byte *uncompr,
+ uLong uncomprLen)
{
int err;
z_stream d_stream; /* decompression stream */
/* ===========================================================================
* Test deflate() with full flush
*/
-void test_flush(compr, comprLen)
- Byte *compr;
- uLong *comprLen;
+void test_flush(Byte *compr, uLong comprLen)
{
z_stream c_stream; /* compression stream */
int err;
/* ===========================================================================
* Test inflateSync()
*/
-void test_sync(compr, comprLen, uncompr, uncomprLen)
- Byte *compr, *uncompr;
- uLong comprLen, uncomprLen;
+void test_sync(Byte *compr, uLong comprLen, Byte *uncompr, uLong uncomprLen)
{
int err;
z_stream d_stream; /* decompression stream */
/* ===========================================================================
* Test deflate() with preset dictionary
*/
-void test_dict_deflate(compr, comprLen)
- Byte *compr;
- uLong comprLen;
+void test_dict_deflate(Byte *compr, uLong comprLen)
{
z_stream c_stream; /* compression stream */
int err;
/* ===========================================================================
* Test inflate() with a preset dictionary
*/
-void test_dict_inflate(compr, comprLen, uncompr, uncomprLen)
- Byte *compr, *uncompr;
- uLong comprLen, uncomprLen;
+void test_dict_inflate(Byte *compr, uLong comprLen, Byte *uncompr,
+ uLong uncomprLen)
{
int err;
z_stream d_stream; /* decompression stream */
* Usage: example [output.gz [input.gz]]
*/
-int main(argc, argv)
- int argc;
- char *argv[];
+int main(int argc, char *argv[])
{
Byte *compr, *uncompr;
uLong comprLen = 10000*sizeof(int); /* don't overflow on MSDOS */
# include <sys/stat.h>
#endif
-#if defined(MSDOS) || defined(OS2) || defined(WIN32) || defined(__CYGWIN__)
+#if defined(MSDOS) || defined(OS2) || defined(_WIN32) || defined(__CYGWIN__)
# include <fcntl.h>
# include <io.h>
# ifdef UNDER_CE
#endif
#if !defined(Z_HAVE_UNISTD_H) && !defined(_LARGEFILE64_SOURCE)
-#ifndef WIN32 /* unlink already in stdio.h for WIN32 */
+#ifndef _WIN32 /* unlink already in stdio.h for WIN32 */
extern int unlink _Z_OF((const char *));
#endif
#endif
The strwinerror function does not change the current setting
of GetLastError. */
-static char *strwinerror (error)
- DWORD error;
+static char *strwinerror(DWORD error)
{
static char buf[1024];
return buf;
}
-static void pwinerror (s)
- const char *s;
+static void pwinerror (const char *s)
{
if (s && *s)
fprintf(stderr, "%s: %s\n", s, strwinerror(GetLastError ()));
return gz_open(NULL, fd, mode);
}
-gzFile gz_open(path, fd, mode)
- const char *path;
- int fd;
- const char *mode;
-{
+gzFile gz_open(const char *path, int fd, const char *mode) {
gzFile gz;
int ret;
int gzwrite _Z_OF((gzFile, const void *, unsigned));
-int gzwrite(gz, buf, len)
- gzFile gz;
- const void *buf;
- unsigned len;
-{
+int gzwrite(gzFile gz, const void *buf, unsigned len) {
z_stream *strm;
unsigned char out[BUFLEN];
int gzread _Z_OF((gzFile, void *, unsigned));
-int gzread(gz, buf, len)
- gzFile gz;
- void *buf;
- unsigned len;
-{
+int gzread(gzFile gz, void *buf, unsigned len) {
int ret;
unsigned got;
unsigned char in[1];
int gzclose _Z_OF((gzFile));
-int gzclose(gz)
- gzFile gz;
-{
+int gzclose(gzFile gz) {
z_stream *strm;
unsigned char out[BUFLEN];
const char *gzerror _Z_OF((gzFile, int *));
-const char *gzerror(gz, err)
- gzFile gz;
- int *err;
+const char *gzerror(gzFile gz, int *err)
{
*err = gz->err;
return gz->msg;
/* ===========================================================================
* Display error message and exit
*/
-void error(msg)
- const char *msg;
+void error(const char *msg)
{
fprintf(stderr, "%s: %s\n", prog, msg);
exit(1);
* Compress input to output then close both files.
*/
-void gz_compress(in, out)
- FILE *in;
- gzFile out;
+void gz_compress(FILE *in, gzFile out)
{
local char buf[BUFLEN];
int len;
/* Try compressing the input file at once using mmap. Return Z_OK if
* if success, Z_ERRNO otherwise.
*/
-int gz_compress_mmap(in, out)
- FILE *in;
- gzFile out;
-{
+int gz_compress_mmap(FILE *in, gzFile out) {
int len;
int err;
int ifd = fileno(in);
/* ===========================================================================
* Uncompress input to output then close both files.
*/
-void gz_uncompress(in, out)
- gzFile in;
- FILE *out;
-{
+void gz_uncompress(gzFile in, FILE *out) {
local char buf[BUFLEN];
int len;
int err;
* Compress the given file: create a corresponding .gz file and remove the
* original.
*/
-void file_compress(file, mode)
- char *file;
- char *mode;
-{
+void file_compress(char *file, char *mode) {
local char outfile[MAX_NAME_LEN];
FILE *in;
gzFile out;
/* ===========================================================================
* Uncompress the given file and remove the original.
*/
-void file_uncompress(file)
- char *file;
-{
+void file_uncompress(char *file) {
local char buf[MAX_NAME_LEN];
char *infile, *outfile;
FILE *out;
* -1 to -9 : compression level
*/
-int main(argc, argv)
- int argc;
- char *argv[];
-{
+int main(int argc, char *argv[]) {
int copyout = 0;
int uncompr = 0;
gzFile file;
/* gzclose() is in a separate file so that it is linked in only if it is used.
That way the other gzclose functions can be used instead to avoid linking in
unneeded compression or decompression routines. */
-int ZEXPORT gzclose(file)
- gzFile file;
-{
+int ZEXPORT gzclose(gzFile file) {
#ifndef NO_GZCOMPRESS
gz_statep state;
The gz_strwinerror function does not change the current setting of
GetLastError. */
-char ZLIB_INTERNAL *gz_strwinerror (error)
- DWORD error;
-{
+char ZLIB_INTERNAL *gz_strwinerror(DWORD error) {
static char buf[1024];
wchar_t *msgbuf;
#endif /* UNDER_CE */
/* Reset gzip file state */
-local void gz_reset(state)
- gz_statep state;
-{
+local void gz_reset(gz_statep state) {
state.state->x.have = 0; /* no output data available */
if (state.state->mode == GZ_READ) { /* for reading ... */
state.state->eof = 0; /* not at end of file */
}
/* Open a gzip file either by name or file descriptor. */
-local gzFile gz_open(path, fd, mode)
- const void *path;
- int fd;
- const char *mode;
-{
+local gzFile gz_open(const void *path, int fd, const char *mode) {
gz_statep state;
z_size_t len;
int oflag;
}
/* -- see zlib.h -- */
-gzFile ZEXPORT gzopen(path, mode)
- const char *path;
- const char *mode;
-{
+gzFile ZEXPORT gzopen(const char *path, const char *mode) {
return gz_open(path, -1, mode);
}
/* -- see zlib.h -- */
-gzFile ZEXPORT gzopen64(path, mode)
- const char *path;
- const char *mode;
-{
+gzFile ZEXPORT gzopen64(const char *path, const char *mode) {
return gz_open(path, -1, mode);
}
/* -- see zlib.h -- */
-gzFile ZEXPORT gzdopen(fd, mode)
- int fd;
- const char *mode;
-{
+gzFile ZEXPORT gzdopen(int fd, const char *mode) {
char *path; /* identifier for error messages */
gzFile gz;
/* -- see zlib.h -- */
#ifdef WIDECHAR
-gzFile ZEXPORT gzopen_w(path, mode)
- const wchar_t *path;
- const char *mode;
-{
+gzFile ZEXPORT gzopen_w(const wchar_t *path, const char *mode) {
return gz_open(path, -2, mode);
}
#endif
/* -- see zlib.h -- */
-int ZEXPORT gzbuffer(file, size)
- gzFile file;
- unsigned size;
-{
+int ZEXPORT gzbuffer(gzFile file, unsigned size) {
gz_statep state;
/* get internal structure and check integrity */
}
/* -- see zlib.h -- */
-int ZEXPORT gzrewind(file)
- gzFile file;
-{
+int ZEXPORT gzrewind(gzFile file) {
gz_statep state;
/* get internal structure */
}
/* -- see zlib.h -- */
-z_off64_t ZEXPORT gzseek64(file, offset, whence)
- gzFile file;
- z_off64_t offset;
- int whence;
-{
+z_off64_t ZEXPORT gzseek64(gzFile file, z_off64_t offset, int whence) {
unsigned n;
z_off64_t ret;
gz_statep state;
}
/* -- see zlib.h -- */
-z_off_t ZEXPORT gzseek(file, offset, whence)
- gzFile file;
- z_off_t offset;
- int whence;
-{
+z_off_t ZEXPORT gzseek(gzFile file, z_off_t offset, int whence) {
z_off64_t ret;
ret = gzseek64(file, (z_off64_t)offset, whence);
}
/* -- see zlib.h -- */
-z_off64_t ZEXPORT gztell64(file)
- gzFile file;
-{
+z_off64_t ZEXPORT gztell64(gzFile file) {
gz_statep state;
/* get internal structure and check integrity */
}
/* -- see zlib.h -- */
-z_off_t ZEXPORT gztell(file)
- gzFile file;
-{
+z_off_t ZEXPORT gztell(gzFile file) {
z_off64_t ret;
ret = gztell64(file);
}
/* -- see zlib.h -- */
-z_off64_t ZEXPORT gzoffset64(file)
- gzFile file;
-{
+z_off64_t ZEXPORT gzoffset64(gzFile file) {
z_off64_t offset;
gz_statep state;
}
/* -- see zlib.h -- */
-z_off_t ZEXPORT gzoffset(file)
- gzFile file;
-{
+z_off_t ZEXPORT gzoffset(gzFile file) {
z_off64_t ret;
ret = gzoffset64(file);
}
/* -- see zlib.h -- */
-int ZEXPORT gzeof(file)
- gzFile file;
-{
+int ZEXPORT gzeof(gzFile file) {
gz_statep state;
/* get internal structure and check integrity */
}
/* -- see zlib.h -- */
-const char * ZEXPORT gzerror(file, errnum)
- gzFile file;
- int *errnum;
-{
+const char * ZEXPORT gzerror(gzFile file, int *errnum) {
gz_statep state;
/* get internal structure and check integrity */
}
/* -- see zlib.h -- */
-void ZEXPORT gzclearerr(file)
- gzFile file;
-{
+void ZEXPORT gzclearerr(gzFile file) {
gz_statep state;
/* get internal structure and check integrity */
memory). Simply save the error message as a static string. If there is an
allocation failure constructing the error message, then convert the error to
out of memory. */
-void ZLIB_INTERNAL gz_error(state, err, msg)
- gz_statep state;
- int err;
- const char *msg;
-{
+void ZLIB_INTERNAL gz_error(gz_statep state, int err, const char *msg) {
/* free previously allocated message and clear */
if (state.state->msg != NULL) {
if (state.state->err != Z_MEM_ERROR)
available) -- we need to do this to cover cases where 2's complement not
used, since C standard permits 1's complement and sign-bit representations,
otherwise we could just use ((unsigned)-1) >> 1 */
-unsigned ZLIB_INTERNAL gz_intmax()
-{
+unsigned ZLIB_INTERNAL gz_intmax() {
unsigned p, q;
p = 1;
state.state->fd, and update state.state->eof, state.state->err, and state.state->msg as appropriate.
This function needs to loop on read(), since read() is not guaranteed to
read the number of bytes requested, depending on the type of descriptor. */
-local int gz_load(state, buf, len, have)
- gz_statep state;
- unsigned char *buf;
- unsigned len;
- unsigned *have;
-{
+local int gz_load(gz_statep state, unsigned char *buf, unsigned len,
+ unsigned *have) {
ssize_t ret;
unsigned get, max = ((unsigned)-1 >> 2) + 1;
If strm->avail_in != 0, then the current data is moved to the beginning of
the input buffer, and then the remainder of the buffer is loaded with the
available data from the input file. */
-local int gz_avail(state)
- gz_statep state;
+local int gz_avail(gz_statep state)
{
unsigned got;
z_streamp strm = &(state.state->strm);
case, all further file reads will be directly to either the output buffer or
a user buffer. If decompressing, the inflate state will be initialized.
gz_look() will return 0 on success or -1 on failure. */
-local int gz_look(state)
- gz_statep state;
-{
+local int gz_look(gz_statep state) {
z_streamp strm = &(state.state->strm);
/* allocate read buffers and inflate memory */
data. If the gzip stream completes, state.state->how is reset to LOOK to look for
the next gzip stream or raw data, once state.state->x.have is depleted. Returns 0
on success, -1 on failure. */
-local int gz_decomp(state)
- gz_statep state;
-{
+local int gz_decomp(gz_statep state) {
int ret = Z_OK;
unsigned had;
z_streamp strm = &(state.state->strm);
looked for to determine whether to copy or decompress. Returns -1 on error,
otherwise 0. gz_fetch() will leave state.state->how as COPY or GZIP unless the
end of the input file has been reached and all data has been processed. */
-local int gz_fetch(state)
- gz_statep state;
-{
+local int gz_fetch(gz_statep state) {
z_streamp strm = &(state.state->strm);
do {
}
/* Skip len uncompressed bytes of output. Return -1 on error, 0 on success. */
-local int gz_skip(state, len)
- gz_statep state;
- z_off64_t len;
-{
+local int gz_skip(gz_statep state, z_off64_t len) {
unsigned n;
/* skip over len bytes or reach end-of-file, whichever comes first */
input. Return the number of bytes read. If zero is returned, either the
end of file was reached, or there was an error. state.state->err must be
consulted in that case to determine which. */
-local z_size_t gz_read(state, buf, len)
- gz_statep state;
- voidp buf;
- z_size_t len;
-{
+local z_size_t gz_read(gz_statep state, voidp buf, z_size_t len) {
z_size_t got;
unsigned n;
}
/* -- see zlib.h -- */
-int ZEXPORT gzread(file, buf, len)
- gzFile file;
- voidp buf;
- unsigned len;
-{
+int ZEXPORT gzread(gzFile file, voidp buf, unsigned len) {
gz_statep state;
/* get internal structure */
}
/* -- see zlib.h -- */
-z_size_t ZEXPORT gzfread(buf, size, nitems, file)
- voidp buf;
- z_size_t size;
- z_size_t nitems;
- gzFile file;
-{
+z_size_t ZEXPORT gzfread(voidp buf, z_size_t size, z_size_t nitems,
+ gzFile file) {
z_size_t len;
gz_statep state;
ZEXTERN int ZEXPORT gzgetc_ _Z_OF((gzFile file));
#endif
-int ZEXPORT gzgetc(file)
- gzFile file;
-{
+int ZEXPORT gzgetc(gzFile file) {
int ret;
unsigned char buf[1];
gz_statep state;
return ret < 1 ? -1 : buf[0];
}
-int ZEXPORT gzgetc_(file)
-gzFile file;
-{
+int ZEXPORT gzgetc_(gzFile file) {
return gzgetc(file);
}
/* -- see zlib.h -- */
-int ZEXPORT gzungetc(c, file)
- int c;
- gzFile file;
-{
+int ZEXPORT gzungetc(int c, gzFile file) {
gz_statep state;
/* get internal structure */
}
/* -- see zlib.h -- */
-char * ZEXPORT gzgets(file, buf, len)
- gzFile file;
- char *buf;
- int len;
-{
+char * ZEXPORT gzgets(gzFile file, char *buf, int len) {
unsigned left, n;
char *str;
unsigned char *eol;
}
/* -- see zlib.h -- */
-int ZEXPORT gzdirect(file)
- gzFile file;
-{
+int ZEXPORT gzdirect(gzFile file) {
gz_statep state;
/* get internal structure */
}
/* -- see zlib.h -- */
-int ZEXPORT gzclose_r(file)
- gzFile file;
-{
+int ZEXPORT gzclose_r(gzFile file) {
int ret, err;
gz_statep state;
/* Initialize state for writing a gzip file. Mark initialization by setting
state.state->size to non-zero. Return -1 on a memory allocation failure, or 0 on
success. */
-local int gz_init(state)
- gz_statep state;
-{
+local int gz_init(gz_statep state) {
int ret;
z_streamp strm = &(state.state->strm);
deflate() flush value. If flush is Z_FINISH, then the deflate() state is
reset to start a new gzip stream. If gz->direct is true, then simply write
to the output file without compressing, and ignore flush. */
-local int gz_comp(state, flush)
- gz_statep state;
- int flush;
-{
+local int gz_comp(gz_statep state, int flush) {
int ret, writ;
unsigned have, put, max = ((unsigned)-1 >> 2) + 1;
z_streamp strm = &(state.state->strm);
/* Compress len zeros to output. Return -1 on a write error or memory
allocation failure by gz_comp(), or 0 on success. */
-local int gz_zero(state, len)
- gz_statep state;
- z_off64_t len;
-{
+local int gz_zero(gz_statep state, z_off64_t len) {
int first;
unsigned n;
z_streamp strm = &(state.state->strm);
/* Write len bytes from buf to file. Return the number of bytes written. If
the returned value is less than len, then there was an error. */
-local z_size_t gz_write(state, buf, len)
- gz_statep state;
- voidpc buf;
- z_size_t len;
-{
+local z_size_t gz_write(gz_statep state, voidpc buf, z_size_t len) {
z_size_t put = len;
/* if len is zero, avoid unnecessary operations */
}
/* -- see zlib.h -- */
-int ZEXPORT gzwrite(file, buf, len)
- gzFile file;
- voidpc buf;
- unsigned len;
-{
+int ZEXPORT gzwrite(gzFile file, voidpc buf, unsigned len) {
gz_statep state;
/* get internal structure */
}
/* -- see zlib.h -- */
-z_size_t ZEXPORT gzfwrite(buf, size, nitems, file)
- voidpc buf;
- z_size_t size;
- z_size_t nitems;
- gzFile file;
-{
+z_size_t ZEXPORT gzfwrite(voidpc buf, z_size_t size, z_size_t nitems,
+ gzFile file) {
z_size_t len;
gz_statep state;
}
/* -- see zlib.h -- */
-int ZEXPORT gzputc(file, c)
- gzFile file;
- int c;
-{
+int ZEXPORT gzputc(gzFile file, int c) {
unsigned have;
unsigned char buf[1];
gz_statep state;
}
/* -- see zlib.h -- */
-int ZEXPORT gzputs(file, str)
- gzFile file;
- const char *str;
-{
+int ZEXPORT gzputs(gzFile file, const char *str) {
int ret;
z_size_t len;
gz_statep state;
#include <stdarg.h>
/* -- see zlib.h -- */
-int ZEXPORTVA gzvprintf(gzFile file, const char *format, va_list va)
-{
+int ZEXPORTVA gzvprintf(gzFile file, const char *format, va_list va) {
int len;
unsigned left;
char *next;
return len;
}
-int ZEXPORTVA gzprintf(gzFile file, const char *format, ...)
-{
+int ZEXPORTVA gzprintf(gzFile file, const char *format, ...) {
va_list va;
int ret;
#else /* !STDC && !Z_HAVE_STDARG_H */
/* -- see zlib.h -- */
-int ZEXPORTVA gzprintf (file, format, a1, a2, a3, a4, a5, a6, a7, a8, a9, a10,
- a11, a12, a13, a14, a15, a16, a17, a18, a19, a20)
- gzFile file;
- const char *format;
- int a1, a2, a3, a4, a5, a6, a7, a8, a9, a10,
- a11, a12, a13, a14, a15, a16, a17, a18, a19, a20;
-{
+int ZEXPORTVA gzprintf(gzFile file, const char *format, int a1, int a2, int a3,
+ int a4, int a5, int a6, int a7, int a8, int a9, int a10,
+ int a11, int a12, int a13, int a14, int a15, int a16,
+ int a17, int a18, int a19, int a20) {
unsigned len, left;
char *next;
gz_statep state;
#endif
/* -- see zlib.h -- */
-int ZEXPORT gzflush(file, flush)
- gzFile file;
- int flush;
-{
+int ZEXPORT gzflush(gzFile file, int flush) {
gz_statep state;
/* get internal structure */
}
/* -- see zlib.h -- */
-int ZEXPORT gzsetparams(file, level, strategy)
- gzFile file;
- int level;
- int strategy;
-{
+int ZEXPORT gzsetparams(gzFile file, int level, int strategy) {
gz_statep state;
z_streamp strm;
}
/* -- see zlib.h -- */
-int ZEXPORT gzclose_w(file)
- gzFile file;
-{
+int ZEXPORT gzclose_w(gzFile file) {
int ret = Z_OK;
gz_statep state;