648db22b |
1 | #ifndef SEEKABLE_H |
2 | #define SEEKABLE_H |
3 | |
4 | #if defined (__cplusplus) |
5 | extern "C" { |
6 | #endif |
7 | |
8 | #include <stdio.h> |
9 | #include "zstd.h" /* ZSTDLIB_API */ |
10 | |
11 | |
12 | #define ZSTD_seekTableFooterSize 9 |
13 | |
14 | #define ZSTD_SEEKABLE_MAGICNUMBER 0x8F92EAB1 |
15 | |
16 | #define ZSTD_SEEKABLE_MAXFRAMES 0x8000000U |
17 | |
18 | /* Limit maximum size to avoid potential issues storing the compressed size */ |
19 | #define ZSTD_SEEKABLE_MAX_FRAME_DECOMPRESSED_SIZE 0x40000000U |
20 | |
21 | /*-**************************************************************************** |
22 | * Seekable Format |
23 | * |
24 | * The seekable format splits the compressed data into a series of "frames", |
25 | * each compressed individually so that decompression of a section in the |
26 | * middle of an archive only requires zstd to decompress at most a frame's |
27 | * worth of extra data, instead of the entire archive. |
28 | ******************************************************************************/ |
29 | |
30 | typedef struct ZSTD_seekable_CStream_s ZSTD_seekable_CStream; |
31 | typedef struct ZSTD_seekable_s ZSTD_seekable; |
32 | typedef struct ZSTD_seekTable_s ZSTD_seekTable; |
33 | |
34 | /*-**************************************************************************** |
35 | * Seekable compression - HowTo |
36 | * A ZSTD_seekable_CStream object is required to tracking streaming operation. |
37 | * Use ZSTD_seekable_createCStream() and ZSTD_seekable_freeCStream() to create/ |
38 | * release resources. |
39 | * |
40 | * Streaming objects are reusable to avoid allocation and deallocation, |
41 | * to start a new compression operation call ZSTD_seekable_initCStream() on the |
42 | * compressor. |
43 | * |
44 | * Data streamed to the seekable compressor will automatically be split into |
45 | * frames of size `maxFrameSize` (provided in ZSTD_seekable_initCStream()), |
46 | * or if none is provided, will be cut off whenever ZSTD_seekable_endFrame() is |
47 | * called or when the default maximum frame size (2GB) is reached. |
48 | * |
49 | * Use ZSTD_seekable_initCStream() to initialize a ZSTD_seekable_CStream object |
50 | * for a new compression operation. |
51 | * - `maxFrameSize` indicates the size at which to automatically start a new |
52 | * seekable frame. |
53 | * `maxFrameSize == 0` implies the default maximum size. |
54 | * Smaller frame sizes allow faster decompression of small segments, |
55 | * since retrieving a single byte requires decompression of |
56 | * the full frame where the byte belongs. |
57 | * In general, size the frames to roughly correspond to |
58 | * the access granularity (when it's known). |
59 | * But small sizes also reduce compression ratio. |
60 | * Avoid really tiny frame sizes (< 1 KB), |
61 | * that would hurt compression ratio considerably. |
62 | * - `checksumFlag` indicates whether or not the seek table should include frame |
63 | * checksums on the uncompressed data for verification. |
64 | * @return : a size hint for input to provide for compression, or an error code |
65 | * checkable with ZSTD_isError() |
66 | * |
67 | * Use ZSTD_seekable_compressStream() repetitively to consume input stream. |
68 | * The function will automatically update both `pos` fields. |
69 | * Note that it may not consume the entire input, in which case `pos < size`, |
70 | * and it's up to the caller to present again remaining data. |
71 | * @return : a size hint, preferred nb of bytes to use as input for next |
72 | * function call or an error code, which can be tested using |
73 | * ZSTD_isError(). |
74 | * Note 1 : it's just a hint, to help latency a little, any other |
75 | * value will work fine. |
76 | * |
77 | * At any time, call ZSTD_seekable_endFrame() to end the current frame and |
78 | * start a new one. |
79 | * |
80 | * ZSTD_seekable_endStream() will end the current frame, and then write the seek |
81 | * table so that decompressors can efficiently find compressed frames. |
82 | * ZSTD_seekable_endStream() may return a number > 0 if it was unable to flush |
83 | * all the necessary data to `output`. In this case, it should be called again |
84 | * until all remaining data is flushed out and 0 is returned. |
85 | ******************************************************************************/ |
86 | |
87 | /*===== Seekable compressor management =====*/ |
88 | ZSTDLIB_API ZSTD_seekable_CStream* ZSTD_seekable_createCStream(void); |
89 | ZSTDLIB_API size_t ZSTD_seekable_freeCStream(ZSTD_seekable_CStream* zcs); |
90 | |
91 | /*===== Seekable compression functions =====*/ |
92 | ZSTDLIB_API size_t ZSTD_seekable_initCStream(ZSTD_seekable_CStream* zcs, int compressionLevel, int checksumFlag, unsigned maxFrameSize); |
93 | ZSTDLIB_API size_t ZSTD_seekable_compressStream(ZSTD_seekable_CStream* zcs, ZSTD_outBuffer* output, ZSTD_inBuffer* input); |
94 | ZSTDLIB_API size_t ZSTD_seekable_endFrame(ZSTD_seekable_CStream* zcs, ZSTD_outBuffer* output); |
95 | ZSTDLIB_API size_t ZSTD_seekable_endStream(ZSTD_seekable_CStream* zcs, ZSTD_outBuffer* output); |
96 | |
97 | /*= Raw seek table API |
98 | * These functions allow for the seek table to be constructed directly. |
99 | * This table can then be appended to a file of concatenated frames. |
100 | * This allows the frames to be compressed independently, even in parallel, |
101 | * and compiled together afterward into a seekable archive. |
102 | * |
103 | * Use ZSTD_seekable_createFrameLog() to allocate and initialize a tracking |
104 | * structure. |
105 | * |
106 | * Call ZSTD_seekable_logFrame() once for each frame in the archive. |
107 | * checksum is optional, and will not be used if checksumFlag was 0 when the |
108 | * frame log was created. If present, it should be the least significant 32 |
109 | * bits of the XXH64 hash of the uncompressed data. |
110 | * |
111 | * Call ZSTD_seekable_writeSeekTable to serialize the data into a seek table. |
112 | * If the entire table was written, the return value will be 0. Otherwise, |
113 | * it will be equal to the number of bytes left to write. */ |
114 | typedef struct ZSTD_frameLog_s ZSTD_frameLog; |
115 | ZSTDLIB_API ZSTD_frameLog* ZSTD_seekable_createFrameLog(int checksumFlag); |
116 | ZSTDLIB_API size_t ZSTD_seekable_freeFrameLog(ZSTD_frameLog* fl); |
117 | ZSTDLIB_API size_t ZSTD_seekable_logFrame(ZSTD_frameLog* fl, unsigned compressedSize, unsigned decompressedSize, unsigned checksum); |
118 | ZSTDLIB_API size_t ZSTD_seekable_writeSeekTable(ZSTD_frameLog* fl, ZSTD_outBuffer* output); |
119 | |
120 | |
121 | /*-**************************************************************************** |
122 | * Seekable decompression - HowTo |
123 | * A ZSTD_seekable object is required to tracking the seekTable. |
124 | * |
125 | * Call ZSTD_seekable_init* to initialize a ZSTD_seekable object with the |
126 | * the seek table provided in the input. |
127 | * There are three modes for ZSTD_seekable_init: |
128 | * - ZSTD_seekable_initBuff() : An in-memory API. The data contained in |
129 | * `src` should be the entire seekable file, including the seek table. |
130 | * `src` should be kept alive and unmodified until the ZSTD_seekable object |
131 | * is freed or reset. |
132 | * - ZSTD_seekable_initFile() : A simplified file API using stdio. fread and |
133 | * fseek will be used to access the required data for building the seek |
134 | * table and doing decompression operations. `src` should not be closed |
135 | * or modified until the ZSTD_seekable object is freed or reset. |
136 | * - ZSTD_seekable_initAdvanced() : A general API allowing the client to |
137 | * provide its own read and seek callbacks. |
138 | * + ZSTD_seekable_read() : read exactly `n` bytes into `buffer`. |
139 | * Premature EOF should be treated as an error. |
140 | * + ZSTD_seekable_seek() : seek the read head to `offset` from `origin`, |
141 | * where origin is either SEEK_SET (beginning of |
142 | * file), or SEEK_END (end of file). |
143 | * Both functions should return a non-negative value in case of success, and a |
144 | * negative value in case of failure. If implementing using this API and |
145 | * stdio, be careful with files larger than 4GB and fseek. All of these |
146 | * functions return an error code checkable with ZSTD_isError(). |
147 | * |
148 | * Call ZSTD_seekable_decompress to decompress `dstSize` bytes at decompressed |
149 | * offset `offset`. ZSTD_seekable_decompress may have to decompress the entire |
150 | * prefix of the frame before the desired data if it has not already processed |
151 | * this section. If ZSTD_seekable_decompress is called multiple times for a |
152 | * consecutive range of data, it will efficiently retain the decompressor object |
153 | * and avoid redecompressing frame prefixes. The return value is the number of |
154 | * bytes decompressed, or an error code checkable with ZSTD_isError(). |
155 | * |
156 | * The seek table access functions can be used to obtain the data contained |
157 | * in the seek table. If frameIndex is larger than the value returned by |
158 | * ZSTD_seekable_getNumFrames(), they will return error codes checkable with |
159 | * ZSTD_isError(). Note that since the offset access functions return |
160 | * unsigned long long instead of size_t, in this case they will instead return |
161 | * the value ZSTD_SEEKABLE_FRAMEINDEX_TOOLARGE. |
162 | ******************************************************************************/ |
163 | |
164 | /*===== Seekable decompressor management =====*/ |
165 | ZSTDLIB_API ZSTD_seekable* ZSTD_seekable_create(void); |
166 | ZSTDLIB_API size_t ZSTD_seekable_free(ZSTD_seekable* zs); |
167 | |
168 | /*===== Seekable decompression functions =====*/ |
169 | ZSTDLIB_API size_t ZSTD_seekable_initBuff(ZSTD_seekable* zs, const void* src, size_t srcSize); |
170 | ZSTDLIB_API size_t ZSTD_seekable_initFile(ZSTD_seekable* zs, FILE* src); |
171 | ZSTDLIB_API size_t ZSTD_seekable_decompress(ZSTD_seekable* zs, void* dst, size_t dstSize, unsigned long long offset); |
172 | ZSTDLIB_API size_t ZSTD_seekable_decompressFrame(ZSTD_seekable* zs, void* dst, size_t dstSize, unsigned frameIndex); |
173 | |
174 | #define ZSTD_SEEKABLE_FRAMEINDEX_TOOLARGE (0ULL-2) |
175 | /*===== Seekable seek table access functions =====*/ |
176 | ZSTDLIB_API unsigned ZSTD_seekable_getNumFrames(const ZSTD_seekable* zs); |
177 | ZSTDLIB_API unsigned long long ZSTD_seekable_getFrameCompressedOffset(const ZSTD_seekable* zs, unsigned frameIndex); |
178 | ZSTDLIB_API unsigned long long ZSTD_seekable_getFrameDecompressedOffset(const ZSTD_seekable* zs, unsigned frameIndex); |
179 | ZSTDLIB_API size_t ZSTD_seekable_getFrameCompressedSize(const ZSTD_seekable* zs, unsigned frameIndex); |
180 | ZSTDLIB_API size_t ZSTD_seekable_getFrameDecompressedSize(const ZSTD_seekable* zs, unsigned frameIndex); |
181 | ZSTDLIB_API unsigned ZSTD_seekable_offsetToFrameIndex(const ZSTD_seekable* zs, unsigned long long offset); |
182 | |
183 | |
184 | /*-**************************************************************************** |
185 | * Direct exploitation of the seekTable |
186 | * |
187 | * Memory constrained use cases that manage multiple archives |
188 | * benefit from retaining multiple archive seek tables |
189 | * without retaining a ZSTD_seekable instance for each. |
190 | * |
191 | * Below API allow the above-mentioned use cases |
192 | * to initialize a ZSTD_seekable, extract its (smaller) ZSTD_seekTable, |
193 | * then throw the ZSTD_seekable away to save memory. |
194 | * |
195 | * Standard ZSTD operations can then be used |
196 | * to decompress frames based on seek table offsets. |
197 | ******************************************************************************/ |
198 | |
199 | /*===== Independent seek table management =====*/ |
200 | ZSTDLIB_API ZSTD_seekTable* ZSTD_seekTable_create_fromSeekable(const ZSTD_seekable* zs); |
201 | ZSTDLIB_API size_t ZSTD_seekTable_free(ZSTD_seekTable* st); |
202 | |
203 | /*===== Direct seek table access functions =====*/ |
204 | ZSTDLIB_API unsigned ZSTD_seekTable_getNumFrames(const ZSTD_seekTable* st); |
205 | ZSTDLIB_API unsigned long long ZSTD_seekTable_getFrameCompressedOffset(const ZSTD_seekTable* st, unsigned frameIndex); |
206 | ZSTDLIB_API unsigned long long ZSTD_seekTable_getFrameDecompressedOffset(const ZSTD_seekTable* st, unsigned frameIndex); |
207 | ZSTDLIB_API size_t ZSTD_seekTable_getFrameCompressedSize(const ZSTD_seekTable* st, unsigned frameIndex); |
208 | ZSTDLIB_API size_t ZSTD_seekTable_getFrameDecompressedSize(const ZSTD_seekTable* st, unsigned frameIndex); |
209 | ZSTDLIB_API unsigned ZSTD_seekTable_offsetToFrameIndex(const ZSTD_seekTable* st, unsigned long long offset); |
210 | |
211 | |
212 | /*===== Seekable advanced I/O API =====*/ |
213 | typedef int(ZSTD_seekable_read)(void* opaque, void* buffer, size_t n); |
214 | typedef int(ZSTD_seekable_seek)(void* opaque, long long offset, int origin); |
215 | typedef struct { |
216 | void* opaque; |
217 | ZSTD_seekable_read* read; |
218 | ZSTD_seekable_seek* seek; |
219 | } ZSTD_seekable_customFile; |
220 | ZSTDLIB_API size_t ZSTD_seekable_initAdvanced(ZSTD_seekable* zs, ZSTD_seekable_customFile src); |
221 | |
222 | #if defined (__cplusplus) |
223 | } |
224 | #endif |
225 | |
226 | #endif |