| 1 | /* |
| 2 | * Copyright (c) Meta Platforms, Inc. and affiliates. |
| 3 | * All rights reserved. |
| 4 | * |
| 5 | * This source code is licensed under both the BSD-style license (found in the |
| 6 | * LICENSE file in the root directory of this source tree) and the GPLv2 (found |
| 7 | * in the COPYING file in the root directory of this source tree). |
| 8 | * You may select, at your option, one of the above-listed licenses. |
| 9 | */ |
| 10 | |
| 11 | /* ********************************************************* |
| 12 | * Turn on Large Files support (>4GB) for 32-bit Linux/Unix |
| 13 | ***********************************************************/ |
| 14 | #if !defined(__64BIT__) || defined(__MINGW32__) /* No point defining Large file for 64 bit but MinGW-w64 requires it */ |
| 15 | # if !defined(_FILE_OFFSET_BITS) |
| 16 | # define _FILE_OFFSET_BITS 64 /* turn off_t into a 64-bit type for ftello, fseeko */ |
| 17 | # endif |
| 18 | # if !defined(_LARGEFILE_SOURCE) /* obsolete macro, replaced with _FILE_OFFSET_BITS */ |
| 19 | # define _LARGEFILE_SOURCE 1 /* Large File Support extension (LFS) - fseeko, ftello */ |
| 20 | # endif |
| 21 | # if defined(_AIX) || defined(__hpux) |
| 22 | # define _LARGE_FILES /* Large file support on 32-bits AIX and HP-UX */ |
| 23 | # endif |
| 24 | #endif |
| 25 | |
| 26 | /* ************************************************************ |
| 27 | * Detect POSIX version |
| 28 | * PLATFORM_POSIX_VERSION = 0 for non-Unix e.g. Windows |
| 29 | * PLATFORM_POSIX_VERSION = 1 for Unix-like but non-POSIX |
| 30 | * PLATFORM_POSIX_VERSION > 1 is equal to found _POSIX_VERSION |
| 31 | * Value of PLATFORM_POSIX_VERSION can be forced on command line |
| 32 | ***************************************************************/ |
| 33 | #ifndef PLATFORM_POSIX_VERSION |
| 34 | |
| 35 | # if (defined(__APPLE__) && defined(__MACH__)) || defined(__SVR4) || defined(_AIX) || defined(__hpux) /* POSIX.1-2001 (SUSv3) conformant */ \ |
| 36 | || defined(__DragonFly__) || defined(__FreeBSD__) || defined(__NetBSD__) || defined(__OpenBSD__) /* BSD distros */ |
| 37 | /* exception rule : force posix version to 200112L, |
| 38 | * note: it's better to use unistd.h's _POSIX_VERSION whenever possible */ |
| 39 | # define PLATFORM_POSIX_VERSION 200112L |
| 40 | |
| 41 | /* try to determine posix version through official unistd.h's _POSIX_VERSION (https://pubs.opengroup.org/onlinepubs/7908799/xsh/unistd.h.html). |
| 42 | * note : there is no simple way to know in advance if <unistd.h> is present or not on target system, |
| 43 | * Posix specification mandates its presence and its content, but target system must respect this spec. |
| 44 | * It's necessary to _not_ #include <unistd.h> whenever target OS is not unix-like |
| 45 | * otherwise it will block preprocessing stage. |
| 46 | * The following list of build macros tries to "guess" if target OS is likely unix-like, and therefore can #include <unistd.h> |
| 47 | */ |
| 48 | # elif !defined(_WIN32) \ |
| 49 | && ( defined(__unix__) || defined(__unix) \ |
| 50 | || defined(__midipix__) || defined(__VMS) || defined(__HAIKU__) ) |
| 51 | |
| 52 | # if defined(__linux__) || defined(__linux) || defined(__CYGWIN__) |
| 53 | # ifndef _POSIX_C_SOURCE |
| 54 | # define _POSIX_C_SOURCE 200809L /* feature test macro : https://www.gnu.org/software/libc/manual/html_node/Feature-Test-Macros.html */ |
| 55 | # endif |
| 56 | # endif |
| 57 | # include <unistd.h> /* declares _POSIX_VERSION */ |
| 58 | # if defined(_POSIX_VERSION) /* POSIX compliant */ |
| 59 | # define PLATFORM_POSIX_VERSION _POSIX_VERSION |
| 60 | # else |
| 61 | # define PLATFORM_POSIX_VERSION 1 |
| 62 | # endif |
| 63 | |
| 64 | # ifdef __UCLIBC__ |
| 65 | # ifndef __USE_MISC |
| 66 | # define __USE_MISC /* enable st_mtim on uclibc */ |
| 67 | # endif |
| 68 | # endif |
| 69 | |
| 70 | # else /* non-unix target platform (like Windows) */ |
| 71 | # define PLATFORM_POSIX_VERSION 0 |
| 72 | # endif |
| 73 | |
| 74 | #endif /* PLATFORM_POSIX_VERSION */ |
| 75 | |
| 76 | |
| 77 | /* ************************************************************ |
| 78 | * Avoid fseek()'s 2GiB barrier with MSVC, macOS, *BSD, MinGW |
| 79 | ***************************************************************/ |
| 80 | #if defined(_MSC_VER) && _MSC_VER >= 1400 |
| 81 | # define LONG_SEEK _fseeki64 |
| 82 | #elif !defined(__64BIT__) && (PLATFORM_POSIX_VERSION >= 200112L) /* No point defining Large file for 64 bit */ |
| 83 | # define LONG_SEEK fseeko |
| 84 | #elif defined(__MINGW32__) && !defined(__STRICT_ANSI__) && !defined(__NO_MINGW_LFS) && defined(__MSVCRT__) |
| 85 | # define LONG_SEEK fseeko64 |
| 86 | #elif defined(_WIN32) && !defined(__DJGPP__) |
| 87 | # include <windows.h> |
| 88 | static int LONG_SEEK(FILE* file, __int64 offset, int origin) { |
| 89 | LARGE_INTEGER off; |
| 90 | DWORD method; |
| 91 | off.QuadPart = offset; |
| 92 | if (origin == SEEK_END) |
| 93 | method = FILE_END; |
| 94 | else if (origin == SEEK_CUR) |
| 95 | method = FILE_CURRENT; |
| 96 | else |
| 97 | method = FILE_BEGIN; |
| 98 | |
| 99 | if (SetFilePointerEx((HANDLE) _get_osfhandle(_fileno(file)), off, NULL, method)) |
| 100 | return 0; |
| 101 | else |
| 102 | return -1; |
| 103 | } |
| 104 | #else |
| 105 | # define LONG_SEEK fseek |
| 106 | #endif |
| 107 | |
| 108 | #include <stdlib.h> /* malloc, free */ |
| 109 | #include <stdio.h> /* FILE* */ |
| 110 | #include <limits.h> /* UNIT_MAX */ |
| 111 | #include <assert.h> |
| 112 | |
| 113 | #define XXH_STATIC_LINKING_ONLY |
| 114 | #include "xxhash.h" |
| 115 | |
| 116 | #define ZSTD_STATIC_LINKING_ONLY |
| 117 | #include "zstd.h" |
| 118 | #include "zstd_errors.h" |
| 119 | #include "mem.h" |
| 120 | #include "zstd_seekable.h" |
| 121 | |
| 122 | #undef ERROR |
| 123 | #define ERROR(name) ((size_t)-ZSTD_error_##name) |
| 124 | |
| 125 | #define CHECK_IO(f) { int const errcod = (f); if (errcod < 0) return ERROR(seekableIO); } |
| 126 | |
| 127 | #undef MIN |
| 128 | #undef MAX |
| 129 | #define MIN(a, b) ((a) < (b) ? (a) : (b)) |
| 130 | #define MAX(a, b) ((a) > (b) ? (a) : (b)) |
| 131 | |
| 132 | #define ZSTD_SEEKABLE_NO_OUTPUT_PROGRESS_MAX 16 |
| 133 | |
| 134 | /* Special-case callbacks for FILE* and in-memory modes, so that we can treat |
| 135 | * them the same way as the advanced API */ |
| 136 | static int ZSTD_seekable_read_FILE(void* opaque, void* buffer, size_t n) |
| 137 | { |
| 138 | size_t const result = fread(buffer, 1, n, (FILE*)opaque); |
| 139 | if (result != n) { |
| 140 | return -1; |
| 141 | } |
| 142 | return 0; |
| 143 | } |
| 144 | |
| 145 | static int ZSTD_seekable_seek_FILE(void* opaque, long long offset, int origin) |
| 146 | { |
| 147 | int const ret = LONG_SEEK((FILE*)opaque, offset, origin); |
| 148 | if (ret) return ret; |
| 149 | return fflush((FILE*)opaque); |
| 150 | } |
| 151 | |
| 152 | typedef struct { |
| 153 | const void *ptr; |
| 154 | size_t size; |
| 155 | size_t pos; |
| 156 | } buffWrapper_t; |
| 157 | |
| 158 | static int ZSTD_seekable_read_buff(void* opaque, void* buffer, size_t n) |
| 159 | { |
| 160 | buffWrapper_t* const buff = (buffWrapper_t*)opaque; |
| 161 | assert(buff != NULL); |
| 162 | if (buff->pos + n > buff->size) return -1; |
| 163 | memcpy(buffer, (const BYTE*)buff->ptr + buff->pos, n); |
| 164 | buff->pos += n; |
| 165 | return 0; |
| 166 | } |
| 167 | |
| 168 | static int ZSTD_seekable_seek_buff(void* opaque, long long offset, int origin) |
| 169 | { |
| 170 | buffWrapper_t* const buff = (buffWrapper_t*) opaque; |
| 171 | unsigned long long newOffset; |
| 172 | assert(buff != NULL); |
| 173 | switch (origin) { |
| 174 | case SEEK_SET: |
| 175 | assert(offset >= 0); |
| 176 | newOffset = (unsigned long long)offset; |
| 177 | break; |
| 178 | case SEEK_CUR: |
| 179 | newOffset = (unsigned long long)((long long)buff->pos + offset); |
| 180 | break; |
| 181 | case SEEK_END: |
| 182 | newOffset = (unsigned long long)((long long)buff->size + offset); |
| 183 | break; |
| 184 | default: |
| 185 | assert(0); /* not possible */ |
| 186 | } |
| 187 | if (newOffset > buff->size) { |
| 188 | return -1; |
| 189 | } |
| 190 | buff->pos = newOffset; |
| 191 | return 0; |
| 192 | } |
| 193 | |
| 194 | typedef struct { |
| 195 | U64 cOffset; |
| 196 | U64 dOffset; |
| 197 | U32 checksum; |
| 198 | } seekEntry_t; |
| 199 | |
| 200 | struct ZSTD_seekTable_s { |
| 201 | seekEntry_t* entries; |
| 202 | size_t tableLen; |
| 203 | |
| 204 | int checksumFlag; |
| 205 | }; |
| 206 | |
| 207 | #define SEEKABLE_BUFF_SIZE ZSTD_BLOCKSIZE_MAX |
| 208 | |
| 209 | struct ZSTD_seekable_s { |
| 210 | ZSTD_DStream* dstream; |
| 211 | ZSTD_seekTable seekTable; |
| 212 | ZSTD_seekable_customFile src; |
| 213 | |
| 214 | U64 decompressedOffset; |
| 215 | U32 curFrame; |
| 216 | |
| 217 | BYTE inBuff[SEEKABLE_BUFF_SIZE]; /* need to do our own input buffering */ |
| 218 | BYTE outBuff[SEEKABLE_BUFF_SIZE]; /* so we can efficiently decompress the |
| 219 | starts of chunks before we get to the |
| 220 | desired section */ |
| 221 | ZSTD_inBuffer in; /* maintain continuity across ZSTD_seekable_decompress operations */ |
| 222 | buffWrapper_t buffWrapper; /* for `src.opaque` in in-memory mode */ |
| 223 | |
| 224 | XXH64_state_t xxhState; |
| 225 | }; |
| 226 | |
| 227 | ZSTD_seekable* ZSTD_seekable_create(void) |
| 228 | { |
| 229 | ZSTD_seekable* const zs = (ZSTD_seekable*)malloc(sizeof(ZSTD_seekable)); |
| 230 | if (zs == NULL) return NULL; |
| 231 | |
| 232 | /* also initializes stage to zsds_init */ |
| 233 | memset(zs, 0, sizeof(*zs)); |
| 234 | |
| 235 | zs->dstream = ZSTD_createDStream(); |
| 236 | if (zs->dstream == NULL) { |
| 237 | free(zs); |
| 238 | return NULL; |
| 239 | } |
| 240 | |
| 241 | return zs; |
| 242 | } |
| 243 | |
| 244 | size_t ZSTD_seekable_free(ZSTD_seekable* zs) |
| 245 | { |
| 246 | if (zs == NULL) return 0; /* support free on null */ |
| 247 | ZSTD_freeDStream(zs->dstream); |
| 248 | free(zs->seekTable.entries); |
| 249 | free(zs); |
| 250 | return 0; |
| 251 | } |
| 252 | |
| 253 | ZSTD_seekTable* ZSTD_seekTable_create_fromSeekable(const ZSTD_seekable* zs) |
| 254 | { |
| 255 | ZSTD_seekTable* const st = (ZSTD_seekTable*)malloc(sizeof(ZSTD_seekTable)); |
| 256 | if (st==NULL) return NULL; |
| 257 | |
| 258 | st->checksumFlag = zs->seekTable.checksumFlag; |
| 259 | st->tableLen = zs->seekTable.tableLen; |
| 260 | |
| 261 | /* Allocate an extra entry at the end to match logic of initial allocation */ |
| 262 | size_t const entriesSize = sizeof(seekEntry_t) * (zs->seekTable.tableLen + 1); |
| 263 | seekEntry_t* const entries = (seekEntry_t*)malloc(entriesSize); |
| 264 | if (entries==NULL) { |
| 265 | free(st); |
| 266 | return NULL; |
| 267 | } |
| 268 | |
| 269 | memcpy(entries, zs->seekTable.entries, entriesSize); |
| 270 | st->entries = entries; |
| 271 | return st; |
| 272 | } |
| 273 | |
| 274 | size_t ZSTD_seekTable_free(ZSTD_seekTable* st) |
| 275 | { |
| 276 | if (st == NULL) return 0; /* support free on null */ |
| 277 | free(st->entries); |
| 278 | free(st); |
| 279 | return 0; |
| 280 | } |
| 281 | |
| 282 | /** ZSTD_seekable_offsetToFrameIndex() : |
| 283 | * Performs a binary search to find the last frame with a decompressed offset |
| 284 | * <= pos |
| 285 | * @return : the frame's index */ |
| 286 | unsigned ZSTD_seekable_offsetToFrameIndex(const ZSTD_seekable* zs, unsigned long long pos) |
| 287 | { |
| 288 | return ZSTD_seekTable_offsetToFrameIndex(&zs->seekTable, pos); |
| 289 | } |
| 290 | |
| 291 | unsigned ZSTD_seekTable_offsetToFrameIndex(const ZSTD_seekTable* st, unsigned long long pos) |
| 292 | { |
| 293 | U32 lo = 0; |
| 294 | U32 hi = (U32)st->tableLen; |
| 295 | assert(st->tableLen <= UINT_MAX); |
| 296 | |
| 297 | if (pos >= st->entries[st->tableLen].dOffset) { |
| 298 | return (unsigned)st->tableLen; |
| 299 | } |
| 300 | |
| 301 | while (lo + 1 < hi) { |
| 302 | U32 const mid = lo + ((hi - lo) >> 1); |
| 303 | if (st->entries[mid].dOffset <= pos) { |
| 304 | lo = mid; |
| 305 | } else { |
| 306 | hi = mid; |
| 307 | } |
| 308 | } |
| 309 | return lo; |
| 310 | } |
| 311 | |
| 312 | unsigned ZSTD_seekable_getNumFrames(const ZSTD_seekable* zs) |
| 313 | { |
| 314 | return ZSTD_seekTable_getNumFrames(&zs->seekTable); |
| 315 | } |
| 316 | |
| 317 | unsigned ZSTD_seekTable_getNumFrames(const ZSTD_seekTable* st) |
| 318 | { |
| 319 | assert(st->tableLen <= UINT_MAX); |
| 320 | return (unsigned)st->tableLen; |
| 321 | } |
| 322 | |
| 323 | unsigned long long ZSTD_seekable_getFrameCompressedOffset(const ZSTD_seekable* zs, unsigned frameIndex) |
| 324 | { |
| 325 | return ZSTD_seekTable_getFrameCompressedOffset(&zs->seekTable, frameIndex); |
| 326 | } |
| 327 | |
| 328 | unsigned long long ZSTD_seekTable_getFrameCompressedOffset(const ZSTD_seekTable* st, unsigned frameIndex) |
| 329 | { |
| 330 | if (frameIndex >= st->tableLen) return ZSTD_SEEKABLE_FRAMEINDEX_TOOLARGE; |
| 331 | return st->entries[frameIndex].cOffset; |
| 332 | } |
| 333 | |
| 334 | unsigned long long ZSTD_seekable_getFrameDecompressedOffset(const ZSTD_seekable* zs, unsigned frameIndex) |
| 335 | { |
| 336 | return ZSTD_seekTable_getFrameDecompressedOffset(&zs->seekTable, frameIndex); |
| 337 | } |
| 338 | |
| 339 | unsigned long long ZSTD_seekTable_getFrameDecompressedOffset(const ZSTD_seekTable* st, unsigned frameIndex) |
| 340 | { |
| 341 | if (frameIndex >= st->tableLen) return ZSTD_SEEKABLE_FRAMEINDEX_TOOLARGE; |
| 342 | return st->entries[frameIndex].dOffset; |
| 343 | } |
| 344 | |
| 345 | size_t ZSTD_seekable_getFrameCompressedSize(const ZSTD_seekable* zs, unsigned frameIndex) |
| 346 | { |
| 347 | return ZSTD_seekTable_getFrameCompressedSize(&zs->seekTable, frameIndex); |
| 348 | } |
| 349 | |
| 350 | size_t ZSTD_seekTable_getFrameCompressedSize(const ZSTD_seekTable* st, unsigned frameIndex) |
| 351 | { |
| 352 | if (frameIndex >= st->tableLen) return ERROR(frameIndex_tooLarge); |
| 353 | return st->entries[frameIndex + 1].cOffset - |
| 354 | st->entries[frameIndex].cOffset; |
| 355 | } |
| 356 | |
| 357 | size_t ZSTD_seekable_getFrameDecompressedSize(const ZSTD_seekable* zs, unsigned frameIndex) |
| 358 | { |
| 359 | return ZSTD_seekTable_getFrameDecompressedSize(&zs->seekTable, frameIndex); |
| 360 | } |
| 361 | |
| 362 | size_t ZSTD_seekTable_getFrameDecompressedSize(const ZSTD_seekTable* st, unsigned frameIndex) |
| 363 | { |
| 364 | if (frameIndex > st->tableLen) return ERROR(frameIndex_tooLarge); |
| 365 | return st->entries[frameIndex + 1].dOffset - |
| 366 | st->entries[frameIndex].dOffset; |
| 367 | } |
| 368 | |
| 369 | static size_t ZSTD_seekable_loadSeekTable(ZSTD_seekable* zs) |
| 370 | { |
| 371 | int checksumFlag; |
| 372 | ZSTD_seekable_customFile src = zs->src; |
| 373 | /* read the footer, fixed size */ |
| 374 | CHECK_IO(src.seek(src.opaque, -(int)ZSTD_seekTableFooterSize, SEEK_END)); |
| 375 | CHECK_IO(src.read(src.opaque, zs->inBuff, ZSTD_seekTableFooterSize)); |
| 376 | |
| 377 | if (MEM_readLE32(zs->inBuff + 5) != ZSTD_SEEKABLE_MAGICNUMBER) { |
| 378 | return ERROR(prefix_unknown); |
| 379 | } |
| 380 | |
| 381 | { BYTE const sfd = zs->inBuff[4]; |
| 382 | checksumFlag = sfd >> 7; |
| 383 | |
| 384 | /* check reserved bits */ |
| 385 | if ((sfd >> 2) & 0x1f) { |
| 386 | return ERROR(corruption_detected); |
| 387 | } } |
| 388 | |
| 389 | { U32 const numFrames = MEM_readLE32(zs->inBuff); |
| 390 | U32 const sizePerEntry = 8 + (checksumFlag?4:0); |
| 391 | U32 const tableSize = sizePerEntry * numFrames; |
| 392 | U32 const frameSize = tableSize + ZSTD_seekTableFooterSize + ZSTD_SKIPPABLEHEADERSIZE; |
| 393 | |
| 394 | U32 remaining = frameSize - ZSTD_seekTableFooterSize; /* don't need to re-read footer */ |
| 395 | { U32 const toRead = MIN(remaining, SEEKABLE_BUFF_SIZE); |
| 396 | CHECK_IO(src.seek(src.opaque, -(S64)frameSize, SEEK_END)); |
| 397 | CHECK_IO(src.read(src.opaque, zs->inBuff, toRead)); |
| 398 | remaining -= toRead; |
| 399 | } |
| 400 | |
| 401 | if (MEM_readLE32(zs->inBuff) != (ZSTD_MAGIC_SKIPPABLE_START | 0xE)) { |
| 402 | return ERROR(prefix_unknown); |
| 403 | } |
| 404 | if (MEM_readLE32(zs->inBuff+4) + ZSTD_SKIPPABLEHEADERSIZE != frameSize) { |
| 405 | return ERROR(prefix_unknown); |
| 406 | } |
| 407 | |
| 408 | { /* Allocate an extra entry at the end so that we can do size |
| 409 | * computations on the last element without special case */ |
| 410 | seekEntry_t* const entries = (seekEntry_t*)malloc(sizeof(seekEntry_t) * (numFrames + 1)); |
| 411 | |
| 412 | U32 idx = 0; |
| 413 | U32 pos = 8; |
| 414 | |
| 415 | U64 cOffset = 0; |
| 416 | U64 dOffset = 0; |
| 417 | |
| 418 | if (entries == NULL) return ERROR(memory_allocation); |
| 419 | |
| 420 | /* compute cumulative positions */ |
| 421 | for (; idx < numFrames; idx++) { |
| 422 | if (pos + sizePerEntry > SEEKABLE_BUFF_SIZE) { |
| 423 | U32 const offset = SEEKABLE_BUFF_SIZE - pos; |
| 424 | U32 const toRead = MIN(remaining, SEEKABLE_BUFF_SIZE - offset); |
| 425 | memmove(zs->inBuff, zs->inBuff + pos, offset); /* move any data we haven't read yet */ |
| 426 | CHECK_IO(src.read(src.opaque, zs->inBuff+offset, toRead)); |
| 427 | remaining -= toRead; |
| 428 | pos = 0; |
| 429 | } |
| 430 | entries[idx].cOffset = cOffset; |
| 431 | entries[idx].dOffset = dOffset; |
| 432 | |
| 433 | cOffset += MEM_readLE32(zs->inBuff + pos); |
| 434 | pos += 4; |
| 435 | dOffset += MEM_readLE32(zs->inBuff + pos); |
| 436 | pos += 4; |
| 437 | if (checksumFlag) { |
| 438 | entries[idx].checksum = MEM_readLE32(zs->inBuff + pos); |
| 439 | pos += 4; |
| 440 | } |
| 441 | } |
| 442 | entries[numFrames].cOffset = cOffset; |
| 443 | entries[numFrames].dOffset = dOffset; |
| 444 | |
| 445 | zs->seekTable.entries = entries; |
| 446 | zs->seekTable.tableLen = numFrames; |
| 447 | zs->seekTable.checksumFlag = checksumFlag; |
| 448 | return 0; |
| 449 | } |
| 450 | } |
| 451 | } |
| 452 | |
| 453 | size_t ZSTD_seekable_initBuff(ZSTD_seekable* zs, const void* src, size_t srcSize) |
| 454 | { |
| 455 | zs->buffWrapper = (buffWrapper_t){src, srcSize, 0}; |
| 456 | { ZSTD_seekable_customFile srcFile = {&zs->buffWrapper, |
| 457 | &ZSTD_seekable_read_buff, |
| 458 | &ZSTD_seekable_seek_buff}; |
| 459 | return ZSTD_seekable_initAdvanced(zs, srcFile); } |
| 460 | } |
| 461 | |
| 462 | size_t ZSTD_seekable_initFile(ZSTD_seekable* zs, FILE* src) |
| 463 | { |
| 464 | ZSTD_seekable_customFile srcFile = {src, &ZSTD_seekable_read_FILE, |
| 465 | &ZSTD_seekable_seek_FILE}; |
| 466 | return ZSTD_seekable_initAdvanced(zs, srcFile); |
| 467 | } |
| 468 | |
| 469 | size_t ZSTD_seekable_initAdvanced(ZSTD_seekable* zs, ZSTD_seekable_customFile src) |
| 470 | { |
| 471 | zs->src = src; |
| 472 | |
| 473 | { const size_t seekTableInit = ZSTD_seekable_loadSeekTable(zs); |
| 474 | if (ZSTD_isError(seekTableInit)) return seekTableInit; } |
| 475 | |
| 476 | zs->decompressedOffset = (U64)-1; |
| 477 | zs->curFrame = (U32)-1; |
| 478 | |
| 479 | { const size_t dstreamInit = ZSTD_initDStream(zs->dstream); |
| 480 | if (ZSTD_isError(dstreamInit)) return dstreamInit; } |
| 481 | return 0; |
| 482 | } |
| 483 | |
| 484 | size_t ZSTD_seekable_decompress(ZSTD_seekable* zs, void* dst, size_t len, unsigned long long offset) |
| 485 | { |
| 486 | unsigned long long const eos = zs->seekTable.entries[zs->seekTable.tableLen].dOffset; |
| 487 | if (offset + len > eos) { |
| 488 | len = eos - offset; |
| 489 | } |
| 490 | |
| 491 | U32 targetFrame = ZSTD_seekable_offsetToFrameIndex(zs, offset); |
| 492 | U32 noOutputProgressCount = 0; |
| 493 | size_t srcBytesRead = 0; |
| 494 | do { |
| 495 | /* check if we can continue from a previous decompress job */ |
| 496 | if (targetFrame != zs->curFrame || offset < zs->decompressedOffset) { |
| 497 | zs->decompressedOffset = zs->seekTable.entries[targetFrame].dOffset; |
| 498 | zs->curFrame = targetFrame; |
| 499 | |
| 500 | assert(zs->seekTable.entries[targetFrame].cOffset < LLONG_MAX); |
| 501 | CHECK_IO(zs->src.seek(zs->src.opaque, |
| 502 | (long long)zs->seekTable.entries[targetFrame].cOffset, |
| 503 | SEEK_SET)); |
| 504 | zs->in = (ZSTD_inBuffer){zs->inBuff, 0, 0}; |
| 505 | XXH64_reset(&zs->xxhState, 0); |
| 506 | ZSTD_DCtx_reset(zs->dstream, ZSTD_reset_session_only); |
| 507 | if (zs->buffWrapper.size && srcBytesRead > zs->buffWrapper.size) { |
| 508 | return ERROR(seekableIO); |
| 509 | } |
| 510 | } |
| 511 | |
| 512 | while (zs->decompressedOffset < offset + len) { |
| 513 | size_t toRead; |
| 514 | ZSTD_outBuffer outTmp; |
| 515 | size_t prevOutPos; |
| 516 | size_t prevInPos; |
| 517 | size_t forwardProgress; |
| 518 | if (zs->decompressedOffset < offset) { |
| 519 | /* dummy decompressions until we get to the target offset */ |
| 520 | outTmp = (ZSTD_outBuffer){zs->outBuff, (size_t) (MIN(SEEKABLE_BUFF_SIZE, offset - zs->decompressedOffset)), 0}; |
| 521 | } else { |
| 522 | outTmp = (ZSTD_outBuffer){dst, len, (size_t) (zs->decompressedOffset - offset)}; |
| 523 | } |
| 524 | |
| 525 | prevOutPos = outTmp.pos; |
| 526 | prevInPos = zs->in.pos; |
| 527 | toRead = ZSTD_decompressStream(zs->dstream, &outTmp, &zs->in); |
| 528 | if (ZSTD_isError(toRead)) { |
| 529 | return toRead; |
| 530 | } |
| 531 | |
| 532 | if (zs->seekTable.checksumFlag) { |
| 533 | XXH64_update(&zs->xxhState, (BYTE*)outTmp.dst + prevOutPos, |
| 534 | outTmp.pos - prevOutPos); |
| 535 | } |
| 536 | forwardProgress = outTmp.pos - prevOutPos; |
| 537 | if (forwardProgress == 0) { |
| 538 | if (noOutputProgressCount++ > ZSTD_SEEKABLE_NO_OUTPUT_PROGRESS_MAX) { |
| 539 | return ERROR(seekableIO); |
| 540 | } |
| 541 | } else { |
| 542 | noOutputProgressCount = 0; |
| 543 | } |
| 544 | zs->decompressedOffset += forwardProgress; |
| 545 | srcBytesRead += zs->in.pos - prevInPos; |
| 546 | |
| 547 | if (toRead == 0) { |
| 548 | /* frame complete */ |
| 549 | |
| 550 | /* verify checksum */ |
| 551 | if (zs->seekTable.checksumFlag && |
| 552 | (XXH64_digest(&zs->xxhState) & 0xFFFFFFFFU) != |
| 553 | zs->seekTable.entries[targetFrame].checksum) { |
| 554 | return ERROR(corruption_detected); |
| 555 | } |
| 556 | |
| 557 | if (zs->decompressedOffset < offset + len) { |
| 558 | /* go back to the start and force a reset of the stream */ |
| 559 | targetFrame = ZSTD_seekable_offsetToFrameIndex(zs, zs->decompressedOffset); |
| 560 | /* in this case it will fail later with corruption_detected, since last block does not have checksum */ |
| 561 | assert(targetFrame != zs->seekTable.tableLen); |
| 562 | } |
| 563 | break; |
| 564 | } |
| 565 | |
| 566 | /* read in more data if we're done with this buffer */ |
| 567 | if (zs->in.pos == zs->in.size) { |
| 568 | toRead = MIN(toRead, SEEKABLE_BUFF_SIZE); |
| 569 | CHECK_IO(zs->src.read(zs->src.opaque, zs->inBuff, toRead)); |
| 570 | zs->in.size = toRead; |
| 571 | zs->in.pos = 0; |
| 572 | } |
| 573 | } /* while (zs->decompressedOffset < offset + len) */ |
| 574 | } while (zs->decompressedOffset != offset + len); |
| 575 | |
| 576 | return len; |
| 577 | } |
| 578 | |
| 579 | size_t ZSTD_seekable_decompressFrame(ZSTD_seekable* zs, void* dst, size_t dstSize, unsigned frameIndex) |
| 580 | { |
| 581 | if (frameIndex >= zs->seekTable.tableLen) { |
| 582 | return ERROR(frameIndex_tooLarge); |
| 583 | } |
| 584 | |
| 585 | { size_t const decompressedSize = |
| 586 | zs->seekTable.entries[frameIndex + 1].dOffset - |
| 587 | zs->seekTable.entries[frameIndex].dOffset; |
| 588 | if (dstSize < decompressedSize) { |
| 589 | return ERROR(dstSize_tooSmall); |
| 590 | } |
| 591 | return ZSTD_seekable_decompress( |
| 592 | zs, dst, decompressedSize, |
| 593 | zs->seekTable.entries[frameIndex].dOffset); |
| 594 | } |
| 595 | } |