b24e7fce |
1 | /* gzread.c -- zlib functions for reading gzip files |
9e052883 |
2 | * Copyright (C) 2004-2017 Mark Adler |
b24e7fce |
3 | * For conditions of distribution and use, see copyright notice in zlib.h |
4 | */ |
5 | |
6 | #include "gzguts.h" |
7 | |
8 | /* Local functions */ |
9 | local int gz_load OF((gz_statep, unsigned char *, unsigned, unsigned *)); |
10 | local int gz_avail OF((gz_statep)); |
11 | local int gz_look OF((gz_statep)); |
12 | local int gz_decomp OF((gz_statep)); |
13 | local int gz_fetch OF((gz_statep)); |
14 | local int gz_skip OF((gz_statep, z_off64_t)); |
15 | local z_size_t gz_read OF((gz_statep, voidp, z_size_t)); |
16 | |
17 | /* Use read() to load a buffer -- return -1 on error, otherwise 0. Read from |
18 | state->fd, and update state->eof, state->err, and state->msg as appropriate. |
19 | This function needs to loop on read(), since read() is not guaranteed to |
20 | read the number of bytes requested, depending on the type of descriptor. */ |
21 | local int gz_load(state, buf, len, have) |
22 | gz_statep state; |
23 | unsigned char *buf; |
24 | unsigned len; |
25 | unsigned *have; |
26 | { |
27 | int ret; |
28 | unsigned get, max = ((unsigned)-1 >> 2) + 1; |
29 | |
30 | *have = 0; |
31 | do { |
32 | get = len - *have; |
33 | if (get > max) |
34 | get = max; |
35 | ret = read(state->fd, buf + *have, get); |
36 | if (ret <= 0) |
37 | break; |
38 | *have += (unsigned)ret; |
39 | } while (*have < len); |
40 | if (ret < 0) { |
41 | gz_error(state, Z_ERRNO, zstrerror()); |
42 | return -1; |
43 | } |
44 | if (ret == 0) |
45 | state->eof = 1; |
46 | return 0; |
47 | } |
48 | |
49 | /* Load up input buffer and set eof flag if last data loaded -- return -1 on |
50 | error, 0 otherwise. Note that the eof flag is set when the end of the input |
51 | file is reached, even though there may be unused data in the buffer. Once |
52 | that data has been used, no more attempts will be made to read the file. |
53 | If strm->avail_in != 0, then the current data is moved to the beginning of |
54 | the input buffer, and then the remainder of the buffer is loaded with the |
55 | available data from the input file. */ |
56 | local int gz_avail(state) |
57 | gz_statep state; |
58 | { |
59 | unsigned got; |
60 | z_streamp strm = &(state->strm); |
61 | |
62 | if (state->err != Z_OK && state->err != Z_BUF_ERROR) |
63 | return -1; |
64 | if (state->eof == 0) { |
65 | if (strm->avail_in) { /* copy what's there to the start */ |
66 | unsigned char *p = state->in; |
67 | unsigned const char *q = strm->next_in; |
68 | unsigned n = strm->avail_in; |
69 | do { |
70 | *p++ = *q++; |
71 | } while (--n); |
72 | } |
73 | if (gz_load(state, state->in + strm->avail_in, |
74 | state->size - strm->avail_in, &got) == -1) |
75 | return -1; |
76 | strm->avail_in += got; |
77 | strm->next_in = state->in; |
78 | } |
79 | return 0; |
80 | } |
81 | |
82 | /* Look for gzip header, set up for inflate or copy. state->x.have must be 0. |
83 | If this is the first time in, allocate required memory. state->how will be |
84 | left unchanged if there is no more input data available, will be set to COPY |
85 | if there is no gzip header and direct copying will be performed, or it will |
86 | be set to GZIP for decompression. If direct copying, then leftover input |
87 | data from the input buffer will be copied to the output buffer. In that |
88 | case, all further file reads will be directly to either the output buffer or |
89 | a user buffer. If decompressing, the inflate state will be initialized. |
90 | gz_look() will return 0 on success or -1 on failure. */ |
91 | local int gz_look(state) |
92 | gz_statep state; |
93 | { |
94 | z_streamp strm = &(state->strm); |
95 | |
96 | /* allocate read buffers and inflate memory */ |
97 | if (state->size == 0) { |
98 | /* allocate buffers */ |
99 | state->in = (unsigned char *)malloc(state->want); |
100 | state->out = (unsigned char *)malloc(state->want << 1); |
101 | if (state->in == NULL || state->out == NULL) { |
102 | free(state->out); |
103 | free(state->in); |
104 | gz_error(state, Z_MEM_ERROR, "out of memory"); |
105 | return -1; |
106 | } |
107 | state->size = state->want; |
108 | |
109 | /* allocate inflate memory */ |
110 | state->strm.zalloc = Z_NULL; |
111 | state->strm.zfree = Z_NULL; |
112 | state->strm.opaque = Z_NULL; |
113 | state->strm.avail_in = 0; |
114 | state->strm.next_in = Z_NULL; |
115 | if (inflateInit2(&(state->strm), 15 + 16) != Z_OK) { /* gunzip */ |
116 | free(state->out); |
117 | free(state->in); |
118 | state->size = 0; |
119 | gz_error(state, Z_MEM_ERROR, "out of memory"); |
120 | return -1; |
121 | } |
122 | } |
123 | |
124 | /* get at least the magic bytes in the input buffer */ |
125 | if (strm->avail_in < 2) { |
126 | if (gz_avail(state) == -1) |
127 | return -1; |
128 | if (strm->avail_in == 0) |
129 | return 0; |
130 | } |
131 | |
132 | /* look for gzip magic bytes -- if there, do gzip decoding (note: there is |
133 | a logical dilemma here when considering the case of a partially written |
134 | gzip file, to wit, if a single 31 byte is written, then we cannot tell |
135 | whether this is a single-byte file, or just a partially written gzip |
136 | file -- for here we assume that if a gzip file is being written, then |
137 | the header will be written in a single operation, so that reading a |
138 | single byte is sufficient indication that it is not a gzip file) */ |
139 | if (strm->avail_in > 1 && |
140 | strm->next_in[0] == 31 && strm->next_in[1] == 139) { |
141 | inflateReset(strm); |
142 | state->how = GZIP; |
143 | state->direct = 0; |
144 | return 0; |
145 | } |
146 | |
147 | /* no gzip header -- if we were decoding gzip before, then this is trailing |
148 | garbage. Ignore the trailing garbage and finish. */ |
149 | if (state->direct == 0) { |
150 | strm->avail_in = 0; |
151 | state->eof = 1; |
152 | state->x.have = 0; |
153 | return 0; |
154 | } |
155 | |
156 | /* doing raw i/o, copy any leftover input to output -- this assumes that |
157 | the output buffer is larger than the input buffer, which also assures |
158 | space for gzungetc() */ |
159 | state->x.next = state->out; |
9e052883 |
160 | memcpy(state->x.next, strm->next_in, strm->avail_in); |
161 | state->x.have = strm->avail_in; |
162 | strm->avail_in = 0; |
b24e7fce |
163 | state->how = COPY; |
164 | state->direct = 1; |
165 | return 0; |
166 | } |
167 | |
168 | /* Decompress from input to the provided next_out and avail_out in the state. |
169 | On return, state->x.have and state->x.next point to the just decompressed |
170 | data. If the gzip stream completes, state->how is reset to LOOK to look for |
171 | the next gzip stream or raw data, once state->x.have is depleted. Returns 0 |
172 | on success, -1 on failure. */ |
173 | local int gz_decomp(state) |
174 | gz_statep state; |
175 | { |
176 | int ret = Z_OK; |
177 | unsigned had; |
178 | z_streamp strm = &(state->strm); |
179 | |
180 | /* fill output buffer up to end of deflate stream */ |
181 | had = strm->avail_out; |
182 | do { |
183 | /* get more input for inflate() */ |
184 | if (strm->avail_in == 0 && gz_avail(state) == -1) |
185 | return -1; |
186 | if (strm->avail_in == 0) { |
187 | gz_error(state, Z_BUF_ERROR, "unexpected end of file"); |
188 | break; |
189 | } |
190 | |
191 | /* decompress and handle errors */ |
192 | ret = inflate(strm, Z_NO_FLUSH); |
193 | if (ret == Z_STREAM_ERROR || ret == Z_NEED_DICT) { |
194 | gz_error(state, Z_STREAM_ERROR, |
195 | "internal error: inflate stream corrupt"); |
196 | return -1; |
197 | } |
198 | if (ret == Z_MEM_ERROR) { |
199 | gz_error(state, Z_MEM_ERROR, "out of memory"); |
200 | return -1; |
201 | } |
202 | if (ret == Z_DATA_ERROR) { /* deflate stream invalid */ |
203 | gz_error(state, Z_DATA_ERROR, |
204 | strm->msg == NULL ? "compressed data error" : strm->msg); |
205 | return -1; |
206 | } |
207 | } while (strm->avail_out && ret != Z_STREAM_END); |
208 | |
209 | /* update available output */ |
210 | state->x.have = had - strm->avail_out; |
211 | state->x.next = strm->next_out - state->x.have; |
212 | |
213 | /* if the gzip stream completed successfully, look for another */ |
214 | if (ret == Z_STREAM_END) |
215 | state->how = LOOK; |
216 | |
217 | /* good decompression */ |
218 | return 0; |
219 | } |
220 | |
221 | /* Fetch data and put it in the output buffer. Assumes state->x.have is 0. |
222 | Data is either copied from the input file or decompressed from the input |
223 | file depending on state->how. If state->how is LOOK, then a gzip header is |
224 | looked for to determine whether to copy or decompress. Returns -1 on error, |
225 | otherwise 0. gz_fetch() will leave state->how as COPY or GZIP unless the |
226 | end of the input file has been reached and all data has been processed. */ |
227 | local int gz_fetch(state) |
228 | gz_statep state; |
229 | { |
230 | z_streamp strm = &(state->strm); |
231 | |
232 | do { |
233 | switch(state->how) { |
234 | case LOOK: /* -> LOOK, COPY (only if never GZIP), or GZIP */ |
235 | if (gz_look(state) == -1) |
236 | return -1; |
237 | if (state->how == LOOK) |
238 | return 0; |
239 | break; |
240 | case COPY: /* -> COPY */ |
241 | if (gz_load(state, state->out, state->size << 1, &(state->x.have)) |
242 | == -1) |
243 | return -1; |
244 | state->x.next = state->out; |
245 | return 0; |
246 | case GZIP: /* -> GZIP or LOOK (if end of gzip stream) */ |
247 | strm->avail_out = state->size << 1; |
248 | strm->next_out = state->out; |
249 | if (gz_decomp(state) == -1) |
250 | return -1; |
251 | } |
252 | } while (state->x.have == 0 && (!state->eof || strm->avail_in)); |
253 | return 0; |
254 | } |
255 | |
256 | /* Skip len uncompressed bytes of output. Return -1 on error, 0 on success. */ |
257 | local int gz_skip(state, len) |
258 | gz_statep state; |
259 | z_off64_t len; |
260 | { |
261 | unsigned n; |
262 | |
263 | /* skip over len bytes or reach end-of-file, whichever comes first */ |
264 | while (len) |
265 | /* skip over whatever is in output buffer */ |
266 | if (state->x.have) { |
267 | n = GT_OFF(state->x.have) || (z_off64_t)state->x.have > len ? |
268 | (unsigned)len : state->x.have; |
269 | state->x.have -= n; |
270 | state->x.next += n; |
271 | state->x.pos += n; |
272 | len -= n; |
273 | } |
274 | |
275 | /* output buffer empty -- return if we're at the end of the input */ |
276 | else if (state->eof && state->strm.avail_in == 0) |
277 | break; |
278 | |
279 | /* need more data to skip -- load up output buffer */ |
280 | else { |
281 | /* get more output, looking for header if required */ |
282 | if (gz_fetch(state) == -1) |
283 | return -1; |
284 | } |
285 | return 0; |
286 | } |
287 | |
288 | /* Read len bytes into buf from file, or less than len up to the end of the |
289 | input. Return the number of bytes read. If zero is returned, either the |
290 | end of file was reached, or there was an error. state->err must be |
291 | consulted in that case to determine which. */ |
292 | local z_size_t gz_read(state, buf, len) |
293 | gz_statep state; |
294 | voidp buf; |
295 | z_size_t len; |
296 | { |
297 | z_size_t got; |
298 | unsigned n; |
299 | |
300 | /* if len is zero, avoid unnecessary operations */ |
301 | if (len == 0) |
302 | return 0; |
303 | |
304 | /* process a skip request */ |
305 | if (state->seek) { |
306 | state->seek = 0; |
307 | if (gz_skip(state, state->skip) == -1) |
308 | return 0; |
309 | } |
310 | |
311 | /* get len bytes to buf, or less than len if at the end */ |
312 | got = 0; |
313 | do { |
314 | /* set n to the maximum amount of len that fits in an unsigned int */ |
9e052883 |
315 | n = (unsigned)-1; |
b24e7fce |
316 | if (n > len) |
9e052883 |
317 | n = (unsigned)len; |
b24e7fce |
318 | |
319 | /* first just try copying data from the output buffer */ |
320 | if (state->x.have) { |
321 | if (state->x.have < n) |
322 | n = state->x.have; |
323 | memcpy(buf, state->x.next, n); |
324 | state->x.next += n; |
325 | state->x.have -= n; |
326 | } |
327 | |
328 | /* output buffer empty -- return if we're at the end of the input */ |
329 | else if (state->eof && state->strm.avail_in == 0) { |
330 | state->past = 1; /* tried to read past end */ |
331 | break; |
332 | } |
333 | |
334 | /* need output data -- for small len or new stream load up our output |
335 | buffer */ |
336 | else if (state->how == LOOK || n < (state->size << 1)) { |
337 | /* get more output, looking for header if required */ |
338 | if (gz_fetch(state) == -1) |
339 | return 0; |
340 | continue; /* no progress yet -- go back to copy above */ |
341 | /* the copy above assures that we will leave with space in the |
342 | output buffer, allowing at least one gzungetc() to succeed */ |
343 | } |
344 | |
345 | /* large len -- read directly into user buffer */ |
346 | else if (state->how == COPY) { /* read directly */ |
347 | if (gz_load(state, (unsigned char *)buf, n, &n) == -1) |
348 | return 0; |
349 | } |
350 | |
351 | /* large len -- decompress directly into user buffer */ |
352 | else { /* state->how == GZIP */ |
353 | state->strm.avail_out = n; |
354 | state->strm.next_out = (unsigned char *)buf; |
355 | if (gz_decomp(state) == -1) |
356 | return 0; |
357 | n = state->x.have; |
358 | state->x.have = 0; |
359 | } |
360 | |
361 | /* update progress */ |
362 | len -= n; |
363 | buf = (char *)buf + n; |
364 | got += n; |
365 | state->x.pos += n; |
366 | } while (len); |
367 | |
368 | /* return number of bytes read into user buffer */ |
369 | return got; |
370 | } |
371 | |
372 | /* -- see zlib.h -- */ |
373 | int ZEXPORT gzread(file, buf, len) |
374 | gzFile file; |
375 | voidp buf; |
376 | unsigned len; |
377 | { |
378 | gz_statep state; |
379 | |
380 | /* get internal structure */ |
381 | if (file == NULL) |
382 | return -1; |
383 | state = (gz_statep)file; |
384 | |
385 | /* check that we're reading and that there's no (serious) error */ |
386 | if (state->mode != GZ_READ || |
387 | (state->err != Z_OK && state->err != Z_BUF_ERROR)) |
388 | return -1; |
389 | |
390 | /* since an int is returned, make sure len fits in one, otherwise return |
391 | with an error (this avoids a flaw in the interface) */ |
392 | if ((int)len < 0) { |
393 | gz_error(state, Z_STREAM_ERROR, "request does not fit in an int"); |
394 | return -1; |
395 | } |
396 | |
397 | /* read len or fewer bytes to buf */ |
9e052883 |
398 | len = (unsigned)gz_read(state, buf, len); |
b24e7fce |
399 | |
400 | /* check for an error */ |
401 | if (len == 0 && state->err != Z_OK && state->err != Z_BUF_ERROR) |
402 | return -1; |
403 | |
404 | /* return the number of bytes read (this is assured to fit in an int) */ |
405 | return (int)len; |
406 | } |
407 | |
408 | /* -- see zlib.h -- */ |
409 | z_size_t ZEXPORT gzfread(buf, size, nitems, file) |
410 | voidp buf; |
411 | z_size_t size; |
412 | z_size_t nitems; |
413 | gzFile file; |
414 | { |
415 | z_size_t len; |
416 | gz_statep state; |
417 | |
418 | /* get internal structure */ |
419 | if (file == NULL) |
420 | return 0; |
421 | state = (gz_statep)file; |
422 | |
423 | /* check that we're reading and that there's no (serious) error */ |
424 | if (state->mode != GZ_READ || |
425 | (state->err != Z_OK && state->err != Z_BUF_ERROR)) |
426 | return 0; |
427 | |
428 | /* compute bytes to read -- error on overflow */ |
429 | len = nitems * size; |
430 | if (size && len / size != nitems) { |
431 | gz_error(state, Z_STREAM_ERROR, "request does not fit in a size_t"); |
432 | return 0; |
433 | } |
434 | |
435 | /* read len or fewer bytes to buf, return the number of full items read */ |
436 | return len ? gz_read(state, buf, len) / size : 0; |
437 | } |
438 | |
439 | /* -- see zlib.h -- */ |
440 | #ifdef Z_PREFIX_SET |
441 | # undef z_gzgetc |
442 | #else |
443 | # undef gzgetc |
444 | #endif |
445 | int ZEXPORT gzgetc(file) |
446 | gzFile file; |
447 | { |
b24e7fce |
448 | unsigned char buf[1]; |
449 | gz_statep state; |
450 | |
451 | /* get internal structure */ |
452 | if (file == NULL) |
453 | return -1; |
454 | state = (gz_statep)file; |
455 | |
456 | /* check that we're reading and that there's no (serious) error */ |
457 | if (state->mode != GZ_READ || |
458 | (state->err != Z_OK && state->err != Z_BUF_ERROR)) |
459 | return -1; |
460 | |
461 | /* try output buffer (no need to check for skip request) */ |
462 | if (state->x.have) { |
463 | state->x.have--; |
464 | state->x.pos++; |
465 | return *(state->x.next)++; |
466 | } |
467 | |
468 | /* nothing there -- try gz_read() */ |
9e052883 |
469 | return gz_read(state, buf, 1) < 1 ? -1 : buf[0]; |
b24e7fce |
470 | } |
471 | |
472 | int ZEXPORT gzgetc_(file) |
473 | gzFile file; |
474 | { |
475 | return gzgetc(file); |
476 | } |
477 | |
478 | /* -- see zlib.h -- */ |
479 | int ZEXPORT gzungetc(c, file) |
480 | int c; |
481 | gzFile file; |
482 | { |
483 | gz_statep state; |
484 | |
485 | /* get internal structure */ |
486 | if (file == NULL) |
487 | return -1; |
488 | state = (gz_statep)file; |
489 | |
490 | /* check that we're reading and that there's no (serious) error */ |
491 | if (state->mode != GZ_READ || |
492 | (state->err != Z_OK && state->err != Z_BUF_ERROR)) |
493 | return -1; |
494 | |
495 | /* process a skip request */ |
496 | if (state->seek) { |
497 | state->seek = 0; |
498 | if (gz_skip(state, state->skip) == -1) |
499 | return -1; |
500 | } |
501 | |
502 | /* can't push EOF */ |
503 | if (c < 0) |
504 | return -1; |
505 | |
506 | /* if output buffer empty, put byte at end (allows more pushing) */ |
507 | if (state->x.have == 0) { |
508 | state->x.have = 1; |
509 | state->x.next = state->out + (state->size << 1) - 1; |
510 | state->x.next[0] = (unsigned char)c; |
511 | state->x.pos--; |
512 | state->past = 0; |
513 | return c; |
514 | } |
515 | |
516 | /* if no room, give up (must have already done a gzungetc()) */ |
517 | if (state->x.have == (state->size << 1)) { |
518 | gz_error(state, Z_DATA_ERROR, "out of room to push characters"); |
519 | return -1; |
520 | } |
521 | |
522 | /* slide output data if needed and insert byte before existing data */ |
523 | if (state->x.next == state->out) { |
524 | unsigned char *src = state->out + state->x.have; |
525 | unsigned char *dest = state->out + (state->size << 1); |
526 | while (src > state->out) |
527 | *--dest = *--src; |
528 | state->x.next = dest; |
529 | } |
530 | state->x.have++; |
531 | state->x.next--; |
532 | state->x.next[0] = (unsigned char)c; |
533 | state->x.pos--; |
534 | state->past = 0; |
535 | return c; |
536 | } |
537 | |
538 | /* -- see zlib.h -- */ |
539 | char * ZEXPORT gzgets(file, buf, len) |
540 | gzFile file; |
541 | char *buf; |
542 | int len; |
543 | { |
544 | unsigned left, n; |
545 | char *str; |
546 | unsigned char *eol; |
547 | gz_statep state; |
548 | |
549 | /* check parameters and get internal structure */ |
550 | if (file == NULL || buf == NULL || len < 1) |
551 | return NULL; |
552 | state = (gz_statep)file; |
553 | |
554 | /* check that we're reading and that there's no (serious) error */ |
555 | if (state->mode != GZ_READ || |
556 | (state->err != Z_OK && state->err != Z_BUF_ERROR)) |
557 | return NULL; |
558 | |
559 | /* process a skip request */ |
560 | if (state->seek) { |
561 | state->seek = 0; |
562 | if (gz_skip(state, state->skip) == -1) |
563 | return NULL; |
564 | } |
565 | |
566 | /* copy output bytes up to new line or len - 1, whichever comes first -- |
567 | append a terminating zero to the string (we don't check for a zero in |
568 | the contents, let the user worry about that) */ |
569 | str = buf; |
570 | left = (unsigned)len - 1; |
571 | if (left) do { |
572 | /* assure that something is in the output buffer */ |
573 | if (state->x.have == 0 && gz_fetch(state) == -1) |
574 | return NULL; /* error */ |
575 | if (state->x.have == 0) { /* end of file */ |
576 | state->past = 1; /* read past end */ |
577 | break; /* return what we have */ |
578 | } |
579 | |
580 | /* look for end-of-line in current output buffer */ |
581 | n = state->x.have > left ? left : state->x.have; |
582 | eol = (unsigned char *)memchr(state->x.next, '\n', n); |
583 | if (eol != NULL) |
584 | n = (unsigned)(eol - state->x.next) + 1; |
585 | |
586 | /* copy through end-of-line, or remainder if not found */ |
587 | memcpy(buf, state->x.next, n); |
588 | state->x.have -= n; |
589 | state->x.next += n; |
590 | state->x.pos += n; |
591 | left -= n; |
592 | buf += n; |
593 | } while (left && eol == NULL); |
594 | |
595 | /* return terminated string, or if nothing, end of file */ |
596 | if (buf == str) |
597 | return NULL; |
598 | buf[0] = 0; |
599 | return str; |
600 | } |
601 | |
602 | /* -- see zlib.h -- */ |
603 | int ZEXPORT gzdirect(file) |
604 | gzFile file; |
605 | { |
606 | gz_statep state; |
607 | |
608 | /* get internal structure */ |
609 | if (file == NULL) |
610 | return 0; |
611 | state = (gz_statep)file; |
612 | |
613 | /* if the state is not known, but we can find out, then do so (this is |
614 | mainly for right after a gzopen() or gzdopen()) */ |
615 | if (state->mode == GZ_READ && state->how == LOOK && state->x.have == 0) |
616 | (void)gz_look(state); |
617 | |
618 | /* return 1 if transparent, 0 if processing a gzip stream */ |
619 | return state->direct; |
620 | } |
621 | |
622 | /* -- see zlib.h -- */ |
623 | int ZEXPORT gzclose_r(file) |
624 | gzFile file; |
625 | { |
626 | int ret, err; |
627 | gz_statep state; |
628 | |
629 | /* get internal structure */ |
630 | if (file == NULL) |
631 | return Z_STREAM_ERROR; |
632 | state = (gz_statep)file; |
633 | |
634 | /* check that we're reading */ |
635 | if (state->mode != GZ_READ) |
636 | return Z_STREAM_ERROR; |
637 | |
638 | /* free memory and close file */ |
639 | if (state->size) { |
640 | inflateEnd(&(state->strm)); |
641 | free(state->out); |
642 | free(state->in); |
643 | } |
644 | err = state->err == Z_BUF_ERROR ? Z_BUF_ERROR : Z_OK; |
645 | gz_error(state, Z_OK, NULL); |
646 | free(state->path); |
647 | ret = close(state->fd); |
648 | free(state); |
649 | return ret ? Z_ERRNO : err; |
650 | } |