648db22b |
1 | /* fitblk.c contains minimal changes required to be compiled with zlibWrapper: |
2 | * - #include "zlib.h" was changed to #include "zstd_zlibwrapper.h" |
3 | * - writing block to stdout was disabled */ |
4 | |
9e052883 |
5 | /* fitblk.c: example of fitting compressed output to a specified size |
6 | Not copyrighted -- provided to the public domain |
7 | Version 1.1 25 November 2004 Mark Adler */ |
8 | |
9 | /* Version history: |
10 | 1.0 24 Nov 2004 First version |
11 | 1.1 25 Nov 2004 Change deflateInit2() to deflateInit() |
12 | Use fixed-size, stack-allocated raw buffers |
13 | Simplify code moving compression to subroutines |
14 | Use assert() for internal errors |
15 | Add detailed description of approach |
16 | */ |
17 | |
18 | /* Approach to just fitting a requested compressed size: |
19 | |
20 | fitblk performs three compression passes on a portion of the input |
21 | data in order to determine how much of that input will compress to |
22 | nearly the requested output block size. The first pass generates |
23 | enough deflate blocks to produce output to fill the requested |
24 | output size plus a specified excess amount (see the EXCESS define |
25 | below). The last deflate block may go quite a bit past that, but |
26 | is discarded. The second pass decompresses and recompresses just |
27 | the compressed data that fit in the requested plus excess sized |
28 | buffer. The deflate process is terminated after that amount of |
29 | input, which is less than the amount consumed on the first pass. |
30 | The last deflate block of the result will be of a comparable size |
31 | to the final product, so that the header for that deflate block and |
32 | the compression ratio for that block will be about the same as in |
33 | the final product. The third compression pass decompresses the |
34 | result of the second step, but only the compressed data up to the |
35 | requested size minus an amount to allow the compressed stream to |
36 | complete (see the MARGIN define below). That will result in a |
37 | final compressed stream whose length is less than or equal to the |
38 | requested size. Assuming sufficient input and a requested size |
39 | greater than a few hundred bytes, the shortfall will typically be |
40 | less than ten bytes. |
41 | |
42 | If the input is short enough that the first compression completes |
43 | before filling the requested output size, then that compressed |
44 | stream is return with no recompression. |
45 | |
46 | EXCESS is chosen to be just greater than the shortfall seen in a |
47 | two pass approach similar to the above. That shortfall is due to |
48 | the last deflate block compressing more efficiently with a smaller |
49 | header on the second pass. EXCESS is set to be large enough so |
50 | that there is enough uncompressed data for the second pass to fill |
51 | out the requested size, and small enough so that the final deflate |
52 | block of the second pass will be close in size to the final deflate |
53 | block of the third and final pass. MARGIN is chosen to be just |
54 | large enough to assure that the final compression has enough room |
55 | to complete in all cases. |
56 | */ |
57 | |
58 | #include <stdio.h> |
59 | #include <stdlib.h> |
60 | #include <assert.h> |
648db22b |
61 | #include "zstd_zlibwrapper.h" |
9e052883 |
62 | |
648db22b |
63 | #define LOG_FITBLK(...) /*printf(__VA_ARGS__)*/ |
9e052883 |
64 | #define local static |
65 | |
66 | /* print nastygram and leave */ |
67 | local void quit(char *why) |
68 | { |
69 | fprintf(stderr, "fitblk abort: %s\n", why); |
70 | exit(1); |
71 | } |
72 | |
73 | #define RAWLEN 4096 /* intermediate uncompressed buffer size */ |
74 | |
75 | /* compress from file to def until provided buffer is full or end of |
76 | input reached; return last deflate() return value, or Z_ERRNO if |
77 | there was read error on the file */ |
78 | local int partcompress(FILE *in, z_streamp def) |
79 | { |
80 | int ret, flush; |
81 | unsigned char raw[RAWLEN]; |
82 | |
648db22b |
83 | flush = Z_SYNC_FLUSH; |
9e052883 |
84 | do { |
648db22b |
85 | def->avail_in = (uInt)fread(raw, 1, RAWLEN, in); |
9e052883 |
86 | if (ferror(in)) |
87 | return Z_ERRNO; |
88 | def->next_in = raw; |
89 | if (feof(in)) |
90 | flush = Z_FINISH; |
648db22b |
91 | LOG_FITBLK("partcompress1 avail_in=%d total_in=%d avail_out=%d total_out=%d\n", (int)def->avail_in, (int)def->total_in, (int)def->avail_out, (int)def->total_out); |
9e052883 |
92 | ret = deflate(def, flush); |
648db22b |
93 | LOG_FITBLK("partcompress2 ret=%d avail_in=%d total_in=%d avail_out=%d total_out=%d\n", ret, (int)def->avail_in, (int)def->total_in, (int)def->avail_out, (int)def->total_out); |
9e052883 |
94 | assert(ret != Z_STREAM_ERROR); |
648db22b |
95 | } while (def->avail_out != 0 && flush == Z_SYNC_FLUSH); |
9e052883 |
96 | return ret; |
97 | } |
98 | |
99 | /* recompress from inf's input to def's output; the input for inf and |
100 | the output for def are set in those structures before calling; |
101 | return last deflate() return value, or Z_MEM_ERROR if inflate() |
102 | was not able to allocate enough memory when it needed to */ |
103 | local int recompress(z_streamp inf, z_streamp def) |
104 | { |
105 | int ret, flush; |
106 | unsigned char raw[RAWLEN]; |
107 | |
108 | flush = Z_NO_FLUSH; |
648db22b |
109 | LOG_FITBLK("recompress start\n"); |
9e052883 |
110 | do { |
111 | /* decompress */ |
112 | inf->avail_out = RAWLEN; |
113 | inf->next_out = raw; |
648db22b |
114 | LOG_FITBLK("recompress1inflate avail_in=%d total_in=%d avail_out=%d total_out=%d\n", (int)inf->avail_in, (int)inf->total_in, (int)inf->avail_out, (int)inf->total_out); |
9e052883 |
115 | ret = inflate(inf, Z_NO_FLUSH); |
648db22b |
116 | LOG_FITBLK("recompress2inflate avail_in=%d total_in=%d avail_out=%d total_out=%d\n", (int)inf->avail_in, (int)inf->total_in, (int)inf->avail_out, (int)inf->total_out); |
9e052883 |
117 | assert(ret != Z_STREAM_ERROR && ret != Z_DATA_ERROR && |
118 | ret != Z_NEED_DICT); |
119 | if (ret == Z_MEM_ERROR) |
120 | return ret; |
121 | |
122 | /* compress what was decompressed until done or no room */ |
123 | def->avail_in = RAWLEN - inf->avail_out; |
124 | def->next_in = raw; |
125 | if (inf->avail_out != 0) |
126 | flush = Z_FINISH; |
648db22b |
127 | LOG_FITBLK("recompress1deflate avail_in=%d total_in=%d avail_out=%d total_out=%d\n", (int)def->avail_in, (int)def->total_in, (int)def->avail_out, (int)def->total_out); |
9e052883 |
128 | ret = deflate(def, flush); |
648db22b |
129 | LOG_FITBLK("recompress2deflate ret=%d avail_in=%d total_in=%d avail_out=%d total_out=%d\n", ret, (int)def->avail_in, (int)def->total_in, (int)def->avail_out, (int)def->total_out); |
9e052883 |
130 | assert(ret != Z_STREAM_ERROR); |
131 | } while (ret != Z_STREAM_END && def->avail_out != 0); |
132 | return ret; |
133 | } |
134 | |
135 | #define EXCESS 256 /* empirically determined stream overage */ |
136 | #define MARGIN 8 /* amount to back off for completion */ |
137 | |
138 | /* compress from stdin to fixed-size block on stdout */ |
139 | int main(int argc, char **argv) |
140 | { |
141 | int ret; /* return code */ |
142 | unsigned size; /* requested fixed output block size */ |
143 | unsigned have; /* bytes written by deflate() call */ |
144 | unsigned char *blk; /* intermediate and final stream */ |
145 | unsigned char *tmp; /* close to desired size stream */ |
146 | z_stream def, inf; /* zlib deflate and inflate states */ |
147 | |
148 | /* get requested output size */ |
149 | if (argc != 2) |
150 | quit("need one argument: size of output block"); |
648db22b |
151 | ret = (int)strtol(argv[1], argv + 1, 10); |
9e052883 |
152 | if (argv[1][0] != 0) |
153 | quit("argument must be a number"); |
154 | if (ret < 8) /* 8 is minimum zlib stream size */ |
155 | quit("need positive size of 8 or greater"); |
156 | size = (unsigned)ret; |
157 | |
648db22b |
158 | printf("zlib version %s\n", ZLIB_VERSION); |
159 | if (ZWRAP_isUsingZSTDcompression()) printf("zstd version %s\n", zstdVersion()); |
160 | |
9e052883 |
161 | /* allocate memory for buffers and compression engine */ |
648db22b |
162 | blk = (unsigned char*)malloc(size + EXCESS); |
9e052883 |
163 | def.zalloc = Z_NULL; |
164 | def.zfree = Z_NULL; |
165 | def.opaque = Z_NULL; |
166 | ret = deflateInit(&def, Z_DEFAULT_COMPRESSION); |
167 | if (ret != Z_OK || blk == NULL) |
168 | quit("out of memory"); |
169 | |
170 | /* compress from stdin until output full, or no more input */ |
171 | def.avail_out = size + EXCESS; |
172 | def.next_out = blk; |
648db22b |
173 | LOG_FITBLK("partcompress1 total_in=%d total_out=%d\n", (int)def.total_in, (int)def.total_out); |
9e052883 |
174 | ret = partcompress(stdin, &def); |
648db22b |
175 | printf("partcompress total_in=%d total_out=%d\n", (int)def.total_in, (int)def.total_out); |
9e052883 |
176 | if (ret == Z_ERRNO) |
177 | quit("error reading input"); |
178 | |
179 | /* if it all fit, then size was undersubscribed -- done! */ |
180 | if (ret == Z_STREAM_END && def.avail_out >= EXCESS) { |
181 | /* write block to stdout */ |
182 | have = size + EXCESS - def.avail_out; |
648db22b |
183 | /* if (fwrite(blk, 1, have, stdout) != have || ferror(stdout)) |
184 | * quit("error writing output"); */ |
9e052883 |
185 | |
186 | /* clean up and print results to stderr */ |
187 | ret = deflateEnd(&def); |
188 | assert(ret != Z_STREAM_ERROR); |
189 | free(blk); |
190 | fprintf(stderr, |
191 | "%u bytes unused out of %u requested (all input)\n", |
192 | size - have, size); |
193 | return 0; |
194 | } |
195 | |
196 | /* it didn't all fit -- set up for recompression */ |
197 | inf.zalloc = Z_NULL; |
198 | inf.zfree = Z_NULL; |
199 | inf.opaque = Z_NULL; |
200 | inf.avail_in = 0; |
201 | inf.next_in = Z_NULL; |
202 | ret = inflateInit(&inf); |
648db22b |
203 | tmp = (unsigned char*)malloc(size + EXCESS); |
9e052883 |
204 | if (ret != Z_OK || tmp == NULL) |
205 | quit("out of memory"); |
206 | ret = deflateReset(&def); |
207 | assert(ret != Z_STREAM_ERROR); |
208 | |
209 | /* do first recompression close to the right amount */ |
210 | inf.avail_in = size + EXCESS; |
211 | inf.next_in = blk; |
212 | def.avail_out = size + EXCESS; |
213 | def.next_out = tmp; |
648db22b |
214 | LOG_FITBLK("recompress1 inf.total_in=%d def.total_out=%d\n", (int)inf.total_in, (int)def.total_out); |
9e052883 |
215 | ret = recompress(&inf, &def); |
648db22b |
216 | LOG_FITBLK("recompress1 inf.total_in=%d def.total_out=%d\n", (int)inf.total_in, (int)def.total_out); |
9e052883 |
217 | if (ret == Z_MEM_ERROR) |
218 | quit("out of memory"); |
219 | |
648db22b |
220 | /* set up for next recompression */ |
9e052883 |
221 | ret = inflateReset(&inf); |
222 | assert(ret != Z_STREAM_ERROR); |
223 | ret = deflateReset(&def); |
224 | assert(ret != Z_STREAM_ERROR); |
225 | |
226 | /* do second and final recompression (third compression) */ |
227 | inf.avail_in = size - MARGIN; /* assure stream will complete */ |
228 | inf.next_in = tmp; |
229 | def.avail_out = size; |
230 | def.next_out = blk; |
648db22b |
231 | LOG_FITBLK("recompress2 inf.total_in=%d def.total_out=%d\n", (int)inf.total_in, (int)def.total_out); |
9e052883 |
232 | ret = recompress(&inf, &def); |
648db22b |
233 | LOG_FITBLK("recompress2 inf.total_in=%d def.total_out=%d\n", (int)inf.total_in, (int)def.total_out); |
9e052883 |
234 | if (ret == Z_MEM_ERROR) |
235 | quit("out of memory"); |
236 | assert(ret == Z_STREAM_END); /* otherwise MARGIN too small */ |
237 | |
238 | /* done -- write block to stdout */ |
239 | have = size - def.avail_out; |
648db22b |
240 | /* if (fwrite(blk, 1, have, stdout) != have || ferror(stdout)) |
241 | * quit("error writing output"); */ |
9e052883 |
242 | |
243 | /* clean up and print results to stderr */ |
244 | free(tmp); |
245 | ret = inflateEnd(&inf); |
246 | assert(ret != Z_STREAM_ERROR); |
247 | ret = deflateEnd(&def); |
248 | assert(ret != Z_STREAM_ERROR); |
249 | free(blk); |
250 | fprintf(stderr, |
251 | "%u bytes unused out of %u requested (%lu input)\n", |
252 | size - have, size, def.total_in); |
253 | return 0; |
254 | } |