9e052883 |
1 | /* enough.c -- determine the maximum size of inflate's Huffman code tables over |
2 | * all possible valid and complete prefix codes, subject to a length limit. |
3 | * Copyright (C) 2007, 2008, 2012, 2018 Mark Adler |
4 | * Version 1.5 5 August 2018 Mark Adler |
5 | */ |
6 | |
7 | /* Version history: |
8 | 1.0 3 Jan 2007 First version (derived from codecount.c version 1.4) |
9 | 1.1 4 Jan 2007 Use faster incremental table usage computation |
10 | Prune examine() search on previously visited states |
11 | 1.2 5 Jan 2007 Comments clean up |
12 | As inflate does, decrease root for short codes |
13 | Refuse cases where inflate would increase root |
14 | 1.3 17 Feb 2008 Add argument for initial root table size |
15 | Fix bug for initial root table size == max - 1 |
16 | Use a macro to compute the history index |
17 | 1.4 18 Aug 2012 Avoid shifts more than bits in type (caused endless loop!) |
18 | Clean up comparisons of different types |
19 | Clean up code indentation |
20 | 1.5 5 Aug 2018 Clean up code style, formatting, and comments |
21 | Show all the codes for the maximum, and only the maximum |
22 | */ |
23 | |
24 | /* |
25 | Examine all possible prefix codes for a given number of symbols and a |
26 | maximum code length in bits to determine the maximum table size for zlib's |
27 | inflate. Only complete prefix codes are counted. |
28 | |
29 | Two codes are considered distinct if the vectors of the number of codes per |
30 | length are not identical. So permutations of the symbol assignments result |
31 | in the same code for the counting, as do permutations of the assignments of |
32 | the bit values to the codes (i.e. only canonical codes are counted). |
33 | |
34 | We build a code from shorter to longer lengths, determining how many symbols |
35 | are coded at each length. At each step, we have how many symbols remain to |
36 | be coded, what the last code length used was, and how many bit patterns of |
37 | that length remain unused. Then we add one to the code length and double the |
38 | number of unused patterns to graduate to the next code length. We then |
39 | assign all portions of the remaining symbols to that code length that |
40 | preserve the properties of a correct and eventually complete code. Those |
41 | properties are: we cannot use more bit patterns than are available; and when |
42 | all the symbols are used, there are exactly zero possible bit patterns left |
43 | unused. |
44 | |
45 | The inflate Huffman decoding algorithm uses two-level lookup tables for |
46 | speed. There is a single first-level table to decode codes up to root bits |
47 | in length (root == 9 for literal/length codes and root == 6 for distance |
48 | codes, in the current inflate implementation). The base table has 1 << root |
49 | entries and is indexed by the next root bits of input. Codes shorter than |
50 | root bits have replicated table entries, so that the correct entry is |
51 | pointed to regardless of the bits that follow the short code. If the code is |
52 | longer than root bits, then the table entry points to a second-level table. |
53 | The size of that table is determined by the longest code with that root-bit |
54 | prefix. If that longest code has length len, then the table has size 1 << |
55 | (len - root), to index the remaining bits in that set of codes. Each |
56 | subsequent root-bit prefix then has its own sub-table. The total number of |
57 | table entries required by the code is calculated incrementally as the number |
58 | of codes at each bit length is populated. When all of the codes are shorter |
59 | than root bits, then root is reduced to the longest code length, resulting |
60 | in a single, smaller, one-level table. |
61 | |
62 | The inflate algorithm also provides for small values of root (relative to |
63 | the log2 of the number of symbols), where the shortest code has more bits |
64 | than root. In that case, root is increased to the length of the shortest |
65 | code. This program, by design, does not handle that case, so it is verified |
66 | that the number of symbols is less than 1 << (root + 1). |
67 | |
68 | In order to speed up the examination (by about ten orders of magnitude for |
69 | the default arguments), the intermediate states in the build-up of a code |
70 | are remembered and previously visited branches are pruned. The memory |
71 | required for this will increase rapidly with the total number of symbols and |
72 | the maximum code length in bits. However this is a very small price to pay |
73 | for the vast speedup. |
74 | |
75 | First, all of the possible prefix codes are counted, and reachable |
76 | intermediate states are noted by a non-zero count in a saved-results array. |
77 | Second, the intermediate states that lead to (root + 1) bit or longer codes |
78 | are used to look at all sub-codes from those junctures for their inflate |
79 | memory usage. (The amount of memory used is not affected by the number of |
80 | codes of root bits or less in length.) Third, the visited states in the |
81 | construction of those sub-codes and the associated calculation of the table |
82 | size is recalled in order to avoid recalculating from the same juncture. |
83 | Beginning the code examination at (root + 1) bit codes, which is enabled by |
84 | identifying the reachable nodes, accounts for about six of the orders of |
85 | magnitude of improvement for the default arguments. About another four |
86 | orders of magnitude come from not revisiting previous states. Out of |
87 | approximately 2x10^16 possible prefix codes, only about 2x10^6 sub-codes |
88 | need to be examined to cover all of the possible table memory usage cases |
89 | for the default arguments of 286 symbols limited to 15-bit codes. |
90 | |
91 | Note that the uintmax_t type is used for counting. It is quite easy to |
92 | exceed the capacity of an eight-byte integer with a large number of symbols |
93 | and a large maximum code length, so multiple-precision arithmetic would need |
94 | to replace the integer arithmetic in that case. This program will abort if |
95 | an overflow occurs. The big_t type identifies where the counting takes |
96 | place. |
97 | |
98 | The uintmax_t type is also used for calculating the number of possible codes |
99 | remaining at the maximum length. This limits the maximum code length to the |
100 | number of bits in a long long minus the number of bits needed to represent |
101 | the symbols in a flat code. The code_t type identifies where the bit-pattern |
102 | counting takes place. |
103 | */ |
104 | |
105 | #include <stdio.h> |
106 | #include <stdlib.h> |
107 | #include <string.h> |
108 | #include <stdarg.h> |
109 | #include <stdint.h> |
110 | #include <assert.h> |
111 | |
112 | #define local static |
113 | |
114 | // Special data types. |
115 | typedef uintmax_t big_t; // type for code counting |
116 | #define PRIbig "ju" // printf format for big_t |
117 | typedef uintmax_t code_t; // type for bit pattern counting |
118 | struct tab { // type for been-here check |
119 | size_t len; // allocated length of bit vector in octets |
120 | char *vec; // allocated bit vector |
121 | }; |
122 | |
123 | /* The array for saving results, num[], is indexed with this triplet: |
124 | |
125 | syms: number of symbols remaining to code |
126 | left: number of available bit patterns at length len |
127 | len: number of bits in the codes currently being assigned |
128 | |
129 | Those indices are constrained thusly when saving results: |
130 | |
131 | syms: 3..totsym (totsym == total symbols to code) |
132 | left: 2..syms - 1, but only the evens (so syms == 8 -> 2, 4, 6) |
133 | len: 1..max - 1 (max == maximum code length in bits) |
134 | |
135 | syms == 2 is not saved since that immediately leads to a single code. left |
136 | must be even, since it represents the number of available bit patterns at |
137 | the current length, which is double the number at the previous length. left |
138 | ends at syms-1 since left == syms immediately results in a single code. |
139 | (left > sym is not allowed since that would result in an incomplete code.) |
140 | len is less than max, since the code completes immediately when len == max. |
141 | |
142 | The offset into the array is calculated for the three indices with the first |
143 | one (syms) being outermost, and the last one (len) being innermost. We build |
144 | the array with length max-1 lists for the len index, with syms-3 of those |
145 | for each symbol. There are totsym-2 of those, with each one varying in |
146 | length as a function of sym. See the calculation of index in map() for the |
147 | index, and the calculation of size in main() for the size of the array. |
148 | |
149 | For the deflate example of 286 symbols limited to 15-bit codes, the array |
150 | has 284,284 entries, taking up 2.17 MB for an 8-byte big_t. More than half |
151 | of the space allocated for saved results is actually used -- not all |
152 | possible triplets are reached in the generation of valid prefix codes. |
153 | */ |
154 | |
155 | /* The array for tracking visited states, done[], is itself indexed identically |
156 | to the num[] array as described above for the (syms, left, len) triplet. |
157 | Each element in the array is further indexed by the (mem, rem) doublet, |
158 | where mem is the amount of inflate table space used so far, and rem is the |
159 | remaining unused entries in the current inflate sub-table. Each indexed |
160 | element is simply one bit indicating whether the state has been visited or |
161 | not. Since the ranges for mem and rem are not known a priori, each bit |
162 | vector is of a variable size, and grows as needed to accommodate the visited |
163 | states. mem and rem are used to calculate a single index in a triangular |
164 | array. Since the range of mem is expected in the default case to be about |
165 | ten times larger than the range of rem, the array is skewed to reduce the |
166 | memory usage, with eight times the range for mem than for rem. See the |
167 | calculations for offset and bit in been_here() for the details. |
168 | |
169 | For the deflate example of 286 symbols limited to 15-bit codes, the bit |
170 | vectors grow to total 5.5 MB, in addition to the 4.3 MB done array itself. |
171 | */ |
172 | |
173 | // Type for a variable-length, allocated string. |
174 | typedef struct { |
175 | char *str; // pointer to allocated string |
176 | size_t size; // size of allocation |
177 | size_t len; // length of string, not including terminating zero |
178 | } string_t; |
179 | |
180 | // Clear a string_t. |
181 | local void string_clear(string_t *s) { |
182 | s->str[0] = 0; |
183 | s->len = 0; |
184 | } |
185 | |
186 | // Initialize a string_t. |
187 | local void string_init(string_t *s) { |
188 | s->size = 16; |
189 | s->str = malloc(s->size); |
190 | assert(s->str != NULL && "out of memory"); |
191 | string_clear(s); |
192 | } |
193 | |
194 | // Release the allocation of a string_t. |
195 | local void string_free(string_t *s) { |
196 | free(s->str); |
197 | s->str = NULL; |
198 | s->size = 0; |
199 | s->len = 0; |
200 | } |
201 | |
202 | // Save the results of printf with fmt and the subsequent argument list to s. |
203 | // Each call appends to s. The allocated space for s is increased as needed. |
204 | local void string_printf(string_t *s, char *fmt, ...) { |
205 | va_list ap; |
206 | va_start(ap, fmt); |
207 | size_t len = s->len; |
208 | int ret = vsnprintf(s->str + len, s->size - len, fmt, ap); |
209 | assert(ret >= 0 && "out of memory"); |
210 | s->len += ret; |
211 | if (s->size < s->len + 1) { |
212 | do { |
213 | s->size <<= 1; |
214 | assert(s->size != 0 && "overflow"); |
215 | } while (s->size < s->len + 1); |
216 | s->str = realloc(s->str, s->size); |
217 | assert(s->str != NULL && "out of memory"); |
218 | vsnprintf(s->str + len, s->size - len, fmt, ap); |
219 | } |
220 | va_end(ap); |
221 | } |
222 | |
223 | // Globals to avoid propagating constants or constant pointers recursively. |
224 | struct { |
225 | int max; // maximum allowed bit length for the codes |
226 | int root; // size of base code table in bits |
227 | int large; // largest code table so far |
228 | size_t size; // number of elements in num and done |
229 | big_t tot; // total number of codes with maximum tables size |
230 | string_t out; // display of subcodes for maximum tables size |
231 | int *code; // number of symbols assigned to each bit length |
232 | big_t *num; // saved results array for code counting |
233 | struct tab *done; // states already evaluated array |
234 | } g; |
235 | |
236 | // Index function for num[] and done[]. |
237 | local inline size_t map(int syms, int left, int len) { |
238 | return ((size_t)((syms - 1) >> 1) * ((syms - 2) >> 1) + |
239 | (left >> 1) - 1) * (g.max - 1) + |
240 | len - 1; |
241 | } |
242 | |
243 | // Free allocated space in globals. |
244 | local void cleanup(void) { |
245 | if (g.done != NULL) { |
246 | for (size_t n = 0; n < g.size; n++) |
247 | if (g.done[n].len) |
248 | free(g.done[n].vec); |
249 | g.size = 0; |
250 | free(g.done); g.done = NULL; |
251 | } |
252 | free(g.num); g.num = NULL; |
253 | free(g.code); g.code = NULL; |
254 | string_free(&g.out); |
255 | } |
256 | |
257 | // Return the number of possible prefix codes using bit patterns of lengths len |
258 | // through max inclusive, coding syms symbols, with left bit patterns of length |
259 | // len unused -- return -1 if there is an overflow in the counting. Keep a |
260 | // record of previous results in num to prevent repeating the same calculation. |
261 | local big_t count(int syms, int left, int len) { |
262 | // see if only one possible code |
263 | if (syms == left) |
264 | return 1; |
265 | |
266 | // note and verify the expected state |
267 | assert(syms > left && left > 0 && len < g.max); |
268 | |
269 | // see if we've done this one already |
270 | size_t index = map(syms, left, len); |
271 | big_t got = g.num[index]; |
272 | if (got) |
273 | return got; // we have -- return the saved result |
274 | |
275 | // we need to use at least this many bit patterns so that the code won't be |
276 | // incomplete at the next length (more bit patterns than symbols) |
277 | int least = (left << 1) - syms; |
278 | if (least < 0) |
279 | least = 0; |
280 | |
281 | // we can use at most this many bit patterns, lest there not be enough |
282 | // available for the remaining symbols at the maximum length (if there were |
283 | // no limit to the code length, this would become: most = left - 1) |
284 | int most = (((code_t)left << (g.max - len)) - syms) / |
285 | (((code_t)1 << (g.max - len)) - 1); |
286 | |
287 | // count all possible codes from this juncture and add them up |
288 | big_t sum = 0; |
289 | for (int use = least; use <= most; use++) { |
290 | got = count(syms - use, (left - use) << 1, len + 1); |
291 | sum += got; |
292 | if (got == (big_t)-1 || sum < got) // overflow |
293 | return (big_t)-1; |
294 | } |
295 | |
296 | // verify that all recursive calls are productive |
297 | assert(sum != 0); |
298 | |
299 | // save the result and return it |
300 | g.num[index] = sum; |
301 | return sum; |
302 | } |
303 | |
304 | // Return true if we've been here before, set to true if not. Set a bit in a |
305 | // bit vector to indicate visiting this state. Each (syms,len,left) state has a |
306 | // variable size bit vector indexed by (mem,rem). The bit vector is lengthened |
307 | // as needed to allow setting the (mem,rem) bit. |
308 | local int been_here(int syms, int left, int len, int mem, int rem) { |
309 | // point to vector for (syms,left,len), bit in vector for (mem,rem) |
310 | size_t index = map(syms, left, len); |
311 | mem -= 1 << g.root; // mem always includes the root table |
312 | mem >>= 1; // mem and rem are always even |
313 | rem >>= 1; |
314 | size_t offset = (mem >> 3) + rem; |
315 | offset = ((offset * (offset + 1)) >> 1) + rem; |
316 | int bit = 1 << (mem & 7); |
317 | |
318 | // see if we've been here |
319 | size_t length = g.done[index].len; |
320 | if (offset < length && (g.done[index].vec[offset] & bit) != 0) |
321 | return 1; // done this! |
322 | |
323 | // we haven't been here before -- set the bit to show we have now |
324 | |
325 | // see if we need to lengthen the vector in order to set the bit |
326 | if (length <= offset) { |
327 | // if we have one already, enlarge it, zero out the appended space |
328 | char *vector; |
329 | if (length) { |
330 | do { |
331 | length <<= 1; |
332 | } while (length <= offset); |
333 | vector = realloc(g.done[index].vec, length); |
334 | assert(vector != NULL && "out of memory"); |
335 | memset(vector + g.done[index].len, 0, length - g.done[index].len); |
336 | } |
337 | |
338 | // otherwise we need to make a new vector and zero it out |
339 | else { |
340 | length = 16; |
341 | while (length <= offset) |
342 | length <<= 1; |
343 | vector = calloc(length, 1); |
344 | assert(vector != NULL && "out of memory"); |
345 | } |
346 | |
347 | // install the new vector |
348 | g.done[index].len = length; |
349 | g.done[index].vec = vector; |
350 | } |
351 | |
352 | // set the bit |
353 | g.done[index].vec[offset] |= bit; |
354 | return 0; |
355 | } |
356 | |
357 | // Examine all possible codes from the given node (syms, len, left). Compute |
358 | // the amount of memory required to build inflate's decoding tables, where the |
359 | // number of code structures used so far is mem, and the number remaining in |
360 | // the current sub-table is rem. |
361 | local void examine(int syms, int left, int len, int mem, int rem) { |
362 | // see if we have a complete code |
363 | if (syms == left) { |
364 | // set the last code entry |
365 | g.code[len] = left; |
366 | |
367 | // complete computation of memory used by this code |
368 | while (rem < left) { |
369 | left -= rem; |
370 | rem = 1 << (len - g.root); |
371 | mem += rem; |
372 | } |
373 | assert(rem == left); |
374 | |
375 | // if this is at the maximum, show the sub-code |
376 | if (mem >= g.large) { |
377 | // if this is a new maximum, update the maximum and clear out the |
378 | // printed sub-codes from the previous maximum |
379 | if (mem > g.large) { |
380 | g.large = mem; |
381 | string_clear(&g.out); |
382 | } |
383 | |
384 | // compute the starting state for this sub-code |
385 | syms = 0; |
386 | left = 1 << g.max; |
387 | for (int bits = g.max; bits > g.root; bits--) { |
388 | syms += g.code[bits]; |
389 | left -= g.code[bits]; |
390 | assert((left & 1) == 0); |
391 | left >>= 1; |
392 | } |
393 | |
394 | // print the starting state and the resulting sub-code to g.out |
395 | string_printf(&g.out, "<%u, %u, %u>:", |
396 | syms, g.root + 1, ((1 << g.root) - left) << 1); |
397 | for (int bits = g.root + 1; bits <= g.max; bits++) |
398 | if (g.code[bits]) |
399 | string_printf(&g.out, " %d[%d]", g.code[bits], bits); |
400 | string_printf(&g.out, "\n"); |
401 | } |
402 | |
403 | // remove entries as we drop back down in the recursion |
404 | g.code[len] = 0; |
405 | return; |
406 | } |
407 | |
408 | // prune the tree if we can |
409 | if (been_here(syms, left, len, mem, rem)) |
410 | return; |
411 | |
412 | // we need to use at least this many bit patterns so that the code won't be |
413 | // incomplete at the next length (more bit patterns than symbols) |
414 | int least = (left << 1) - syms; |
415 | if (least < 0) |
416 | least = 0; |
417 | |
418 | // we can use at most this many bit patterns, lest there not be enough |
419 | // available for the remaining symbols at the maximum length (if there were |
420 | // no limit to the code length, this would become: most = left - 1) |
421 | int most = (((code_t)left << (g.max - len)) - syms) / |
422 | (((code_t)1 << (g.max - len)) - 1); |
423 | |
424 | // occupy least table spaces, creating new sub-tables as needed |
425 | int use = least; |
426 | while (rem < use) { |
427 | use -= rem; |
428 | rem = 1 << (len - g.root); |
429 | mem += rem; |
430 | } |
431 | rem -= use; |
432 | |
433 | // examine codes from here, updating table space as we go |
434 | for (use = least; use <= most; use++) { |
435 | g.code[len] = use; |
436 | examine(syms - use, (left - use) << 1, len + 1, |
437 | mem + (rem ? 1 << (len - g.root) : 0), rem << 1); |
438 | if (rem == 0) { |
439 | rem = 1 << (len - g.root); |
440 | mem += rem; |
441 | } |
442 | rem--; |
443 | } |
444 | |
445 | // remove entries as we drop back down in the recursion |
446 | g.code[len] = 0; |
447 | } |
448 | |
449 | // Look at all sub-codes starting with root + 1 bits. Look at only the valid |
450 | // intermediate code states (syms, left, len). For each completed code, |
451 | // calculate the amount of memory required by inflate to build the decoding |
452 | // tables. Find the maximum amount of memory required and show the codes that |
453 | // require that maximum. |
454 | local void enough(int syms) { |
455 | // clear code |
456 | for (int n = 0; n <= g.max; n++) |
457 | g.code[n] = 0; |
458 | |
459 | // look at all (root + 1) bit and longer codes |
460 | string_clear(&g.out); // empty saved results |
461 | g.large = 1 << g.root; // base table |
462 | if (g.root < g.max) // otherwise, there's only a base table |
463 | for (int n = 3; n <= syms; n++) |
464 | for (int left = 2; left < n; left += 2) { |
465 | // look at all reachable (root + 1) bit nodes, and the |
466 | // resulting codes (complete at root + 2 or more) |
467 | size_t index = map(n, left, g.root + 1); |
468 | if (g.root + 1 < g.max && g.num[index]) // reachable node |
469 | examine(n, left, g.root + 1, 1 << g.root, 0); |
470 | |
471 | // also look at root bit codes with completions at root + 1 |
472 | // bits (not saved in num, since complete), just in case |
473 | if (g.num[index - 1] && n <= left << 1) |
474 | examine((n - left) << 1, (n - left) << 1, g.root + 1, |
475 | 1 << g.root, 0); |
476 | } |
477 | |
478 | // done |
479 | printf("maximum of %d table entries for root = %d\n", g.large, g.root); |
480 | fputs(g.out.str, stdout); |
481 | } |
482 | |
483 | // Examine and show the total number of possible prefix codes for a given |
484 | // maximum number of symbols, initial root table size, and maximum code length |
485 | // in bits -- those are the command arguments in that order. The default values |
486 | // are 286, 9, and 15 respectively, for the deflate literal/length code. The |
487 | // possible codes are counted for each number of coded symbols from two to the |
488 | // maximum. The counts for each of those and the total number of codes are |
489 | // shown. The maximum number of inflate table entries is then calculated across |
490 | // all possible codes. Each new maximum number of table entries and the |
491 | // associated sub-code (starting at root + 1 == 10 bits) is shown. |
492 | // |
493 | // To count and examine prefix codes that are not length-limited, provide a |
494 | // maximum length equal to the number of symbols minus one. |
495 | // |
496 | // For the deflate literal/length code, use "enough". For the deflate distance |
497 | // code, use "enough 30 6". |
498 | int main(int argc, char **argv) { |
499 | // set up globals for cleanup() |
500 | g.code = NULL; |
501 | g.num = NULL; |
502 | g.done = NULL; |
503 | string_init(&g.out); |
504 | |
505 | // get arguments -- default to the deflate literal/length code |
506 | int syms = 286; |
507 | g.root = 9; |
508 | g.max = 15; |
509 | if (argc > 1) { |
510 | syms = atoi(argv[1]); |
511 | if (argc > 2) { |
512 | g.root = atoi(argv[2]); |
513 | if (argc > 3) |
514 | g.max = atoi(argv[3]); |
515 | } |
516 | } |
517 | if (argc > 4 || syms < 2 || g.root < 1 || g.max < 1) { |
518 | fputs("invalid arguments, need: [sym >= 2 [root >= 1 [max >= 1]]]\n", |
519 | stderr); |
520 | return 1; |
521 | } |
522 | |
523 | // if not restricting the code length, the longest is syms - 1 |
524 | if (g.max > syms - 1) |
525 | g.max = syms - 1; |
526 | |
527 | // determine the number of bits in a code_t |
528 | int bits = 0; |
529 | for (code_t word = 1; word; word <<= 1) |
530 | bits++; |
531 | |
532 | // make sure that the calculation of most will not overflow |
533 | if (g.max > bits || (code_t)(syms - 2) >= ((code_t)-1 >> (g.max - 1))) { |
534 | fputs("abort: code length too long for internal types\n", stderr); |
535 | return 1; |
536 | } |
537 | |
538 | // reject impossible code requests |
539 | if ((code_t)(syms - 1) > ((code_t)1 << g.max) - 1) { |
540 | fprintf(stderr, "%d symbols cannot be coded in %d bits\n", |
541 | syms, g.max); |
542 | return 1; |
543 | } |
544 | |
545 | // allocate code vector |
546 | g.code = calloc(g.max + 1, sizeof(int)); |
547 | assert(g.code != NULL && "out of memory"); |
548 | |
549 | // determine size of saved results array, checking for overflows, |
550 | // allocate and clear the array (set all to zero with calloc()) |
551 | if (syms == 2) // iff max == 1 |
552 | g.num = NULL; // won't be saving any results |
553 | else { |
554 | g.size = syms >> 1; |
555 | int n = (syms - 1) >> 1; |
556 | assert(g.size <= (size_t)-1 / n && "overflow"); |
557 | g.size *= n; |
558 | n = g.max - 1; |
559 | assert(g.size <= (size_t)-1 / n && "overflow"); |
560 | g.size *= n; |
561 | g.num = calloc(g.size, sizeof(big_t)); |
562 | assert(g.num != NULL && "out of memory"); |
563 | } |
564 | |
565 | // count possible codes for all numbers of symbols, add up counts |
566 | big_t sum = 0; |
567 | for (int n = 2; n <= syms; n++) { |
568 | big_t got = count(n, 2, 1); |
569 | sum += got; |
570 | assert(got != (big_t)-1 && sum >= got && "overflow"); |
571 | } |
572 | printf("%"PRIbig" total codes for 2 to %d symbols", sum, syms); |
573 | if (g.max < syms - 1) |
574 | printf(" (%d-bit length limit)\n", g.max); |
575 | else |
576 | puts(" (no length limit)"); |
577 | |
578 | // allocate and clear done array for been_here() |
579 | if (syms == 2) |
580 | g.done = NULL; |
581 | else { |
582 | g.done = calloc(g.size, sizeof(struct tab)); |
583 | assert(g.done != NULL && "out of memory"); |
584 | } |
585 | |
586 | // find and show maximum inflate table usage |
587 | if (g.root > g.max) // reduce root to max length |
588 | g.root = g.max; |
589 | if ((code_t)syms < ((code_t)1 << (g.root + 1))) |
590 | enough(syms); |
591 | else |
592 | fputs("cannot handle minimum code lengths > root", stderr); |
593 | |
594 | // done |
595 | cleanup(); |
596 | return 0; |
597 | } |