Commit | Line | Data |
---|---|---|
3719602c PC |
1 | /* license:BSD-3-Clause |
2 | * copyright-holders:Aaron Giles | |
3 | **************************************************************************** | |
4 | ||
5 | huffman.c | |
6 | ||
7 | Static Huffman compression and decompression helpers. | |
8 | ||
9 | **************************************************************************** | |
10 | ||
11 | Maximum codelength is officially (alphabetsize - 1). This would be 255 bits | |
12 | (since we use 1 byte values). However, it is also dependent upon the number | |
13 | of samples used, as follows: | |
14 | ||
15 | 2 bits -> 3..4 samples | |
16 | 3 bits -> 5..7 samples | |
17 | 4 bits -> 8..12 samples | |
18 | 5 bits -> 13..20 samples | |
19 | 6 bits -> 21..33 samples | |
20 | 7 bits -> 34..54 samples | |
21 | 8 bits -> 55..88 samples | |
22 | 9 bits -> 89..143 samples | |
23 | 10 bits -> 144..232 samples | |
24 | 11 bits -> 233..376 samples | |
25 | 12 bits -> 377..609 samples | |
26 | 13 bits -> 610..986 samples | |
27 | 14 bits -> 987..1596 samples | |
28 | 15 bits -> 1597..2583 samples | |
29 | 16 bits -> 2584..4180 samples -> note that a 4k data size guarantees codelength <= 16 bits | |
30 | 17 bits -> 4181..6764 samples | |
31 | 18 bits -> 6765..10945 samples | |
32 | 19 bits -> 10946..17710 samples | |
33 | 20 bits -> 17711..28656 samples | |
34 | 21 bits -> 28657..46367 samples | |
35 | 22 bits -> 46368..75024 samples | |
36 | 23 bits -> 75025..121392 samples | |
37 | 24 bits -> 121393..196417 samples | |
38 | 25 bits -> 196418..317810 samples | |
39 | 26 bits -> 317811..514228 samples | |
40 | 27 bits -> 514229..832039 samples | |
41 | 28 bits -> 832040..1346268 samples | |
42 | 29 bits -> 1346269..2178308 samples | |
43 | 30 bits -> 2178309..3524577 samples | |
44 | 31 bits -> 3524578..5702886 samples | |
45 | 32 bits -> 5702887..9227464 samples | |
46 | ||
47 | Looking at it differently, here is where powers of 2 fall into these buckets: | |
48 | ||
49 | 256 samples -> 11 bits max | |
50 | 512 samples -> 12 bits max | |
51 | 1k samples -> 14 bits max | |
52 | 2k samples -> 15 bits max | |
53 | 4k samples -> 16 bits max | |
54 | 8k samples -> 18 bits max | |
55 | 16k samples -> 19 bits max | |
56 | 32k samples -> 21 bits max | |
57 | 64k samples -> 22 bits max | |
58 | 128k samples -> 24 bits max | |
59 | 256k samples -> 25 bits max | |
60 | 512k samples -> 27 bits max | |
61 | 1M samples -> 28 bits max | |
62 | 2M samples -> 29 bits max | |
63 | 4M samples -> 31 bits max | |
64 | 8M samples -> 32 bits max | |
65 | ||
66 | **************************************************************************** | |
67 | ||
68 | Delta-RLE encoding works as follows: | |
69 | ||
70 | Starting value is assumed to be 0. All data is encoded as a delta | |
71 | from the previous value, such that final[i] = final[i - 1] + delta. | |
72 | Long runs of 0s are RLE-encoded as follows: | |
73 | ||
74 | 0x100 = repeat count of 8 | |
75 | 0x101 = repeat count of 9 | |
76 | 0x102 = repeat count of 10 | |
77 | 0x103 = repeat count of 11 | |
78 | 0x104 = repeat count of 12 | |
79 | 0x105 = repeat count of 13 | |
80 | 0x106 = repeat count of 14 | |
81 | 0x107 = repeat count of 15 | |
82 | 0x108 = repeat count of 16 | |
83 | 0x109 = repeat count of 32 | |
84 | 0x10a = repeat count of 64 | |
85 | 0x10b = repeat count of 128 | |
86 | 0x10c = repeat count of 256 | |
87 | 0x10d = repeat count of 512 | |
88 | 0x10e = repeat count of 1024 | |
89 | 0x10f = repeat count of 2048 | |
90 | ||
91 | Note that repeat counts are reset at the end of a row, so if a 0 run | |
92 | extends to the end of a row, a large repeat count may be used. | |
93 | ||
94 | The reason for starting the run counts at 8 is that 0 is expected to | |
95 | be the most common symbol, and is typically encoded in 1 or 2 bits. | |
96 | ||
97 | ***************************************************************************/ | |
98 | ||
99 | #include <stdlib.h> | |
100 | #include <assert.h> | |
101 | #include <stdio.h> | |
102 | #include <string.h> | |
103 | ||
104 | #include <libchdr/huffman.h> | |
105 | #include <libchdr/minmax.h> | |
106 | ||
107 | /*************************************************************************** | |
108 | * MACROS | |
109 | *************************************************************************** | |
110 | */ | |
111 | ||
112 | #define MAKE_LOOKUP(code,bits) (((code) << 5) | ((bits) & 0x1f)) | |
113 | ||
114 | /*************************************************************************** | |
115 | * IMPLEMENTATION | |
116 | *************************************************************************** | |
117 | */ | |
118 | ||
119 | /*------------------------------------------------- | |
120 | * huffman_context_base - create an encoding/ | |
121 | * decoding context | |
122 | *------------------------------------------------- | |
123 | */ | |
124 | ||
125 | struct huffman_decoder* create_huffman_decoder(int numcodes, int maxbits) | |
126 | { | |
127 | struct huffman_decoder* decoder; | |
128 | /* limit to 24 bits */ | |
129 | if (maxbits > 24) | |
130 | return NULL; | |
131 | ||
132 | decoder = (struct huffman_decoder*)malloc(sizeof(struct huffman_decoder)); | |
133 | decoder->numcodes = numcodes; | |
134 | decoder->maxbits = maxbits; | |
135 | decoder->lookup = (lookup_value*)malloc(sizeof(lookup_value) * (1 << maxbits)); | |
136 | decoder->huffnode = (struct node_t*)malloc(sizeof(struct node_t) * numcodes); | |
137 | decoder->datahisto = NULL; | |
138 | decoder->prevdata = 0; | |
139 | decoder->rleremaining = 0; | |
140 | return decoder; | |
141 | } | |
142 | ||
143 | void delete_huffman_decoder(struct huffman_decoder* decoder) | |
144 | { | |
145 | if (decoder != NULL) | |
146 | { | |
147 | if (decoder->lookup != NULL) | |
148 | free(decoder->lookup); | |
149 | if (decoder->huffnode != NULL) | |
150 | free(decoder->huffnode); | |
151 | free(decoder); | |
152 | } | |
153 | } | |
154 | ||
155 | /*------------------------------------------------- | |
156 | * decode_one - decode a single code from the | |
157 | * huffman stream | |
158 | *------------------------------------------------- | |
159 | */ | |
160 | ||
161 | uint32_t huffman_decode_one(struct huffman_decoder* decoder, struct bitstream* bitbuf) | |
162 | { | |
163 | /* peek ahead to get maxbits worth of data */ | |
164 | uint32_t bits = bitstream_peek(bitbuf, decoder->maxbits); | |
165 | ||
166 | /* look it up, then remove the actual number of bits for this code */ | |
167 | lookup_value lookup = decoder->lookup[bits]; | |
168 | bitstream_remove(bitbuf, lookup & 0x1f); | |
169 | ||
170 | /* return the value */ | |
171 | return lookup >> 5; | |
172 | } | |
173 | ||
174 | /*------------------------------------------------- | |
175 | * import_tree_rle - import an RLE-encoded | |
176 | * huffman tree from a source data stream | |
177 | *------------------------------------------------- | |
178 | */ | |
179 | ||
180 | enum huffman_error huffman_import_tree_rle(struct huffman_decoder* decoder, struct bitstream* bitbuf) | |
181 | { | |
182 | enum huffman_error error; | |
183 | /* bits per entry depends on the maxbits */ | |
184 | int numbits; | |
185 | int curnode; | |
186 | if (decoder->maxbits >= 16) | |
187 | numbits = 5; | |
188 | else if (decoder->maxbits >= 8) | |
189 | numbits = 4; | |
190 | else | |
191 | numbits = 3; | |
192 | ||
193 | /* loop until we read all the nodes */ | |
194 | for (curnode = 0; curnode < (int)decoder->numcodes; /* blank */) | |
195 | { | |
196 | /* a non-one value is just raw */ | |
197 | int nodebits = bitstream_read(bitbuf, numbits); | |
198 | if (nodebits != 1) | |
199 | decoder->huffnode[curnode++].numbits = nodebits; | |
200 | ||
201 | /* a one value is an escape code */ | |
202 | else | |
203 | { | |
204 | /* a double 1 is just a single 1 */ | |
205 | nodebits = bitstream_read(bitbuf, numbits); | |
206 | if (nodebits == 1) | |
207 | decoder->huffnode[curnode++].numbits = nodebits; | |
208 | ||
209 | /* otherwise, we need one for value for the repeat count */ | |
210 | else | |
211 | { | |
212 | int repcount = bitstream_read(bitbuf, numbits) + 3; | |
213 | while (repcount--) | |
214 | decoder->huffnode[curnode++].numbits = nodebits; | |
215 | } | |
216 | } | |
217 | } | |
218 | ||
219 | /* make sure we ended up with the right number */ | |
220 | if (curnode != (int)decoder->numcodes) | |
221 | return HUFFERR_INVALID_DATA; | |
222 | ||
223 | /* assign canonical codes for all nodes based on their code lengths */ | |
224 | error = huffman_assign_canonical_codes(decoder); | |
225 | if (error != HUFFERR_NONE) | |
226 | return error; | |
227 | ||
228 | /* build the lookup table */ | |
229 | huffman_build_lookup_table(decoder); | |
230 | ||
231 | /* determine final input length and report errors */ | |
232 | return bitstream_overflow(bitbuf) ? HUFFERR_INPUT_BUFFER_TOO_SMALL : HUFFERR_NONE; | |
233 | } | |
234 | ||
235 | /*------------------------------------------------- | |
236 | * import_tree_huffman - import a huffman-encoded | |
237 | * huffman tree from a source data stream | |
238 | *------------------------------------------------- | |
239 | */ | |
240 | ||
241 | enum huffman_error huffman_import_tree_huffman(struct huffman_decoder* decoder, struct bitstream* bitbuf) | |
242 | { | |
243 | int last = 0; | |
244 | int curcode; | |
245 | uint32_t temp; | |
246 | enum huffman_error error; | |
247 | uint8_t rlefullbits = 0; | |
248 | int index, count = 0; | |
249 | int start; | |
250 | /* start by parsing the lengths for the small tree */ | |
251 | struct huffman_decoder* smallhuff = create_huffman_decoder(24, 6); | |
252 | ||
253 | smallhuff->huffnode[0].numbits = bitstream_read(bitbuf, 3); | |
254 | start = bitstream_read(bitbuf, 3) + 1; | |
255 | ||
256 | for (index = 1; index < 24; index++) | |
257 | { | |
258 | if (index < start || count == 7) | |
259 | smallhuff->huffnode[index].numbits = 0; | |
260 | else | |
261 | { | |
262 | count = bitstream_read(bitbuf, 3); | |
263 | smallhuff->huffnode[index].numbits = (count == 7) ? 0 : count; | |
264 | } | |
265 | } | |
266 | ||
267 | /* then regenerate the tree */ | |
268 | error = huffman_assign_canonical_codes(smallhuff); | |
269 | if (error != HUFFERR_NONE) | |
270 | return error; | |
271 | huffman_build_lookup_table(smallhuff); | |
272 | ||
273 | /* determine the maximum length of an RLE count */ | |
274 | temp = decoder->numcodes - 9; | |
275 | while (temp != 0) | |
276 | { | |
277 | temp >>= 1; | |
278 | rlefullbits++; | |
279 | } | |
280 | ||
281 | /* now process the rest of the data */ | |
282 | for (curcode = 0; curcode < (int)decoder->numcodes; /* blank */) | |
283 | { | |
284 | int value = huffman_decode_one(smallhuff, bitbuf); | |
285 | if (value != 0) | |
286 | decoder->huffnode[curcode++].numbits = last = value - 1; | |
287 | else | |
288 | { | |
289 | int count = bitstream_read(bitbuf, 3) + 2; | |
290 | if (count == 7+2) | |
291 | count += bitstream_read(bitbuf, rlefullbits); | |
292 | for (/* blank */; count != 0 && curcode < (int)decoder->numcodes; count--) | |
293 | decoder->huffnode[curcode++].numbits = last; | |
294 | } | |
295 | } | |
296 | ||
297 | /* make sure we ended up with the right number */ | |
298 | if (curcode != (int)decoder->numcodes) | |
299 | { | |
300 | delete_huffman_decoder(smallhuff); | |
301 | return HUFFERR_INVALID_DATA; | |
302 | } | |
303 | ||
304 | /* assign canonical codes for all nodes based on their code lengths */ | |
305 | error = huffman_assign_canonical_codes(decoder); | |
306 | if (error != HUFFERR_NONE) | |
307 | { | |
308 | delete_huffman_decoder(smallhuff); | |
309 | return error; | |
310 | } | |
311 | ||
312 | /* build the lookup table */ | |
313 | huffman_build_lookup_table(decoder); | |
314 | delete_huffman_decoder(smallhuff); | |
315 | ||
316 | /* determine final input length and report errors */ | |
317 | return bitstream_overflow(bitbuf) ? HUFFERR_INPUT_BUFFER_TOO_SMALL : HUFFERR_NONE; | |
318 | } | |
319 | ||
320 | /*------------------------------------------------- | |
321 | * compute_tree_from_histo - common backend for | |
322 | * computing a tree based on the data histogram | |
323 | *------------------------------------------------- | |
324 | */ | |
325 | ||
326 | enum huffman_error huffman_compute_tree_from_histo(struct huffman_decoder* decoder) | |
327 | { | |
328 | /* compute the number of data items in the histogram */ | |
329 | int i; | |
330 | uint32_t upperweight; | |
331 | uint32_t lowerweight = 0; | |
332 | uint32_t sdatacount = 0; | |
333 | for (i = 0; i < (int)decoder->numcodes; i++) | |
334 | sdatacount += decoder->datahisto[i]; | |
335 | ||
336 | /* binary search to achieve the optimum encoding */ | |
337 | upperweight = sdatacount * 2; | |
338 | ||
339 | for (;;) | |
340 | { | |
341 | /* build a tree using the current weight */ | |
342 | uint32_t curweight = (upperweight + lowerweight) / 2; | |
343 | int curmaxbits = huffman_build_tree(decoder, sdatacount, curweight); | |
344 | ||
345 | /* apply binary search here */ | |
346 | if (curmaxbits <= decoder->maxbits) | |
347 | { | |
348 | lowerweight = curweight; | |
349 | ||
350 | /* early out if it worked with the raw weights, or if we're done searching */ | |
351 | if (curweight == sdatacount || (upperweight - lowerweight) <= 1) | |
352 | break; | |
353 | } | |
354 | else | |
355 | upperweight = curweight; | |
356 | } | |
357 | ||
358 | /* assign canonical codes for all nodes based on their code lengths */ | |
359 | return huffman_assign_canonical_codes(decoder); | |
360 | } | |
361 | ||
362 | /*************************************************************************** | |
363 | * INTERNAL FUNCTIONS | |
364 | *************************************************************************** | |
365 | */ | |
366 | ||
367 | /*------------------------------------------------- | |
368 | * tree_node_compare - compare two tree nodes | |
369 | * by weight | |
370 | *------------------------------------------------- | |
371 | */ | |
372 | ||
373 | static int huffman_tree_node_compare(const void *item1, const void *item2) | |
374 | { | |
375 | const struct node_t *node1 = *(const struct node_t **)item1; | |
376 | const struct node_t *node2 = *(const struct node_t **)item2; | |
377 | if (node2->weight != node1->weight) | |
378 | return node2->weight - node1->weight; | |
379 | if (node2->bits - node1->bits == 0) | |
380 | fprintf(stderr, "identical node sort keys, should not happen!\n"); | |
381 | return (int)node1->bits - (int)node2->bits; | |
382 | } | |
383 | ||
384 | /*------------------------------------------------- | |
385 | * build_tree - build a huffman tree based on the | |
386 | * data distribution | |
387 | *------------------------------------------------- | |
388 | */ | |
389 | ||
390 | int huffman_build_tree(struct huffman_decoder* decoder, uint32_t totaldata, uint32_t totalweight) | |
391 | { | |
392 | int nextalloc; | |
393 | int maxbits = 0; | |
394 | /* make a list of all non-zero nodes */ | |
395 | struct node_t** list = (struct node_t**) | |
396 | malloc(sizeof(struct node_t*) * decoder->numcodes * 2); | |
397 | int curcode, listitems = 0; | |
398 | ||
399 | memset(decoder->huffnode, 0, | |
400 | decoder->numcodes * sizeof(decoder->huffnode[0])); | |
401 | ||
402 | for (curcode = 0; curcode < (int)decoder->numcodes; curcode++) | |
403 | if (decoder->datahisto[curcode] != 0) | |
404 | { | |
405 | list[listitems++] = &decoder->huffnode[curcode]; | |
406 | decoder->huffnode[curcode].count = decoder->datahisto[curcode]; | |
407 | decoder->huffnode[curcode].bits = curcode; | |
408 | ||
409 | /* scale the weight by the current effective length, ensuring we don't go to 0 */ | |
410 | decoder->huffnode[curcode].weight = ((uint64_t)decoder->datahisto[curcode]) * ((uint64_t)totalweight) / ((uint64_t)totaldata); | |
411 | if (decoder->huffnode[curcode].weight == 0) | |
412 | decoder->huffnode[curcode].weight = 1; | |
413 | } | |
414 | ||
415 | #if 0 | |
416 | { | |
417 | unsigned i; | |
418 | fprintf(stderr, "Pre-sort:\n"); | |
419 | for (i = 0; i < listitems; i++) | |
420 | fprintf(stderr, "weight: %d code: %d\n", | |
421 | list[i]->m_weight, list[i]->m_bits); | |
422 | } | |
423 | #endif | |
424 | ||
425 | /* sort the list by weight, largest weight first */ | |
426 | qsort(&list[0], listitems, sizeof(list[0]), huffman_tree_node_compare); | |
427 | ||
428 | #if 0 | |
429 | fprintf(stderr, "Post-sort:\n"); | |
430 | for (int i = 0; i < listitems; i++) { | |
431 | fprintf(stderr, "weight: %d code: %d\n", list[i]->m_weight, list[i]->m_bits); | |
432 | } | |
433 | fprintf(stderr, "===================\n"); | |
434 | #endif | |
435 | ||
436 | /* now build the tree */ | |
437 | nextalloc = decoder->numcodes; | |
438 | ||
439 | while (listitems > 1) | |
440 | { | |
441 | int curitem; | |
442 | /* remove lowest two items */ | |
443 | struct node_t* node1 = &(*list[--listitems]); | |
444 | struct node_t* node0 = &(*list[--listitems]); | |
445 | ||
446 | /* create new node */ | |
447 | struct node_t* newnode = &decoder->huffnode[nextalloc++]; | |
448 | newnode->parent = NULL; | |
449 | node0->parent = | |
450 | node1->parent = newnode; | |
451 | newnode->weight = | |
452 | node0->weight + node1->weight; | |
453 | ||
454 | /* insert into list at appropriate location */ | |
455 | for (curitem = 0; curitem < listitems; curitem++) | |
456 | if (newnode->weight > list[curitem]->weight) | |
457 | { | |
458 | memmove(&list[curitem+1], | |
459 | &list[curitem], | |
460 | (listitems - curitem) * sizeof(list[0])); | |
461 | break; | |
462 | } | |
463 | list[curitem] = newnode; | |
464 | listitems++; | |
465 | } | |
466 | ||
467 | /* compute the number of bits in each code, | |
468 | * and fill in another histogram */ | |
469 | for (curcode = 0; curcode < (int)decoder->numcodes; curcode++) | |
470 | { | |
471 | struct node_t *curnode; | |
472 | struct node_t* node = &decoder->huffnode[curcode]; | |
473 | node->numbits = 0; | |
474 | node->bits = 0; | |
475 | ||
476 | /* if we have a non-zero weight, compute the number of bits */ | |
477 | if (node->weight > 0) | |
478 | { | |
479 | /* determine the number of bits for this node */ | |
480 | for (curnode = node; | |
481 | curnode->parent != NULL; curnode = curnode->parent) | |
482 | node->numbits++; | |
483 | if (node->numbits == 0) | |
484 | node->numbits = 1; | |
485 | ||
486 | /* keep track of the max */ | |
487 | maxbits = MAX(maxbits, ((int)node->numbits)); | |
488 | } | |
489 | } | |
490 | free(list); | |
491 | return maxbits; | |
492 | } | |
493 | ||
494 | /*------------------------------------------------- | |
495 | * assign_canonical_codes - assign canonical codes | |
496 | * to all the nodes based on the number of bits | |
497 | * in each | |
498 | *------------------------------------------------- | |
499 | */ | |
500 | ||
501 | enum huffman_error huffman_assign_canonical_codes(struct huffman_decoder* decoder) | |
502 | { | |
503 | uint32_t curstart = 0; | |
504 | /* build up a histogram of bit lengths */ | |
505 | int curcode, codelen; | |
506 | uint32_t bithisto[33] = { 0 }; | |
507 | for (curcode = 0; curcode < (int)decoder->numcodes; curcode++) | |
508 | { | |
509 | struct node_t* node = &decoder->huffnode[curcode]; | |
510 | if (node->numbits > decoder->maxbits) | |
511 | return HUFFERR_INTERNAL_INCONSISTENCY; | |
512 | if (node->numbits <= 32) | |
513 | bithisto[node->numbits]++; | |
514 | } | |
515 | ||
516 | /* for each code length, determine the starting code number */ | |
517 | for (codelen = 32; codelen > 0; codelen--) | |
518 | { | |
519 | uint32_t nextstart = (curstart + bithisto[codelen]) >> 1; | |
520 | if (codelen != 1 && nextstart * 2 != (curstart + bithisto[codelen])) | |
521 | return HUFFERR_INTERNAL_INCONSISTENCY; | |
522 | bithisto[codelen] = curstart; | |
523 | curstart = nextstart; | |
524 | } | |
525 | ||
526 | /* now assign canonical codes */ | |
527 | for (curcode = 0; curcode < (int)decoder->numcodes; curcode++) | |
528 | { | |
529 | struct node_t* node = &decoder->huffnode[curcode]; | |
530 | if (node->numbits > 0) | |
531 | node->bits = bithisto[node->numbits]++; | |
532 | } | |
533 | return HUFFERR_NONE; | |
534 | } | |
535 | ||
536 | /*------------------------------------------------- | |
537 | * build_lookup_table - build a lookup table for | |
538 | * fast decoding | |
539 | *------------------------------------------------- | |
540 | */ | |
541 | ||
542 | void huffman_build_lookup_table(struct huffman_decoder* decoder) | |
543 | { | |
544 | /* iterate over all codes */ | |
545 | int curcode; | |
546 | for (curcode = 0; curcode < (int)decoder->numcodes; curcode++) | |
547 | { | |
548 | /* process all nodes which have non-zero bits */ | |
549 | struct node_t* node = &decoder->huffnode[curcode]; | |
550 | if (node->numbits > 0) | |
551 | { | |
552 | /* set up the entry */ | |
553 | lookup_value value = MAKE_LOOKUP(curcode, node->numbits); | |
554 | ||
555 | /* fill all matching entries */ | |
556 | int shift = decoder->maxbits - node->numbits; | |
557 | lookup_value *dest = &decoder->lookup[node->bits << shift]; | |
558 | lookup_value *destend = &decoder->lookup[((node->bits + 1) << shift) - 1]; | |
559 | while (dest <= destend) | |
560 | *dest++ = value; | |
561 | } | |
562 | } | |
563 | } |