9c659ffe |
1 | /* license:BSD-3-Clause |
2 | * copyright-holders:Aaron Giles |
3 | **************************************************************************** |
4 | |
5 | huffman.c |
6 | |
7 | Static Huffman compression and decompression helpers. |
8 | |
9 | **************************************************************************** |
10 | |
11 | Maximum codelength is officially (alphabetsize - 1). This would be 255 bits |
12 | (since we use 1 byte values). However, it is also dependent upon the number |
13 | of samples used, as follows: |
14 | |
15 | 2 bits -> 3..4 samples |
16 | 3 bits -> 5..7 samples |
17 | 4 bits -> 8..12 samples |
18 | 5 bits -> 13..20 samples |
19 | 6 bits -> 21..33 samples |
20 | 7 bits -> 34..54 samples |
21 | 8 bits -> 55..88 samples |
22 | 9 bits -> 89..143 samples |
23 | 10 bits -> 144..232 samples |
24 | 11 bits -> 233..376 samples |
25 | 12 bits -> 377..609 samples |
26 | 13 bits -> 610..986 samples |
27 | 14 bits -> 987..1596 samples |
28 | 15 bits -> 1597..2583 samples |
29 | 16 bits -> 2584..4180 samples -> note that a 4k data size guarantees codelength <= 16 bits |
30 | 17 bits -> 4181..6764 samples |
31 | 18 bits -> 6765..10945 samples |
32 | 19 bits -> 10946..17710 samples |
33 | 20 bits -> 17711..28656 samples |
34 | 21 bits -> 28657..46367 samples |
35 | 22 bits -> 46368..75024 samples |
36 | 23 bits -> 75025..121392 samples |
37 | 24 bits -> 121393..196417 samples |
38 | 25 bits -> 196418..317810 samples |
39 | 26 bits -> 317811..514228 samples |
40 | 27 bits -> 514229..832039 samples |
41 | 28 bits -> 832040..1346268 samples |
42 | 29 bits -> 1346269..2178308 samples |
43 | 30 bits -> 2178309..3524577 samples |
44 | 31 bits -> 3524578..5702886 samples |
45 | 32 bits -> 5702887..9227464 samples |
46 | |
47 | Looking at it differently, here is where powers of 2 fall into these buckets: |
48 | |
49 | 256 samples -> 11 bits max |
50 | 512 samples -> 12 bits max |
51 | 1k samples -> 14 bits max |
52 | 2k samples -> 15 bits max |
53 | 4k samples -> 16 bits max |
54 | 8k samples -> 18 bits max |
55 | 16k samples -> 19 bits max |
56 | 32k samples -> 21 bits max |
57 | 64k samples -> 22 bits max |
58 | 128k samples -> 24 bits max |
59 | 256k samples -> 25 bits max |
60 | 512k samples -> 27 bits max |
61 | 1M samples -> 28 bits max |
62 | 2M samples -> 29 bits max |
63 | 4M samples -> 31 bits max |
64 | 8M samples -> 32 bits max |
65 | |
66 | **************************************************************************** |
67 | |
68 | Delta-RLE encoding works as follows: |
69 | |
70 | Starting value is assumed to be 0. All data is encoded as a delta |
71 | from the previous value, such that final[i] = final[i - 1] + delta. |
72 | Long runs of 0s are RLE-encoded as follows: |
73 | |
74 | 0x100 = repeat count of 8 |
75 | 0x101 = repeat count of 9 |
76 | 0x102 = repeat count of 10 |
77 | 0x103 = repeat count of 11 |
78 | 0x104 = repeat count of 12 |
79 | 0x105 = repeat count of 13 |
80 | 0x106 = repeat count of 14 |
81 | 0x107 = repeat count of 15 |
82 | 0x108 = repeat count of 16 |
83 | 0x109 = repeat count of 32 |
84 | 0x10a = repeat count of 64 |
85 | 0x10b = repeat count of 128 |
86 | 0x10c = repeat count of 256 |
87 | 0x10d = repeat count of 512 |
88 | 0x10e = repeat count of 1024 |
89 | 0x10f = repeat count of 2048 |
90 | |
91 | Note that repeat counts are reset at the end of a row, so if a 0 run |
92 | extends to the end of a row, a large repeat count may be used. |
93 | |
94 | The reason for starting the run counts at 8 is that 0 is expected to |
95 | be the most common symbol, and is typically encoded in 1 or 2 bits. |
96 | |
97 | ***************************************************************************/ |
98 | |
99 | #include <stdlib.h> |
9c659ffe |
100 | #include <stdio.h> |
101 | #include <string.h> |
102 | |
2ff0b512 |
103 | #include <libchdr/huffman.h> |
9c659ffe |
104 | |
105 | #define MAX(x,y) ((x) > (y) ? (x) : (y)) |
106 | |
107 | /*************************************************************************** |
108 | * MACROS |
109 | *************************************************************************** |
110 | */ |
111 | |
112 | #define MAKE_LOOKUP(code,bits) (((code) << 5) | ((bits) & 0x1f)) |
113 | |
114 | /*************************************************************************** |
115 | * IMPLEMENTATION |
116 | *************************************************************************** |
117 | */ |
118 | |
119 | /*------------------------------------------------- |
120 | * huffman_context_base - create an encoding/ |
121 | * decoding context |
122 | *------------------------------------------------- |
123 | */ |
124 | |
125 | struct huffman_decoder* create_huffman_decoder(int numcodes, int maxbits) |
126 | { |
dcd8e4fd |
127 | struct huffman_decoder* decoder = NULL; |
128 | |
9c659ffe |
129 | /* limit to 24 bits */ |
130 | if (maxbits > 24) |
131 | return NULL; |
132 | |
dcd8e4fd |
133 | decoder = (struct huffman_decoder*)malloc(sizeof(struct huffman_decoder)); |
9c659ffe |
134 | decoder->numcodes = numcodes; |
135 | decoder->maxbits = maxbits; |
136 | decoder->lookup = (lookup_value*)malloc(sizeof(lookup_value) * (1 << maxbits)); |
137 | decoder->huffnode = (struct node_t*)malloc(sizeof(struct node_t) * numcodes); |
138 | decoder->datahisto = NULL; |
139 | decoder->prevdata = 0; |
140 | decoder->rleremaining = 0; |
141 | return decoder; |
142 | } |
143 | |
2ff0b512 |
144 | void delete_huffman_decoder(struct huffman_decoder* decoder) |
145 | { |
146 | if (decoder != NULL) |
147 | { |
148 | if (decoder->lookup != NULL) |
149 | free(decoder->lookup); |
150 | if (decoder->huffnode != NULL) |
151 | free(decoder->huffnode); |
152 | free(decoder); |
153 | } |
154 | } |
155 | |
9c659ffe |
156 | /*------------------------------------------------- |
157 | * decode_one - decode a single code from the |
158 | * huffman stream |
159 | *------------------------------------------------- |
160 | */ |
161 | |
162 | uint32_t huffman_decode_one(struct huffman_decoder* decoder, struct bitstream* bitbuf) |
163 | { |
164 | /* peek ahead to get maxbits worth of data */ |
165 | uint32_t bits = bitstream_peek(bitbuf, decoder->maxbits); |
166 | |
167 | /* look it up, then remove the actual number of bits for this code */ |
168 | lookup_value lookup = decoder->lookup[bits]; |
169 | bitstream_remove(bitbuf, lookup & 0x1f); |
170 | |
171 | /* return the value */ |
172 | return lookup >> 5; |
173 | } |
174 | |
175 | /*------------------------------------------------- |
176 | * import_tree_rle - import an RLE-encoded |
177 | * huffman tree from a source data stream |
178 | *------------------------------------------------- |
179 | */ |
180 | |
181 | enum huffman_error huffman_import_tree_rle(struct huffman_decoder* decoder, struct bitstream* bitbuf) |
182 | { |
9e052883 |
183 | int numbits; |
184 | uint32_t curnode; |
dcd8e4fd |
185 | enum huffman_error error; |
186 | |
9c659ffe |
187 | /* bits per entry depends on the maxbits */ |
9c659ffe |
188 | if (decoder->maxbits >= 16) |
189 | numbits = 5; |
190 | else if (decoder->maxbits >= 8) |
191 | numbits = 4; |
192 | else |
193 | numbits = 3; |
194 | |
195 | /* loop until we read all the nodes */ |
9c659ffe |
196 | for (curnode = 0; curnode < decoder->numcodes; ) |
197 | { |
198 | /* a non-one value is just raw */ |
199 | int nodebits = bitstream_read(bitbuf, numbits); |
200 | if (nodebits != 1) |
201 | decoder->huffnode[curnode++].numbits = nodebits; |
202 | |
203 | /* a one value is an escape code */ |
204 | else |
205 | { |
206 | /* a double 1 is just a single 1 */ |
207 | nodebits = bitstream_read(bitbuf, numbits); |
208 | if (nodebits == 1) |
209 | decoder->huffnode[curnode++].numbits = nodebits; |
210 | |
211 | /* otherwise, we need one for value for the repeat count */ |
212 | else |
213 | { |
214 | int repcount = bitstream_read(bitbuf, numbits) + 3; |
9e052883 |
215 | if (repcount + curnode > decoder->numcodes) |
216 | return HUFFERR_INVALID_DATA; |
9c659ffe |
217 | while (repcount--) |
218 | decoder->huffnode[curnode++].numbits = nodebits; |
219 | } |
220 | } |
221 | } |
222 | |
223 | /* make sure we ended up with the right number */ |
224 | if (curnode != decoder->numcodes) |
225 | return HUFFERR_INVALID_DATA; |
226 | |
227 | /* assign canonical codes for all nodes based on their code lengths */ |
dcd8e4fd |
228 | error = huffman_assign_canonical_codes(decoder); |
9c659ffe |
229 | if (error != HUFFERR_NONE) |
230 | return error; |
231 | |
232 | /* build the lookup table */ |
233 | huffman_build_lookup_table(decoder); |
234 | |
235 | /* determine final input length and report errors */ |
236 | return bitstream_overflow(bitbuf) ? HUFFERR_INPUT_BUFFER_TOO_SMALL : HUFFERR_NONE; |
237 | } |
238 | |
239 | |
240 | /*------------------------------------------------- |
241 | * import_tree_huffman - import a huffman-encoded |
242 | * huffman tree from a source data stream |
243 | *------------------------------------------------- |
244 | */ |
245 | |
246 | enum huffman_error huffman_import_tree_huffman(struct huffman_decoder* decoder, struct bitstream* bitbuf) |
247 | { |
dcd8e4fd |
248 | int start; |
249 | int last = 0; |
250 | int count = 0; |
251 | int index; |
9e052883 |
252 | uint32_t curcode; |
dcd8e4fd |
253 | uint8_t rlefullbits = 0; |
254 | uint32_t temp; |
255 | enum huffman_error error; |
9c659ffe |
256 | /* start by parsing the lengths for the small tree */ |
257 | struct huffman_decoder* smallhuff = create_huffman_decoder(24, 6); |
258 | smallhuff->huffnode[0].numbits = bitstream_read(bitbuf, 3); |
dcd8e4fd |
259 | start = bitstream_read(bitbuf, 3) + 1; |
260 | for (index = 1; index < 24; index++) |
9c659ffe |
261 | { |
262 | if (index < start || count == 7) |
263 | smallhuff->huffnode[index].numbits = 0; |
264 | else |
265 | { |
266 | count = bitstream_read(bitbuf, 3); |
267 | smallhuff->huffnode[index].numbits = (count == 7) ? 0 : count; |
268 | } |
269 | } |
270 | |
271 | /* then regenerate the tree */ |
dcd8e4fd |
272 | error = huffman_assign_canonical_codes(smallhuff); |
9c659ffe |
273 | if (error != HUFFERR_NONE) |
274 | return error; |
275 | huffman_build_lookup_table(smallhuff); |
276 | |
277 | /* determine the maximum length of an RLE count */ |
dcd8e4fd |
278 | temp = decoder->numcodes - 9; |
9c659ffe |
279 | while (temp != 0) |
280 | temp >>= 1, rlefullbits++; |
281 | |
282 | /* now process the rest of the data */ |
9c659ffe |
283 | for (curcode = 0; curcode < decoder->numcodes; ) |
284 | { |
285 | int value = huffman_decode_one(smallhuff, bitbuf); |
286 | if (value != 0) |
287 | decoder->huffnode[curcode++].numbits = last = value - 1; |
288 | else |
289 | { |
290 | int count = bitstream_read(bitbuf, 3) + 2; |
291 | if (count == 7+2) |
292 | count += bitstream_read(bitbuf, rlefullbits); |
293 | for ( ; count != 0 && curcode < decoder->numcodes; count--) |
294 | decoder->huffnode[curcode++].numbits = last; |
295 | } |
296 | } |
297 | |
298 | /* make sure we ended up with the right number */ |
299 | if (curcode != decoder->numcodes) |
300 | return HUFFERR_INVALID_DATA; |
301 | |
302 | /* assign canonical codes for all nodes based on their code lengths */ |
303 | error = huffman_assign_canonical_codes(decoder); |
304 | if (error != HUFFERR_NONE) |
305 | return error; |
306 | |
307 | /* build the lookup table */ |
308 | huffman_build_lookup_table(decoder); |
309 | |
310 | /* determine final input length and report errors */ |
311 | return bitstream_overflow(bitbuf) ? HUFFERR_INPUT_BUFFER_TOO_SMALL : HUFFERR_NONE; |
312 | } |
313 | |
314 | /*------------------------------------------------- |
315 | * compute_tree_from_histo - common backend for |
316 | * computing a tree based on the data histogram |
317 | *------------------------------------------------- |
318 | */ |
319 | |
320 | enum huffman_error huffman_compute_tree_from_histo(struct huffman_decoder* decoder) |
321 | { |
9e052883 |
322 | uint32_t i; |
dcd8e4fd |
323 | uint32_t lowerweight; |
324 | uint32_t upperweight; |
9c659ffe |
325 | /* compute the number of data items in the histogram */ |
326 | uint32_t sdatacount = 0; |
dcd8e4fd |
327 | for (i = 0; i < decoder->numcodes; i++) |
9c659ffe |
328 | sdatacount += decoder->datahisto[i]; |
329 | |
330 | /* binary search to achieve the optimum encoding */ |
dcd8e4fd |
331 | lowerweight = 0; |
332 | upperweight = sdatacount * 2; |
9c659ffe |
333 | while (1) |
334 | { |
335 | /* build a tree using the current weight */ |
336 | uint32_t curweight = (upperweight + lowerweight) / 2; |
337 | int curmaxbits = huffman_build_tree(decoder, sdatacount, curweight); |
338 | |
339 | /* apply binary search here */ |
340 | if (curmaxbits <= decoder->maxbits) |
341 | { |
342 | lowerweight = curweight; |
343 | |
344 | /* early out if it worked with the raw weights, or if we're done searching */ |
345 | if (curweight == sdatacount || (upperweight - lowerweight) <= 1) |
346 | break; |
347 | } |
348 | else |
349 | upperweight = curweight; |
350 | } |
351 | |
352 | /* assign canonical codes for all nodes based on their code lengths */ |
353 | return huffman_assign_canonical_codes(decoder); |
354 | } |
355 | |
356 | /*************************************************************************** |
357 | * INTERNAL FUNCTIONS |
358 | *************************************************************************** |
359 | */ |
360 | |
361 | /*------------------------------------------------- |
362 | * tree_node_compare - compare two tree nodes |
363 | * by weight |
364 | *------------------------------------------------- |
365 | */ |
366 | |
367 | static int huffman_tree_node_compare(const void *item1, const void *item2) |
368 | { |
369 | const struct node_t *node1 = *(const struct node_t **)item1; |
370 | const struct node_t *node2 = *(const struct node_t **)item2; |
371 | if (node2->weight != node1->weight) |
372 | return node2->weight - node1->weight; |
373 | if (node2->bits - node1->bits == 0) |
374 | fprintf(stderr, "identical node sort keys, should not happen!\n"); |
375 | return (int)node1->bits - (int)node2->bits; |
376 | } |
377 | |
378 | /*------------------------------------------------- |
379 | * build_tree - build a huffman tree based on the |
380 | * data distribution |
381 | *------------------------------------------------- |
382 | */ |
383 | |
384 | int huffman_build_tree(struct huffman_decoder* decoder, uint32_t totaldata, uint32_t totalweight) |
385 | { |
9e052883 |
386 | uint32_t curcode; |
dcd8e4fd |
387 | int nextalloc; |
388 | int listitems = 0; |
389 | int maxbits = 0; |
9c659ffe |
390 | /* make a list of all non-zero nodes */ |
391 | struct node_t** list = (struct node_t**)malloc(sizeof(struct node_t*) * decoder->numcodes * 2); |
9c659ffe |
392 | memset(decoder->huffnode, 0, decoder->numcodes * sizeof(decoder->huffnode[0])); |
dcd8e4fd |
393 | for (curcode = 0; curcode < decoder->numcodes; curcode++) |
9c659ffe |
394 | if (decoder->datahisto[curcode] != 0) |
395 | { |
396 | list[listitems++] = &decoder->huffnode[curcode]; |
397 | decoder->huffnode[curcode].count = decoder->datahisto[curcode]; |
398 | decoder->huffnode[curcode].bits = curcode; |
399 | |
400 | /* scale the weight by the current effective length, ensuring we don't go to 0 */ |
401 | decoder->huffnode[curcode].weight = ((uint64_t)decoder->datahisto[curcode]) * ((uint64_t)totalweight) / ((uint64_t)totaldata); |
402 | if (decoder->huffnode[curcode].weight == 0) |
403 | decoder->huffnode[curcode].weight = 1; |
404 | } |
405 | |
406 | #if 0 |
407 | fprintf(stderr, "Pre-sort:\n"); |
408 | for (int i = 0; i < listitems; i++) { |
409 | fprintf(stderr, "weight: %d code: %d\n", list[i]->m_weight, list[i]->m_bits); |
410 | } |
411 | #endif |
412 | |
413 | /* sort the list by weight, largest weight first */ |
414 | qsort(&list[0], listitems, sizeof(list[0]), huffman_tree_node_compare); |
415 | |
416 | #if 0 |
417 | fprintf(stderr, "Post-sort:\n"); |
418 | for (int i = 0; i < listitems; i++) { |
419 | fprintf(stderr, "weight: %d code: %d\n", list[i]->m_weight, list[i]->m_bits); |
420 | } |
421 | fprintf(stderr, "===================\n"); |
422 | #endif |
423 | |
424 | /* now build the tree */ |
dcd8e4fd |
425 | nextalloc = decoder->numcodes; |
9c659ffe |
426 | while (listitems > 1) |
427 | { |
2ff0b512 |
428 | int curitem; |
9c659ffe |
429 | /* remove lowest two items */ |
430 | struct node_t* node1 = &(*list[--listitems]); |
431 | struct node_t* node0 = &(*list[--listitems]); |
432 | |
433 | /* create new node */ |
434 | struct node_t* newnode = &decoder->huffnode[nextalloc++]; |
435 | newnode->parent = NULL; |
436 | node0->parent = node1->parent = newnode; |
437 | newnode->weight = node0->weight + node1->weight; |
438 | |
439 | /* insert into list at appropriate location */ |
9c659ffe |
440 | for (curitem = 0; curitem < listitems; curitem++) |
441 | if (newnode->weight > list[curitem]->weight) |
442 | { |
443 | memmove(&list[curitem+1], &list[curitem], (listitems - curitem) * sizeof(list[0])); |
444 | break; |
445 | } |
446 | list[curitem] = newnode; |
447 | listitems++; |
448 | } |
449 | |
450 | /* compute the number of bits in each code, and fill in another histogram */ |
dcd8e4fd |
451 | for (curcode = 0; curcode < decoder->numcodes; curcode++) |
9c659ffe |
452 | { |
2ff0b512 |
453 | struct node_t *curnode; |
9c659ffe |
454 | struct node_t* node = &decoder->huffnode[curcode]; |
455 | node->numbits = 0; |
456 | node->bits = 0; |
457 | |
458 | /* if we have a non-zero weight, compute the number of bits */ |
459 | if (node->weight > 0) |
460 | { |
461 | /* determine the number of bits for this node */ |
2ff0b512 |
462 | for (curnode = node; curnode->parent != NULL; curnode = curnode->parent) |
9c659ffe |
463 | node->numbits++; |
464 | if (node->numbits == 0) |
465 | node->numbits = 1; |
466 | |
467 | /* keep track of the max */ |
468 | maxbits = MAX(maxbits, ((int)node->numbits)); |
469 | } |
470 | } |
471 | return maxbits; |
472 | } |
473 | |
474 | /*------------------------------------------------- |
475 | * assign_canonical_codes - assign canonical codes |
476 | * to all the nodes based on the number of bits |
477 | * in each |
478 | *------------------------------------------------- |
479 | */ |
480 | |
481 | enum huffman_error huffman_assign_canonical_codes(struct huffman_decoder* decoder) |
482 | { |
9e052883 |
483 | uint32_t curcode; |
484 | int codelen; |
dcd8e4fd |
485 | uint32_t curstart = 0; |
9c659ffe |
486 | /* build up a histogram of bit lengths */ |
487 | uint32_t bithisto[33] = { 0 }; |
dcd8e4fd |
488 | for (curcode = 0; curcode < decoder->numcodes; curcode++) |
9c659ffe |
489 | { |
490 | struct node_t* node = &decoder->huffnode[curcode]; |
491 | if (node->numbits > decoder->maxbits) |
492 | return HUFFERR_INTERNAL_INCONSISTENCY; |
493 | if (node->numbits <= 32) |
494 | bithisto[node->numbits]++; |
495 | } |
496 | |
497 | /* for each code length, determine the starting code number */ |
dcd8e4fd |
498 | for (codelen = 32; codelen > 0; codelen--) |
9c659ffe |
499 | { |
500 | uint32_t nextstart = (curstart + bithisto[codelen]) >> 1; |
501 | if (codelen != 1 && nextstart * 2 != (curstart + bithisto[codelen])) |
502 | return HUFFERR_INTERNAL_INCONSISTENCY; |
503 | bithisto[codelen] = curstart; |
504 | curstart = nextstart; |
505 | } |
506 | |
507 | /* now assign canonical codes */ |
dcd8e4fd |
508 | for (curcode = 0; curcode < decoder->numcodes; curcode++) |
9c659ffe |
509 | { |
510 | struct node_t* node = &decoder->huffnode[curcode]; |
511 | if (node->numbits > 0) |
512 | node->bits = bithisto[node->numbits]++; |
513 | } |
514 | return HUFFERR_NONE; |
515 | } |
516 | |
517 | /*------------------------------------------------- |
518 | * build_lookup_table - build a lookup table for |
519 | * fast decoding |
520 | *------------------------------------------------- |
521 | */ |
522 | |
523 | void huffman_build_lookup_table(struct huffman_decoder* decoder) |
524 | { |
9e052883 |
525 | uint32_t curcode; |
9c659ffe |
526 | /* iterate over all codes */ |
dcd8e4fd |
527 | for (curcode = 0; curcode < decoder->numcodes; curcode++) |
9c659ffe |
528 | { |
529 | /* process all nodes which have non-zero bits */ |
530 | struct node_t* node = &decoder->huffnode[curcode]; |
531 | if (node->numbits > 0) |
532 | { |
2ff0b512 |
533 | int shift; |
534 | lookup_value *dest; |
535 | lookup_value *destend; |
9c659ffe |
536 | /* set up the entry */ |
537 | lookup_value value = MAKE_LOOKUP(curcode, node->numbits); |
538 | |
539 | /* fill all matching entries */ |
2ff0b512 |
540 | shift = decoder->maxbits - node->numbits; |
541 | dest = &decoder->lookup[node->bits << shift]; |
542 | destend = &decoder->lookup[((node->bits + 1) << shift) - 1]; |
9c659ffe |
543 | while (dest <= destend) |
544 | *dest++ = value; |
545 | } |
546 | } |
547 | } |