98e75f2d |
1 | /* |
2 | * Texture Filtering |
3 | * Version: 1.0 |
4 | * |
5 | * Copyright (C) 2007 Hiroshi Morii All Rights Reserved. |
6 | * Email koolsmoky(at)users.sourceforge.net |
7 | * Web http://www.3dfxzone.it/koolsmoky |
8 | * |
9 | * this is free software; you can redistribute it and/or modify |
10 | * it under the terms of the GNU General Public License as published by |
11 | * the Free Software Foundation; either version 2, or (at your option) |
12 | * any later version. |
13 | * |
14 | * this is distributed in the hope that it will be useful, |
15 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
16 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
17 | * GNU General Public License for more details. |
18 | * |
19 | * You should have received a copy of the GNU General Public License |
20 | * along with GNU Make; see the file COPYING. If not, write to |
21 | * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. |
22 | */ |
23 | |
24 | #include "TxUtil.h" |
25 | #include "TxDbg.h" |
26 | #include <zlib.h> |
27 | #include <stdlib.h> |
28 | #ifdef _WIN32 |
29 | #define WIN32_LEAN_AND_MEAN |
30 | #include <windows.h> |
31 | #else |
32 | #include <unistd.h> |
33 | #endif |
34 | |
35 | /* |
36 | * External libraries |
37 | ******************************************************************************/ |
38 | TxLoadLib::TxLoadLib() |
39 | { |
40 | #ifdef DXTN_DLL |
41 | if (!_dxtnlib) |
42 | _dxtnlib = LoadLibrary("dxtn"); |
43 | |
44 | if (_dxtnlib) { |
45 | if (!_tx_compress_dxtn) |
46 | _tx_compress_dxtn = (dxtCompressTexFuncExt)DLSYM(_dxtnlib, "tx_compress_dxtn"); |
47 | |
48 | if (!_tx_compress_fxt1) |
49 | _tx_compress_fxt1 = (fxtCompressTexFuncExt)DLSYM(_dxtnlib, "fxt1_encode"); |
50 | } |
51 | #else |
52 | _tx_compress_dxtn = tx_compress_dxtn; |
53 | _tx_compress_fxt1 = fxt1_encode; |
54 | |
55 | #endif |
56 | } |
57 | |
58 | TxLoadLib::~TxLoadLib() |
59 | { |
60 | #ifdef DXTN_DLL |
61 | /* free dynamic library */ |
62 | if (_dxtnlib) |
63 | FreeLibrary(_dxtnlib); |
64 | #endif |
65 | |
66 | } |
67 | |
68 | fxtCompressTexFuncExt |
69 | TxLoadLib::getfxtCompressTexFuncExt() |
70 | { |
71 | return _tx_compress_fxt1; |
72 | } |
73 | |
74 | dxtCompressTexFuncExt |
75 | TxLoadLib::getdxtCompressTexFuncExt() |
76 | { |
77 | return _tx_compress_dxtn; |
78 | } |
79 | |
80 | |
81 | /* |
82 | * Utilities |
83 | ******************************************************************************/ |
84 | uint32 |
85 | TxUtil::checksumTx(uint8 *src, int width, int height, uint16 format) |
86 | { |
87 | int dataSize = sizeofTx(width, height, format); |
88 | |
89 | /* for now we use adler32 if something else is better |
90 | * we can simply swtich later |
91 | */ |
92 | /* return (dataSize ? Adler32(src, dataSize, 1) : 0); */ |
93 | |
94 | /* zlib crc32 */ |
95 | return (dataSize ? crc32(crc32(0L, Z_NULL, 0), src, dataSize) : 0); |
96 | } |
97 | |
98 | int |
99 | TxUtil::sizeofTx(int width, int height, uint16 format) |
100 | { |
101 | int dataSize = 0; |
102 | |
103 | /* a lookup table for the shifts would be better */ |
104 | switch (format) { |
105 | case GR_TEXFMT_ARGB_CMP_FXT1: |
106 | dataSize = (((width + 0x7) & ~0x7) * ((height + 0x3) & ~0x3)) >> 1; |
107 | break; |
108 | case GR_TEXFMT_ARGB_CMP_DXT1: |
109 | dataSize = (((width + 0x3) & ~0x3) * ((height + 0x3) & ~0x3)) >> 1; |
110 | break; |
111 | case GR_TEXFMT_ARGB_CMP_DXT3: |
112 | case GR_TEXFMT_ARGB_CMP_DXT5: |
113 | dataSize = ((width + 0x3) & ~0x3) * ((height + 0x3) & ~0x3); |
114 | break; |
115 | case GR_TEXFMT_ALPHA_INTENSITY_44: |
116 | case GR_TEXFMT_ALPHA_8: |
117 | case GR_TEXFMT_INTENSITY_8: |
118 | case GR_TEXFMT_P_8: |
119 | dataSize = width * height; |
120 | break; |
121 | case GR_TEXFMT_ARGB_4444: |
122 | case GR_TEXFMT_ARGB_1555: |
123 | case GR_TEXFMT_RGB_565: |
124 | case GR_TEXFMT_ALPHA_INTENSITY_88: |
125 | dataSize = (width * height) << 1; |
126 | break; |
127 | case GR_TEXFMT_ARGB_8888: |
128 | dataSize = (width * height) << 2; |
129 | break; |
130 | default: |
131 | /* unsupported format */ |
132 | DBG_INFO(80, L"Error: cannot get size. unsupported gfmt:%x\n", format); |
133 | ; |
134 | } |
135 | |
136 | return dataSize; |
137 | } |
138 | |
139 | #if 0 /* unused */ |
140 | uint32 |
141 | TxUtil::chkAlpha(uint32* src, int width, int height) |
142 | { |
143 | /* NOTE: _src must be ARGB8888 |
144 | * return values |
145 | * 0x00000000: 8bit alpha |
146 | * 0x00000001: 1bit alpha |
147 | * 0xff000001: no alpha |
148 | */ |
149 | |
150 | int _size = width * height; |
151 | uint32 alpha = 0; |
152 | |
153 | __asm { |
154 | mov esi, dword ptr [src]; |
155 | mov ecx, dword ptr [_size]; |
156 | mov ebx, 0xff000000; |
157 | |
158 | tc1_loop: |
159 | mov eax, dword ptr [esi]; |
160 | add esi, 4; |
161 | |
162 | and eax, 0xff000000; |
163 | jz alpha1bit; |
164 | cmp eax, 0xff000000; |
165 | je alpha1bit; |
166 | jmp done; |
167 | |
168 | alpha1bit: |
169 | and ebx, eax; |
170 | dec ecx; |
171 | jnz tc1_loop; |
172 | |
173 | or ebx, 0x00000001; |
174 | mov dword ptr [alpha], ebx; |
175 | |
176 | done: |
177 | } |
178 | |
179 | return alpha; |
180 | } |
181 | #endif |
182 | |
183 | uint32 |
184 | TxUtil::checksum(uint8 *src, int width, int height, int size, int rowStride) |
185 | { |
186 | /* Rice CRC32 for now. We can switch this to Jabo MD5 or |
187 | * any other custom checksum. |
188 | * TODO: use *_HIRESTEXTURE option. */ |
189 | |
190 | if (!src) return 0; |
191 | |
192 | return RiceCRC32(src, width, height, size, rowStride); |
193 | } |
194 | |
195 | uint64 |
196 | TxUtil::checksum64(uint8 *src, int width, int height, int size, int rowStride, uint8 *palette) |
197 | { |
198 | /* Rice CRC32 for now. We can switch this to Jabo MD5 or |
199 | * any other custom checksum. |
200 | * TODO: use *_HIRESTEXTURE option. */ |
201 | /* Returned value is 64bits: hi=palette crc32 low=texture crc32 */ |
202 | |
203 | if (!src) return 0; |
204 | |
205 | uint64 crc64Ret = 0; |
206 | |
207 | if (palette) { |
208 | uint32 crc32 = 0, cimax = 0; |
209 | switch (size & 0xff) { |
210 | case 1: |
211 | if (RiceCRC32_CI8(src, width, height, size, rowStride, &crc32, &cimax)) { |
212 | crc64Ret = (uint64)RiceCRC32(palette, cimax + 1, 1, 2, 512); |
213 | crc64Ret <<= 32; |
214 | crc64Ret |= (uint64)crc32; |
215 | } |
216 | break; |
217 | case 0: |
218 | if (RiceCRC32_CI4(src, width, height, size, rowStride, &crc32, &cimax)) { |
219 | crc64Ret = (uint64)RiceCRC32(palette, cimax + 1, 1, 2, 32); |
220 | crc64Ret <<= 32; |
221 | crc64Ret |= (uint64)crc32; |
222 | } |
223 | } |
224 | } |
225 | if (!crc64Ret) { |
226 | crc64Ret = (uint64)RiceCRC32(src, width, height, size, rowStride); |
227 | } |
228 | |
229 | return crc64Ret; |
230 | } |
231 | |
232 | /* |
233 | ** Computes Adler32 checksum for a stream of data. |
234 | ** |
235 | ** From the specification found in RFC 1950: (ZLIB Compressed Data Format |
236 | ** Specification version 3.3) |
237 | ** |
238 | ** ADLER32 (Adler-32 checksum) This contains a checksum value of the |
239 | ** uncompressed data (excluding any dictionary data) computed according to |
240 | ** Adler-32 algorithm. This algorithm is a 32-bit extension and improvement |
241 | ** of the Fletcher algorithm, used in the ITU-T X.224 / ISO 8073 standard. |
242 | ** |
243 | ** Adler-32 is composed of two sums accumulated per byte: s1 is the sum of |
244 | ** all bytes, s2 is the sum of all s1 values. Both sums are done modulo |
245 | ** 65521. s1 is initialized to 1, s2 to zero. The Adler-32 checksum is stored |
246 | ** as s2*65536 + s1 in most-significant-byte first (network) order. |
247 | ** |
248 | ** 8.2. The Adler-32 algorithm |
249 | ** |
250 | ** The Adler-32 algorithm is much faster than the CRC32 algorithm yet still |
251 | ** provides an extremely low probability of undetected errors. |
252 | ** |
253 | ** The modulo on unsigned long accumulators can be delayed for 5552 bytes, |
254 | ** so the modulo operation time is negligible. If the bytes are a, b, c, |
255 | ** the second sum is 3a + 2b + c + 3, and so is position and order sensitive, |
256 | ** unlike the first sum, which is just a checksum. That 65521 is prime is |
257 | ** important to avoid a possible large class of two-byte errors that leave |
258 | ** the check unchanged. (The Fletcher checksum uses 255, which is not prime |
259 | ** and which also makes the Fletcher check insensitive to single byte |
260 | ** changes 0 <-> 255.) |
261 | ** |
262 | ** The sum s1 is initialized to 1 instead of zero to make the length of |
263 | ** the sequence part of s2, so that the length does not have to be checked |
264 | ** separately. (Any sequence of zeroes has a Fletcher checksum of zero.) |
265 | */ |
266 | |
267 | uint32 |
268 | TxUtil::Adler32(const uint8* data, int Len, uint32 dwAdler32) |
269 | { |
270 | #if 1 |
271 | /* zlib adler32 */ |
272 | return adler32(dwAdler32, data, Len); |
273 | #else |
274 | register uint32 s1 = dwAdler32 & 0xFFFF; |
275 | register uint32 s2 = (dwAdler32 >> 16) & 0xFFFF; |
276 | int k; |
277 | |
278 | while (Len > 0) { |
279 | /* 5552 is the largest n such that 255n(n+1)/2 + (n+1)(BASE-1) <= 2^32-1 */ |
280 | k = (Len < 5552 ? Len : 5552); |
281 | Len -= k; |
282 | while (k--) { |
283 | s1 += *data++; |
284 | s2 += s1; |
285 | } |
286 | /* 65521 is the largest prime smaller than 65536 */ |
287 | s1 %= 65521; |
288 | s2 %= 65521; |
289 | } |
290 | |
291 | return (s2 << 16) | s1; |
292 | #endif |
293 | } |
294 | |
295 | uint32 |
296 | TxUtil::Adler32(const uint8* src, int width, int height, int size, int rowStride) |
297 | { |
298 | int i; |
299 | uint32 ret = 1; |
300 | uint32 width_in_bytes = width * size; |
301 | |
302 | for (i = 0; i < height; i++) { |
303 | ret = Adler32(src, width_in_bytes, ret); |
304 | src += rowStride; |
305 | } |
306 | |
307 | return ret; |
308 | } |
309 | |
310 | // rotate left |
311 | template<class T> static T __ROL__(T value, unsigned int count) |
312 | { |
313 | const unsigned int nbits = sizeof(T) * 8; |
314 | count %= nbits; |
315 | |
316 | T high = value >> (nbits - count); |
317 | value <<= count; |
318 | value |= high; |
319 | return value; |
320 | } |
321 | |
322 | /* Rice CRC32 for hires texture packs */ |
323 | /* NOTE: The following is used in Glide64 to calculate the CRC32 |
324 | * for Rice hires texture packs. |
325 | * |
326 | * BYTE* addr = (BYTE*)(gfx.RDRAM + |
327 | * rdp.addr[rdp.tiles[tile].t_mem] + |
328 | * (rdp.tiles[tile].ul_t * bpl) + |
329 | * (((rdp.tiles[tile].ul_s<<rdp.tiles[tile].size)+1)>>1)); |
330 | * RiceCRC32(addr, |
331 | * rdp.tiles[tile].width, |
332 | * rdp.tiles[tile].height, |
333 | * (unsigned short)(rdp.tiles[tile].format << 8 | rdp.tiles[tile].size), |
334 | * bpl); |
335 | */ |
336 | uint32 |
337 | TxUtil::RiceCRC32(const uint8* src, int width, int height, int size, int rowStride) |
338 | { |
339 | const uint8_t *row; |
340 | uint32_t crc32Ret; |
341 | int cur_height; |
342 | uint32_t pos; |
343 | uint32_t word; |
344 | uint32_t word_hash = 0; |
345 | uint32_t tmp; |
346 | const uint32_t bytes_per_width = ((width << size) + 1) >> 1; |
347 | |
348 | row = src; |
349 | crc32Ret = 0; |
350 | |
351 | for (cur_height = height - 1; cur_height >= 0; cur_height--) { |
352 | for (pos = bytes_per_width - 4; pos < 0x80000000u; pos -= 4) { |
353 | word = *(uint32_t *)&row[pos]; |
354 | word_hash = pos ^ word; |
355 | tmp = __ROL__(crc32Ret, 4); |
356 | crc32Ret = word_hash + tmp; |
357 | } |
358 | crc32Ret += cur_height ^ word_hash; |
359 | row += rowStride; |
360 | } |
361 | return crc32Ret; |
362 | } |
363 | |
364 | boolean |
365 | TxUtil::RiceCRC32_CI4(const uint8* src, int width, int height, int size, int rowStride, |
366 | uint32* crc32, uint32* cimax) |
367 | { |
368 | const uint8_t *row; |
369 | uint32_t crc32Ret; |
370 | uint32_t cimaxRet; |
371 | int cur_height; |
372 | uint32_t pos; |
373 | uint32_t word; |
374 | uint32_t word_hash = 0; |
375 | uint32_t tmp; |
376 | const uint32_t bytes_per_width = ((width << size) + 1) >> 1; |
377 | |
378 | row = src; |
379 | crc32Ret = 0; |
380 | cimaxRet = 0; |
381 | |
382 | for (cur_height = height - 1; cur_height >= 0; cur_height--) { |
383 | for (pos = bytes_per_width - 4; pos < 0x80000000u; pos -= 4) { |
384 | word = *(uint32_t *)&row[pos]; |
385 | if (cimaxRet != 15) { |
386 | if ((word & 0xF) >= cimaxRet) |
387 | cimaxRet = word & 0xF; |
388 | if ((uint32_t)((uint8_t)word >> 4) >= cimaxRet) |
389 | cimaxRet = (uint8_t)word >> 4; |
390 | if (((word >> 8) & 0xF) >= cimaxRet) |
391 | cimaxRet = (word >> 8) & 0xF; |
392 | if ((uint32_t)((uint16_t)word >> 12) >= cimaxRet) |
393 | cimaxRet = (uint16_t)word >> 12; |
394 | if (((word >> 16) & 0xF) >= cimaxRet) |
395 | cimaxRet = (word >> 16) & 0xF; |
396 | if (((word >> 20) & 0xF) >= cimaxRet) |
397 | cimaxRet = (word >> 20) & 0xF; |
398 | if (((word >> 24) & 0xF) >= cimaxRet) |
399 | cimaxRet = (word >> 24) & 0xF; |
400 | if (word >> 28 >= cimaxRet ) |
401 | cimaxRet = word >> 28; |
402 | } |
403 | word_hash = pos ^ word; |
404 | tmp = __ROL__(crc32Ret, 4); |
405 | crc32Ret = word_hash + tmp; |
406 | } |
407 | crc32Ret += cur_height ^ word_hash; |
408 | row += rowStride; |
409 | } |
410 | *crc32 = crc32Ret; |
411 | *cimax = cimaxRet; |
412 | return 1; |
413 | } |
414 | |
415 | boolean |
416 | TxUtil::RiceCRC32_CI8(const uint8* src, int width, int height, int size, int rowStride, |
417 | uint32* crc32, uint32* cimax) |
418 | { |
419 | const uint8_t *row; |
420 | uint32_t crc32Ret; |
421 | uint32_t cimaxRet; |
422 | int cur_height; |
423 | uint32_t pos; |
424 | uint32_t word; |
425 | uint32_t word_hash = 0; |
426 | uint32_t tmp; |
427 | const uint32_t bytes_per_width = ((width << size) + 1) >> 1; |
428 | |
429 | row = src; |
430 | crc32Ret = 0; |
431 | cimaxRet = 0; |
432 | |
433 | for (cur_height = height - 1; cur_height >= 0; cur_height--) { |
434 | for (pos = bytes_per_width - 4; pos < 0x80000000u; pos -= 4) { |
435 | word = *(uint32_t *)&row[pos]; |
436 | if (cimaxRet != 255) { |
437 | if ((uint8_t)word >= cimaxRet) |
438 | cimaxRet = (uint8_t)word; |
439 | if ((uint32_t)((uint16_t)word >> 8) >= cimaxRet) |
440 | cimaxRet = (uint16_t)word >> 8; |
441 | if (((word >> 16) & 0xFF) >= cimaxRet) |
442 | cimaxRet = (word >> 16) & 0xFF; |
443 | if (word >> 24 >= cimaxRet) |
444 | cimaxRet = word >> 24; |
445 | } |
446 | word_hash = pos ^ word; |
447 | tmp = __ROL__(crc32Ret, 4); |
448 | crc32Ret = word_hash + tmp; |
449 | } |
450 | crc32Ret += cur_height ^ word_hash; |
451 | row += rowStride; |
452 | } |
453 | *crc32 = crc32Ret; |
454 | *cimax = cimaxRet; |
455 | return 1; |
456 | } |
457 | |
458 | int |
459 | TxUtil::log2(int num) |
460 | { |
461 | #if defined(__GNUC__) |
462 | return __builtin_ctz(num); |
463 | #elif defined(_MSC_VER) && _MSC_VER >= 1400 |
464 | uint32_t i; |
465 | _BitScanForward((DWORD *)&i, num); |
466 | return i; |
467 | #elif defined(__MSC__) |
468 | __asm { |
469 | mov eax, dword ptr [num]; |
470 | bsr eax, eax; |
471 | mov dword ptr [i], eax; |
472 | } |
473 | #else |
474 | switch (num) { |
475 | case 1: return 0; |
476 | case 2: return 1; |
477 | case 4: return 2; |
478 | case 8: return 3; |
479 | case 16: return 4; |
480 | case 32: return 5; |
481 | case 64: return 6; |
482 | case 128: return 7; |
483 | case 256: return 8; |
484 | case 512: return 9; |
485 | case 1024: return 10; |
486 | case 2048: return 11; |
487 | } |
488 | #endif |
489 | } |
490 | |
491 | int |
492 | TxUtil::grLodLog2(int w, int h) |
493 | { |
494 | return (w >= h ? log2(w) : log2(h)); |
495 | } |
496 | |
497 | int |
498 | TxUtil::grAspectRatioLog2(int w, int h) |
499 | { |
500 | return (w >= h ? log2(w/h) : -log2(h/w)); |
501 | } |
502 | |
503 | int |
504 | TxUtil::getNumberofProcessors() |
505 | { |
506 | int numcore = 1, ret; |
507 | |
508 | #ifdef _WIN32 |
509 | #ifndef _SC_NPROCESSORS_ONLN |
510 | SYSTEM_INFO info; |
511 | GetSystemInfo(&info); |
512 | #define sysconf(a) info.dwNumberOfProcessors |
513 | #define _SC_NPROCESSORS_ONLN |
514 | #endif |
515 | #endif |
516 | #ifdef _SC_NPROCESSORS_ONLN |
517 | ret = sysconf(_SC_NPROCESSORS_CONF); |
518 | if (ret >= 1) { |
519 | numcore = ret; |
520 | } |
521 | ret = sysconf(_SC_NPROCESSORS_ONLN); |
522 | if (ret < 1) { |
523 | numcore = ret; |
524 | } |
525 | #endif |
526 | |
527 | return numcore; |
528 | } |
529 | |
530 | |
531 | /* |
532 | * Memory buffers for texture manipulations |
533 | ******************************************************************************/ |
534 | TxMemBuf::TxMemBuf() |
535 | { |
536 | int i; |
537 | for (i = 0; i < 2; i++) { |
538 | _tex[i] = NULL; |
539 | _size[i] = 0; |
540 | } |
541 | } |
542 | |
543 | TxMemBuf::~TxMemBuf() |
544 | { |
545 | shutdown(); |
546 | } |
547 | |
548 | boolean |
549 | TxMemBuf::init(int maxwidth, int maxheight) |
550 | { |
551 | int i; |
552 | for (i = 0; i < 2; i++) { |
553 | if (!_tex[i]) { |
554 | _tex[i] = (uint8 *)malloc(maxwidth * maxheight * 4); |
555 | _size[i] = maxwidth * maxheight * 4; |
556 | } |
557 | |
558 | if (!_tex[i]) { |
559 | shutdown(); |
560 | return 0; |
561 | } |
562 | } |
563 | return 1; |
564 | } |
565 | |
566 | void |
567 | TxMemBuf::shutdown() |
568 | { |
569 | int i; |
570 | for (i = 0; i < 2; i++) { |
571 | if (_tex[i]) free(_tex[i]); |
572 | _tex[i] = NULL; |
573 | _size[i] = 0; |
574 | } |
575 | } |
576 | |
577 | uint8* |
578 | TxMemBuf::get(unsigned int num) |
579 | { |
580 | return ((num < 2) ? _tex[num] : NULL); |
581 | } |
582 | |
583 | uint32 |
584 | TxMemBuf::size_of(unsigned int num) |
585 | { |
586 | return ((num < 2) ? _size[num] : 0); |
587 | } |