ALL: Huge upstream synch + PerRom DelaySI & CountPerOp parameters
[mupen64plus-pandora.git] / source / gles2glide64 / src / GlideHQ / TxUtil.cpp
CommitLineData
98e75f2d 1/*
2 * Texture Filtering
3 * Version: 1.0
4 *
5 * Copyright (C) 2007 Hiroshi Morii All Rights Reserved.
6 * Email koolsmoky(at)users.sourceforge.net
7 * Web http://www.3dfxzone.it/koolsmoky
8 *
9 * this is free software; you can redistribute it and/or modify
10 * it under the terms of the GNU General Public License as published by
11 * the Free Software Foundation; either version 2, or (at your option)
12 * any later version.
13 *
14 * this is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 * GNU General Public License for more details.
18 *
19 * You should have received a copy of the GNU General Public License
20 * along with GNU Make; see the file COPYING. If not, write to
21 * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
22 */
23
24#include "TxUtil.h"
25#include "TxDbg.h"
26#include <zlib.h>
27#include <stdlib.h>
28#ifdef _WIN32
29#define WIN32_LEAN_AND_MEAN
30#include <windows.h>
31#else
32#include <unistd.h>
33#endif
34
35/*
36 * External libraries
37 ******************************************************************************/
38TxLoadLib::TxLoadLib()
39{
40#ifdef DXTN_DLL
41 if (!_dxtnlib)
42 _dxtnlib = LoadLibrary("dxtn");
43
44 if (_dxtnlib) {
2d262872 45 if (!_tx_compress_dxtn_rgba)
46 _tx_compress_dxtn_rgba = (dxtCompressTexFuncExt)DLSYM(_dxtnlib, "tx_compress_dxtn_rgba");
98e75f2d 47
48 if (!_tx_compress_fxt1)
49 _tx_compress_fxt1 = (fxtCompressTexFuncExt)DLSYM(_dxtnlib, "fxt1_encode");
50 }
51#else
2d262872 52 _tx_compress_dxtn_rgba = tx_compress_dxtn_rgba;
98e75f2d 53 _tx_compress_fxt1 = fxt1_encode;
54
55#endif
56}
57
58TxLoadLib::~TxLoadLib()
59{
60#ifdef DXTN_DLL
61 /* free dynamic library */
62 if (_dxtnlib)
63 FreeLibrary(_dxtnlib);
64#endif
65
66}
67
68fxtCompressTexFuncExt
69TxLoadLib::getfxtCompressTexFuncExt()
70{
71 return _tx_compress_fxt1;
72}
73
74dxtCompressTexFuncExt
75TxLoadLib::getdxtCompressTexFuncExt()
76{
2d262872 77 return _tx_compress_dxtn_rgba;
98e75f2d 78}
79
80
81/*
82 * Utilities
83 ******************************************************************************/
84uint32
85TxUtil::checksumTx(uint8 *src, int width, int height, uint16 format)
86{
87 int dataSize = sizeofTx(width, height, format);
88
89 /* for now we use adler32 if something else is better
90 * we can simply swtich later
91 */
92 /* return (dataSize ? Adler32(src, dataSize, 1) : 0); */
93
94 /* zlib crc32 */
95 return (dataSize ? crc32(crc32(0L, Z_NULL, 0), src, dataSize) : 0);
96}
97
98int
99TxUtil::sizeofTx(int width, int height, uint16 format)
100{
101 int dataSize = 0;
102
103 /* a lookup table for the shifts would be better */
104 switch (format) {
105 case GR_TEXFMT_ARGB_CMP_FXT1:
106 dataSize = (((width + 0x7) & ~0x7) * ((height + 0x3) & ~0x3)) >> 1;
107 break;
108 case GR_TEXFMT_ARGB_CMP_DXT1:
109 dataSize = (((width + 0x3) & ~0x3) * ((height + 0x3) & ~0x3)) >> 1;
110 break;
111 case GR_TEXFMT_ARGB_CMP_DXT3:
112 case GR_TEXFMT_ARGB_CMP_DXT5:
113 dataSize = ((width + 0x3) & ~0x3) * ((height + 0x3) & ~0x3);
114 break;
115 case GR_TEXFMT_ALPHA_INTENSITY_44:
116 case GR_TEXFMT_ALPHA_8:
117 case GR_TEXFMT_INTENSITY_8:
118 case GR_TEXFMT_P_8:
119 dataSize = width * height;
120 break;
121 case GR_TEXFMT_ARGB_4444:
122 case GR_TEXFMT_ARGB_1555:
123 case GR_TEXFMT_RGB_565:
124 case GR_TEXFMT_ALPHA_INTENSITY_88:
125 dataSize = (width * height) << 1;
126 break;
127 case GR_TEXFMT_ARGB_8888:
128 dataSize = (width * height) << 2;
129 break;
130 default:
131 /* unsupported format */
132 DBG_INFO(80, L"Error: cannot get size. unsupported gfmt:%x\n", format);
133 ;
134 }
135
136 return dataSize;
137}
138
139#if 0 /* unused */
140uint32
141TxUtil::chkAlpha(uint32* src, int width, int height)
142{
143 /* NOTE: _src must be ARGB8888
144 * return values
145 * 0x00000000: 8bit alpha
146 * 0x00000001: 1bit alpha
147 * 0xff000001: no alpha
148 */
149
150 int _size = width * height;
151 uint32 alpha = 0;
152
153 __asm {
154 mov esi, dword ptr [src];
155 mov ecx, dword ptr [_size];
156 mov ebx, 0xff000000;
157
158 tc1_loop:
159 mov eax, dword ptr [esi];
160 add esi, 4;
161
162 and eax, 0xff000000;
163 jz alpha1bit;
164 cmp eax, 0xff000000;
165 je alpha1bit;
166 jmp done;
167
168 alpha1bit:
169 and ebx, eax;
170 dec ecx;
171 jnz tc1_loop;
172
173 or ebx, 0x00000001;
174 mov dword ptr [alpha], ebx;
175
176 done:
177 }
178
179 return alpha;
180}
181#endif
182
183uint32
184TxUtil::checksum(uint8 *src, int width, int height, int size, int rowStride)
185{
186 /* Rice CRC32 for now. We can switch this to Jabo MD5 or
187 * any other custom checksum.
188 * TODO: use *_HIRESTEXTURE option. */
189
190 if (!src) return 0;
191
192 return RiceCRC32(src, width, height, size, rowStride);
193}
194
195uint64
196TxUtil::checksum64(uint8 *src, int width, int height, int size, int rowStride, uint8 *palette)
197{
198 /* Rice CRC32 for now. We can switch this to Jabo MD5 or
199 * any other custom checksum.
200 * TODO: use *_HIRESTEXTURE option. */
201 /* Returned value is 64bits: hi=palette crc32 low=texture crc32 */
202
203 if (!src) return 0;
204
205 uint64 crc64Ret = 0;
206
207 if (palette) {
208 uint32 crc32 = 0, cimax = 0;
209 switch (size & 0xff) {
210 case 1:
211 if (RiceCRC32_CI8(src, width, height, size, rowStride, &crc32, &cimax)) {
212 crc64Ret = (uint64)RiceCRC32(palette, cimax + 1, 1, 2, 512);
213 crc64Ret <<= 32;
214 crc64Ret |= (uint64)crc32;
215 }
216 break;
217 case 0:
218 if (RiceCRC32_CI4(src, width, height, size, rowStride, &crc32, &cimax)) {
219 crc64Ret = (uint64)RiceCRC32(palette, cimax + 1, 1, 2, 32);
220 crc64Ret <<= 32;
221 crc64Ret |= (uint64)crc32;
222 }
223 }
224 }
225 if (!crc64Ret) {
226 crc64Ret = (uint64)RiceCRC32(src, width, height, size, rowStride);
227 }
228
229 return crc64Ret;
230}
231
232/*
233** Computes Adler32 checksum for a stream of data.
234**
235** From the specification found in RFC 1950: (ZLIB Compressed Data Format
236** Specification version 3.3)
237**
238** ADLER32 (Adler-32 checksum) This contains a checksum value of the
239** uncompressed data (excluding any dictionary data) computed according to
240** Adler-32 algorithm. This algorithm is a 32-bit extension and improvement
241** of the Fletcher algorithm, used in the ITU-T X.224 / ISO 8073 standard.
242**
243** Adler-32 is composed of two sums accumulated per byte: s1 is the sum of
244** all bytes, s2 is the sum of all s1 values. Both sums are done modulo
245** 65521. s1 is initialized to 1, s2 to zero. The Adler-32 checksum is stored
246** as s2*65536 + s1 in most-significant-byte first (network) order.
247**
248** 8.2. The Adler-32 algorithm
249**
250** The Adler-32 algorithm is much faster than the CRC32 algorithm yet still
251** provides an extremely low probability of undetected errors.
252**
253** The modulo on unsigned long accumulators can be delayed for 5552 bytes,
254** so the modulo operation time is negligible. If the bytes are a, b, c,
255** the second sum is 3a + 2b + c + 3, and so is position and order sensitive,
256** unlike the first sum, which is just a checksum. That 65521 is prime is
257** important to avoid a possible large class of two-byte errors that leave
258** the check unchanged. (The Fletcher checksum uses 255, which is not prime
259** and which also makes the Fletcher check insensitive to single byte
260** changes 0 <-> 255.)
261**
262** The sum s1 is initialized to 1 instead of zero to make the length of
263** the sequence part of s2, so that the length does not have to be checked
264** separately. (Any sequence of zeroes has a Fletcher checksum of zero.)
265*/
266
267uint32
268TxUtil::Adler32(const uint8* data, int Len, uint32 dwAdler32)
269{
270#if 1
271 /* zlib adler32 */
272 return adler32(dwAdler32, data, Len);
273#else
274 register uint32 s1 = dwAdler32 & 0xFFFF;
275 register uint32 s2 = (dwAdler32 >> 16) & 0xFFFF;
276 int k;
277
278 while (Len > 0) {
279 /* 5552 is the largest n such that 255n(n+1)/2 + (n+1)(BASE-1) <= 2^32-1 */
280 k = (Len < 5552 ? Len : 5552);
281 Len -= k;
282 while (k--) {
283 s1 += *data++;
284 s2 += s1;
285 }
286 /* 65521 is the largest prime smaller than 65536 */
287 s1 %= 65521;
288 s2 %= 65521;
289 }
290
291 return (s2 << 16) | s1;
292#endif
293}
294
295uint32
296TxUtil::Adler32(const uint8* src, int width, int height, int size, int rowStride)
297{
298 int i;
299 uint32 ret = 1;
300 uint32 width_in_bytes = width * size;
301
302 for (i = 0; i < height; i++) {
303 ret = Adler32(src, width_in_bytes, ret);
304 src += rowStride;
305 }
306
307 return ret;
308}
309
310// rotate left
311template<class T> static T __ROL__(T value, unsigned int count)
312{
313 const unsigned int nbits = sizeof(T) * 8;
314 count %= nbits;
315
316 T high = value >> (nbits - count);
317 value <<= count;
318 value |= high;
319 return value;
320}
321
322/* Rice CRC32 for hires texture packs */
323/* NOTE: The following is used in Glide64 to calculate the CRC32
324 * for Rice hires texture packs.
325 *
326 * BYTE* addr = (BYTE*)(gfx.RDRAM +
327 * rdp.addr[rdp.tiles[tile].t_mem] +
328 * (rdp.tiles[tile].ul_t * bpl) +
329 * (((rdp.tiles[tile].ul_s<<rdp.tiles[tile].size)+1)>>1));
330 * RiceCRC32(addr,
331 * rdp.tiles[tile].width,
332 * rdp.tiles[tile].height,
333 * (unsigned short)(rdp.tiles[tile].format << 8 | rdp.tiles[tile].size),
334 * bpl);
335 */
336uint32
337TxUtil::RiceCRC32(const uint8* src, int width, int height, int size, int rowStride)
338{
339 const uint8_t *row;
340 uint32_t crc32Ret;
341 int cur_height;
342 uint32_t pos;
343 uint32_t word;
344 uint32_t word_hash = 0;
345 uint32_t tmp;
346 const uint32_t bytes_per_width = ((width << size) + 1) >> 1;
347
348 row = src;
349 crc32Ret = 0;
350
351 for (cur_height = height - 1; cur_height >= 0; cur_height--) {
352 for (pos = bytes_per_width - 4; pos < 0x80000000u; pos -= 4) {
353 word = *(uint32_t *)&row[pos];
354 word_hash = pos ^ word;
355 tmp = __ROL__(crc32Ret, 4);
356 crc32Ret = word_hash + tmp;
357 }
358 crc32Ret += cur_height ^ word_hash;
359 row += rowStride;
360 }
361 return crc32Ret;
362}
363
364boolean
365TxUtil::RiceCRC32_CI4(const uint8* src, int width, int height, int size, int rowStride,
366 uint32* crc32, uint32* cimax)
367{
368 const uint8_t *row;
369 uint32_t crc32Ret;
370 uint32_t cimaxRet;
371 int cur_height;
372 uint32_t pos;
373 uint32_t word;
374 uint32_t word_hash = 0;
375 uint32_t tmp;
376 const uint32_t bytes_per_width = ((width << size) + 1) >> 1;
377
378 row = src;
379 crc32Ret = 0;
380 cimaxRet = 0;
381
382 for (cur_height = height - 1; cur_height >= 0; cur_height--) {
383 for (pos = bytes_per_width - 4; pos < 0x80000000u; pos -= 4) {
384 word = *(uint32_t *)&row[pos];
385 if (cimaxRet != 15) {
386 if ((word & 0xF) >= cimaxRet)
387 cimaxRet = word & 0xF;
388 if ((uint32_t)((uint8_t)word >> 4) >= cimaxRet)
389 cimaxRet = (uint8_t)word >> 4;
390 if (((word >> 8) & 0xF) >= cimaxRet)
391 cimaxRet = (word >> 8) & 0xF;
392 if ((uint32_t)((uint16_t)word >> 12) >= cimaxRet)
393 cimaxRet = (uint16_t)word >> 12;
394 if (((word >> 16) & 0xF) >= cimaxRet)
395 cimaxRet = (word >> 16) & 0xF;
396 if (((word >> 20) & 0xF) >= cimaxRet)
397 cimaxRet = (word >> 20) & 0xF;
398 if (((word >> 24) & 0xF) >= cimaxRet)
399 cimaxRet = (word >> 24) & 0xF;
400 if (word >> 28 >= cimaxRet )
401 cimaxRet = word >> 28;
402 }
403 word_hash = pos ^ word;
404 tmp = __ROL__(crc32Ret, 4);
405 crc32Ret = word_hash + tmp;
406 }
407 crc32Ret += cur_height ^ word_hash;
408 row += rowStride;
409 }
410 *crc32 = crc32Ret;
411 *cimax = cimaxRet;
412 return 1;
413}
414
415boolean
416TxUtil::RiceCRC32_CI8(const uint8* src, int width, int height, int size, int rowStride,
417 uint32* crc32, uint32* cimax)
418{
419 const uint8_t *row;
420 uint32_t crc32Ret;
421 uint32_t cimaxRet;
422 int cur_height;
423 uint32_t pos;
424 uint32_t word;
425 uint32_t word_hash = 0;
426 uint32_t tmp;
427 const uint32_t bytes_per_width = ((width << size) + 1) >> 1;
428
429 row = src;
430 crc32Ret = 0;
431 cimaxRet = 0;
432
433 for (cur_height = height - 1; cur_height >= 0; cur_height--) {
434 for (pos = bytes_per_width - 4; pos < 0x80000000u; pos -= 4) {
435 word = *(uint32_t *)&row[pos];
436 if (cimaxRet != 255) {
437 if ((uint8_t)word >= cimaxRet)
438 cimaxRet = (uint8_t)word;
439 if ((uint32_t)((uint16_t)word >> 8) >= cimaxRet)
440 cimaxRet = (uint16_t)word >> 8;
441 if (((word >> 16) & 0xFF) >= cimaxRet)
442 cimaxRet = (word >> 16) & 0xFF;
443 if (word >> 24 >= cimaxRet)
444 cimaxRet = word >> 24;
445 }
446 word_hash = pos ^ word;
447 tmp = __ROL__(crc32Ret, 4);
448 crc32Ret = word_hash + tmp;
449 }
450 crc32Ret += cur_height ^ word_hash;
451 row += rowStride;
452 }
453 *crc32 = crc32Ret;
454 *cimax = cimaxRet;
455 return 1;
456}
457
458int
459TxUtil::log2(int num)
460{
461#if defined(__GNUC__)
462 return __builtin_ctz(num);
463#elif defined(_MSC_VER) && _MSC_VER >= 1400
464 uint32_t i;
465 _BitScanForward((DWORD *)&i, num);
466 return i;
467#elif defined(__MSC__)
468 __asm {
469 mov eax, dword ptr [num];
470 bsr eax, eax;
471 mov dword ptr [i], eax;
472 }
473#else
474 switch (num) {
475 case 1: return 0;
476 case 2: return 1;
477 case 4: return 2;
478 case 8: return 3;
479 case 16: return 4;
480 case 32: return 5;
481 case 64: return 6;
482 case 128: return 7;
483 case 256: return 8;
484 case 512: return 9;
485 case 1024: return 10;
486 case 2048: return 11;
487 }
488#endif
489}
490
491int
492TxUtil::grLodLog2(int w, int h)
493{
494 return (w >= h ? log2(w) : log2(h));
495}
496
497int
498TxUtil::grAspectRatioLog2(int w, int h)
499{
500 return (w >= h ? log2(w/h) : -log2(h/w));
501}
502
503int
504TxUtil::getNumberofProcessors()
505{
506 int numcore = 1, ret;
507
508#ifdef _WIN32
509#ifndef _SC_NPROCESSORS_ONLN
510 SYSTEM_INFO info;
511 GetSystemInfo(&info);
512#define sysconf(a) info.dwNumberOfProcessors
513#define _SC_NPROCESSORS_ONLN
514#endif
515#endif
516#ifdef _SC_NPROCESSORS_ONLN
517 ret = sysconf(_SC_NPROCESSORS_CONF);
518 if (ret >= 1) {
519 numcore = ret;
520 }
521 ret = sysconf(_SC_NPROCESSORS_ONLN);
522 if (ret < 1) {
523 numcore = ret;
524 }
525#endif
526
527 return numcore;
528}
529
530
531/*
532 * Memory buffers for texture manipulations
533 ******************************************************************************/
534TxMemBuf::TxMemBuf()
535{
536 int i;
537 for (i = 0; i < 2; i++) {
538 _tex[i] = NULL;
539 _size[i] = 0;
540 }
541}
542
543TxMemBuf::~TxMemBuf()
544{
545 shutdown();
546}
547
548boolean
549TxMemBuf::init(int maxwidth, int maxheight)
550{
551 int i;
552 for (i = 0; i < 2; i++) {
553 if (!_tex[i]) {
554 _tex[i] = (uint8 *)malloc(maxwidth * maxheight * 4);
555 _size[i] = maxwidth * maxheight * 4;
556 }
557
558 if (!_tex[i]) {
559 shutdown();
560 return 0;
561 }
562 }
563 return 1;
564}
565
566void
567TxMemBuf::shutdown()
568{
569 int i;
570 for (i = 0; i < 2; i++) {
571 if (_tex[i]) free(_tex[i]);
572 _tex[i] = NULL;
573 _size[i] = 0;
574 }
575}
576
577uint8*
578TxMemBuf::get(unsigned int num)
579{
580 return ((num < 2) ? _tex[num] : NULL);
581}
582
583uint32
584TxMemBuf::size_of(unsigned int num)
585{
586 return ((num < 2) ? _size[num] : 0);
587}