source/gles2glide64/src/GlideHQ/TxUtil.cpp

   1 /*
   2  * Texture Filtering
   3  * Version:  1.0
   4  *
   5  * Copyright (C) 2007  Hiroshi Morii   All Rights Reserved.
   6  * Email koolsmoky(at)users.sourceforge.net
   7  * Web   http://www.3dfxzone.it/koolsmoky
   8  *
   9  * this is free software; you can redistribute it and/or modify
  10  * it under the terms of the GNU General Public License as published by
  11  * the Free Software Foundation; either version 2, or (at your option)
  12  * any later version.
  13  *
  14  * this is distributed in the hope that it will be useful,
  15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  17  * GNU General Public License for more details.
  18  *
  19  * You should have received a copy of the GNU General Public License
  20  * along with GNU Make; see the file COPYING.  If not, write to
  21  * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
  22  */
  23
  24 #include "TxUtil.h"
  25 #include "TxDbg.h"
  26 #include <zlib.h>
  27 #include <stdlib.h>
  28 #ifdef _WIN32
  29 #define WIN32_LEAN_AND_MEAN
  30 #include <windows.h>
  31 #else
  32 #include <unistd.h>
  33 #endif
  34
  35 /*
  36  * External libraries
  37  ******************************************************************************/
  38 TxLoadLib::TxLoadLib()
  39 {
  40 #ifdef DXTN_DLL
  41   if (!_dxtnlib)
  42     _dxtnlib = LoadLibrary("dxtn");
  43
  44   if (_dxtnlib) {
  45     if (!_tx_compress_dxtn_rgba)
  46       _tx_compress_dxtn_rgba = (dxtCompressTexFuncExt)DLSYM(_dxtnlib, "tx_compress_dxtn_rgba");
  47
  48     if (!_tx_compress_fxt1)
  49       _tx_compress_fxt1 = (fxtCompressTexFuncExt)DLSYM(_dxtnlib, "fxt1_encode");
  50   }
  51 #else
  52   _tx_compress_dxtn_rgba = tx_compress_dxtn_rgba;
  53   _tx_compress_fxt1 = fxt1_encode;
  54
  55 #endif
  56 }
  57
  58 TxLoadLib::~TxLoadLib()
  59 {
  60 #ifdef DXTN_DLL
  61   /* free dynamic library */
  62   if (_dxtnlib)
  63     FreeLibrary(_dxtnlib);
  64 #endif
  65
  66 }
  67
  68 fxtCompressTexFuncExt
  69 TxLoadLib::getfxtCompressTexFuncExt()
  70 {
  71   return _tx_compress_fxt1;
  72 }
  73
  74 dxtCompressTexFuncExt
  75 TxLoadLib::getdxtCompressTexFuncExt()
  76 {
  77   return _tx_compress_dxtn_rgba;
  78 }
  79
  80
  81 /*
  82  * Utilities
  83  ******************************************************************************/
  84 uint32
  85 TxUtil::checksumTx(uint8 *src, int width, int height, uint16 format)
  86 {
  87   int dataSize = sizeofTx(width, height, format);
  88
  89   /* for now we use adler32 if something else is better
  90    * we can simply swtich later
  91    */
  92   /* return (dataSize ? Adler32(src, dataSize, 1) : 0); */
  93
  94   /* zlib crc32 */
  95   return (dataSize ? crc32(crc32(0L, Z_NULL, 0), src, dataSize) : 0);
  96 }
  97
  98 int
  99 TxUtil::sizeofTx(int width, int height, uint16 format)
 100 {
 101   int dataSize = 0;
 102
 103   /* a lookup table for the shifts would be better */
 104   switch (format) {
 105   case GR_TEXFMT_ARGB_CMP_FXT1:
 106     dataSize = (((width + 0x7) & ~0x7) * ((height + 0x3) & ~0x3)) >> 1;
 107     break;
 108   case GR_TEXFMT_ARGB_CMP_DXT1:
 109     dataSize = (((width + 0x3) & ~0x3) * ((height + 0x3) & ~0x3)) >> 1;
 110     break;
 111   case GR_TEXFMT_ARGB_CMP_DXT3:
 112   case GR_TEXFMT_ARGB_CMP_DXT5:
 113     dataSize = ((width + 0x3) & ~0x3) * ((height + 0x3) & ~0x3);
 114     break;
 115   case GR_TEXFMT_ALPHA_INTENSITY_44:
 116   case GR_TEXFMT_ALPHA_8:
 117   case GR_TEXFMT_INTENSITY_8:
 118   case GR_TEXFMT_P_8:
 119     dataSize = width * height;
 120     break;
 121   case GR_TEXFMT_ARGB_4444:
 122   case GR_TEXFMT_ARGB_1555:
 123   case GR_TEXFMT_RGB_565:
 124   case GR_TEXFMT_ALPHA_INTENSITY_88:
 125     dataSize = (width * height) << 1;
 126     break;
 127   case GR_TEXFMT_ARGB_8888:
 128     dataSize = (width * height) << 2;
 129     break;
 130   default:
 131     /* unsupported format */
 132     DBG_INFO(80, L"Error: cannot get size. unsupported gfmt:%x\n", format);
 133     ;
 134   }
 135
 136   return dataSize;
 137 }
 138
 139 #if 0 /* unused */
 140 uint32
 141 TxUtil::chkAlpha(uint32* src, int width, int height)
 142 {
 143   /* NOTE: _src must be ARGB8888
 144    * return values
 145    * 0x00000000: 8bit alpha
 146    * 0x00000001: 1bit alpha
 147    * 0xff000001: no alpha
 148    */
 149
 150   int _size = width * height;
 151   uint32 alpha = 0;
 152
 153   __asm {
 154     mov esi, dword ptr [src];
 155     mov ecx, dword ptr [_size];
 156     mov ebx, 0xff000000;
 157
 158   tc1_loop:
 159     mov eax, dword ptr [esi];
 160     add esi, 4;
 161
 162     and eax, 0xff000000;
 163     jz  alpha1bit;
 164     cmp eax, 0xff000000;
 165     je  alpha1bit;
 166     jmp done;
 167
 168   alpha1bit:
 169     and ebx, eax;
 170     dec ecx;
 171     jnz tc1_loop;
 172
 173     or  ebx, 0x00000001;
 174     mov dword ptr [alpha], ebx;
 175
 176   done:
 177   }
 178
 179   return alpha;
 180 }
 181 #endif
 182
 183 uint32
 184 TxUtil::checksum(uint8 *src, int width, int height, int size, int rowStride)
 185 {
 186   /* Rice CRC32 for now. We can switch this to Jabo MD5 or
 187    * any other custom checksum.
 188    * TODO: use *_HIRESTEXTURE option. */
 189
 190   if (!src) return 0;
 191
 192   return RiceCRC32(src, width, height, size, rowStride);
 193 }
 194
 195 uint64
 196 TxUtil::checksum64(uint8 *src, int width, int height, int size, int rowStride, uint8 *palette)
 197 {
 198   /* Rice CRC32 for now. We can switch this to Jabo MD5 or
 199    * any other custom checksum.
 200    * TODO: use *_HIRESTEXTURE option. */
 201   /* Returned value is 64bits: hi=palette crc32 low=texture crc32 */
 202
 203   if (!src) return 0;
 204
 205   uint64 crc64Ret = 0;
 206
 207   if (palette) {
 208     uint32 crc32 = 0, cimax = 0;
 209     switch (size & 0xff) {
 210     case 1:
 211       if (RiceCRC32_CI8(src, width, height, size, rowStride, &crc32, &cimax)) {
 212         crc64Ret = (uint64)RiceCRC32(palette, cimax + 1, 1, 2, 512);
 213         crc64Ret <<= 32;
 214         crc64Ret |= (uint64)crc32;
 215       }
 216       break;
 217     case 0:
 218       if (RiceCRC32_CI4(src, width, height, size, rowStride, &crc32, &cimax)) {
 219         crc64Ret = (uint64)RiceCRC32(palette, cimax + 1, 1, 2, 32);
 220         crc64Ret <<= 32;
 221         crc64Ret |= (uint64)crc32;
 222       }
 223     }
 224   }
 225   if (!crc64Ret) {
 226     crc64Ret = (uint64)RiceCRC32(src, width, height, size, rowStride);
 227   }
 228
 229   return crc64Ret;
 230 }
 231
 232 /*
 233 ** Computes Adler32 checksum for a stream of data.
 234 **
 235 ** From the specification found in RFC 1950: (ZLIB Compressed Data Format
 236 ** Specification version 3.3)
 237 **
 238 ** ADLER32 (Adler-32 checksum) This contains a checksum value of the
 239 ** uncompressed data (excluding any dictionary data) computed according to
 240 ** Adler-32 algorithm. This algorithm is a 32-bit extension and improvement
 241 ** of the Fletcher algorithm, used in the ITU-T X.224 / ISO 8073 standard.
 242 **
 243 ** Adler-32 is composed of two sums accumulated per byte: s1 is the sum of
 244 ** all bytes, s2 is the sum of all s1 values. Both sums are done modulo
 245 ** 65521. s1 is initialized to 1, s2 to zero. The Adler-32 checksum is stored
 246 ** as s2*65536 + s1 in most-significant-byte first (network) order.
 247 **
 248 ** 8.2. The Adler-32 algorithm
 249 **
 250 ** The Adler-32 algorithm is much faster than the CRC32 algorithm yet still
 251 ** provides an extremely low probability of undetected errors.
 252 **
 253 ** The modulo on unsigned long accumulators can be delayed for 5552 bytes,
 254 ** so the modulo operation time is negligible. If the bytes are a, b, c,
 255 ** the second sum is 3a + 2b + c + 3, and so is position and order sensitive,
 256 ** unlike the first sum, which is just a checksum. That 65521 is prime is
 257 ** important to avoid a possible large class of two-byte errors that leave
 258 ** the check unchanged. (The Fletcher checksum uses 255, which is not prime
 259 ** and which also makes the Fletcher check insensitive to single byte
 260 ** changes 0 <-> 255.)
 261 **
 262 ** The sum s1 is initialized to 1 instead of zero to make the length of
 263 ** the sequence part of s2, so that the length does not have to be checked
 264 ** separately. (Any sequence of zeroes has a Fletcher checksum of zero.)
 265 */
 266
 267 uint32
 268 TxUtil::Adler32(const uint8* data, int Len, uint32 dwAdler32)
 269 {
 270 #if 1
 271   /* zlib adler32 */
 272   return adler32(dwAdler32, data, Len);
 273 #else
 274   register uint32 s1 = dwAdler32 & 0xFFFF;
 275   register uint32 s2 = (dwAdler32 >> 16) & 0xFFFF;
 276   int k;
 277
 278   while (Len > 0) {
 279     /* 5552 is the largest n such that 255n(n+1)/2 + (n+1)(BASE-1) <= 2^32-1 */
 280     k = (Len < 5552 ? Len : 5552);
 281     Len -= k;
 282     while (k--) {
 283       s1 += *data++;
 284       s2 += s1;
 285     }
 286     /* 65521 is the largest prime smaller than 65536 */
 287     s1 %= 65521;
 288     s2 %= 65521;
 289   }
 290
 291   return (s2 << 16) | s1;
 292 #endif
 293 }
 294
 295 uint32
 296 TxUtil::Adler32(const uint8* src, int width, int height, int size, int rowStride)
 297 {
 298   int i;
 299   uint32 ret = 1;
 300   uint32 width_in_bytes = width * size;
 301
 302   for (i = 0; i < height; i++) {
 303     ret = Adler32(src, width_in_bytes, ret);
 304     src += rowStride;
 305   }
 306
 307   return ret;
 308 }
 309
 310 // rotate left
 311 template<class T> static T __ROL__(T value, unsigned int count)
 312 {
 313   const unsigned int nbits = sizeof(T) * 8;
 314   count %= nbits;
 315
 316   T high = value >> (nbits - count);
 317   value <<= count;
 318   value |= high;
 319   return value;
 320 }
 321
 322 /* Rice CRC32 for hires texture packs */
 323 /* NOTE: The following is used in Glide64 to calculate the CRC32
 324  * for Rice hires texture packs.
 325  *
 326  * BYTE* addr = (BYTE*)(gfx.RDRAM +
 327  *                     rdp.addr[rdp.tiles[tile].t_mem] +
 328  *                     (rdp.tiles[tile].ul_t * bpl) +
 329  *                     (((rdp.tiles[tile].ul_s<<rdp.tiles[tile].size)+1)>>1));
 330  * RiceCRC32(addr,
 331  *          rdp.tiles[tile].width,
 332  *          rdp.tiles[tile].height,
 333  *          (unsigned short)(rdp.tiles[tile].format << 8 | rdp.tiles[tile].size),
 334  *          bpl);
 335  */
 336 uint32
 337 TxUtil::RiceCRC32(const uint8* src, int width, int height, int size, int rowStride)
 338 {
 339   const uint8_t *row;
 340   uint32_t crc32Ret;
 341   int cur_height;
 342   uint32_t pos;
 343   uint32_t word;
 344   uint32_t word_hash = 0;
 345   uint32_t tmp;
 346   const uint32_t bytes_per_width = ((width << size) + 1) >> 1;
 347
 348   row = src;
 349   crc32Ret = 0;
 350
 351   for (cur_height = height - 1; cur_height >= 0; cur_height--) {
 352     for (pos = bytes_per_width - 4; pos < 0x80000000u; pos -= 4) {
 353       word = *(uint32_t *)&row[pos];
 354       word_hash = pos ^ word;
 355       tmp = __ROL__(crc32Ret, 4);
 356       crc32Ret = word_hash + tmp;
 357     }
 358     crc32Ret += cur_height ^ word_hash;
 359     row += rowStride;
 360   }
 361   return crc32Ret;
 362 }
 363
 364 boolean
 365 TxUtil::RiceCRC32_CI4(const uint8* src, int width, int height, int size, int rowStride,
 366                         uint32* crc32, uint32* cimax)
 367 {
 368   const uint8_t *row;
 369   uint32_t crc32Ret;
 370   uint32_t cimaxRet;
 371   int cur_height;
 372   uint32_t pos;
 373   uint32_t word;
 374   uint32_t word_hash = 0;
 375   uint32_t tmp;
 376   const uint32_t bytes_per_width = ((width << size) + 1) >> 1;
 377
 378   row = src;
 379   crc32Ret = 0;
 380   cimaxRet = 0;
 381
 382   for (cur_height = height - 1; cur_height >= 0; cur_height--) {
 383     for (pos = bytes_per_width - 4; pos < 0x80000000u; pos -= 4) {
 384       word = *(uint32_t *)&row[pos];
 385       if (cimaxRet != 15) {
 386         if ((word & 0xF) >= cimaxRet)
 387           cimaxRet = word & 0xF;
 388         if ((uint32_t)((uint8_t)word >> 4) >= cimaxRet)
 389           cimaxRet = (uint8_t)word >> 4;
 390         if (((word >> 8) & 0xF) >= cimaxRet)
 391           cimaxRet = (word >> 8) & 0xF;
 392         if ((uint32_t)((uint16_t)word >> 12) >= cimaxRet)
 393           cimaxRet = (uint16_t)word >> 12;
 394         if (((word >> 16) & 0xF) >= cimaxRet)
 395           cimaxRet = (word >> 16) & 0xF;
 396         if (((word >> 20) & 0xF) >= cimaxRet)
 397           cimaxRet = (word >> 20) & 0xF;
 398         if (((word >> 24) & 0xF) >= cimaxRet)
 399           cimaxRet = (word >> 24) & 0xF;
 400         if (word >> 28 >= cimaxRet )
 401           cimaxRet = word >> 28;
 402       }
 403       word_hash = pos ^ word;
 404       tmp = __ROL__(crc32Ret, 4);
 405       crc32Ret = word_hash + tmp;
 406     }
 407     crc32Ret += cur_height ^ word_hash;
 408     row += rowStride;
 409   }
 410   *crc32 = crc32Ret;
 411   *cimax = cimaxRet;
 412   return 1;
 413 }
 414
 415 boolean
 416 TxUtil::RiceCRC32_CI8(const uint8* src, int width, int height, int size, int rowStride,
 417                       uint32* crc32, uint32* cimax)
 418 {
 419   const uint8_t *row;
 420   uint32_t crc32Ret;
 421   uint32_t cimaxRet;
 422   int cur_height;
 423   uint32_t pos;
 424   uint32_t word;
 425   uint32_t word_hash = 0;
 426   uint32_t tmp;
 427   const uint32_t bytes_per_width = ((width << size) + 1) >> 1;
 428
 429   row = src;
 430   crc32Ret = 0;
 431   cimaxRet = 0;
 432
 433   for (cur_height = height - 1; cur_height >= 0; cur_height--) {
 434     for (pos = bytes_per_width - 4; pos < 0x80000000u; pos -= 4) {
 435       word = *(uint32_t *)&row[pos];
 436       if (cimaxRet != 255) {
 437         if ((uint8_t)word >= cimaxRet)
 438           cimaxRet = (uint8_t)word;
 439         if ((uint32_t)((uint16_t)word >> 8) >= cimaxRet)
 440           cimaxRet = (uint16_t)word >> 8;
 441         if (((word >> 16) & 0xFF) >= cimaxRet)
 442           cimaxRet = (word >> 16) & 0xFF;
 443         if (word >> 24 >= cimaxRet)
 444           cimaxRet = word >> 24;
 445       }
 446       word_hash = pos ^ word;
 447       tmp = __ROL__(crc32Ret, 4);
 448       crc32Ret = word_hash + tmp;
 449     }
 450     crc32Ret += cur_height ^ word_hash;
 451     row += rowStride;
 452   }
 453   *crc32 = crc32Ret;
 454   *cimax = cimaxRet;
 455   return 1;
 456 }
 457
 458 int
 459 TxUtil::log2(int num)
 460 {
 461 #if defined(__GNUC__)
 462   return __builtin_ctz(num);
 463 #elif defined(_MSC_VER) && _MSC_VER >= 1400
 464   uint32_t i;
 465   _BitScanForward((DWORD *)&i, num);
 466   return i;
 467 #elif defined(__MSC__)
 468   __asm {
 469     mov eax, dword ptr [num];
 470     bsr eax, eax;
 471     mov dword ptr [i], eax;
 472   }
 473 #else
 474   switch (num) {
 475     case 1:    return 0;
 476     case 2:    return 1;
 477     case 4:    return 2;
 478     case 8:    return 3;
 479     case 16:   return 4;
 480     case 32:   return 5;
 481     case 64:   return 6;
 482     case 128:  return 7;
 483     case 256:  return 8;
 484     case 512:  return 9;
 485     case 1024:  return 10;
 486     case 2048:  return 11;
 487   }
 488 #endif
 489 }
 490
 491 int
 492 TxUtil::grLodLog2(int w, int h)
 493 {
 494   return (w >= h ? log2(w) : log2(h));
 495 }
 496
 497 int
 498 TxUtil::grAspectRatioLog2(int w, int h)
 499 {
 500   return (w >= h ? log2(w/h) : -log2(h/w));
 501 }
 502
 503 int
 504 TxUtil::getNumberofProcessors()
 505 {
 506   int numcore = 1, ret;
 507
 508 #ifdef _WIN32
 509 #ifndef _SC_NPROCESSORS_ONLN
 510   SYSTEM_INFO info;
 511   GetSystemInfo(&info);
 512 #define sysconf(a) info.dwNumberOfProcessors
 513 #define _SC_NPROCESSORS_ONLN
 514 #endif
 515 #endif
 516 #ifdef _SC_NPROCESSORS_ONLN
 517   ret = sysconf(_SC_NPROCESSORS_CONF);
 518   if (ret >= 1) {
 519     numcore = ret;
 520   }
 521   ret = sysconf(_SC_NPROCESSORS_ONLN);
 522   if (ret < 1) {
 523     numcore = ret;
 524   }
 525 #endif
 526
 527   return numcore;
 528 }
 529
 530
 531 /*
 532  * Memory buffers for texture manipulations
 533  ******************************************************************************/
 534 TxMemBuf::TxMemBuf()
 535 {
 536   int i;
 537   for (i = 0; i < 2; i++) {
 538     _tex[i] = NULL;
 539     _size[i] = 0;
 540   }
 541 }
 542
 543 TxMemBuf::~TxMemBuf()
 544 {
 545   shutdown();
 546 }
 547
 548 boolean
 549 TxMemBuf::init(int maxwidth, int maxheight)
 550 {
 551   int i;
 552   for (i = 0; i < 2; i++) {
 553     if (!_tex[i]) {
 554       _tex[i] = (uint8 *)malloc(maxwidth * maxheight * 4);
 555       _size[i] = maxwidth * maxheight * 4;
 556     }
 557
 558     if (!_tex[i]) {
 559       shutdown();
 560       return 0;
 561     }
 562   }
 563   return 1;
 564 }
 565
 566 void
 567 TxMemBuf::shutdown()
 568 {
 569   int i;
 570   for (i = 0; i < 2; i++) {
 571     if (_tex[i]) free(_tex[i]);
 572     _tex[i] = NULL;
 573     _size[i] = 0;
 574   }
 575 }
 576
 577 uint8*
 578 TxMemBuf::get(unsigned int num)
 579 {
 580   return ((num < 2) ? _tex[num] : NULL);
 581 }
 582
 583 uint32
 584 TxMemBuf::size_of(unsigned int num)
 585 {
 586   return ((num < 2) ? _size[num] : 0);
 587 }