[mupen64plus-pandora.git] / source / gles2glide64 / src / GlideHQ / TxUtil.cpp

/*
 * Texture Filtering
 * Version:  1.0
 *
 * Copyright (C) 2007  Hiroshi Morii   All Rights Reserved.
 * Email koolsmoky(at)users.sourceforge.net
 * Web   http://www.3dfxzone.it/koolsmoky
 *
 * this is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2, or (at your option)
 * any later version.
 *
 * this is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with GNU Make; see the file COPYING.  If not, write to
 * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
 */

#include "TxUtil.h"
#include "TxDbg.h"
#include <zlib.h>
#include <stdlib.h>
#ifdef _WIN32
#define WIN32_LEAN_AND_MEAN
#include <windows.h>
#else
#include <unistd.h>
#endif

/*
 * External libraries
 ******************************************************************************/
TxLoadLib::TxLoadLib()
{
#ifdef DXTN_DLL
  if (!_dxtnlib)
    _dxtnlib = LoadLibrary("dxtn");

  if (_dxtnlib) {
    if (!_tx_compress_dxtn_rgba)
      _tx_compress_dxtn_rgba = (dxtCompressTexFuncExt)DLSYM(_dxtnlib, "tx_compress_dxtn_rgba");

    if (!_tx_compress_fxt1)
      _tx_compress_fxt1 = (fxtCompressTexFuncExt)DLSYM(_dxtnlib, "fxt1_encode");
  }
#else
  _tx_compress_dxtn_rgba = tx_compress_dxtn_rgba;
  _tx_compress_fxt1 = fxt1_encode;

#endif
}

TxLoadLib::~TxLoadLib()
{
#ifdef DXTN_DLL
  /* free dynamic library */
  if (_dxtnlib)
    FreeLibrary(_dxtnlib);
#endif

}

fxtCompressTexFuncExt
TxLoadLib::getfxtCompressTexFuncExt()
{
  return _tx_compress_fxt1;
}

dxtCompressTexFuncExt
TxLoadLib::getdxtCompressTexFuncExt()
{
  return _tx_compress_dxtn_rgba;
}


/*
 * Utilities
 ******************************************************************************/
uint32
TxUtil::checksumTx(uint8 *src, int width, int height, uint16 format)
{
  int dataSize = sizeofTx(width, height, format);

  /* for now we use adler32 if something else is better
   * we can simply swtich later
   */
  /* return (dataSize ? Adler32(src, dataSize, 1) : 0); */

  /* zlib crc32 */
  return (dataSize ? crc32(crc32(0L, Z_NULL, 0), src, dataSize) : 0);
}

int
TxUtil::sizeofTx(int width, int height, uint16 format)
{
  int dataSize = 0;

  /* a lookup table for the shifts would be better */
  switch (format) {
  case GR_TEXFMT_ARGB_CMP_FXT1:
    dataSize = (((width + 0x7) & ~0x7) * ((height + 0x3) & ~0x3)) >> 1;
    break;
  case GR_TEXFMT_ARGB_CMP_DXT1:
    dataSize = (((width + 0x3) & ~0x3) * ((height + 0x3) & ~0x3)) >> 1;
    break;
  case GR_TEXFMT_ARGB_CMP_DXT3:
  case GR_TEXFMT_ARGB_CMP_DXT5:
    dataSize = ((width + 0x3) & ~0x3) * ((height + 0x3) & ~0x3);
    break;
  case GR_TEXFMT_ALPHA_INTENSITY_44:
  case GR_TEXFMT_ALPHA_8:
  case GR_TEXFMT_INTENSITY_8:
  case GR_TEXFMT_P_8:
    dataSize = width * height;
    break;
  case GR_TEXFMT_ARGB_4444:
  case GR_TEXFMT_ARGB_1555:
  case GR_TEXFMT_RGB_565:
  case GR_TEXFMT_ALPHA_INTENSITY_88:
    dataSize = (width * height) << 1;
    break;
  case GR_TEXFMT_ARGB_8888:
    dataSize = (width * height) << 2;
    break;
  default:
    /* unsupported format */
    DBG_INFO(80, L"Error: cannot get size. unsupported gfmt:%x\n", format);
    ;
  }

  return dataSize;
}

#if 0 /* unused */
uint32
TxUtil::chkAlpha(uint32* src, int width, int height)
{
  /* NOTE: _src must be ARGB8888
   * return values
   * 0x00000000: 8bit alpha
   * 0x00000001: 1bit alpha
   * 0xff000001: no alpha
   */

  int _size = width * height;
  uint32 alpha = 0;

  __asm {
    mov esi, dword ptr [src];
    mov ecx, dword ptr [_size];
    mov ebx, 0xff000000;

  tc1_loop:
    mov eax, dword ptr [esi];
    add esi, 4;

    and eax, 0xff000000;
    jz  alpha1bit;
    cmp eax, 0xff000000;
    je  alpha1bit;
    jmp done;

  alpha1bit:
    and ebx, eax;
    dec ecx;
    jnz tc1_loop;

    or  ebx, 0x00000001;
    mov dword ptr [alpha], ebx;

  done:
  }

  return alpha;
}
#endif

uint32
TxUtil::checksum(uint8 *src, int width, int height, int size, int rowStride)
{
  /* Rice CRC32 for now. We can switch this to Jabo MD5 or
   * any other custom checksum.
   * TODO: use *_HIRESTEXTURE option. */

  if (!src) return 0;

  return RiceCRC32(src, width, height, size, rowStride);
}

uint64
TxUtil::checksum64(uint8 *src, int width, int height, int size, int rowStride, uint8 *palette)
{
  /* Rice CRC32 for now. We can switch this to Jabo MD5 or
   * any other custom checksum.
   * TODO: use *_HIRESTEXTURE option. */
  /* Returned value is 64bits: hi=palette crc32 low=texture crc32 */

  if (!src) return 0;

  uint64 crc64Ret = 0;

  if (palette) {
    uint32 crc32 = 0, cimax = 0;
    switch (size & 0xff) {
    case 1:
      if (RiceCRC32_CI8(src, width, height, size, rowStride, &crc32, &cimax)) {
        crc64Ret = (uint64)RiceCRC32(palette, cimax + 1, 1, 2, 512);
        crc64Ret <<= 32;
        crc64Ret |= (uint64)crc32;
      }
      break;
    case 0:
      if (RiceCRC32_CI4(src, width, height, size, rowStride, &crc32, &cimax)) {
        crc64Ret = (uint64)RiceCRC32(palette, cimax + 1, 1, 2, 32);
        crc64Ret <<= 32;
        crc64Ret |= (uint64)crc32;
      }
    }
  }
  if (!crc64Ret) {
    crc64Ret = (uint64)RiceCRC32(src, width, height, size, rowStride);
  }

  return crc64Ret;
}

/*
** Computes Adler32 checksum for a stream of data.
**
** From the specification found in RFC 1950: (ZLIB Compressed Data Format
** Specification version 3.3)
**
** ADLER32 (Adler-32 checksum) This contains a checksum value of the
** uncompressed data (excluding any dictionary data) computed according to
** Adler-32 algorithm. This algorithm is a 32-bit extension and improvement
** of the Fletcher algorithm, used in the ITU-T X.224 / ISO 8073 standard.
**
** Adler-32 is composed of two sums accumulated per byte: s1 is the sum of
** all bytes, s2 is the sum of all s1 values. Both sums are done modulo
** 65521. s1 is initialized to 1, s2 to zero. The Adler-32 checksum is stored
** as s2*65536 + s1 in most-significant-byte first (network) order.
**
** 8.2. The Adler-32 algorithm 
**
** The Adler-32 algorithm is much faster than the CRC32 algorithm yet still
** provides an extremely low probability of undetected errors.
**
** The modulo on unsigned long accumulators can be delayed for 5552 bytes,
** so the modulo operation time is negligible. If the bytes are a, b, c,
** the second sum is 3a + 2b + c + 3, and so is position and order sensitive,
** unlike the first sum, which is just a checksum. That 65521 is prime is
** important to avoid a possible large class of two-byte errors that leave
** the check unchanged. (The Fletcher checksum uses 255, which is not prime
** and which also makes the Fletcher check insensitive to single byte
** changes 0 <-> 255.)
**
** The sum s1 is initialized to 1 instead of zero to make the length of
** the sequence part of s2, so that the length does not have to be checked
** separately. (Any sequence of zeroes has a Fletcher checksum of zero.)
*/

uint32
TxUtil::Adler32(const uint8* data, int Len, uint32 dwAdler32)
{
#if 1
  /* zlib adler32 */
  return adler32(dwAdler32, data, Len);
#else
  register uint32 s1 = dwAdler32 & 0xFFFF;
  register uint32 s2 = (dwAdler32 >> 16) & 0xFFFF;
  int k;

  while (Len > 0) {
    /* 5552 is the largest n such that 255n(n+1)/2 + (n+1)(BASE-1) <= 2^32-1 */
    k = (Len < 5552 ? Len : 5552);
    Len -= k;
    while (k--) {
      s1 += *data++;
      s2 += s1;
    }
    /* 65521 is the largest prime smaller than 65536 */
    s1 %= 65521;
    s2 %= 65521;
  }

  return (s2 << 16) | s1;
#endif
}

uint32
TxUtil::Adler32(const uint8* src, int width, int height, int size, int rowStride)
{
  int i;
  uint32 ret = 1;
  uint32 width_in_bytes = width * size;

  for (i = 0; i < height; i++) {
    ret = Adler32(src, width_in_bytes, ret);
    src += rowStride;
  }

  return ret;
}

// rotate left
template<class T> static T __ROL__(T value, unsigned int count)
{
  const unsigned int nbits = sizeof(T) * 8;
  count %= nbits;

  T high = value >> (nbits - count);
  value <<= count;
  value |= high;
  return value;
}

/* Rice CRC32 for hires texture packs */
/* NOTE: The following is used in Glide64 to calculate the CRC32
 * for Rice hires texture packs.
 *
 * BYTE* addr = (BYTE*)(gfx.RDRAM +
 *                     rdp.addr[rdp.tiles[tile].t_mem] +
 *                     (rdp.tiles[tile].ul_t * bpl) +
 *                     (((rdp.tiles[tile].ul_s<<rdp.tiles[tile].size)+1)>>1));
 * RiceCRC32(addr,
 *          rdp.tiles[tile].width,
 *          rdp.tiles[tile].height,
 *          (unsigned short)(rdp.tiles[tile].format << 8 | rdp.tiles[tile].size),
 *          bpl);
 */
uint32
TxUtil::RiceCRC32(const uint8* src, int width, int height, int size, int rowStride)
{
  const uint8_t *row;
  uint32_t crc32Ret;
  int cur_height;
  uint32_t pos;
  uint32_t word;
  uint32_t word_hash = 0;
  uint32_t tmp;
  const uint32_t bytes_per_width = ((width << size) + 1) >> 1;

  row = src;
  crc32Ret = 0;

  for (cur_height = height - 1; cur_height >= 0; cur_height--) {
    for (pos = bytes_per_width - 4; pos < 0x80000000u; pos -= 4) {
      word = *(uint32_t *)&row[pos];
      word_hash = pos ^ word;
      tmp = __ROL__(crc32Ret, 4);
      crc32Ret = word_hash + tmp;
    }
    crc32Ret += cur_height ^ word_hash;
    row += rowStride;
  }
  return crc32Ret;
}

boolean
TxUtil::RiceCRC32_CI4(const uint8* src, int width, int height, int size, int rowStride,
                        uint32* crc32, uint32* cimax)
{
  const uint8_t *row;
  uint32_t crc32Ret;
  uint32_t cimaxRet;
  int cur_height;
  uint32_t pos;
  uint32_t word;
  uint32_t word_hash = 0;
  uint32_t tmp;
  const uint32_t bytes_per_width = ((width << size) + 1) >> 1;

  row = src;
  crc32Ret = 0;
  cimaxRet = 0;

  for (cur_height = height - 1; cur_height >= 0; cur_height--) {
    for (pos = bytes_per_width - 4; pos < 0x80000000u; pos -= 4) {
      word = *(uint32_t *)&row[pos];
      if (cimaxRet != 15) {
        if ((word & 0xF) >= cimaxRet)
          cimaxRet = word & 0xF;
        if ((uint32_t)((uint8_t)word >> 4) >= cimaxRet)
          cimaxRet = (uint8_t)word >> 4;
        if (((word >> 8) & 0xF) >= cimaxRet)
          cimaxRet = (word >> 8) & 0xF;
        if ((uint32_t)((uint16_t)word >> 12) >= cimaxRet)
          cimaxRet = (uint16_t)word >> 12;
        if (((word >> 16) & 0xF) >= cimaxRet)
          cimaxRet = (word >> 16) & 0xF;
        if (((word >> 20) & 0xF) >= cimaxRet)
          cimaxRet = (word >> 20) & 0xF;
        if (((word >> 24) & 0xF) >= cimaxRet)
          cimaxRet = (word >> 24) & 0xF;
        if (word >> 28 >= cimaxRet )
          cimaxRet = word >> 28;
      }
      word_hash = pos ^ word;
      tmp = __ROL__(crc32Ret, 4);
      crc32Ret = word_hash + tmp;
    }
    crc32Ret += cur_height ^ word_hash;
    row += rowStride;
  }
  *crc32 = crc32Ret;
  *cimax = cimaxRet;
  return 1;
}

boolean
TxUtil::RiceCRC32_CI8(const uint8* src, int width, int height, int size, int rowStride,
                      uint32* crc32, uint32* cimax)
{
  const uint8_t *row;
  uint32_t crc32Ret;
  uint32_t cimaxRet;
  int cur_height;
  uint32_t pos;
  uint32_t word;
  uint32_t word_hash = 0;
  uint32_t tmp;
  const uint32_t bytes_per_width = ((width << size) + 1) >> 1;

  row = src;
  crc32Ret = 0;
  cimaxRet = 0;

  for (cur_height = height - 1; cur_height >= 0; cur_height--) {
    for (pos = bytes_per_width - 4; pos < 0x80000000u; pos -= 4) {
      word = *(uint32_t *)&row[pos];
      if (cimaxRet != 255) {
        if ((uint8_t)word >= cimaxRet)
          cimaxRet = (uint8_t)word;
        if ((uint32_t)((uint16_t)word >> 8) >= cimaxRet)
          cimaxRet = (uint16_t)word >> 8;
        if (((word >> 16) & 0xFF) >= cimaxRet)
          cimaxRet = (word >> 16) & 0xFF;
        if (word >> 24 >= cimaxRet)
          cimaxRet = word >> 24;
      }
      word_hash = pos ^ word;
      tmp = __ROL__(crc32Ret, 4);
      crc32Ret = word_hash + tmp;
    }
    crc32Ret += cur_height ^ word_hash;
    row += rowStride;
  }
  *crc32 = crc32Ret;
  *cimax = cimaxRet;
  return 1;
}

int
TxUtil::log2(int num)
{
#if defined(__GNUC__)
  return __builtin_ctz(num);
#elif defined(_MSC_VER) && _MSC_VER >= 1400
  uint32_t i;
  _BitScanForward((DWORD *)&i, num);
  return i;
#elif defined(__MSC__)
  __asm {
    mov eax, dword ptr [num];
    bsr eax, eax;
    mov dword ptr [i], eax;
  }
#else
  switch (num) {
    case 1:    return 0;
    case 2:    return 1;
    case 4:    return 2;
    case 8:    return 3;
    case 16:   return 4;
    case 32:   return 5;
    case 64:   return 6;
    case 128:  return 7;
    case 256:  return 8;
    case 512:  return 9;
    case 1024:  return 10;
    case 2048:  return 11;
  }
#endif
}

int
TxUtil::grLodLog2(int w, int h)
{
  return (w >= h ? log2(w) : log2(h));
}

int
TxUtil::grAspectRatioLog2(int w, int h)
{
  return (w >= h ? log2(w/h) : -log2(h/w));
}

int
TxUtil::getNumberofProcessors()
{
  int numcore = 1, ret;

#ifdef _WIN32
#ifndef _SC_NPROCESSORS_ONLN
  SYSTEM_INFO info;
  GetSystemInfo(&info);
#define sysconf(a) info.dwNumberOfProcessors
#define _SC_NPROCESSORS_ONLN
#endif
#endif
#ifdef _SC_NPROCESSORS_ONLN
  ret = sysconf(_SC_NPROCESSORS_CONF);
  if (ret >= 1) {
    numcore = ret;
  }
  ret = sysconf(_SC_NPROCESSORS_ONLN);
  if (ret < 1) {
    numcore = ret;
  }
#endif

  return numcore;
}


/*
 * Memory buffers for texture manipulations
 ******************************************************************************/
TxMemBuf::TxMemBuf()
{
  int i;
  for (i = 0; i < 2; i++) {
    _tex[i] = NULL;
    _size[i] = 0;
  }
}

TxMemBuf::~TxMemBuf()
{
  shutdown();
}

boolean
TxMemBuf::init(int maxwidth, int maxheight)
{
  int i;
  for (i = 0; i < 2; i++) {
    if (!_tex[i]) {
      _tex[i] = (uint8 *)malloc(maxwidth * maxheight * 4);
      _size[i] = maxwidth * maxheight * 4;
    }

    if (!_tex[i]) {
      shutdown();
      return 0;
    }
  }
  return 1;
}

void
TxMemBuf::shutdown()
{
  int i;
  for (i = 0; i < 2; i++) {
    if (_tex[i]) free(_tex[i]);
    _tex[i] = NULL;
    _size[i] = 0;
  }
}

uint8*
TxMemBuf::get(unsigned int num)
{
  return ((num < 2) ? _tex[num] : NULL);
}

uint32
TxMemBuf::size_of(unsigned int num)
{
  return ((num < 2) ? _size[num] : 0);
}
Commit	Line	Data
98e75f2d	1	/*
	2	* Texture Filtering
	3	* Version: 1.0
	4	*
	5	* Copyright (C) 2007 Hiroshi Morii All Rights Reserved.
	6	* Email koolsmoky(at)users.sourceforge.net
	7	* Web http://www.3dfxzone.it/koolsmoky
	8	*
	9	* this is free software; you can redistribute it and/or modify
	10	* it under the terms of the GNU General Public License as published by
	11	* the Free Software Foundation; either version 2, or (at your option)
	12	* any later version.
	13	*
	14	* this is distributed in the hope that it will be useful,
	15	* but WITHOUT ANY WARRANTY; without even the implied warranty of
	16	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
	17	* GNU General Public License for more details.
	18	*
	19	* You should have received a copy of the GNU General Public License
	20	* along with GNU Make; see the file COPYING. If not, write to
	21	* the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
	22	*/
	23
	24	#include "TxUtil.h"
	25	#include "TxDbg.h"
	26	#include <zlib.h>
	27	#include <stdlib.h>
	28	#ifdef _WIN32
	29	#define WIN32_LEAN_AND_MEAN
	30	#include <windows.h>
	31	#else
	32	#include <unistd.h>
	33	#endif
	34
	35	/*
	36	* External libraries
	37	******************************************************************************/
	38	TxLoadLib::TxLoadLib()
	39	{
	40	#ifdef DXTN_DLL
	41	if (!_dxtnlib)
	42	_dxtnlib = LoadLibrary("dxtn");
	43
	44	if (_dxtnlib) {
2d262872	45	if (!_tx_compress_dxtn_rgba)
2d262872	46	_tx_compress_dxtn_rgba = (dxtCompressTexFuncExt)DLSYM(_dxtnlib, "tx_compress_dxtn_rgba");
98e75f2d	47
	48	if (!_tx_compress_fxt1)
	49	_tx_compress_fxt1 = (fxtCompressTexFuncExt)DLSYM(_dxtnlib, "fxt1_encode");
	50	}
	51	#else
2d262872	52	_tx_compress_dxtn_rgba = tx_compress_dxtn_rgba;
98e75f2d	53	_tx_compress_fxt1 = fxt1_encode;
	54
	55	#endif
	56	}
	57
	58	TxLoadLib::~TxLoadLib()
	59	{
	60	#ifdef DXTN_DLL
	61	/* free dynamic library */
	62	if (_dxtnlib)
	63	FreeLibrary(_dxtnlib);
	64	#endif
	65
	66	}
	67
	68	fxtCompressTexFuncExt
	69	TxLoadLib::getfxtCompressTexFuncExt()
	70	{
	71	return _tx_compress_fxt1;
	72	}
	73
	74	dxtCompressTexFuncExt
	75	TxLoadLib::getdxtCompressTexFuncExt()
	76	{
2d262872	77	return _tx_compress_dxtn_rgba;
98e75f2d	78	}
	79
	80
	81	/*
	82	* Utilities
	83	******************************************************************************/
	84	uint32
	85	TxUtil::checksumTx(uint8 *src, int width, int height, uint16 format)
	86	{
	87	int dataSize = sizeofTx(width, height, format);
	88
	89	/* for now we use adler32 if something else is better
	90	* we can simply swtich later
	91	*/
	92	/* return (dataSize ? Adler32(src, dataSize, 1) : 0); */
	93
	94	/* zlib crc32 */
	95	return (dataSize ? crc32(crc32(0L, Z_NULL, 0), src, dataSize) : 0);
	96	}
	97
	98	int
	99	TxUtil::sizeofTx(int width, int height, uint16 format)
	100	{
	101	int dataSize = 0;
	102
	103	/* a lookup table for the shifts would be better */
	104	switch (format) {
	105	case GR_TEXFMT_ARGB_CMP_FXT1:
	106	dataSize = (((width + 0x7) & ~0x7) * ((height + 0x3) & ~0x3)) >> 1;
	107	break;
	108	case GR_TEXFMT_ARGB_CMP_DXT1:
	109	dataSize = (((width + 0x3) & ~0x3) * ((height + 0x3) & ~0x3)) >> 1;
	110	break;
	111	case GR_TEXFMT_ARGB_CMP_DXT3:
	112	case GR_TEXFMT_ARGB_CMP_DXT5:
	113	dataSize = ((width + 0x3) & ~0x3) * ((height + 0x3) & ~0x3);
	114	break;
	115	case GR_TEXFMT_ALPHA_INTENSITY_44:
	116	case GR_TEXFMT_ALPHA_8:
	117	case GR_TEXFMT_INTENSITY_8:
	118	case GR_TEXFMT_P_8:
	119	dataSize = width * height;
	120	break;
	121	case GR_TEXFMT_ARGB_4444:
	122	case GR_TEXFMT_ARGB_1555:
	123	case GR_TEXFMT_RGB_565:
	124	case GR_TEXFMT_ALPHA_INTENSITY_88:
	125	dataSize = (width * height) << 1;
	126	break;
	127	case GR_TEXFMT_ARGB_8888:
	128	dataSize = (width * height) << 2;
	129	break;
	130	default:
	131	/* unsupported format */
	132	DBG_INFO(80, L"Error: cannot get size. unsupported gfmt:%x\n", format);
	133	;
	134	}
	135
	136	return dataSize;
	137	}
	138
	139	#if 0 /* unused */
	140	uint32
	141	TxUtil::chkAlpha(uint32* src, int width, int height)
142	{
143	/* NOTE: _src must be ARGB8888
144	* return values
145	* 0x00000000: 8bit alpha
146	* 0x00000001: 1bit alpha
147	* 0xff000001: no alpha
148	*/
149
150	int _size = width * height;
151	uint32 alpha = 0;
152
153	__asm {
154	mov esi, dword ptr [src];
155	mov ecx, dword ptr [_size];
156	mov ebx, 0xff000000;
157
158	tc1_loop:
159	mov eax, dword ptr [esi];
160	add esi, 4;
161
162	and eax, 0xff000000;
163	jz alpha1bit;
164	cmp eax, 0xff000000;
165	je alpha1bit;
166	jmp done;
167
168	alpha1bit:
169	and ebx, eax;
170	dec ecx;
171	jnz tc1_loop;
172
173	or ebx, 0x00000001;
174	mov dword ptr [alpha], ebx;
175
176	done:
177	}
178
179	return alpha;
180	}
181	#endif
182
183	uint32
184	TxUtil::checksum(uint8 *src, int width, int height, int size, int rowStride)
185	{
186	/* Rice CRC32 for now. We can switch this to Jabo MD5 or
187	* any other custom checksum.
188	* TODO: use _HIRESTEXTURE option. /
189
190	if (!src) return 0;
191
192	return RiceCRC32(src, width, height, size, rowStride);
193	}
194
195	uint64
196	TxUtil::checksum64(uint8 src, int width, int height, int size, int rowStride, uint8 palette)
197	{
198	/* Rice CRC32 for now. We can switch this to Jabo MD5 or
199	* any other custom checksum.
200	* TODO: use _HIRESTEXTURE option. /
201	/* Returned value is 64bits: hi=palette crc32 low=texture crc32 */
202
203	if (!src) return 0;
204
205	uint64 crc64Ret = 0;
206
207	if (palette) {
208	uint32 crc32 = 0, cimax = 0;
209	switch (size & 0xff) {
210	case 1:
211	if (RiceCRC32_CI8(src, width, height, size, rowStride, &crc32, &cimax)) {
212	crc64Ret = (uint64)RiceCRC32(palette, cimax + 1, 1, 2, 512);
213	crc64Ret <<= 32;
214	crc64Ret \|= (uint64)crc32;
215	}
216	break;
217	case 0:
218	if (RiceCRC32_CI4(src, width, height, size, rowStride, &crc32, &cimax)) {
219	crc64Ret = (uint64)RiceCRC32(palette, cimax + 1, 1, 2, 32);
220	crc64Ret <<= 32;
221	crc64Ret \|= (uint64)crc32;
222	}
223	}
224	}
225	if (!crc64Ret) {
226	crc64Ret = (uint64)RiceCRC32(src, width, height, size, rowStride);
227	}
228
229	return crc64Ret;
230	}
231
232	/*
233	** Computes Adler32 checksum for a stream of data.
234	**
235	** From the specification found in RFC 1950: (ZLIB Compressed Data Format
236	** Specification version 3.3)
237	**
238	** ADLER32 (Adler-32 checksum) This contains a checksum value of the
239	** uncompressed data (excluding any dictionary data) computed according to
240	** Adler-32 algorithm. This algorithm is a 32-bit extension and improvement
241	** of the Fletcher algorithm, used in the ITU-T X.224 / ISO 8073 standard.
242	**
243	** Adler-32 is composed of two sums accumulated per byte: s1 is the sum of
244	** all bytes, s2 is the sum of all s1 values. Both sums are done modulo
245	** 65521. s1 is initialized to 1, s2 to zero. The Adler-32 checksum is stored
246	** as s2*65536 + s1 in most-significant-byte first (network) order.
247	**
248	** 8.2. The Adler-32 algorithm
249	**
250	** The Adler-32 algorithm is much faster than the CRC32 algorithm yet still
251	** provides an extremely low probability of undetected errors.
252	**
253	** The modulo on unsigned long accumulators can be delayed for 5552 bytes,
254	** so the modulo operation time is negligible. If the bytes are a, b, c,
255	** the second sum is 3a + 2b + c + 3, and so is position and order sensitive,
256	** unlike the first sum, which is just a checksum. That 65521 is prime is
257	** important to avoid a possible large class of two-byte errors that leave
258	** the check unchanged. (The Fletcher checksum uses 255, which is not prime
259	** and which also makes the Fletcher check insensitive to single byte
260	** changes 0 <-> 255.)
261	**
262	** The sum s1 is initialized to 1 instead of zero to make the length of
263	** the sequence part of s2, so that the length does not have to be checked
264	** separately. (Any sequence of zeroes has a Fletcher checksum of zero.)
265	*/
266
267	uint32
268	TxUtil::Adler32(const uint8* data, int Len, uint32 dwAdler32)
269	{
270	#if 1
271	/* zlib adler32 */
272	return adler32(dwAdler32, data, Len);
273	#else
274	register uint32 s1 = dwAdler32 & 0xFFFF;
275	register uint32 s2 = (dwAdler32 >> 16) & 0xFFFF;
276	int k;
277
278	while (Len > 0) {
279	/* 5552 is the largest n such that 255n(n+1)/2 + (n+1)(BASE-1) <= 2^32-1 */
280	k = (Len < 5552 ? Len : 5552);
281	Len -= k;
282	while (k--) {
283	s1 += *data++;
284	s2 += s1;
285	}
286	/* 65521 is the largest prime smaller than 65536 */
287	s1 %= 65521;
288	s2 %= 65521;
289	}
290
291	return (s2 << 16) \| s1;
292	#endif
293	}
294
295	uint32
296	TxUtil::Adler32(const uint8* src, int width, int height, int size, int rowStride)
297	{
298	int i;
299	uint32 ret = 1;
300	uint32 width_in_bytes = width * size;
301
302	for (i = 0; i < height; i++) {
303	ret = Adler32(src, width_in_bytes, ret);
304	src += rowStride;
305	}
306
307	return ret;
308	}
309
310	// rotate left
311	template<class T> static T __ROL__(T value, unsigned int count)
312	{
313	const unsigned int nbits = sizeof(T) * 8;
314	count %= nbits;
315
316	T high = value >> (nbits - count);
317	value <<= count;
318	value \|= high;
319	return value;
320	}
321
322	/* Rice CRC32 for hires texture packs */
323	/* NOTE: The following is used in Glide64 to calculate the CRC32
324	* for Rice hires texture packs.
325	*
326	* BYTE* addr = (BYTE*)(gfx.RDRAM +
327	* rdp.addr[rdp.tiles[tile].t_mem] +
328	* (rdp.tiles[tile].ul_t * bpl) +
329	* (((rdp.tiles[tile].ul_s<<rdp.tiles[tile].size)+1)>>1));
330	* RiceCRC32(addr,
331	* rdp.tiles[tile].width,
332	* rdp.tiles[tile].height,
333	* (unsigned short)(rdp.tiles[tile].format << 8 \| rdp.tiles[tile].size),
334	* bpl);
335	*/
336	uint32
337	TxUtil::RiceCRC32(const uint8* src, int width, int height, int size, int rowStride)
338	{
339	const uint8_t *row;
340	uint32_t crc32Ret;
341	int cur_height;
342	uint32_t pos;
343	uint32_t word;
344	uint32_t word_hash = 0;
345	uint32_t tmp;
346	const uint32_t bytes_per_width = ((width << size) + 1) >> 1;
347
348	row = src;
349	crc32Ret = 0;
350
351	for (cur_height = height - 1; cur_height >= 0; cur_height--) {
352	for (pos = bytes_per_width - 4; pos < 0x80000000u; pos -= 4) {
353	word = (uint32_t )&row[pos];
354	word_hash = pos ^ word;
355	tmp = __ROL__(crc32Ret, 4);
356	crc32Ret = word_hash + tmp;
357	}
358	crc32Ret += cur_height ^ word_hash;
359	row += rowStride;
360	}
361	return crc32Ret;
362	}
363
364	boolean
365	TxUtil::RiceCRC32_CI4(const uint8* src, int width, int height, int size, int rowStride,
366	uint32* crc32, uint32* cimax)
367	{
368	const uint8_t *row;
369	uint32_t crc32Ret;
370	uint32_t cimaxRet;
371	int cur_height;
372	uint32_t pos;
373	uint32_t word;
374	uint32_t word_hash = 0;
375	uint32_t tmp;
376	const uint32_t bytes_per_width = ((width << size) + 1) >> 1;
377
378	row = src;
379	crc32Ret = 0;
380	cimaxRet = 0;
381
382	for (cur_height = height - 1; cur_height >= 0; cur_height--) {
383	for (pos = bytes_per_width - 4; pos < 0x80000000u; pos -= 4) {
384	word = (uint32_t )&row[pos];
385	if (cimaxRet != 15) {
386	if ((word & 0xF) >= cimaxRet)
387	cimaxRet = word & 0xF;
388	if ((uint32_t)((uint8_t)word >> 4) >= cimaxRet)
389	cimaxRet = (uint8_t)word >> 4;
390	if (((word >> 8) & 0xF) >= cimaxRet)
391	cimaxRet = (word >> 8) & 0xF;
392	if ((uint32_t)((uint16_t)word >> 12) >= cimaxRet)
393	cimaxRet = (uint16_t)word >> 12;
394	if (((word >> 16) & 0xF) >= cimaxRet)
395	cimaxRet = (word >> 16) & 0xF;
396	if (((word >> 20) & 0xF) >= cimaxRet)
397	cimaxRet = (word >> 20) & 0xF;
398	if (((word >> 24) & 0xF) >= cimaxRet)
399	cimaxRet = (word >> 24) & 0xF;
400	if (word >> 28 >= cimaxRet )
401	cimaxRet = word >> 28;
402	}
403	word_hash = pos ^ word;
404	tmp = __ROL__(crc32Ret, 4);
405	crc32Ret = word_hash + tmp;
406	}
407	crc32Ret += cur_height ^ word_hash;
408	row += rowStride;
409	}
410	*crc32 = crc32Ret;
411	*cimax = cimaxRet;
412	return 1;
413	}
414
415	boolean
416	TxUtil::RiceCRC32_CI8(const uint8* src, int width, int height, int size, int rowStride,
417	uint32* crc32, uint32* cimax)
418	{
419	const uint8_t *row;
420	uint32_t crc32Ret;
421	uint32_t cimaxRet;
422	int cur_height;
423	uint32_t pos;
424	uint32_t word;
425	uint32_t word_hash = 0;
426	uint32_t tmp;
427	const uint32_t bytes_per_width = ((width << size) + 1) >> 1;
428
429	row = src;
430	crc32Ret = 0;
431	cimaxRet = 0;
432
433	for (cur_height = height - 1; cur_height >= 0; cur_height--) {
434	for (pos = bytes_per_width - 4; pos < 0x80000000u; pos -= 4) {
435	word = (uint32_t )&row[pos];
436	if (cimaxRet != 255) {
437	if ((uint8_t)word >= cimaxRet)
438	cimaxRet = (uint8_t)word;
439	if ((uint32_t)((uint16_t)word >> 8) >= cimaxRet)
440	cimaxRet = (uint16_t)word >> 8;
441	if (((word >> 16) & 0xFF) >= cimaxRet)
442	cimaxRet = (word >> 16) & 0xFF;
443	if (word >> 24 >= cimaxRet)
444	cimaxRet = word >> 24;
445	}
446	word_hash = pos ^ word;
447	tmp = __ROL__(crc32Ret, 4);
448	crc32Ret = word_hash + tmp;
449	}
450	crc32Ret += cur_height ^ word_hash;
451	row += rowStride;
452	}
453	*crc32 = crc32Ret;
454	*cimax = cimaxRet;
455	return 1;
456	}
457
458	int
459	TxUtil::log2(int num)
460	{
461	#if defined(__GNUC__)
462	return __builtin_ctz(num);
463	#elif defined(_MSC_VER) && _MSC_VER >= 1400
464	uint32_t i;
465	_BitScanForward((DWORD *)&i, num);
466	return i;
467	#elif defined(__MSC__)
468	__asm {
469	mov eax, dword ptr [num];
470	bsr eax, eax;
471	mov dword ptr [i], eax;
472	}
473	#else
474	switch (num) {
475	case 1: return 0;
476	case 2: return 1;
477	case 4: return 2;
478	case 8: return 3;
479	case 16: return 4;
480	case 32: return 5;
481	case 64: return 6;
482	case 128: return 7;
483	case 256: return 8;
484	case 512: return 9;
485	case 1024: return 10;
486	case 2048: return 11;
487	}
488	#endif
489	}
490
491	int
492	TxUtil::grLodLog2(int w, int h)
493	{
494	return (w >= h ? log2(w) : log2(h));
495	}
496
497	int
498	TxUtil::grAspectRatioLog2(int w, int h)
499	{
500	return (w >= h ? log2(w/h) : -log2(h/w));
501	}
502
503	int
504	TxUtil::getNumberofProcessors()
505	{
506	int numcore = 1, ret;
507
508	#ifdef _WIN32
509	#ifndef _SC_NPROCESSORS_ONLN
510	SYSTEM_INFO info;
511	GetSystemInfo(&info);
512	#define sysconf(a) info.dwNumberOfProcessors
513	#define _SC_NPROCESSORS_ONLN
514	#endif
515	#endif
516	#ifdef _SC_NPROCESSORS_ONLN
517	ret = sysconf(_SC_NPROCESSORS_CONF);
518	if (ret >= 1) {
519	numcore = ret;
520	}
521	ret = sysconf(_SC_NPROCESSORS_ONLN);
522	if (ret < 1) {
523	numcore = ret;
524	}
525	#endif
526
527	return numcore;
528	}
529
530
531	/*
532	* Memory buffers for texture manipulations
533	******************************************************************************/
534	TxMemBuf::TxMemBuf()
535	{
536	int i;
537	for (i = 0; i < 2; i++) {
538	_tex[i] = NULL;
539	_size[i] = 0;
540	}
541	}
542
543	TxMemBuf::~TxMemBuf()
544	{
545	shutdown();
546	}
547
548	boolean
549	TxMemBuf::init(int maxwidth, int maxheight)
550	{
551	int i;
552	for (i = 0; i < 2; i++) {
553	if (!_tex[i]) {
554	_tex[i] = (uint8 )malloc(maxwidth maxheight * 4);
555	_size[i] = maxwidth * maxheight * 4;
556	}
557
558	if (!_tex[i]) {
559	shutdown();
560	return 0;
561	}
562	}
563	return 1;
564	}
565
566	void
567	TxMemBuf::shutdown()
568	{
569	int i;
570	for (i = 0; i < 2; i++) {
571	if (_tex[i]) free(_tex[i]);
572	_tex[i] = NULL;
573	_size[i] = 0;
574	}
575	}
576
577	uint8*
578	TxMemBuf::get(unsigned int num)
579	{
580	return ((num < 2) ? _tex[num] : NULL);
581	}
582
583	uint32
584	TxMemBuf::size_of(unsigned int num)
585	{
586	return ((num < 2) ? _size[num] : 0);
587	}