[mupen64plus-pandora.git] / source / gles2glide64 / src / GlideHQ / TextureFilters.cpp

/*
Copyright (C) 2003 Rice1964

This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.

This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public License for more details.

You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.

*/

/* Copyright (C) 2007 Hiroshi Morii <koolsmoky(at)users.sourceforge.net>
 * Modified for the Texture Filtering library
 */

#include <string.h>
#include "TextureFilters.h"

/************************************************************************/
/* 2X filters                                                           */
/************************************************************************/

#define DWORD_MAKE(r, g, b, a)   ((uint32) (((a) << 24) | ((r) << 16) | ((g) << 8) | (b)))
#define WORD_MAKE(r, g, b, a)   ((uint16) (((a) << 12) | ((r) << 8) | ((g) << 4) | (b)))

// Basic 2x R8G8B8A8 filter with interpolation

void Texture2x_32(uint8 *srcPtr, uint32 srcPitch, uint8 *dstPtr, uint32 dstPitch, int width, int height)
{
  uint32 *pDst1, *pDst2;
  uint32 *pSrc, *pSrc2;
  uint32 nWidth = width;
  uint32 nHeight = height;

  uint32 b1;
  uint32 g1;
  uint32 r1;
  uint32 a1;
  uint32 b2;
  uint32 g2;
  uint32 r2;
  uint32 a2;
  uint32 b3;
  uint32 g3;
  uint32 r3;
  uint32 a3;
  uint32 b4;
  uint32 g4;
  uint32 r4;
  uint32 a4;

  uint32 xSrc;
  uint32 ySrc;

  for (ySrc = 0; ySrc < nHeight; ySrc++)
  {
    pSrc = (uint32*)(((uint8*)srcPtr)+ySrc*srcPitch);
    pSrc2 = (uint32*)(((uint8*)srcPtr)+(ySrc+1)*srcPitch);
    pDst1 = (uint32*)(((uint8*)dstPtr)+(ySrc*2)*dstPitch);
    pDst2 = (uint32*)(((uint8*)dstPtr)+(ySrc*2+1)*dstPitch);

    for (xSrc = 0; xSrc < nWidth; xSrc++)
    {
      b1 = (pSrc[xSrc]>>0)&0xFF;
      g1 = (pSrc[xSrc]>>8)&0xFF;
      r1 = (pSrc[xSrc]>>16)&0xFF;
      a1 = (pSrc[xSrc]>>24)&0xFF;

      // Pixel 1
      pDst1[xSrc*2] = pSrc[xSrc];

      // Pixel 2
      if( xSrc<nWidth-1 )
      {
        b2 = (pSrc[xSrc+1]>>0)&0xFF;
        g2 = (pSrc[xSrc+1]>>8)&0xFF;
        r2 = (pSrc[xSrc+1]>>16)&0xFF;
        a2 = (pSrc[xSrc+1]>>24)&0xFF;
        pDst1[xSrc*2+1] = DWORD_MAKE((r1+r2)/2, (g1+g2)/2, (b1+b2)/2, (a1+a2)/2);
      }
      else
        pDst1[xSrc*2+1] = pSrc[xSrc];

      // Pixel 3
      if( ySrc<nHeight-1 )
      {
        b3 = (pSrc2[xSrc]>>0)&0xFF;
        g3 = (pSrc2[xSrc]>>8)&0xFF;
        r3 = (pSrc2[xSrc]>>16)&0xFF;
        a3 = (pSrc2[xSrc]>>24)&0xFF;
        pDst2[xSrc*2] = DWORD_MAKE((r1+r3)/2, (g1+g3)/2, (b1+b3)/2, (a1+a3)/2);
        if( xSrc<nWidth-1 )
        {
          b4 = (pSrc2[xSrc+1]>>0)&0xFF;
          g4 = (pSrc2[xSrc+1]>>8)&0xFF;
          r4 = (pSrc2[xSrc+1]>>16)&0xFF;
          a4 = (pSrc2[xSrc+1]>>24)&0xFF;
          // Pixel 4
          pDst2[xSrc*2+1] = DWORD_MAKE((r1+r2+r3+r4)/4, (g1+g2+g3+g4)/4, (b1+b2+b3+b4)/4, (a1+a2+a3+a4)/4);
        }
        else
        {
          // Pixel 4
          pDst2[xSrc*2+1] = DWORD_MAKE((r1+r3)/2, (g1+g3)/2, (b1+b3)/2, (a1+a3)/2);
        }
      }
      else
      {
        // Pixel 3
        pDst2[xSrc*2] = pSrc[xSrc];
        // Pixel 4
        if( xSrc<nWidth-1 )
        {
          pDst2[xSrc*2+1] = DWORD_MAKE((r1+r2)/2, (g1+g2)/2, (b1+b2)/2, (a1+a2)/2);
        }
        else
        {
          pDst2[xSrc*2+1] = pSrc[xSrc];
        }
      }
    }
  }
}

#if !_16BPP_HACK
// Basic 2x R4G4B4A4 filter with interpolation
void Texture2x_16(uint8 *srcPtr, uint32 srcPitch, uint8 *dstPtr, uint32 dstPitch, int width, int height)
{
  uint16 *pDst1, *pDst2;
  uint16 *pSrc, *pSrc2;
  uint32 nWidth = width;
  uint32 nHeight = height;

  uint16 b1;
  uint16 g1;
  uint16 r1;
  uint16 a1;
  uint16 b2;
  uint16 g2;
  uint16 r2;
  uint16 a2;
  uint16 b3;
  uint16 g3;
  uint16 r3;
  uint16 a3;
  uint16 b4;
  uint16 g4;
  uint16 r4;
  uint16 a4;

  uint16 xSrc;
  uint16 ySrc;

  for (ySrc = 0; ySrc < nHeight; ySrc++)
  {
    pSrc = (uint16*)(((uint8*)srcPtr)+ySrc*srcPitch);
    pSrc2 = (uint16*)(((uint8*)srcPtr)+(ySrc+1)*srcPitch);
    pDst1 = (uint16*)(((uint8*)dstPtr)+(ySrc*2)*dstPitch);
    pDst2 = (uint16*)(((uint8*)dstPtr)+(ySrc*2+1)*dstPitch);

    for (xSrc = 0; xSrc < nWidth; xSrc++)
    {
      b1 = (pSrc[xSrc]>> 0)&0xF;
      g1 = (pSrc[xSrc]>> 4)&0xF;
      r1 = (pSrc[xSrc]>> 8)&0xF;
      a1 = (pSrc[xSrc]>>12)&0xF;

      if( xSrc<nWidth-1 )
      {
        b2 = (pSrc[xSrc+1]>> 0)&0xF;
        g2 = (pSrc[xSrc+1]>> 4)&0xF;
        r2 = (pSrc[xSrc+1]>> 8)&0xF;
        a2 = (pSrc[xSrc+1]>>12)&0xF;
      }

      if( ySrc<nHeight-1 )
      {
        b3 = (pSrc2[xSrc]>> 0)&0xF;
        g3 = (pSrc2[xSrc]>> 4)&0xF;
        r3 = (pSrc2[xSrc]>> 8)&0xF;
        a3 = (pSrc2[xSrc]>>12)&0xF;
        if( xSrc<nWidth-1 )
        {
          b4 = (pSrc2[xSrc+1]>> 0)&0xF;
          g4 = (pSrc2[xSrc+1]>> 4)&0xF;
          r4 = (pSrc2[xSrc+1]>> 8)&0xF;
          a4 = (pSrc2[xSrc+1]>>12)&0xF;
        }
      }

      // Pixel 1
      pDst1[xSrc*2] = pSrc[xSrc];

      // Pixel 2
      if( xSrc<nWidth-1 )
      {
        pDst1[xSrc*2+1] = WORD_MAKE((r1+r2)/2, (g1+g2)/2, (b1+b2)/2, (a1+a2)/2);
      }
      else
        pDst1[xSrc*2+1] = pSrc[xSrc];


      // Pixel 3
      if( ySrc<nHeight-1 )
      {
        pDst2[xSrc*2] = WORD_MAKE((r1+r3)/2, (g1+g3)/2, (b1+b3)/2, (a1+a3)/2);
      }
      else
        pDst2[xSrc*2] = pSrc[xSrc];

      // Pixel 4
      if( xSrc<nWidth-1 )
      {
        if( ySrc<nHeight-1 )
        {
          pDst2[xSrc*2+1] = WORD_MAKE((r1+r2+r3+r4)/4, (g1+g2+g3+g4)/4, (b1+b2+b3+b4)/4, (a1+a2+a3+a4)/4);
        }
        else
        {
          pDst2[xSrc*2+1] = WORD_MAKE((r1+r2)/2, (g1+g2)/2, (b1+b2)/2, (a1+a2)/2);
        }
      }
      else
      {
        if( ySrc<nHeight-1 )
        {
          pDst2[xSrc*2+1] = WORD_MAKE((r1+r3)/2, (g1+g3)/2, (b1+b3)/2, (a1+a3)/2);
        }
        else
          pDst2[xSrc*2+1] = pSrc[xSrc];
      }
    }
  }
}
#endif /* !_16BPP_HACK */

/*
 * Sharp filters
 * Hiroshi Morii <koolsmoky@users.sourceforge.net>
 */
void SharpFilter_8888(uint32 *src, uint32 srcwidth, uint32 srcheight, uint32 *dest, uint32 filter)
{
  // NOTE: for now we get away with copying the boundaries
  //       filter the boundaries if we face problems

  uint32 mul1, mul2, mul3, shift4;

  uint32 x,y,z;
  uint32 *_src1, *_src2, *_src3, *_dest;
  uint32 val[4];
  uint32 t1,t2,t3,t4,t5,t6,t7,t8,t9;

  switch( filter )
  {
  case SHARP_FILTER_2:
    mul1=1;
    mul2=8;
    mul3=12;
    shift4=2;
    break;
  case SHARP_FILTER_1:
  default:
    mul1=1;
    mul2=8;
    mul3=16;
    shift4=3;
    break;
  }

  // setup rows
  _src1 = src;
  _src2 = _src1 + srcwidth;
  _src3 = _src2 + srcwidth;
  _dest = dest;

  // copy the first row
  memcpy(_dest, _src1, (srcwidth << 2));
  _dest += srcwidth;
  // filter 2nd row to 1 row before the last
  for (y = 1; y < srcheight-1; y++) {
    // copy the first pixel
    _dest[0] = *_src2;
    // filter 2nd pixel to 1 pixel before last
    for (x = 1; x < srcwidth-1; x++) {
      for (z=0; z<4; z++) {
        t1 = *((uint8*)(_src1+x-1)+z);
        t2 = *((uint8*)(_src1+x  )+z);
        t3 = *((uint8*)(_src1+x+1)+z);
        t4 = *((uint8*)(_src2+x-1)+z);
        t5 = *((uint8*)(_src2+x  )+z);
        t6 = *((uint8*)(_src2+x+1)+z);
        t7 = *((uint8*)(_src3+x-1)+z);
        t8 = *((uint8*)(_src3+x  )+z);
        t9 = *((uint8*)(_src3+x+1)+z);
        
        if( (t5*mul2) > (t1+t3+t7+t9+t2+t4+t6+t8)*mul1 ) {
          val[z]= ((t5*mul3) - (t1+t3+t7+t9+t2+t4+t6+t8)*mul1)>>shift4;
          if (val[z] > 0xFF) val[z] = 0xFF;
        } else {
          val[z] = t5;
        }
      }
      _dest[x] = val[0]|(val[1]<<8)|(val[2]<<16)|(val[3]<<24);
    }
    // copy the ending pixel
    _dest[srcwidth-1] = *(_src3 - 1);
    // next row
    _src1 += srcwidth;
    _src2 += srcwidth;
    _src3 += srcwidth;
    _dest += srcwidth;
  }
  // copy the last row
  memcpy(_dest, _src2, (srcwidth << 2));
}

#if !_16BPP_HACK
void SharpFilter_4444(uint16 *src, uint32 srcwidth, uint32 srcheight, uint16 *dest, uint32 filter)
{
  // NOTE: for now we get away with copying the boundaries
  //       filter the boundaries if we face problems

  uint16 mul1, mul2, mul3, shift4;

  uint32 x,y,z;
  uint16 *_src1, *_src2, *_src3, *_dest;
  uint16 val[4];
  uint16 t1,t2,t3,t4,t5,t6,t7,t8,t9;

  switch( filter ) {
  case SHARP_FILTER_2:
    mul1=1;
    mul2=8;
    mul3=12;
    shift4=2;
    break;
  case SHARP_FILTER_1:
  default:
    mul1=1;
    mul2=8;
    mul3=16;
    shift4=3;
    break;
  }

  // setup rows
  _src1 = src;
  _src2 = _src1 + srcwidth;
  _src3 = _src2 + srcwidth;
  _dest = dest;

  // copy the first row
  memcpy(_dest, _src1, (srcwidth << 1));
  _dest += srcwidth;
  // filter 2nd row to 1 row before the last
  for( y = 1; y < srcheight - 1; y++) {
    // copy the first pixel
    _dest[0] = *_src2;
    // filter 2nd pixel to 1 pixel before last
    for( x = 1; x < srcwidth - 1; x++) {
      for( z = 0; z < 4; z++ ) {
        /* Hiroshi Morii <koolsmoky@users.sourceforge.net>
         * Read the entire 16bit pixel and then extract the A,R,G,B components.
         */
        uint32 shift = z << 2;
        t1 = ((*((uint16*)(_src1+x-1))) >> shift) & 0xF;
        t2 = ((*((uint16*)(_src1+x  ))) >> shift) & 0xF;
        t3 = ((*((uint16*)(_src1+x+1))) >> shift) & 0xF;
        t4 = ((*((uint16*)(_src2+x-1))) >> shift) & 0xF;
        t5 = ((*((uint16*)(_src2+x  ))) >> shift) & 0xF;
        t6 = ((*((uint16*)(_src2+x+1))) >> shift) & 0xF;
        t7 = ((*((uint16*)(_src3+x-1))) >> shift) & 0xF;
        t8 = ((*((uint16*)(_src3+x  ))) >> shift) & 0xF;
        t9 = ((*((uint16*)(_src3+x+1))) >> shift) & 0xF;
        
        if( (t5*mul2) > (t1+t3+t7+t9+t2+t4+t6+t8)*mul1 ) {
          val[z] = ((t5*mul3) - (t1+t3+t7+t9+t2+t4+t6+t8)*mul1)>>shift4;
          if (val[z] > 0xF) val[z] = 0xF;
        } else {
          val[z] = t5;
        }
      }
      _dest[x] = val[0]|(val[1]<<4)|(val[2]<<8)|(val[3]<<12);
    }
    // copy the ending pixel
    _dest[srcwidth-1] = *(_src3 - 1);
    // next row
    _src1 += srcwidth;
    _src2 += srcwidth;
    _src3 += srcwidth;
    _dest += srcwidth;
  }
  // copy the last row
  memcpy(_dest, _src2, (srcwidth << 1));
}
#endif /* !_16BPP_HACK */

/*
 * Smooth filters
 * Hiroshi Morii <koolsmoky@users.sourceforge.net>
 */
void SmoothFilter_8888(uint32 *src, uint32 srcwidth, uint32 srcheight, uint32 *dest, uint32 filter)
{
  // NOTE: for now we get away with copying the boundaries
  //       filter the boundaries if we face problems

  uint32 mul1, mul2, mul3, shift4;

  uint32 x,y,z;
  uint32 *_src1, *_src2, *_src3, *_dest;
  uint32 val[4];
  uint32 t1,t2,t3,t4,t5,t6,t7,t8,t9;

  switch( filter ) {
  case SMOOTH_FILTER_4:
    mul1=1;
    mul2=2;
    mul3=4;
    shift4=4;
    break;
  case SMOOTH_FILTER_3:
    mul1=1;
    mul2=1;
    mul3=8;
    shift4=4;
    break;
  case SMOOTH_FILTER_2:
    mul1=1;
    mul2=1;
    mul3=2;
    shift4=2;
    break;
  case SMOOTH_FILTER_1:
  default:
    mul1=1;
    mul2=1;
    mul3=6;
    shift4=3;
    break;
  }

  switch (filter) {
  case SMOOTH_FILTER_3:
  case SMOOTH_FILTER_4:
    // setup rows
    _src1 = src;
    _src2 = _src1 + srcwidth;
    _src3 = _src2 + srcwidth;
    _dest = dest;
    // copy the first row
    memcpy(_dest, _src1, (srcwidth << 2));
    _dest += srcwidth;
    // filter 2nd row to 1 row before the last
    for (y = 1; y < srcheight - 1; y++){
      // copy the first pixel
      _dest[0] = _src2[0];
      // filter 2nd pixel to 1 pixel before last
      for (x = 1; x < srcwidth - 1; x++) {
        for (z = 0; z < 4; z++ ) {
          t1 = *((uint8*)(_src1+x-1)+z);
          t2 = *((uint8*)(_src1+x  )+z);
          t3 = *((uint8*)(_src1+x+1)+z);
          t4 = *((uint8*)(_src2+x-1)+z);
          t5 = *((uint8*)(_src2+x  )+z);
          t6 = *((uint8*)(_src2+x+1)+z);
          t7 = *((uint8*)(_src3+x-1)+z);
          t8 = *((uint8*)(_src3+x  )+z);
          t9 = *((uint8*)(_src3+x+1)+z);
          /* the component value must not overflow 0xFF */
          val[z] = ((t1+t3+t7+t9)*mul1+((t2+t4+t6+t8)*mul2)+(t5*mul3))>>shift4;
          if (val[z] > 0xFF) val[z] = 0xFF;
        }
        _dest[x] = val[0]|(val[1]<<8)|(val[2]<<16)|(val[3]<<24);
      }
      // copy the ending pixel
      _dest[srcwidth-1] = *(_src3 - 1);
      // next row
      _src1 += srcwidth;
      _src2 += srcwidth;
      _src3 += srcwidth;
      _dest += srcwidth;
    }
    // copy the last row
    memcpy(_dest, _src2, (srcwidth << 2));
    break;
  case SMOOTH_FILTER_1:
  case SMOOTH_FILTER_2:
  default:
    // setup rows
    _src1 = src;
    _src2 = _src1 + srcwidth;
    _src3 = _src2 + srcwidth;
    _dest = dest;
    // copy the first row
    memcpy(_dest, _src1, (srcwidth << 2));
    _dest += srcwidth;
    // filter 2nd row to 1 row before the last
    for (y = 1; y < srcheight - 1; y++) {
      // filter 1st pixel to the last
      if (y & 1) {
        for( x = 0; x < srcwidth; x++) {
          for( z = 0; z < 4; z++ ) {
            t2 = *((uint8*)(_src1+x  )+z);
            t5 = *((uint8*)(_src2+x  )+z);
            t8 = *((uint8*)(_src3+x  )+z);
            /* the component value must not overflow 0xFF */
            val[z] = ((t2+t8)*mul2+(t5*mul3))>>shift4;
            if (val[z] > 0xFF) val[z] = 0xFF;
          }
          _dest[x] = val[0]|(val[1]<<8)|(val[2]<<16)|(val[3]<<24);
        }
      } else {
         memcpy(_dest, _src2, (srcwidth << 2));
      }
      // next row
      _src1 += srcwidth;
      _src2 += srcwidth;
      _src3 += srcwidth;
      _dest += srcwidth;
    }
    // copy the last row
    memcpy(_dest, _src2, (srcwidth << 2));
    break;
  }
}

#if !_16BPP_HACK
void SmoothFilter_4444(uint16 *src, uint32 srcwidth, uint32 srcheight, uint16 *dest, uint32 filter)
{
  // NOTE: for now we get away with copying the boundaries
  //       filter the boundaries if we face problems

  uint16 mul1, mul2, mul3, shift4;

  uint32 x,y,z;
  uint16 *_src1, *_src2, *_src3, *_dest;
  uint16 val[4];
  uint16 t1,t2,t3,t4,t5,t6,t7,t8,t9;

  switch( filter ) {
  case SMOOTH_FILTER_4:
    mul1=1;
    mul2=2;
    mul3=4;
    shift4=4;
    break;
  case SMOOTH_FILTER_3:
    mul1=1;
    mul2=1;
    mul3=8;
    shift4=4;
    break;
  case SMOOTH_FILTER_2:
    mul1=1;
    mul2=1;
    mul3=2;
    shift4=2;
    break;
  case SMOOTH_FILTER_1:
  default:
    mul1=1;
    mul2=1;
    mul3=6;
    shift4=3;
    break;
  }

  switch (filter) {
  case SMOOTH_FILTER_3:
  case SMOOTH_FILTER_4:
    // setup rows
    _src1 = src;
    _src2 = _src1 + srcwidth;
    _src3 = _src2 + srcwidth;
    _dest = dest;
    // copy the first row
    memcpy(_dest, _src1, (srcwidth << 1));
    _dest += srcwidth;
    // filter 2nd row to 1 row before the last
    for (y = 1; y < srcheight - 1; y++) {
      // copy the first pixel
      _dest[0] = *_src2;
      // filter 2nd pixel to 1 pixel before last
      for (x = 1; x < srcwidth - 1; x++) {
        for (z = 0; z < 4; z++ ) {
          /* Read the entire 16bit pixel and then extract the A,R,G,B components. */
          uint32 shift = z << 2;
          t1 = ((*(uint16*)(_src1+x-1)) >> shift) & 0xF;
          t2 = ((*(uint16*)(_src1+x  )) >> shift) & 0xF;
          t3 = ((*(uint16*)(_src1+x+1)) >> shift) & 0xF;
          t4 = ((*(uint16*)(_src2+x-1)) >> shift) & 0xF;
          t5 = ((*(uint16*)(_src2+x  )) >> shift) & 0xF;
          t6 = ((*(uint16*)(_src2+x+1)) >> shift) & 0xF;
          t7 = ((*(uint16*)(_src3+x-1)) >> shift) & 0xF;
          t8 = ((*(uint16*)(_src3+x  )) >> shift) & 0xF;
          t9 = ((*(uint16*)(_src3+x+1)) >> shift) & 0xF;
          /* the component value must not overflow 0xF */
          val[z] = ((t1+t3+t7+t9)*mul1+((t2+t4+t6+t8)*mul2)+(t5*mul3))>>shift4;
          if (val[z] > 0xF) val[z] = 0xF;
        }
        _dest[x] = val[0]|(val[1]<<4)|(val[2]<<8)|(val[3]<<12);
      }
      // copy the ending pixel
      _dest[srcwidth-1] = *(_src3 - 1);
      // next row
      _src1 += srcwidth;
      _src2 += srcwidth;
      _src3 += srcwidth;
      _dest += srcwidth;
    }
    // copy the last row
    memcpy(_dest, _src2, (srcwidth << 1));
    break;
  case SMOOTH_FILTER_1:
  case SMOOTH_FILTER_2:
  default:
    // setup rows
    _src1 = src;
    _src2 = _src1 + srcwidth;
    _src3 = _src2 + srcwidth;
    _dest = dest;
    // copy the first row
    memcpy(_dest, _src1, (srcwidth << 1));
    _dest += srcwidth;
    // filter 2nd row to 1 row before the last
    for( y = 1; y < srcheight - 1; y++) {
      if (y & 1) {
        for( x = 0; x < srcwidth; x++) {
          for( z = 0; z < 4; z++ ) {
            /* Read the entire 16bit pixel and then extract the A,R,G,B components. */
            uint32 shift = z << 2;
            t2 = ((*(uint16*)(_src1+x)) >> shift) & 0xF;
            t5 = ((*(uint16*)(_src2+x)) >> shift) & 0xF;
            t8 = ((*(uint16*)(_src3+x)) >> shift) & 0xF;
            /* the component value must not overflow 0xF */
            val[z] = ((t2+t8)*mul2+(t5*mul3))>>shift4;
            if (val[z] > 0xF) val[z] = 0xF;
          }
          _dest[x] = val[0]|(val[1]<<4)|(val[2]<<8)|(val[3]<<12);
        }
      } else {
         memcpy(_dest, _src2, (srcwidth << 1));
      }
      // next row
      _src1 += srcwidth;
      _src2 += srcwidth;
      _src3 += srcwidth;
      _dest += srcwidth;
    }
    // copy the last row
    memcpy(_dest, _src2, (srcwidth << 1));
    break;
  }
}
#endif /* !_16BPP_HACK */

void filter_8888(uint32 *src, uint32 srcwidth, uint32 srcheight, uint32 *dest, uint32 filter) {
  switch (filter & ENHANCEMENT_MASK) {
  case HQ4X_ENHANCEMENT:
    hq4x_8888((uint8*)src, (uint8*)dest, srcwidth, srcheight, srcwidth, (srcwidth << 4));
    return;
  case HQ2X_ENHANCEMENT:
    hq2x_32((uint8*)src, (srcwidth << 2), (uint8*)dest, (srcwidth << 3), srcwidth, srcheight);
    return;
  case HQ2XS_ENHANCEMENT:
    hq2xS_32((uint8*)src, (srcwidth << 2), (uint8*)dest, (srcwidth << 3), srcwidth, srcheight);
    return;
  case LQ2X_ENHANCEMENT:
    lq2x_32((uint8*)src, (srcwidth << 2), (uint8*)dest, (srcwidth << 3), srcwidth, srcheight);
    return;
  case LQ2XS_ENHANCEMENT:
    lq2xS_32((uint8*)src, (srcwidth << 2), (uint8*)dest, (srcwidth << 3), srcwidth, srcheight);
    return;
  case X2SAI_ENHANCEMENT:
    Super2xSaI_8888((uint32*)src, (uint32*)dest, srcwidth, srcheight, srcwidth);
    return;
  case X2_ENHANCEMENT:
    Texture2x_32((uint8*)src, (srcwidth << 2), (uint8*)dest, (srcwidth << 3), srcwidth, srcheight);
    return;
  }

  switch (filter & (SMOOTH_FILTER_MASK|SHARP_FILTER_MASK)) {
  case SMOOTH_FILTER_1:
  case SMOOTH_FILTER_2:
  case SMOOTH_FILTER_3:
  case SMOOTH_FILTER_4:
    SmoothFilter_8888((uint32*)src, srcwidth, srcheight, (uint32*)dest, (filter & SMOOTH_FILTER_MASK));
    return;
  case SHARP_FILTER_1:
  case SHARP_FILTER_2:
    SharpFilter_8888((uint32*)src, srcwidth, srcheight, (uint32*)dest, (filter & SHARP_FILTER_MASK));
    return;
  }
}
Commit	Line	Data
	1	/*
	2	Copyright (C) 2003 Rice1964
	3
	4	This program is free software; you can redistribute it and/or
	5	modify it under the terms of the GNU General Public License
	6	as published by the Free Software Foundation; either version 2
	7	of the License, or (at your option) any later version.
	8
	9	This program is distributed in the hope that it will be useful,
	10	but WITHOUT ANY WARRANTY; without even the implied warranty of
	11	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
	12	GNU General Public License for more details.
	13
	14	You should have received a copy of the GNU General Public License
	15	along with this program; if not, write to the Free Software
	16	Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
	17
	18	*/
	19
	20	/* Copyright (C) 2007 Hiroshi Morii <koolsmoky(at)users.sourceforge.net>
	21	* Modified for the Texture Filtering library
	22	*/
	23
	24	#include <string.h>
	25	#include "TextureFilters.h"
	26
	27	/************************************************************************/
	28	/* 2X filters */
	29	/************************************************************************/
	30
	31	#define DWORD_MAKE(r, g, b, a) ((uint32) (((a) << 24) \| ((r) << 16) \| ((g) << 8) \| (b)))
	32	#define WORD_MAKE(r, g, b, a) ((uint16) (((a) << 12) \| ((r) << 8) \| ((g) << 4) \| (b)))
	33
	34	// Basic 2x R8G8B8A8 filter with interpolation
	35
	36	void Texture2x_32(uint8 srcPtr, uint32 srcPitch, uint8 dstPtr, uint32 dstPitch, int width, int height)
	37	{
	38	uint32 pDst1, pDst2;
	39	uint32 pSrc, pSrc2;
	40	uint32 nWidth = width;
	41	uint32 nHeight = height;
	42
	43	uint32 b1;
	44	uint32 g1;
	45	uint32 r1;
	46	uint32 a1;
	47	uint32 b2;
	48	uint32 g2;
	49	uint32 r2;
	50	uint32 a2;
	51	uint32 b3;
	52	uint32 g3;
	53	uint32 r3;
	54	uint32 a3;
	55	uint32 b4;
	56	uint32 g4;
	57	uint32 r4;
	58	uint32 a4;
	59
	60	uint32 xSrc;
	61	uint32 ySrc;
	62
	63	for (ySrc = 0; ySrc < nHeight; ySrc++)
	64	{
	65	pSrc = (uint32)(((uint8)srcPtr)+ySrc*srcPitch);
	66	pSrc2 = (uint32)(((uint8)srcPtr)+(ySrc+1)*srcPitch);
	67	pDst1 = (uint32)(((uint8)dstPtr)+(ySrc2)dstPitch);
	68	pDst2 = (uint32)(((uint8)dstPtr)+(ySrc2+1)dstPitch);
	69
	70	for (xSrc = 0; xSrc < nWidth; xSrc++)
	71	{
	72	b1 = (pSrc[xSrc]>>0)&0xFF;
	73	g1 = (pSrc[xSrc]>>8)&0xFF;
	74	r1 = (pSrc[xSrc]>>16)&0xFF;
	75	a1 = (pSrc[xSrc]>>24)&0xFF;
	76
	77	// Pixel 1
	78	pDst1[xSrc*2] = pSrc[xSrc];
	79
	80	// Pixel 2
	81	if( xSrc<nWidth-1 )
	82	{
	83	b2 = (pSrc[xSrc+1]>>0)&0xFF;
	84	g2 = (pSrc[xSrc+1]>>8)&0xFF;
	85	r2 = (pSrc[xSrc+1]>>16)&0xFF;
	86	a2 = (pSrc[xSrc+1]>>24)&0xFF;
	87	pDst1[xSrc*2+1] = DWORD_MAKE((r1+r2)/2, (g1+g2)/2, (b1+b2)/2, (a1+a2)/2);
	88	}
	89	else
	90	pDst1[xSrc*2+1] = pSrc[xSrc];
	91
	92	// Pixel 3
	93	if( ySrc<nHeight-1 )
	94	{
	95	b3 = (pSrc2[xSrc]>>0)&0xFF;
	96	g3 = (pSrc2[xSrc]>>8)&0xFF;
	97	r3 = (pSrc2[xSrc]>>16)&0xFF;
	98	a3 = (pSrc2[xSrc]>>24)&0xFF;
	99	pDst2[xSrc*2] = DWORD_MAKE((r1+r3)/2, (g1+g3)/2, (b1+b3)/2, (a1+a3)/2);
	100	if( xSrc<nWidth-1 )
	101	{
	102	b4 = (pSrc2[xSrc+1]>>0)&0xFF;
	103	g4 = (pSrc2[xSrc+1]>>8)&0xFF;
	104	r4 = (pSrc2[xSrc+1]>>16)&0xFF;
	105	a4 = (pSrc2[xSrc+1]>>24)&0xFF;
	106	// Pixel 4
	107	pDst2[xSrc*2+1] = DWORD_MAKE((r1+r2+r3+r4)/4, (g1+g2+g3+g4)/4, (b1+b2+b3+b4)/4, (a1+a2+a3+a4)/4);
	108	}
	109	else
	110	{
	111	// Pixel 4
	112	pDst2[xSrc*2+1] = DWORD_MAKE((r1+r3)/2, (g1+g3)/2, (b1+b3)/2, (a1+a3)/2);
	113	}
	114	}
	115	else
	116	{
	117	// Pixel 3
	118	pDst2[xSrc*2] = pSrc[xSrc];
	119	// Pixel 4
	120	if( xSrc<nWidth-1 )
	121	{
	122	pDst2[xSrc*2+1] = DWORD_MAKE((r1+r2)/2, (g1+g2)/2, (b1+b2)/2, (a1+a2)/2);
	123	}
	124	else
	125	{
	126	pDst2[xSrc*2+1] = pSrc[xSrc];
	127	}
	128	}
	129	}
	130	}
	131	}
	132
	133	#if !_16BPP_HACK
	134	// Basic 2x R4G4B4A4 filter with interpolation
	135	void Texture2x_16(uint8 srcPtr, uint32 srcPitch, uint8 dstPtr, uint32 dstPitch, int width, int height)
	136	{
	137	uint16 pDst1, pDst2;
	138	uint16 pSrc, pSrc2;
	139	uint32 nWidth = width;
	140	uint32 nHeight = height;
	141
	142	uint16 b1;
	143	uint16 g1;
	144	uint16 r1;
	145	uint16 a1;
	146	uint16 b2;
	147	uint16 g2;
	148	uint16 r2;
	149	uint16 a2;
	150	uint16 b3;
	151	uint16 g3;
	152	uint16 r3;
	153	uint16 a3;
	154	uint16 b4;
	155	uint16 g4;
	156	uint16 r4;
	157	uint16 a4;
	158
	159	uint16 xSrc;
	160	uint16 ySrc;
	161
	162	for (ySrc = 0; ySrc < nHeight; ySrc++)
	163	{
	164	pSrc = (uint16)(((uint8)srcPtr)+ySrc*srcPitch);
	165	pSrc2 = (uint16)(((uint8)srcPtr)+(ySrc+1)*srcPitch);
	166	pDst1 = (uint16)(((uint8)dstPtr)+(ySrc2)dstPitch);
	167	pDst2 = (uint16)(((uint8)dstPtr)+(ySrc2+1)dstPitch);
	168
	169	for (xSrc = 0; xSrc < nWidth; xSrc++)
	170	{
	171	b1 = (pSrc[xSrc]>> 0)&0xF;
	172	g1 = (pSrc[xSrc]>> 4)&0xF;
	173	r1 = (pSrc[xSrc]>> 8)&0xF;
	174	a1 = (pSrc[xSrc]>>12)&0xF;
	175
	176	if( xSrc<nWidth-1 )
	177	{
	178	b2 = (pSrc[xSrc+1]>> 0)&0xF;
	179	g2 = (pSrc[xSrc+1]>> 4)&0xF;
	180	r2 = (pSrc[xSrc+1]>> 8)&0xF;
	181	a2 = (pSrc[xSrc+1]>>12)&0xF;
	182	}
	183
	184	if( ySrc<nHeight-1 )
	185	{
	186	b3 = (pSrc2[xSrc]>> 0)&0xF;
	187	g3 = (pSrc2[xSrc]>> 4)&0xF;
	188	r3 = (pSrc2[xSrc]>> 8)&0xF;
	189	a3 = (pSrc2[xSrc]>>12)&0xF;
	190	if( xSrc<nWidth-1 )
	191	{
	192	b4 = (pSrc2[xSrc+1]>> 0)&0xF;
	193	g4 = (pSrc2[xSrc+1]>> 4)&0xF;
	194	r4 = (pSrc2[xSrc+1]>> 8)&0xF;
	195	a4 = (pSrc2[xSrc+1]>>12)&0xF;
	196	}
	197	}
	198
	199	// Pixel 1
	200	pDst1[xSrc*2] = pSrc[xSrc];
	201
	202	// Pixel 2
	203	if( xSrc<nWidth-1 )
	204	{
	205	pDst1[xSrc*2+1] = WORD_MAKE((r1+r2)/2, (g1+g2)/2, (b1+b2)/2, (a1+a2)/2);
	206	}
	207	else
	208	pDst1[xSrc*2+1] = pSrc[xSrc];
	209
	210
	211	// Pixel 3
	212	if( ySrc<nHeight-1 )
	213	{
	214	pDst2[xSrc*2] = WORD_MAKE((r1+r3)/2, (g1+g3)/2, (b1+b3)/2, (a1+a3)/2);
	215	}
	216	else
	217	pDst2[xSrc*2] = pSrc[xSrc];
	218
	219	// Pixel 4
	220	if( xSrc<nWidth-1 )
	221	{
	222	if( ySrc<nHeight-1 )
	223	{
	224	pDst2[xSrc*2+1] = WORD_MAKE((r1+r2+r3+r4)/4, (g1+g2+g3+g4)/4, (b1+b2+b3+b4)/4, (a1+a2+a3+a4)/4);
	225	}
	226	else
	227	{
	228	pDst2[xSrc*2+1] = WORD_MAKE((r1+r2)/2, (g1+g2)/2, (b1+b2)/2, (a1+a2)/2);
	229	}
	230	}
	231	else
	232	{
	233	if( ySrc<nHeight-1 )
	234	{
	235	pDst2[xSrc*2+1] = WORD_MAKE((r1+r3)/2, (g1+g3)/2, (b1+b3)/2, (a1+a3)/2);
	236	}
	237	else
	238	pDst2[xSrc*2+1] = pSrc[xSrc];
	239	}
	240	}
	241	}
	242	}
	243	#endif /* !_16BPP_HACK */
	244
	245	/*
	246	* Sharp filters
	247	* Hiroshi Morii <koolsmoky@users.sourceforge.net>
	248	*/
	249	void SharpFilter_8888(uint32 src, uint32 srcwidth, uint32 srcheight, uint32 dest, uint32 filter)
	250	{
	251	// NOTE: for now we get away with copying the boundaries
	252	// filter the boundaries if we face problems
	253
	254	uint32 mul1, mul2, mul3, shift4;
	255
	256	uint32 x,y,z;
	257	uint32 _src1, _src2, _src3, _dest;
	258	uint32 val[4];
	259	uint32 t1,t2,t3,t4,t5,t6,t7,t8,t9;
	260
	261	switch( filter )
	262	{
	263	case SHARP_FILTER_2:
	264	mul1=1;
	265	mul2=8;
	266	mul3=12;
	267	shift4=2;
	268	break;
	269	case SHARP_FILTER_1:
	270	default:
	271	mul1=1;
	272	mul2=8;
	273	mul3=16;
	274	shift4=3;
	275	break;
	276	}
	277
	278	// setup rows
	279	_src1 = src;
	280	_src2 = _src1 + srcwidth;
	281	_src3 = _src2 + srcwidth;
	282	_dest = dest;
	283
	284	// copy the first row
	285	memcpy(_dest, _src1, (srcwidth << 2));
	286	_dest += srcwidth;
	287	// filter 2nd row to 1 row before the last
	288	for (y = 1; y < srcheight-1; y++) {
	289	// copy the first pixel
	290	_dest[0] = *_src2;
	291	// filter 2nd pixel to 1 pixel before last
	292	for (x = 1; x < srcwidth-1; x++) {
	293	for (z=0; z<4; z++) {
	294	t1 = ((uint8)(_src1+x-1)+z);
	295	t2 = ((uint8)(_src1+x )+z);
	296	t3 = ((uint8)(_src1+x+1)+z);
	297	t4 = ((uint8)(_src2+x-1)+z);
	298	t5 = ((uint8)(_src2+x )+z);
	299	t6 = ((uint8)(_src2+x+1)+z);
	300	t7 = ((uint8)(_src3+x-1)+z);
	301	t8 = ((uint8)(_src3+x )+z);
	302	t9 = ((uint8)(_src3+x+1)+z);
	303
	304	if( (t5mul2) > (t1+t3+t7+t9+t2+t4+t6+t8)mul1 ) {
	305	val[z]= ((t5mul3) - (t1+t3+t7+t9+t2+t4+t6+t8)mul1)>>shift4;
	306	if (val[z] > 0xFF) val[z] = 0xFF;
	307	} else {
	308	val[z] = t5;
	309	}
	310	}
	311	_dest[x] = val[0]\|(val[1]<<8)\|(val[2]<<16)\|(val[3]<<24);
	312	}
	313	// copy the ending pixel
	314	_dest[srcwidth-1] = *(_src3 - 1);
	315	// next row
	316	_src1 += srcwidth;
	317	_src2 += srcwidth;
	318	_src3 += srcwidth;
	319	_dest += srcwidth;
	320	}
	321	// copy the last row
	322	memcpy(_dest, _src2, (srcwidth << 2));
	323	}
	324
	325	#if !_16BPP_HACK
	326	void SharpFilter_4444(uint16 src, uint32 srcwidth, uint32 srcheight, uint16 dest, uint32 filter)
	327	{
	328	// NOTE: for now we get away with copying the boundaries
	329	// filter the boundaries if we face problems
	330
	331	uint16 mul1, mul2, mul3, shift4;
	332
	333	uint32 x,y,z;
	334	uint16 _src1, _src2, _src3, _dest;
	335	uint16 val[4];
	336	uint16 t1,t2,t3,t4,t5,t6,t7,t8,t9;
	337
	338	switch( filter ) {
	339	case SHARP_FILTER_2:
	340	mul1=1;
	341	mul2=8;
	342	mul3=12;
	343	shift4=2;
	344	break;
	345	case SHARP_FILTER_1:
	346	default:
	347	mul1=1;
	348	mul2=8;
	349	mul3=16;
	350	shift4=3;
	351	break;
	352	}
	353
	354	// setup rows
	355	_src1 = src;
	356	_src2 = _src1 + srcwidth;
	357	_src3 = _src2 + srcwidth;
	358	_dest = dest;
	359
	360	// copy the first row
	361	memcpy(_dest, _src1, (srcwidth << 1));
	362	_dest += srcwidth;
	363	// filter 2nd row to 1 row before the last
	364	for( y = 1; y < srcheight - 1; y++) {
	365	// copy the first pixel
	366	_dest[0] = *_src2;
	367	// filter 2nd pixel to 1 pixel before last
	368	for( x = 1; x < srcwidth - 1; x++) {
	369	for( z = 0; z < 4; z++ ) {
	370	/* Hiroshi Morii <koolsmoky@users.sourceforge.net>
	371	* Read the entire 16bit pixel and then extract the A,R,G,B components.
	372	*/
	373	uint32 shift = z << 2;
	374	t1 = ((((uint16)(_src1+x-1))) >> shift) & 0xF;
	375	t2 = ((((uint16)(_src1+x ))) >> shift) & 0xF;
	376	t3 = ((((uint16)(_src1+x+1))) >> shift) & 0xF;
	377	t4 = ((((uint16)(_src2+x-1))) >> shift) & 0xF;
	378	t5 = ((((uint16)(_src2+x ))) >> shift) & 0xF;
	379	t6 = ((((uint16)(_src2+x+1))) >> shift) & 0xF;
	380	t7 = ((((uint16)(_src3+x-1))) >> shift) & 0xF;
	381	t8 = ((((uint16)(_src3+x ))) >> shift) & 0xF;
	382	t9 = ((((uint16)(_src3+x+1))) >> shift) & 0xF;
	383
	384	if( (t5mul2) > (t1+t3+t7+t9+t2+t4+t6+t8)mul1 ) {
	385	val[z] = ((t5mul3) - (t1+t3+t7+t9+t2+t4+t6+t8)mul1)>>shift4;
	386	if (val[z] > 0xF) val[z] = 0xF;
	387	} else {
	388	val[z] = t5;
	389	}
	390	}
	391	_dest[x] = val[0]\|(val[1]<<4)\|(val[2]<<8)\|(val[3]<<12);
	392	}
	393	// copy the ending pixel
	394	_dest[srcwidth-1] = *(_src3 - 1);
	395	// next row
	396	_src1 += srcwidth;
	397	_src2 += srcwidth;
	398	_src3 += srcwidth;
	399	_dest += srcwidth;
	400	}
	401	// copy the last row
	402	memcpy(_dest, _src2, (srcwidth << 1));
	403	}
	404	#endif /* !_16BPP_HACK */
	405
	406	/*
	407	* Smooth filters
	408	* Hiroshi Morii <koolsmoky@users.sourceforge.net>
	409	*/
	410	void SmoothFilter_8888(uint32 src, uint32 srcwidth, uint32 srcheight, uint32 dest, uint32 filter)
	411	{
	412	// NOTE: for now we get away with copying the boundaries
	413	// filter the boundaries if we face problems
	414
	415	uint32 mul1, mul2, mul3, shift4;
	416
	417	uint32 x,y,z;
	418	uint32 _src1, _src2, _src3, _dest;
	419	uint32 val[4];
	420	uint32 t1,t2,t3,t4,t5,t6,t7,t8,t9;
	421
	422	switch( filter ) {
	423	case SMOOTH_FILTER_4:
	424	mul1=1;
	425	mul2=2;
	426	mul3=4;
	427	shift4=4;
	428	break;
	429	case SMOOTH_FILTER_3:
	430	mul1=1;
	431	mul2=1;
	432	mul3=8;
	433	shift4=4;
	434	break;
	435	case SMOOTH_FILTER_2:
	436	mul1=1;
	437	mul2=1;
	438	mul3=2;
	439	shift4=2;
	440	break;
	441	case SMOOTH_FILTER_1:
	442	default:
	443	mul1=1;
	444	mul2=1;
	445	mul3=6;
	446	shift4=3;
	447	break;
	448	}
	449
	450	switch (filter) {
	451	case SMOOTH_FILTER_3:
	452	case SMOOTH_FILTER_4:
	453	// setup rows
	454	_src1 = src;
	455	_src2 = _src1 + srcwidth;
	456	_src3 = _src2 + srcwidth;
	457	_dest = dest;
	458	// copy the first row
	459	memcpy(_dest, _src1, (srcwidth << 2));
	460	_dest += srcwidth;
	461	// filter 2nd row to 1 row before the last
	462	for (y = 1; y < srcheight - 1; y++){
	463	// copy the first pixel
	464	_dest[0] = _src2[0];
	465	// filter 2nd pixel to 1 pixel before last
	466	for (x = 1; x < srcwidth - 1; x++) {
	467	for (z = 0; z < 4; z++ ) {
	468	t1 = ((uint8)(_src1+x-1)+z);
	469	t2 = ((uint8)(_src1+x )+z);
	470	t3 = ((uint8)(_src1+x+1)+z);
	471	t4 = ((uint8)(_src2+x-1)+z);
	472	t5 = ((uint8)(_src2+x )+z);
	473	t6 = ((uint8)(_src2+x+1)+z);
	474	t7 = ((uint8)(_src3+x-1)+z);
	475	t8 = ((uint8)(_src3+x )+z);
	476	t9 = ((uint8)(_src3+x+1)+z);
	477	/* the component value must not overflow 0xFF */
	478	val[z] = ((t1+t3+t7+t9)mul1+((t2+t4+t6+t8)mul2)+(t5*mul3))>>shift4;
	479	if (val[z] > 0xFF) val[z] = 0xFF;
	480	}
	481	_dest[x] = val[0]\|(val[1]<<8)\|(val[2]<<16)\|(val[3]<<24);
	482	}
	483	// copy the ending pixel
	484	_dest[srcwidth-1] = *(_src3 - 1);
	485	// next row
	486	_src1 += srcwidth;
	487	_src2 += srcwidth;
	488	_src3 += srcwidth;
	489	_dest += srcwidth;
	490	}
	491	// copy the last row
	492	memcpy(_dest, _src2, (srcwidth << 2));
	493	break;
	494	case SMOOTH_FILTER_1:
	495	case SMOOTH_FILTER_2:
	496	default:
	497	// setup rows
	498	_src1 = src;
	499	_src2 = _src1 + srcwidth;
	500	_src3 = _src2 + srcwidth;
	501	_dest = dest;
	502	// copy the first row
	503	memcpy(_dest, _src1, (srcwidth << 2));
	504	_dest += srcwidth;
	505	// filter 2nd row to 1 row before the last
	506	for (y = 1; y < srcheight - 1; y++) {
	507	// filter 1st pixel to the last
	508	if (y & 1) {
	509	for( x = 0; x < srcwidth; x++) {
	510	for( z = 0; z < 4; z++ ) {
	511	t2 = ((uint8)(_src1+x )+z);
	512	t5 = ((uint8)(_src2+x )+z);
	513	t8 = ((uint8)(_src3+x )+z);
	514	/* the component value must not overflow 0xFF */
	515	val[z] = ((t2+t8)mul2+(t5mul3))>>shift4;
	516	if (val[z] > 0xFF) val[z] = 0xFF;
	517	}
	518	_dest[x] = val[0]\|(val[1]<<8)\|(val[2]<<16)\|(val[3]<<24);
	519	}
	520	} else {
	521	memcpy(_dest, _src2, (srcwidth << 2));
	522	}
	523	// next row
	524	_src1 += srcwidth;
	525	_src2 += srcwidth;
	526	_src3 += srcwidth;
	527	_dest += srcwidth;
	528	}
	529	// copy the last row
	530	memcpy(_dest, _src2, (srcwidth << 2));
	531	break;
	532	}
	533	}
	534
	535	#if !_16BPP_HACK
	536	void SmoothFilter_4444(uint16 src, uint32 srcwidth, uint32 srcheight, uint16 dest, uint32 filter)
	537	{
	538	// NOTE: for now we get away with copying the boundaries
	539	// filter the boundaries if we face problems
	540
	541	uint16 mul1, mul2, mul3, shift4;
	542
	543	uint32 x,y,z;
	544	uint16 _src1, _src2, _src3, _dest;
	545	uint16 val[4];
	546	uint16 t1,t2,t3,t4,t5,t6,t7,t8,t9;
	547
	548	switch( filter ) {
	549	case SMOOTH_FILTER_4:
	550	mul1=1;
	551	mul2=2;
	552	mul3=4;
	553	shift4=4;
	554	break;
	555	case SMOOTH_FILTER_3:
	556	mul1=1;
	557	mul2=1;
	558	mul3=8;
	559	shift4=4;
	560	break;
	561	case SMOOTH_FILTER_2:
	562	mul1=1;
	563	mul2=1;
	564	mul3=2;
	565	shift4=2;
	566	break;
	567	case SMOOTH_FILTER_1:
	568	default:
	569	mul1=1;
	570	mul2=1;
	571	mul3=6;
	572	shift4=3;
	573	break;
	574	}
	575
	576	switch (filter) {
	577	case SMOOTH_FILTER_3:
	578	case SMOOTH_FILTER_4:
	579	// setup rows
	580	_src1 = src;
	581	_src2 = _src1 + srcwidth;
	582	_src3 = _src2 + srcwidth;
	583	_dest = dest;
	584	// copy the first row
	585	memcpy(_dest, _src1, (srcwidth << 1));
	586	_dest += srcwidth;
	587	// filter 2nd row to 1 row before the last
	588	for (y = 1; y < srcheight - 1; y++) {
	589	// copy the first pixel
	590	_dest[0] = *_src2;
	591	// filter 2nd pixel to 1 pixel before last
	592	for (x = 1; x < srcwidth - 1; x++) {
	593	for (z = 0; z < 4; z++ ) {
	594	/* Read the entire 16bit pixel and then extract the A,R,G,B components. */
	595	uint32 shift = z << 2;
	596	t1 = (((uint16)(_src1+x-1)) >> shift) & 0xF;
	597	t2 = (((uint16)(_src1+x )) >> shift) & 0xF;
	598	t3 = (((uint16)(_src1+x+1)) >> shift) & 0xF;
	599	t4 = (((uint16)(_src2+x-1)) >> shift) & 0xF;
	600	t5 = (((uint16)(_src2+x )) >> shift) & 0xF;
	601	t6 = (((uint16)(_src2+x+1)) >> shift) & 0xF;
	602	t7 = (((uint16)(_src3+x-1)) >> shift) & 0xF;
	603	t8 = (((uint16)(_src3+x )) >> shift) & 0xF;
	604	t9 = (((uint16)(_src3+x+1)) >> shift) & 0xF;
	605	/* the component value must not overflow 0xF */
	606	val[z] = ((t1+t3+t7+t9)mul1+((t2+t4+t6+t8)mul2)+(t5*mul3))>>shift4;
	607	if (val[z] > 0xF) val[z] = 0xF;
	608	}
	609	_dest[x] = val[0]\|(val[1]<<4)\|(val[2]<<8)\|(val[3]<<12);
	610	}
	611	// copy the ending pixel
	612	_dest[srcwidth-1] = *(_src3 - 1);
	613	// next row
	614	_src1 += srcwidth;
	615	_src2 += srcwidth;
	616	_src3 += srcwidth;
	617	_dest += srcwidth;
	618	}
	619	// copy the last row
	620	memcpy(_dest, _src2, (srcwidth << 1));
	621	break;
	622	case SMOOTH_FILTER_1:
	623	case SMOOTH_FILTER_2:
	624	default:
	625	// setup rows
	626	_src1 = src;
	627	_src2 = _src1 + srcwidth;
	628	_src3 = _src2 + srcwidth;
	629	_dest = dest;
	630	// copy the first row
	631	memcpy(_dest, _src1, (srcwidth << 1));
	632	_dest += srcwidth;
	633	// filter 2nd row to 1 row before the last
	634	for( y = 1; y < srcheight - 1; y++) {
	635	if (y & 1) {
	636	for( x = 0; x < srcwidth; x++) {
	637	for( z = 0; z < 4; z++ ) {
	638	/* Read the entire 16bit pixel and then extract the A,R,G,B components. */
	639	uint32 shift = z << 2;
	640	t2 = (((uint16)(_src1+x)) >> shift) & 0xF;
	641	t5 = (((uint16)(_src2+x)) >> shift) & 0xF;
	642	t8 = (((uint16)(_src3+x)) >> shift) & 0xF;
	643	/* the component value must not overflow 0xF */
	644	val[z] = ((t2+t8)mul2+(t5mul3))>>shift4;
	645	if (val[z] > 0xF) val[z] = 0xF;
	646	}
	647	_dest[x] = val[0]\|(val[1]<<4)\|(val[2]<<8)\|(val[3]<<12);
	648	}
	649	} else {
	650	memcpy(_dest, _src2, (srcwidth << 1));
	651	}
	652	// next row
	653	_src1 += srcwidth;
	654	_src2 += srcwidth;
	655	_src3 += srcwidth;
	656	_dest += srcwidth;
	657	}
	658	// copy the last row
	659	memcpy(_dest, _src2, (srcwidth << 1));
	660	break;
	661	}
	662	}
	663	#endif /* !_16BPP_HACK */
	664
	665	void filter_8888(uint32 src, uint32 srcwidth, uint32 srcheight, uint32 dest, uint32 filter) {
	666	switch (filter & ENHANCEMENT_MASK) {
	667	case HQ4X_ENHANCEMENT:
	668	hq4x_8888((uint8)src, (uint8)dest, srcwidth, srcheight, srcwidth, (srcwidth << 4));
	669	return;
	670	case HQ2X_ENHANCEMENT:
	671	hq2x_32((uint8)src, (srcwidth << 2), (uint8)dest, (srcwidth << 3), srcwidth, srcheight);
	672	return;
	673	case HQ2XS_ENHANCEMENT:
	674	hq2xS_32((uint8)src, (srcwidth << 2), (uint8)dest, (srcwidth << 3), srcwidth, srcheight);
	675	return;
	676	case LQ2X_ENHANCEMENT:
	677	lq2x_32((uint8)src, (srcwidth << 2), (uint8)dest, (srcwidth << 3), srcwidth, srcheight);
	678	return;
	679	case LQ2XS_ENHANCEMENT:
	680	lq2xS_32((uint8)src, (srcwidth << 2), (uint8)dest, (srcwidth << 3), srcwidth, srcheight);
	681	return;
	682	case X2SAI_ENHANCEMENT:
	683	Super2xSaI_8888((uint32)src, (uint32)dest, srcwidth, srcheight, srcwidth);
	684	return;
	685	case X2_ENHANCEMENT:
	686	Texture2x_32((uint8)src, (srcwidth << 2), (uint8)dest, (srcwidth << 3), srcwidth, srcheight);
	687	return;
	688	}
	689
	690	switch (filter & (SMOOTH_FILTER_MASK\|SHARP_FILTER_MASK)) {
	691	case SMOOTH_FILTER_1:
	692	case SMOOTH_FILTER_2:
	693	case SMOOTH_FILTER_3:
	694	case SMOOTH_FILTER_4:
	695	SmoothFilter_8888((uint32)src, srcwidth, srcheight, (uint32)dest, (filter & SMOOTH_FILTER_MASK));
	696	return;
	697	case SHARP_FILTER_1:
	698	case SHARP_FILTER_2:
	699	SharpFilter_8888((uint32)src, srcwidth, srcheight, (uint32)dest, (filter & SHARP_FILTER_MASK));
	700	return;
	701	}
	702	}