source/gles2glide64/src/GlideHQ/TextureFilters.cpp

   1 /*
   2 Copyright (C) 2003 Rice1964
   3
   4 This program is free software; you can redistribute it and/or
   5 modify it under the terms of the GNU General Public License
   6 as published by the Free Software Foundation; either version 2
   7 of the License, or (at your option) any later version.
   8
   9 This program is distributed in the hope that it will be useful,
  10 but WITHOUT ANY WARRANTY; without even the implied warranty of
  11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  12 GNU General Public License for more details.
  13
  14 You should have received a copy of the GNU General Public License
  15 along with this program; if not, write to the Free Software
  16 Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
  17
  18 */
  19
  20 /* Copyright (C) 2007 Hiroshi Morii <koolsmoky(at)users.sourceforge.net>
  21  * Modified for the Texture Filtering library
  22  */
  23
  24 #include <string.h>
  25 #include "TextureFilters.h"
  26
  27 /************************************************************************/
  28 /* 2X filters                                                           */
  29 /************************************************************************/
  30
  31 #define DWORD_MAKE(r, g, b, a)   ((uint32) (((a) << 24) | ((r) << 16) | ((g) << 8) | (b)))
  32 #define WORD_MAKE(r, g, b, a)   ((uint16) (((a) << 12) | ((r) << 8) | ((g) << 4) | (b)))
  33
  34 // Basic 2x R8G8B8A8 filter with interpolation
  35
  36 void Texture2x_32(uint8 *srcPtr, uint32 srcPitch, uint8 *dstPtr, uint32 dstPitch, int width, int height)
  37 {
  38   uint32 *pDst1, *pDst2;
  39   uint32 *pSrc, *pSrc2;
  40   uint32 nWidth = width;
  41   uint32 nHeight = height;
  42
  43   uint32 b1;
  44   uint32 g1;
  45   uint32 r1;
  46   uint32 a1;
  47   uint32 b2;
  48   uint32 g2;
  49   uint32 r2;
  50   uint32 a2;
  51   uint32 b3;
  52   uint32 g3;
  53   uint32 r3;
  54   uint32 a3;
  55   uint32 b4;
  56   uint32 g4;
  57   uint32 r4;
  58   uint32 a4;
  59
  60   uint32 xSrc;
  61   uint32 ySrc;
  62
  63   for (ySrc = 0; ySrc < nHeight; ySrc++)
  64   {
  65     pSrc = (uint32*)(((uint8*)srcPtr)+ySrc*srcPitch);
  66     pSrc2 = (uint32*)(((uint8*)srcPtr)+(ySrc+1)*srcPitch);
  67     pDst1 = (uint32*)(((uint8*)dstPtr)+(ySrc*2)*dstPitch);
  68     pDst2 = (uint32*)(((uint8*)dstPtr)+(ySrc*2+1)*dstPitch);
  69
  70     for (xSrc = 0; xSrc < nWidth; xSrc++)
  71     {
  72       b1 = (pSrc[xSrc]>>0)&0xFF;
  73       g1 = (pSrc[xSrc]>>8)&0xFF;
  74       r1 = (pSrc[xSrc]>>16)&0xFF;
  75       a1 = (pSrc[xSrc]>>24)&0xFF;
  76
  77       // Pixel 1
  78       pDst1[xSrc*2] = pSrc[xSrc];
  79
  80       // Pixel 2
  81       if( xSrc<nWidth-1 )
  82       {
  83         b2 = (pSrc[xSrc+1]>>0)&0xFF;
  84         g2 = (pSrc[xSrc+1]>>8)&0xFF;
  85         r2 = (pSrc[xSrc+1]>>16)&0xFF;
  86         a2 = (pSrc[xSrc+1]>>24)&0xFF;
  87         pDst1[xSrc*2+1] = DWORD_MAKE((r1+r2)/2, (g1+g2)/2, (b1+b2)/2, (a1+a2)/2);
  88       }
  89       else
  90         pDst1[xSrc*2+1] = pSrc[xSrc];
  91
  92       // Pixel 3
  93       if( ySrc<nHeight-1 )
  94       {
  95         b3 = (pSrc2[xSrc]>>0)&0xFF;
  96         g3 = (pSrc2[xSrc]>>8)&0xFF;
  97         r3 = (pSrc2[xSrc]>>16)&0xFF;
  98         a3 = (pSrc2[xSrc]>>24)&0xFF;
  99         pDst2[xSrc*2] = DWORD_MAKE((r1+r3)/2, (g1+g3)/2, (b1+b3)/2, (a1+a3)/2);
 100         if( xSrc<nWidth-1 )
 101         {
 102           b4 = (pSrc2[xSrc+1]>>0)&0xFF;
 103           g4 = (pSrc2[xSrc+1]>>8)&0xFF;
 104           r4 = (pSrc2[xSrc+1]>>16)&0xFF;
 105           a4 = (pSrc2[xSrc+1]>>24)&0xFF;
 106           // Pixel 4
 107           pDst2[xSrc*2+1] = DWORD_MAKE((r1+r2+r3+r4)/4, (g1+g2+g3+g4)/4, (b1+b2+b3+b4)/4, (a1+a2+a3+a4)/4);
 108         }
 109         else
 110         {
 111           // Pixel 4
 112           pDst2[xSrc*2+1] = DWORD_MAKE((r1+r3)/2, (g1+g3)/2, (b1+b3)/2, (a1+a3)/2);
 113         }
 114       }
 115       else
 116       {
 117         // Pixel 3
 118         pDst2[xSrc*2] = pSrc[xSrc];
 119         // Pixel 4
 120         if( xSrc<nWidth-1 )
 121         {
 122           pDst2[xSrc*2+1] = DWORD_MAKE((r1+r2)/2, (g1+g2)/2, (b1+b2)/2, (a1+a2)/2);
 123         }
 124         else
 125         {
 126           pDst2[xSrc*2+1] = pSrc[xSrc];
 127         }
 128       }
 129     }
 130   }
 131 }
 132
 133 #if !_16BPP_HACK
 134 // Basic 2x R4G4B4A4 filter with interpolation
 135 void Texture2x_16(uint8 *srcPtr, uint32 srcPitch, uint8 *dstPtr, uint32 dstPitch, int width, int height)
 136 {
 137   uint16 *pDst1, *pDst2;
 138   uint16 *pSrc, *pSrc2;
 139   uint32 nWidth = width;
 140   uint32 nHeight = height;
 141
 142   uint16 b1;
 143   uint16 g1;
 144   uint16 r1;
 145   uint16 a1;
 146   uint16 b2;
 147   uint16 g2;
 148   uint16 r2;
 149   uint16 a2;
 150   uint16 b3;
 151   uint16 g3;
 152   uint16 r3;
 153   uint16 a3;
 154   uint16 b4;
 155   uint16 g4;
 156   uint16 r4;
 157   uint16 a4;
 158
 159   uint16 xSrc;
 160   uint16 ySrc;
 161
 162   for (ySrc = 0; ySrc < nHeight; ySrc++)
 163   {
 164     pSrc = (uint16*)(((uint8*)srcPtr)+ySrc*srcPitch);
 165     pSrc2 = (uint16*)(((uint8*)srcPtr)+(ySrc+1)*srcPitch);
 166     pDst1 = (uint16*)(((uint8*)dstPtr)+(ySrc*2)*dstPitch);
 167     pDst2 = (uint16*)(((uint8*)dstPtr)+(ySrc*2+1)*dstPitch);
 168
 169     for (xSrc = 0; xSrc < nWidth; xSrc++)
 170     {
 171       b1 = (pSrc[xSrc]>> 0)&0xF;
 172       g1 = (pSrc[xSrc]>> 4)&0xF;
 173       r1 = (pSrc[xSrc]>> 8)&0xF;
 174       a1 = (pSrc[xSrc]>>12)&0xF;
 175
 176       if( xSrc<nWidth-1 )
 177       {
 178         b2 = (pSrc[xSrc+1]>> 0)&0xF;
 179         g2 = (pSrc[xSrc+1]>> 4)&0xF;
 180         r2 = (pSrc[xSrc+1]>> 8)&0xF;
 181         a2 = (pSrc[xSrc+1]>>12)&0xF;
 182       }
 183
 184       if( ySrc<nHeight-1 )
 185       {
 186         b3 = (pSrc2[xSrc]>> 0)&0xF;
 187         g3 = (pSrc2[xSrc]>> 4)&0xF;
 188         r3 = (pSrc2[xSrc]>> 8)&0xF;
 189         a3 = (pSrc2[xSrc]>>12)&0xF;
 190         if( xSrc<nWidth-1 )
 191         {
 192           b4 = (pSrc2[xSrc+1]>> 0)&0xF;
 193           g4 = (pSrc2[xSrc+1]>> 4)&0xF;
 194           r4 = (pSrc2[xSrc+1]>> 8)&0xF;
 195           a4 = (pSrc2[xSrc+1]>>12)&0xF;
 196         }
 197       }
 198
 199       // Pixel 1
 200       pDst1[xSrc*2] = pSrc[xSrc];
 201
 202       // Pixel 2
 203       if( xSrc<nWidth-1 )
 204       {
 205         pDst1[xSrc*2+1] = WORD_MAKE((r1+r2)/2, (g1+g2)/2, (b1+b2)/2, (a1+a2)/2);
 206       }
 207       else
 208         pDst1[xSrc*2+1] = pSrc[xSrc];
 209
 210
 211       // Pixel 3
 212       if( ySrc<nHeight-1 )
 213       {
 214         pDst2[xSrc*2] = WORD_MAKE((r1+r3)/2, (g1+g3)/2, (b1+b3)/2, (a1+a3)/2);
 215       }
 216       else
 217         pDst2[xSrc*2] = pSrc[xSrc];
 218
 219       // Pixel 4
 220       if( xSrc<nWidth-1 )
 221       {
 222         if( ySrc<nHeight-1 )
 223         {
 224           pDst2[xSrc*2+1] = WORD_MAKE((r1+r2+r3+r4)/4, (g1+g2+g3+g4)/4, (b1+b2+b3+b4)/4, (a1+a2+a3+a4)/4);
 225         }
 226         else
 227         {
 228           pDst2[xSrc*2+1] = WORD_MAKE((r1+r2)/2, (g1+g2)/2, (b1+b2)/2, (a1+a2)/2);
 229         }
 230       }
 231       else
 232       {
 233         if( ySrc<nHeight-1 )
 234         {
 235           pDst2[xSrc*2+1] = WORD_MAKE((r1+r3)/2, (g1+g3)/2, (b1+b3)/2, (a1+a3)/2);
 236         }
 237         else
 238           pDst2[xSrc*2+1] = pSrc[xSrc];
 239       }
 240     }
 241   }
 242 }
 243 #endif /* !_16BPP_HACK */
 244
 245 /*
 246  * Sharp filters
 247  * Hiroshi Morii <koolsmoky@users.sourceforge.net>
 248  */
 249 void SharpFilter_8888(uint32 *src, uint32 srcwidth, uint32 srcheight, uint32 *dest, uint32 filter)
 250 {
 251   // NOTE: for now we get away with copying the boundaries
 252   //       filter the boundaries if we face problems
 253
 254   uint32 mul1, mul2, mul3, shift4;
 255
 256   uint32 x,y,z;
 257   uint32 *_src1, *_src2, *_src3, *_dest;
 258   uint32 val[4];
 259   uint32 t1,t2,t3,t4,t5,t6,t7,t8,t9;
 260
 261   switch( filter )
 262   {
 263   case SHARP_FILTER_2:
 264     mul1=1;
 265     mul2=8;
 266     mul3=12;
 267     shift4=2;
 268     break;
 269   case SHARP_FILTER_1:
 270   default:
 271     mul1=1;
 272     mul2=8;
 273     mul3=16;
 274     shift4=3;
 275     break;
 276   }
 277
 278   // setup rows
 279   _src1 = src;
 280   _src2 = _src1 + srcwidth;
 281   _src3 = _src2 + srcwidth;
 282   _dest = dest;
 283
 284   // copy the first row
 285   memcpy(_dest, _src1, (srcwidth << 2));
 286   _dest += srcwidth;
 287   // filter 2nd row to 1 row before the last
 288   for (y = 1; y < srcheight-1; y++) {
 289     // copy the first pixel
 290     _dest[0] = *_src2;
 291     // filter 2nd pixel to 1 pixel before last
 292     for (x = 1; x < srcwidth-1; x++) {
 293       for (z=0; z<4; z++) {
 294         t1 = *((uint8*)(_src1+x-1)+z);
 295         t2 = *((uint8*)(_src1+x  )+z);
 296         t3 = *((uint8*)(_src1+x+1)+z);
 297         t4 = *((uint8*)(_src2+x-1)+z);
 298         t5 = *((uint8*)(_src2+x  )+z);
 299         t6 = *((uint8*)(_src2+x+1)+z);
 300         t7 = *((uint8*)(_src3+x-1)+z);
 301         t8 = *((uint8*)(_src3+x  )+z);
 302         t9 = *((uint8*)(_src3+x+1)+z);
 303
 304         if( (t5*mul2) > (t1+t3+t7+t9+t2+t4+t6+t8)*mul1 ) {
 305           val[z]= ((t5*mul3) - (t1+t3+t7+t9+t2+t4+t6+t8)*mul1)>>shift4;
 306           if (val[z] > 0xFF) val[z] = 0xFF;
 307         } else {
 308           val[z] = t5;
 309         }
 310       }
 311       _dest[x] = val[0]|(val[1]<<8)|(val[2]<<16)|(val[3]<<24);
 312     }
 313     // copy the ending pixel
 314     _dest[srcwidth-1] = *(_src3 - 1);
 315     // next row
 316     _src1 += srcwidth;
 317     _src2 += srcwidth;
 318     _src3 += srcwidth;
 319     _dest += srcwidth;
 320   }
 321   // copy the last row
 322   memcpy(_dest, _src2, (srcwidth << 2));
 323 }
 324
 325 #if !_16BPP_HACK
 326 void SharpFilter_4444(uint16 *src, uint32 srcwidth, uint32 srcheight, uint16 *dest, uint32 filter)
 327 {
 328   // NOTE: for now we get away with copying the boundaries
 329   //       filter the boundaries if we face problems
 330
 331   uint16 mul1, mul2, mul3, shift4;
 332
 333   uint32 x,y,z;
 334   uint16 *_src1, *_src2, *_src3, *_dest;
 335   uint16 val[4];
 336   uint16 t1,t2,t3,t4,t5,t6,t7,t8,t9;
 337
 338   switch( filter ) {
 339   case SHARP_FILTER_2:
 340     mul1=1;
 341     mul2=8;
 342     mul3=12;
 343     shift4=2;
 344     break;
 345   case SHARP_FILTER_1:
 346   default:
 347     mul1=1;
 348     mul2=8;
 349     mul3=16;
 350     shift4=3;
 351     break;
 352   }
 353
 354   // setup rows
 355   _src1 = src;
 356   _src2 = _src1 + srcwidth;
 357   _src3 = _src2 + srcwidth;
 358   _dest = dest;
 359
 360   // copy the first row
 361   memcpy(_dest, _src1, (srcwidth << 1));
 362   _dest += srcwidth;
 363   // filter 2nd row to 1 row before the last
 364   for( y = 1; y < srcheight - 1; y++) {
 365     // copy the first pixel
 366     _dest[0] = *_src2;
 367     // filter 2nd pixel to 1 pixel before last
 368     for( x = 1; x < srcwidth - 1; x++) {
 369       for( z = 0; z < 4; z++ ) {
 370         /* Hiroshi Morii <koolsmoky@users.sourceforge.net>
 371          * Read the entire 16bit pixel and then extract the A,R,G,B components.
 372          */
 373         uint32 shift = z << 2;
 374         t1 = ((*((uint16*)(_src1+x-1))) >> shift) & 0xF;
 375         t2 = ((*((uint16*)(_src1+x  ))) >> shift) & 0xF;
 376         t3 = ((*((uint16*)(_src1+x+1))) >> shift) & 0xF;
 377         t4 = ((*((uint16*)(_src2+x-1))) >> shift) & 0xF;
 378         t5 = ((*((uint16*)(_src2+x  ))) >> shift) & 0xF;
 379         t6 = ((*((uint16*)(_src2+x+1))) >> shift) & 0xF;
 380         t7 = ((*((uint16*)(_src3+x-1))) >> shift) & 0xF;
 381         t8 = ((*((uint16*)(_src3+x  ))) >> shift) & 0xF;
 382         t9 = ((*((uint16*)(_src3+x+1))) >> shift) & 0xF;
 383
 384         if( (t5*mul2) > (t1+t3+t7+t9+t2+t4+t6+t8)*mul1 ) {
 385           val[z] = ((t5*mul3) - (t1+t3+t7+t9+t2+t4+t6+t8)*mul1)>>shift4;
 386           if (val[z] > 0xF) val[z] = 0xF;
 387         } else {
 388           val[z] = t5;
 389         }
 390       }
 391       _dest[x] = val[0]|(val[1]<<4)|(val[2]<<8)|(val[3]<<12);
 392     }
 393     // copy the ending pixel
 394     _dest[srcwidth-1] = *(_src3 - 1);
 395     // next row
 396     _src1 += srcwidth;
 397     _src2 += srcwidth;
 398     _src3 += srcwidth;
 399     _dest += srcwidth;
 400   }
 401   // copy the last row
 402   memcpy(_dest, _src2, (srcwidth << 1));
 403 }
 404 #endif /* !_16BPP_HACK */
 405
 406 /*
 407  * Smooth filters
 408  * Hiroshi Morii <koolsmoky@users.sourceforge.net>
 409  */
 410 void SmoothFilter_8888(uint32 *src, uint32 srcwidth, uint32 srcheight, uint32 *dest, uint32 filter)
 411 {
 412   // NOTE: for now we get away with copying the boundaries
 413   //       filter the boundaries if we face problems
 414
 415   uint32 mul1, mul2, mul3, shift4;
 416
 417   uint32 x,y,z;
 418   uint32 *_src1, *_src2, *_src3, *_dest;
 419   uint32 val[4];
 420   uint32 t1,t2,t3,t4,t5,t6,t7,t8,t9;
 421
 422   switch( filter ) {
 423   case SMOOTH_FILTER_4:
 424     mul1=1;
 425     mul2=2;
 426     mul3=4;
 427     shift4=4;
 428     break;
 429   case SMOOTH_FILTER_3:
 430     mul1=1;
 431     mul2=1;
 432     mul3=8;
 433     shift4=4;
 434     break;
 435   case SMOOTH_FILTER_2:
 436     mul1=1;
 437     mul2=1;
 438     mul3=2;
 439     shift4=2;
 440     break;
 441   case SMOOTH_FILTER_1:
 442   default:
 443     mul1=1;
 444     mul2=1;
 445     mul3=6;
 446     shift4=3;
 447     break;
 448   }
 449
 450   switch (filter) {
 451   case SMOOTH_FILTER_3:
 452   case SMOOTH_FILTER_4:
 453     // setup rows
 454     _src1 = src;
 455     _src2 = _src1 + srcwidth;
 456     _src3 = _src2 + srcwidth;
 457     _dest = dest;
 458     // copy the first row
 459     memcpy(_dest, _src1, (srcwidth << 2));
 460     _dest += srcwidth;
 461     // filter 2nd row to 1 row before the last
 462     for (y = 1; y < srcheight - 1; y++){
 463       // copy the first pixel
 464       _dest[0] = _src2[0];
 465       // filter 2nd pixel to 1 pixel before last
 466       for (x = 1; x < srcwidth - 1; x++) {
 467         for (z = 0; z < 4; z++ ) {
 468           t1 = *((uint8*)(_src1+x-1)+z);
 469           t2 = *((uint8*)(_src1+x  )+z);
 470           t3 = *((uint8*)(_src1+x+1)+z);
 471           t4 = *((uint8*)(_src2+x-1)+z);
 472           t5 = *((uint8*)(_src2+x  )+z);
 473           t6 = *((uint8*)(_src2+x+1)+z);
 474           t7 = *((uint8*)(_src3+x-1)+z);
 475           t8 = *((uint8*)(_src3+x  )+z);
 476           t9 = *((uint8*)(_src3+x+1)+z);
 477           /* the component value must not overflow 0xFF */
 478           val[z] = ((t1+t3+t7+t9)*mul1+((t2+t4+t6+t8)*mul2)+(t5*mul3))>>shift4;
 479           if (val[z] > 0xFF) val[z] = 0xFF;
 480         }
 481         _dest[x] = val[0]|(val[1]<<8)|(val[2]<<16)|(val[3]<<24);
 482       }
 483       // copy the ending pixel
 484       _dest[srcwidth-1] = *(_src3 - 1);
 485       // next row
 486       _src1 += srcwidth;
 487       _src2 += srcwidth;
 488       _src3 += srcwidth;
 489       _dest += srcwidth;
 490     }
 491     // copy the last row
 492     memcpy(_dest, _src2, (srcwidth << 2));
 493     break;
 494   case SMOOTH_FILTER_1:
 495   case SMOOTH_FILTER_2:
 496   default:
 497     // setup rows
 498     _src1 = src;
 499     _src2 = _src1 + srcwidth;
 500     _src3 = _src2 + srcwidth;
 501     _dest = dest;
 502     // copy the first row
 503     memcpy(_dest, _src1, (srcwidth << 2));
 504     _dest += srcwidth;
 505     // filter 2nd row to 1 row before the last
 506     for (y = 1; y < srcheight - 1; y++) {
 507       // filter 1st pixel to the last
 508       if (y & 1) {
 509         for( x = 0; x < srcwidth; x++) {
 510           for( z = 0; z < 4; z++ ) {
 511             t2 = *((uint8*)(_src1+x  )+z);
 512             t5 = *((uint8*)(_src2+x  )+z);
 513             t8 = *((uint8*)(_src3+x  )+z);
 514             /* the component value must not overflow 0xFF */
 515             val[z] = ((t2+t8)*mul2+(t5*mul3))>>shift4;
 516             if (val[z] > 0xFF) val[z] = 0xFF;
 517           }
 518           _dest[x] = val[0]|(val[1]<<8)|(val[2]<<16)|(val[3]<<24);
 519         }
 520       } else {
 521          memcpy(_dest, _src2, (srcwidth << 2));
 522       }
 523       // next row
 524       _src1 += srcwidth;
 525       _src2 += srcwidth;
 526       _src3 += srcwidth;
 527       _dest += srcwidth;
 528     }
 529     // copy the last row
 530     memcpy(_dest, _src2, (srcwidth << 2));
 531     break;
 532   }
 533 }
 534
 535 #if !_16BPP_HACK
 536 void SmoothFilter_4444(uint16 *src, uint32 srcwidth, uint32 srcheight, uint16 *dest, uint32 filter)
 537 {
 538   // NOTE: for now we get away with copying the boundaries
 539   //       filter the boundaries if we face problems
 540
 541   uint16 mul1, mul2, mul3, shift4;
 542
 543   uint32 x,y,z;
 544   uint16 *_src1, *_src2, *_src3, *_dest;
 545   uint16 val[4];
 546   uint16 t1,t2,t3,t4,t5,t6,t7,t8,t9;
 547
 548   switch( filter ) {
 549   case SMOOTH_FILTER_4:
 550     mul1=1;
 551     mul2=2;
 552     mul3=4;
 553     shift4=4;
 554     break;
 555   case SMOOTH_FILTER_3:
 556     mul1=1;
 557     mul2=1;
 558     mul3=8;
 559     shift4=4;
 560     break;
 561   case SMOOTH_FILTER_2:
 562     mul1=1;
 563     mul2=1;
 564     mul3=2;
 565     shift4=2;
 566     break;
 567   case SMOOTH_FILTER_1:
 568   default:
 569     mul1=1;
 570     mul2=1;
 571     mul3=6;
 572     shift4=3;
 573     break;
 574   }
 575
 576   switch (filter) {
 577   case SMOOTH_FILTER_3:
 578   case SMOOTH_FILTER_4:
 579     // setup rows
 580     _src1 = src;
 581     _src2 = _src1 + srcwidth;
 582     _src3 = _src2 + srcwidth;
 583     _dest = dest;
 584     // copy the first row
 585     memcpy(_dest, _src1, (srcwidth << 1));
 586     _dest += srcwidth;
 587     // filter 2nd row to 1 row before the last
 588     for (y = 1; y < srcheight - 1; y++) {
 589       // copy the first pixel
 590       _dest[0] = *_src2;
 591       // filter 2nd pixel to 1 pixel before last
 592       for (x = 1; x < srcwidth - 1; x++) {
 593         for (z = 0; z < 4; z++ ) {
 594           /* Read the entire 16bit pixel and then extract the A,R,G,B components. */
 595           uint32 shift = z << 2;
 596           t1 = ((*(uint16*)(_src1+x-1)) >> shift) & 0xF;
 597           t2 = ((*(uint16*)(_src1+x  )) >> shift) & 0xF;
 598           t3 = ((*(uint16*)(_src1+x+1)) >> shift) & 0xF;
 599           t4 = ((*(uint16*)(_src2+x-1)) >> shift) & 0xF;
 600           t5 = ((*(uint16*)(_src2+x  )) >> shift) & 0xF;
 601           t6 = ((*(uint16*)(_src2+x+1)) >> shift) & 0xF;
 602           t7 = ((*(uint16*)(_src3+x-1)) >> shift) & 0xF;
 603           t8 = ((*(uint16*)(_src3+x  )) >> shift) & 0xF;
 604           t9 = ((*(uint16*)(_src3+x+1)) >> shift) & 0xF;
 605           /* the component value must not overflow 0xF */
 606           val[z] = ((t1+t3+t7+t9)*mul1+((t2+t4+t6+t8)*mul2)+(t5*mul3))>>shift4;
 607           if (val[z] > 0xF) val[z] = 0xF;
 608         }
 609         _dest[x] = val[0]|(val[1]<<4)|(val[2]<<8)|(val[3]<<12);
 610       }
 611       // copy the ending pixel
 612       _dest[srcwidth-1] = *(_src3 - 1);
 613       // next row
 614       _src1 += srcwidth;
 615       _src2 += srcwidth;
 616       _src3 += srcwidth;
 617       _dest += srcwidth;
 618     }
 619     // copy the last row
 620     memcpy(_dest, _src2, (srcwidth << 1));
 621     break;
 622   case SMOOTH_FILTER_1:
 623   case SMOOTH_FILTER_2:
 624   default:
 625     // setup rows
 626     _src1 = src;
 627     _src2 = _src1 + srcwidth;
 628     _src3 = _src2 + srcwidth;
 629     _dest = dest;
 630     // copy the first row
 631     memcpy(_dest, _src1, (srcwidth << 1));
 632     _dest += srcwidth;
 633     // filter 2nd row to 1 row before the last
 634     for( y = 1; y < srcheight - 1; y++) {
 635       if (y & 1) {
 636         for( x = 0; x < srcwidth; x++) {
 637           for( z = 0; z < 4; z++ ) {
 638             /* Read the entire 16bit pixel and then extract the A,R,G,B components. */
 639             uint32 shift = z << 2;
 640             t2 = ((*(uint16*)(_src1+x)) >> shift) & 0xF;
 641             t5 = ((*(uint16*)(_src2+x)) >> shift) & 0xF;
 642             t8 = ((*(uint16*)(_src3+x)) >> shift) & 0xF;
 643             /* the component value must not overflow 0xF */
 644             val[z] = ((t2+t8)*mul2+(t5*mul3))>>shift4;
 645             if (val[z] > 0xF) val[z] = 0xF;
 646           }
 647           _dest[x] = val[0]|(val[1]<<4)|(val[2]<<8)|(val[3]<<12);
 648         }
 649       } else {
 650          memcpy(_dest, _src2, (srcwidth << 1));
 651       }
 652       // next row
 653       _src1 += srcwidth;
 654       _src2 += srcwidth;
 655       _src3 += srcwidth;
 656       _dest += srcwidth;
 657     }
 658     // copy the last row
 659     memcpy(_dest, _src2, (srcwidth << 1));
 660     break;
 661   }
 662 }
 663 #endif /* !_16BPP_HACK */
 664
 665 void filter_8888(uint32 *src, uint32 srcwidth, uint32 srcheight, uint32 *dest, uint32 filter) {
 666   switch (filter & ENHANCEMENT_MASK) {
 667   case HQ4X_ENHANCEMENT:
 668     hq4x_8888((uint8*)src, (uint8*)dest, srcwidth, srcheight, srcwidth, (srcwidth << 4));
 669     return;
 670   case HQ2X_ENHANCEMENT:
 671     hq2x_32((uint8*)src, (srcwidth << 2), (uint8*)dest, (srcwidth << 3), srcwidth, srcheight);
 672     return;
 673   case HQ2XS_ENHANCEMENT:
 674     hq2xS_32((uint8*)src, (srcwidth << 2), (uint8*)dest, (srcwidth << 3), srcwidth, srcheight);
 675     return;
 676   case LQ2X_ENHANCEMENT:
 677     lq2x_32((uint8*)src, (srcwidth << 2), (uint8*)dest, (srcwidth << 3), srcwidth, srcheight);
 678     return;
 679   case LQ2XS_ENHANCEMENT:
 680     lq2xS_32((uint8*)src, (srcwidth << 2), (uint8*)dest, (srcwidth << 3), srcwidth, srcheight);
 681     return;
 682   case X2SAI_ENHANCEMENT:
 683     Super2xSaI_8888((uint32*)src, (uint32*)dest, srcwidth, srcheight, srcwidth);
 684     return;
 685   case X2_ENHANCEMENT:
 686     Texture2x_32((uint8*)src, (srcwidth << 2), (uint8*)dest, (srcwidth << 3), srcwidth, srcheight);
 687     return;
 688   }
 689
 690   switch (filter & (SMOOTH_FILTER_MASK|SHARP_FILTER_MASK)) {
 691   case SMOOTH_FILTER_1:
 692   case SMOOTH_FILTER_2:
 693   case SMOOTH_FILTER_3:
 694   case SMOOTH_FILTER_4:
 695     SmoothFilter_8888((uint32*)src, srcwidth, srcheight, (uint32*)dest, (filter & SMOOTH_FILTER_MASK));
 696     return;
 697   case SHARP_FILTER_1:
 698   case SHARP_FILTER_2:
 699     SharpFilter_8888((uint32*)src, srcwidth, srcheight, (uint32*)dest, (filter & SHARP_FILTER_MASK));
 700     return;
 701   }
 702 }