gpu-gles: schtruck/fpse merge: don't delay gl init
[pcsx_rearmed.git] / plugins / dfxvideo / soft.c
index d50b8e0..c1c3bef 100644 (file)
@@ -62,8 +62,8 @@ short DrawSemiTrans=FALSE;
 short Ymin;
 short Ymax;
 short          ly0,lx0,ly1,lx1,ly2,lx2,ly3,lx3;        // global psx vertex coords
-int32_t           GlobalTextAddrX,GlobalTextAddrY,GlobalTextTP,GlobalTextIL;
-int32_t           GlobalTextREST,GlobalTextABR,GlobalTextPAGE;
+int32_t           GlobalTextAddrX,GlobalTextAddrY,GlobalTextTP;
+int32_t           GlobalTextABR,GlobalTextPAGE;
 
 ////////////////////////////////////////////////////////////////////////
 // POLYGON OFFSET FUNCS
@@ -1077,9 +1077,15 @@ static int left_B, delta_left_B, right_B, delta_right_B;
 // USE_NASM
 static inline int shl10idiv(int x, int y)
 {
+#ifdef __ARM_ARCH_7A__
+ // rearmed: let's use VFP divider instead
+ float r = 1024.0f * (float)x / (float)y;
+ return (int)r;
+#else
  __int64 bi=x;
  bi<<=10;
  return bi/y;
+#endif
 }
 
 ////////////////////////////////////////////////////////////////////////
@@ -2645,162 +2651,6 @@ static void drawPoly3TEx4(short x1, short y1, short x2, short y2, short x3, shor
 
 ////////////////////////////////////////////////////////////////////////
 
-static void drawPoly3TEx4_IL(short x1, short y1, short x2, short y2, short x3, short y3, short tx1, short ty1, short tx2, short ty2, short tx3, short ty3,short clX, short clY)
-{
- int i,j,xmin,xmax,ymin,ymax,n_xi,n_yi,TXV;
- int32_t difX, difY,difX2, difY2;
- int32_t posX,posY,YAdjust,XAdjust;
- int32_t clutP;
- short tC1,tC2;
- if(x1>drawW && x2>drawW && x3>drawW) return;
- if(y1>drawH && y2>drawH && y3>drawH) return;
- if(x1<drawX && x2<drawX && x3<drawX) return;
- if(y1<drawY && y2<drawY && y3<drawY) return;
- if(drawY>=drawH) return;
- if(drawX>=drawW) return; 
-
- if(!SetupSections_FT(x1,y1,x2,y2,x3,y3,tx1,ty1,tx2,ty2,tx3,ty3)) return;
-
- ymax=Ymax;
-
- for(ymin=Ymin;ymin<drawY;ymin++)
-  if(NextRow_FT()) return;
-
- clutP=(clY<<10)+clX;
-
- YAdjust=(GlobalTextAddrY<<10)+GlobalTextAddrX;
-
- difX=delta_right_u;difX2=difX<<1;
- difY=delta_right_v;difY2=difY<<1;
-
-#ifdef FASTSOLID
-
- if(!bCheckMask && !DrawSemiTrans)
-  {
-   for (i=ymin;i<=ymax;i++)
-    {
-     xmin=(left_x >> 16);
-     xmax=(right_x >> 16)-1;
-     if(drawW<xmax) xmax=drawW;
-
-     if(xmax>=xmin)
-      {
-       posX=left_u;
-       posY=left_v;
-
-       if(xmin<drawX)
-        {j=drawX-xmin;xmin=drawX;posX+=j*difX;posY+=j*difY;}
-
-       for(j=xmin;j<xmax;j+=2)
-        {
-         XAdjust=(posX>>16);
-
-         TXV=posY>>16;
-         n_xi = ( ( XAdjust >> 2 ) & ~0x3c ) + ( ( TXV << 2 ) & 0x3c );
-         n_yi = ( TXV & ~0xf ) + ( ( XAdjust >> 4 ) & 0xf );
-
-         tC1= (GETLE16(&psxVuw[(n_yi<<10)+YAdjust+n_xi]) >> ((XAdjust & 0x03)<<2)) & 0x0f ;
-
-         XAdjust=((posX+difX)>>16);
-
-         TXV=(posY+difY)>>16;
-         n_xi = ( ( XAdjust >> 2 ) & ~0x3c ) + ( ( TXV << 2 ) & 0x3c );
-         n_yi = ( TXV & ~0xf ) + ( ( XAdjust >> 4 ) & 0xf );
-
-         tC2= (GETLE16(&psxVuw[(n_yi<<10)+YAdjust+n_xi]) >> ((XAdjust & 0x03)<<2)) & 0x0f ;
-
-         GetTextureTransColG32_S((uint32_t *)&psxVuw[(i<<10)+j],
-             GETLE16(&psxVuw[clutP+tC1])|
-             ((int32_t)GETLE16(&psxVuw[clutP+tC2]))<<16);
-
-         posX+=difX2;
-         posY+=difY2;
-        }
-       if(j==xmax)
-        {
-         XAdjust=(posX>>16);
-
-         TXV=posY>>16;
-         n_xi = ( ( XAdjust >> 2 ) & ~0x3c ) + ( ( TXV << 2 ) & 0x3c );
-         n_yi = ( TXV & ~0xf ) + ( ( XAdjust >> 4 ) & 0xf );
-
-         tC1= (GETLE16(&psxVuw[(n_yi<<10)+YAdjust+n_xi]) >> ((XAdjust & 0x03)<<2)) & 0x0f ;
-
-         GetTextureTransColG_S(&psxVuw[(i<<10)+j],GETLE16(&psxVuw[clutP+tC1]));
-        }
-      }
-     if(NextRow_FT()) 
-      {
-       return;
-      }
-    }
-   return;
-  }
-
-#endif
-
- for (i=ymin;i<=ymax;i++)
-  {
-   xmin=(left_x >> 16);
-   xmax=(right_x >> 16)-1; //!!!!!!!!!!!!!!!!!!
-   if(drawW<xmax) xmax=drawW;
-
-   if(xmax>=xmin)
-    {
-     posX=left_u;
-     posY=left_v;
-
-     if(xmin<drawX)
-      {j=drawX-xmin;xmin=drawX;posX+=j*difX;posY+=j*difY;}
-
-     for(j=xmin;j<xmax;j+=2)
-      {
-       XAdjust=(posX>>16);
-
-       TXV=posY>>16;
-       n_xi = ( ( XAdjust >> 2 ) & ~0x3c ) + ( ( TXV << 2 ) & 0x3c );
-       n_yi = ( TXV & ~0xf ) + ( ( XAdjust >> 4 ) & 0xf );
-
-       tC1= (GETLE16(&psxVuw[(n_yi<<10)+YAdjust+n_xi]) >> ((XAdjust & 0x03)<<2)) & 0x0f ;
-
-       XAdjust=((posX+difX)>>16);
-
-       TXV=(posY+difY)>>16;
-       n_xi = ( ( XAdjust >> 2 ) & ~0x3c ) + ( ( TXV << 2 ) & 0x3c );
-       n_yi = ( TXV & ~0xf ) + ( ( XAdjust >> 4 ) & 0xf );
-
-       tC2= (GETLE16(&psxVuw[(n_yi<<10)+YAdjust+n_xi]) >> ((XAdjust & 0x03)<<2)) & 0x0f ;
-
-       GetTextureTransColG32((uint32_t *)&psxVuw[(i<<10)+j],
-           GETLE16(&psxVuw[clutP+tC1])|
-           ((int32_t)GETLE16(&psxVuw[clutP+tC2]))<<16);
-
-       posX+=difX2;
-       posY+=difY2;
-      }
-     if(j==xmax)
-      {
-       XAdjust=(posX>>16);
-
-       TXV=posY>>16;
-       n_xi = ( ( XAdjust >> 2 ) & ~0x3c ) + ( ( TXV << 2 ) & 0x3c );
-       n_yi = ( TXV & ~0xf ) + ( ( XAdjust >> 4 ) & 0xf );
-
-       tC1= (GETLE16(&psxVuw[(n_yi<<10)+YAdjust+n_xi]) >> ((XAdjust & 0x03)<<2)) & 0x0f ;
-
-       GetTextureTransColG(&psxVuw[(i<<10)+j],GETLE16(&psxVuw[clutP+tC1]));
-      }
-    }
-   if(NextRow_FT()) 
-    {
-     return;
-    }
-  }
-}
-
-////////////////////////////////////////////////////////////////////////
-
 static void drawPoly3TEx4_TW(short x1, short y1, short x2, short y2, short x3, short y3, short tx1, short ty1, short tx2, short ty2, short tx3, short ty3,short clX, short clY)
 {
  int i,j,xmin,xmax,ymin,ymax;
@@ -2853,12 +2703,12 @@ static void drawPoly3TEx4_TW(short x1, short y1, short x2, short y2, short x3, s
 
        for(j=xmin;j<xmax;j+=2)
         {
-         XAdjust=(posX>>16)%TWin.Position.x1;
-         tC1 = psxVub[(((posY>>16)%TWin.Position.y1)<<11)+
+         XAdjust=(posX>>16)&TWin.xmask;
+         tC1 = psxVub[(((posY>>16)&TWin.ymask)<<11)+
                       YAdjust+(XAdjust>>1)];
          tC1=(tC1>>((XAdjust&1)<<2))&0xf;
-         XAdjust=((posX+difX)>>16)%TWin.Position.x1;
-         tC2 = psxVub[((((posY+difY)>>16)%TWin.Position.y1)<<11)+
+         XAdjust=((posX+difX)>>16)&TWin.xmask;
+         tC2 = psxVub[((((posY+difY)>>16)&TWin.ymask)<<11)+
                       YAdjust+(XAdjust>>1)];
          tC2=(tC2>>((XAdjust&1)<<2))&0xf;
 
@@ -2871,8 +2721,8 @@ static void drawPoly3TEx4_TW(short x1, short y1, short x2, short y2, short x3, s
         }
        if(j==xmax)
         {
-         XAdjust=(posX>>16)%TWin.Position.x1;
-         tC1 = psxVub[(((posY>>16)%TWin.Position.y1)<<11)+
+         XAdjust=(posX>>16)&TWin.xmask;
+         tC1 = psxVub[(((posY>>16)&TWin.ymask)<<11)+
                       YAdjust+(XAdjust>>1)];
          tC1=(tC1>>((XAdjust&1)<<2))&0xf;
          GetTextureTransColG_S(&psxVuw[(i<<10)+j],GETLE16(&psxVuw[clutP+tC1]));
@@ -2904,12 +2754,12 @@ static void drawPoly3TEx4_TW(short x1, short y1, short x2, short y2, short x3, s
 
      for(j=xmin;j<xmax;j+=2)
       {
-       XAdjust=(posX>>16)%TWin.Position.x1;
-       tC1 = psxVub[(((posY>>16)%TWin.Position.y1)<<11)+
+       XAdjust=(posX>>16)&TWin.xmask;
+       tC1 = psxVub[(((posY>>16)&TWin.ymask)<<11)+
                     YAdjust+(XAdjust>>1)];
        tC1=(tC1>>((XAdjust&1)<<2))&0xf;
-       XAdjust=((posX+difX)>>16)%TWin.Position.x1;
-       tC2 = psxVub[((((posY+difY)>>16)%TWin.Position.y1)<<11)+
+       XAdjust=((posX+difX)>>16)&TWin.xmask;
+       tC2 = psxVub[((((posY+difY)>>16)&TWin.ymask)<<11)+
                     YAdjust+(XAdjust>>1)];
        tC2=(tC2>>((XAdjust&1)<<2))&0xf;
 
@@ -2922,8 +2772,8 @@ static void drawPoly3TEx4_TW(short x1, short y1, short x2, short y2, short x3, s
       }
      if(j==xmax)
       {
-       XAdjust=(posX>>16)%TWin.Position.x1;
-       tC1 = psxVub[(((posY>>16)%TWin.Position.y1)<<11)+
+       XAdjust=(posX>>16)&TWin.xmask;
+       tC1 = psxVub[(((posY>>16)&TWin.ymask)<<11)+
                     YAdjust+(XAdjust>>1)];
        tC1=(tC1>>((XAdjust&1)<<2))&0xf;
        GetTextureTransColG(&psxVuw[(i<<10)+j],GETLE16(&psxVuw[clutP+tC1]));
@@ -3090,12 +2940,12 @@ static void drawPoly4TEx4(short x1, short y1, short x2, short y2, short x3, shor
 
 ////////////////////////////////////////////////////////////////////////
 
-static void drawPoly4TEx4_IL(short x1, short y1, short x2, short y2, short x3, short y3, short x4, short y4, short tx1, short ty1, short tx2, short ty2, short tx3, short ty3, short tx4, short ty4,short clX, short clY)
+static void drawPoly4TEx4_TW(short x1, short y1, short x2, short y2, short x3, short y3, short x4, short y4, short tx1, short ty1, short tx2, short ty2, short tx3, short ty3, short tx4, short ty4,short clX, short clY)
 {
  int32_t num; 
- int32_t i,j=0,xmin,xmax,ymin,ymax,n_xi,n_yi,TXV;
+ int32_t i,j,xmin,xmax,ymin,ymax;
  int32_t difX, difY, difX2, difY2;
- int32_t posX=0,posY=0,YAdjust,clutP,XAdjust;
+ int32_t posX,posY,YAdjust,clutP,XAdjust;
  short tC1,tC2;
 
  if(x1>drawW && x2>drawW && x3>drawW && x4>drawW) return;
@@ -3114,7 +2964,8 @@ static void drawPoly4TEx4_IL(short x1, short y1, short x2, short y2, short x3, s
 
  clutP=(clY<<10)+clX;
 
- YAdjust=((GlobalTextAddrY)<<10)+GlobalTextAddrX;
+ YAdjust=((GlobalTextAddrY)<<11)+(GlobalTextAddrX<<1);
+ YAdjust+=(TWin.Position.y0<<11)+(TWin.Position.x0>>1);
 
 #ifdef FASTSOLID
 
@@ -3143,21 +2994,14 @@ static void drawPoly4TEx4_IL(short x1, short y1, short x2, short y2, short x3, s
 
        for(j=xmin;j<xmax;j+=2)
         {
-         XAdjust=(posX>>16);
-
-         TXV=posY>>16;
-         n_xi = ( ( XAdjust >> 2 ) & ~0x3c ) + ( ( TXV << 2 ) & 0x3c );
-         n_yi = ( TXV & ~0xf ) + ( ( XAdjust >> 4 ) & 0xf );
-
-         tC1= (GETLE16(&psxVuw[(n_yi<<10)+YAdjust+n_xi]) >> ((XAdjust & 0x03)<<2)) & 0x0f ;
-
-         XAdjust=((posX+difX)>>16);
-
-         TXV=(posY+difY)>>16;
-         n_xi = ( ( XAdjust >> 2 ) & ~0x3c ) + ( ( TXV << 2 ) & 0x3c );
-         n_yi = ( TXV & ~0xf ) + ( ( XAdjust >> 4 ) & 0xf );
-
-         tC2= (GETLE16(&psxVuw[(n_yi<<10)+YAdjust+n_xi]) >> ((XAdjust & 0x03)<<2)) & 0x0f ;
+         XAdjust=(posX>>16)&TWin.xmask;
+         tC1 = psxVub[(((posY>>16)&TWin.ymask)<<11)+
+                      YAdjust+(XAdjust>>1)];
+         tC1=(tC1>>((XAdjust&1)<<2))&0xf;
+         XAdjust=((posX+difX)>>16)&TWin.xmask;
+         tC2 = psxVub[((((posY+difY)>>16)&TWin.ymask)<<11)+
+                      YAdjust+(XAdjust>>1)];
+         tC2=(tC2>>((XAdjust&1)<<2))&0xf;
 
          GetTextureTransColG32_S((uint32_t *)&psxVuw[(i<<10)+j],
               GETLE16(&psxVuw[clutP+tC1])|
@@ -3165,25 +3009,20 @@ static void drawPoly4TEx4_IL(short x1, short y1, short x2, short y2, short x3, s
          posX+=difX2;
          posY+=difY2;
         }
-         posX+=difX2;
-         posY+=difY2;
-        }
-
        if(j==xmax)
         {
-         XAdjust=(posX>>16);
-         TXV=posY>>16;
-         n_xi = ( ( XAdjust >> 2 ) & ~0x3c ) + ( ( TXV << 2 ) & 0x3c );
-         n_yi = ( TXV & ~0xf ) + ( ( XAdjust >> 4 ) & 0xf );
-
-         tC1= (GETLE16(&psxVuw[(n_yi<<10)+YAdjust+n_xi]) >> ((XAdjust & 0x03)<<2)) & 0x0f ;
-
+         XAdjust=(posX>>16)&TWin.xmask;
+         tC1 = psxVub[(((posY>>16)&TWin.ymask)<<11)+
+                      YAdjust+(XAdjust>>1)];
+         tC1=(tC1>>((XAdjust&1)<<2))&0xf;
          GetTextureTransColG_S(&psxVuw[(i<<10)+j],GETLE16(&psxVuw[clutP+tC1]));
         }
-
       }
      if(NextRow_FT4()) return;
     }
+   return;
+  }
+
 #endif
 
  for (i=ymin;i<=ymax;i++)
@@ -3209,21 +3048,14 @@ static void drawPoly4TEx4_IL(short x1, short y1, short x2, short y2, short x3, s
 
      for(j=xmin;j<xmax;j+=2)
       {
-       XAdjust=(posX>>16);
-
-       TXV=posY>>16;
-       n_xi = ( ( XAdjust >> 2 ) & ~0x3c ) + ( ( TXV << 2 ) & 0x3c );
-       n_yi = ( TXV & ~0xf ) + ( ( XAdjust >> 4 ) & 0xf );
-
-       tC1= (GETLE16(&psxVuw[(n_yi<<10)+YAdjust+n_xi]) >> ((XAdjust & 0x03)<<2)) & 0x0f ;
-
-       XAdjust=((posX+difX)>>16);
-
-       TXV=(posY+difY)>>16;
-       n_xi = ( ( XAdjust >> 2 ) & ~0x3c ) + ( ( TXV << 2 ) & 0x3c );
-       n_yi = ( TXV & ~0xf ) + ( ( XAdjust >> 4 ) & 0xf );
-
-       tC2= (GETLE16(&psxVuw[(n_yi<<10)+YAdjust+n_xi]) >> ((XAdjust & 0x03)<<2)) & 0x0f ;
+       XAdjust=(posX>>16)&TWin.xmask;
+       tC1 = psxVub[(((posY>>16)&TWin.ymask)<<11)+
+                    YAdjust+(XAdjust>>1)];
+       tC1=(tC1>>((XAdjust&1)<<2))&0xf;
+       XAdjust=((posX+difX)>>16)&TWin.xmask;
+       tC2 = psxVub[((((posY+difY)>>16)&TWin.ymask)<<11)+
+                    YAdjust+(XAdjust>>1)];
+       tC2=(tC2>>((XAdjust&1)<<2))&0xf;
 
        GetTextureTransColG32((uint32_t *)&psxVuw[(i<<10)+j],
             GETLE16(&psxVuw[clutP+tC1])|
@@ -3233,13 +3065,10 @@ static void drawPoly4TEx4_IL(short x1, short y1, short x2, short y2, short x3, s
       }
      if(j==xmax)
       {
-       XAdjust=(posX>>16);
-       TXV=posY>>16;
-       n_xi = ( ( XAdjust >> 2 ) & ~0x3c ) + ( ( TXV << 2 ) & 0x3c );
-       n_yi = ( TXV & ~0xf ) + ( ( XAdjust >> 4 ) & 0xf );
-
-       tC1= (GETLE16(&psxVuw[(n_yi<<10)+YAdjust+n_xi]) >> ((XAdjust & 0x03)<<2)) & 0x0f ;
-
+       XAdjust=(posX>>16)&TWin.xmask;
+       tC1 = psxVub[(((posY>>16)&TWin.ymask)<<11)+
+                    YAdjust+(XAdjust>>1)];
+       tC1=(tC1>>((XAdjust&1)<<2))&0xf;
        GetTextureTransColG(&psxVuw[(i<<10)+j],GETLE16(&psxVuw[clutP+tC1]));
       }
     }
@@ -3249,7 +3078,7 @@ static void drawPoly4TEx4_IL(short x1, short y1, short x2, short y2, short x3, s
 
 ////////////////////////////////////////////////////////////////////////
 
-static void drawPoly4TEx4_TW(short x1, short y1, short x2, short y2, short x3, short y3, short x4, short y4, short tx1, short ty1, short tx2, short ty2, short tx3, short ty3, short tx4, short ty4,short clX, short clY)
+static void drawPoly4TEx4_TW_S(short x1, short y1, short x2, short y2, short x3, short y3, short x4, short y4, short tx1, short ty1, short tx2, short ty2, short tx3, short ty3, short tx4, short ty4,short clX, short clY)
 {
  int32_t num; 
  int32_t i,j,xmin,xmax,ymin,ymax;
@@ -3303,12 +3132,12 @@ static void drawPoly4TEx4_TW(short x1, short y1, short x2, short y2, short x3, s
 
        for(j=xmin;j<xmax;j+=2)
         {
-         XAdjust=(posX>>16)%TWin.Position.x1;
-         tC1 = psxVub[(((posY>>16)%TWin.Position.y1)<<11)+
+         XAdjust=(posX>>16)&TWin.xmask;
+         tC1 = psxVub[(((posY>>16)&TWin.ymask)<<11)+
                       YAdjust+(XAdjust>>1)];
          tC1=(tC1>>((XAdjust&1)<<2))&0xf;
-         XAdjust=((posX+difX)>>16)%TWin.Position.x1;
-         tC2 = psxVub[((((posY+difY)>>16)%TWin.Position.y1)<<11)+
+         XAdjust=((posX+difX)>>16)&TWin.xmask;
+         tC2 = psxVub[((((posY+difY)>>16)&TWin.ymask)<<11)+
                       YAdjust+(XAdjust>>1)];
          tC2=(tC2>>((XAdjust&1)<<2))&0xf;
 
@@ -3320,8 +3149,8 @@ static void drawPoly4TEx4_TW(short x1, short y1, short x2, short y2, short x3, s
         }
        if(j==xmax)
         {
-         XAdjust=(posX>>16)%TWin.Position.x1;
-         tC1 = psxVub[(((posY>>16)%TWin.Position.y1)<<11)+
+         XAdjust=(posX>>16)&TWin.xmask;
+         tC1 = psxVub[(((posY>>16)&TWin.ymask)<<11)+
                       YAdjust+(XAdjust>>1)];
          tC1=(tC1>>((XAdjust&1)<<2))&0xf;
          GetTextureTransColG_S(&psxVuw[(i<<10)+j],GETLE16(&psxVuw[clutP+tC1]));
@@ -3357,16 +3186,16 @@ static void drawPoly4TEx4_TW(short x1, short y1, short x2, short y2, short x3, s
 
      for(j=xmin;j<xmax;j+=2)
       {
-       XAdjust=(posX>>16)%TWin.Position.x1;
-       tC1 = psxVub[(((posY>>16)%TWin.Position.y1)<<11)+
+       XAdjust=(posX>>16)&TWin.xmask;
+       tC1 = psxVub[(((posY>>16)&TWin.ymask)<<11)+
                     YAdjust+(XAdjust>>1)];
        tC1=(tC1>>((XAdjust&1)<<2))&0xf;
-       XAdjust=((posX+difX)>>16)%TWin.Position.x1;
-       tC2 = psxVub[((((posY+difY)>>16)%TWin.Position.y1)<<11)+
+       XAdjust=((posX+difX)>>16)&TWin.xmask;
+       tC2 = psxVub[((((posY+difY)>>16)&TWin.ymask)<<11)+
                     YAdjust+(XAdjust>>1)];
        tC2=(tC2>>((XAdjust&1)<<2))&0xf;
 
-       GetTextureTransColG32((uint32_t *)&psxVuw[(i<<10)+j],
+       GetTextureTransColG32_SPR((uint32_t *)&psxVuw[(i<<10)+j],
             GETLE16(&psxVuw[clutP+tC1])|
             ((int32_t)GETLE16(&psxVuw[clutP+tC2]))<<16);
        posX+=difX2;
@@ -3374,45 +3203,47 @@ static void drawPoly4TEx4_TW(short x1, short y1, short x2, short y2, short x3, s
       }
      if(j==xmax)
       {
-       XAdjust=(posX>>16)%TWin.Position.x1;
-       tC1 = psxVub[(((posY>>16)%TWin.Position.y1)<<11)+
+       XAdjust=(posX>>16)&TWin.xmask;
+       tC1 = psxVub[(((posY>>16)&TWin.ymask)<<11)+
                     YAdjust+(XAdjust>>1)];
        tC1=(tC1>>((XAdjust&1)<<2))&0xf;
-       GetTextureTransColG(&psxVuw[(i<<10)+j],GETLE16(&psxVuw[clutP+tC1]));
+       GetTextureTransColG_SPR(&psxVuw[(i<<10)+j],GETLE16(&psxVuw[clutP+tC1]));
       }
     }
    if(NextRow_FT4()) return;
   }
 }
-
+////////////////////////////////////////////////////////////////////////
+// POLY 3 F-SHADED TEX PAL 8
 ////////////////////////////////////////////////////////////////////////
 
-static void drawPoly4TEx4_TW_S(short x1, short y1, short x2, short y2, short x3, short y3, short x4, short y4, short tx1, short ty1, short tx2, short ty2, short tx3, short ty3, short tx4, short ty4,short clX, short clY)
+static void drawPoly3TEx8(short x1, short y1, short x2, short y2, short x3, short y3, short tx1, short ty1, short tx2, short ty2, short tx3, short ty3,short clX, short clY)
 {
- int32_t num; 
- int32_t i,j,xmin,xmax,ymin,ymax;
- int32_t difX, difY, difX2, difY2;
- int32_t posX,posY,YAdjust,clutP,XAdjust;
+ int i,j,xmin,xmax,ymin,ymax;
+ int32_t difX, difY,difX2, difY2;
+ int32_t posX,posY,YAdjust,clutP;
  short tC1,tC2;
 
- if(x1>drawW && x2>drawW && x3>drawW && x4>drawW) return;
- if(y1>drawH && y2>drawH && y3>drawH && y4>drawH) return;
- if(x1<drawX && x2<drawX && x3<drawX && x4<drawX) return;
- if(y1<drawY && y2<drawY && y3<drawY && y4<drawY) return;
+ if(x1>drawW && x2>drawW && x3>drawW) return;
+ if(y1>drawH && y2>drawH && y3>drawH) return;
+ if(x1<drawX && x2<drawX && x3<drawX) return;
+ if(y1<drawY && y2<drawY && y3<drawY) return;
  if(drawY>=drawH) return;
  if(drawX>=drawW) return; 
 
- if(!SetupSections_FT4(x1,y1,x2,y2,x3,y3,x4,y4,tx1,ty1,tx2,ty2,tx3,ty3,tx4,ty4)) return;
+ if(!SetupSections_FT(x1,y1,x2,y2,x3,y3,tx1,ty1,tx2,ty2,tx3,ty3)) return;
 
  ymax=Ymax;
 
  for(ymin=Ymin;ymin<drawY;ymin++)
-  if(NextRow_FT4()) return;
+  if(NextRow_FT()) return;
 
  clutP=(clY<<10)+clX;
 
  YAdjust=((GlobalTextAddrY)<<11)+(GlobalTextAddrX<<1);
- YAdjust+=(TWin.Position.y0<<11)+(TWin.Position.x0>>1);
+
+ difX=delta_right_u;difX2=difX<<1;
+ difY=delta_right_v;difY2=difY<<1;
 
 #ifdef FASTSOLID
 
@@ -3421,51 +3252,39 @@ static void drawPoly4TEx4_TW_S(short x1, short y1, short x2, short y2, short x3,
    for (i=ymin;i<=ymax;i++)
     {
      xmin=(left_x >> 16);
-     xmax=(right_x >> 16);
+     xmax=(right_x >> 16)-1; //!!!!!!!!!!!!!!!
+     if(drawW<xmax) xmax=drawW;
 
      if(xmax>=xmin)
       {
        posX=left_u;
        posY=left_v;
 
-       num=(xmax-xmin);
-       if(num==0) num=1;
-       difX=(right_u-posX)/num;
-       difY=(right_v-posY)/num;
-       difX2=difX<<1;
-       difY2=difY<<1;
-
        if(xmin<drawX)
         {j=drawX-xmin;xmin=drawX;posX+=j*difX;posY+=j*difY;}
-       xmax--;if(drawW<xmax) xmax=drawW;
 
        for(j=xmin;j<xmax;j+=2)
         {
-         XAdjust=(posX>>16)%TWin.Position.x1;
-         tC1 = psxVub[(((posY>>16)%TWin.Position.y1)<<11)+
-                      YAdjust+(XAdjust>>1)];
-         tC1=(tC1>>((XAdjust&1)<<2))&0xf;
-         XAdjust=((posX+difX)>>16)%TWin.Position.x1;
-         tC2 = psxVub[((((posY+difY)>>16)%TWin.Position.y1)<<11)+
-                      YAdjust+(XAdjust>>1)];
-         tC2=(tC2>>((XAdjust&1)<<2))&0xf;
-
+         tC1 = psxVub[((posY>>5)&(int32_t)0xFFFFF800)+YAdjust+(posX>>16)];
+         tC2 = psxVub[(((posY+difY)>>5)&(int32_t)0xFFFFF800)+YAdjust+
+                      ((posX+difX)>>16)];
          GetTextureTransColG32_S((uint32_t *)&psxVuw[(i<<10)+j],
-              GETLE16(&psxVuw[clutP+tC1])|
-              ((int32_t)GETLE16(&psxVuw[clutP+tC2]))<<16);
+             GETLE16(&psxVuw[clutP+tC1])|
+             ((int32_t)GETLE16(&psxVuw[clutP+tC2]))<<16);
          posX+=difX2;
          posY+=difY2;
         }
+
        if(j==xmax)
         {
-         XAdjust=(posX>>16)%TWin.Position.x1;
-         tC1 = psxVub[(((posY>>16)%TWin.Position.y1)<<11)+
-                      YAdjust+(XAdjust>>1)];
-         tC1=(tC1>>((XAdjust&1)<<2))&0xf;
+         tC1 = psxVub[((posY>>5)&(int32_t)0xFFFFF800)+YAdjust+(posX>>16)];
          GetTextureTransColG_S(&psxVuw[(i<<10)+j],GETLE16(&psxVuw[clutP+tC1]));
         }
       }
-     if(NextRow_FT4()) return;
+     if(NextRow_FT()) 
+      {
+       return;
+      }
     }
    return;
   }
@@ -3475,136 +3294,8 @@ static void drawPoly4TEx4_TW_S(short x1, short y1, short x2, short y2, short x3,
  for (i=ymin;i<=ymax;i++)
   {
    xmin=(left_x >> 16);
-   xmax=(right_x >> 16);
-
-   if(xmax>=xmin)
-    {
-     posX=left_u;
-     posY=left_v;
-
-     num=(xmax-xmin);
-     if(num==0) num=1;
-     difX=(right_u-posX)/num;
-     difY=(right_v-posY)/num;
-     difX2=difX<<1;
-     difY2=difY<<1;
-
-     if(xmin<drawX)
-      {j=drawX-xmin;xmin=drawX;posX+=j*difX;posY+=j*difY;}
-     xmax--;if(drawW<xmax) xmax=drawW;
-
-     for(j=xmin;j<xmax;j+=2)
-      {
-       XAdjust=(posX>>16)%TWin.Position.x1;
-       tC1 = psxVub[(((posY>>16)%TWin.Position.y1)<<11)+
-                    YAdjust+(XAdjust>>1)];
-       tC1=(tC1>>((XAdjust&1)<<2))&0xf;
-       XAdjust=((posX+difX)>>16)%TWin.Position.x1;
-       tC2 = psxVub[((((posY+difY)>>16)%TWin.Position.y1)<<11)+
-                    YAdjust+(XAdjust>>1)];
-       tC2=(tC2>>((XAdjust&1)<<2))&0xf;
-
-       GetTextureTransColG32_SPR((uint32_t *)&psxVuw[(i<<10)+j],
-            GETLE16(&psxVuw[clutP+tC1])|
-            ((int32_t)GETLE16(&psxVuw[clutP+tC2]))<<16);
-       posX+=difX2;
-       posY+=difY2;
-      }
-     if(j==xmax)
-      {
-       XAdjust=(posX>>16)%TWin.Position.x1;
-       tC1 = psxVub[(((posY>>16)%TWin.Position.y1)<<11)+
-                    YAdjust+(XAdjust>>1)];
-       tC1=(tC1>>((XAdjust&1)<<2))&0xf;
-       GetTextureTransColG_SPR(&psxVuw[(i<<10)+j],GETLE16(&psxVuw[clutP+tC1]));
-      }
-    }
-   if(NextRow_FT4()) return;
-  }
-}
-////////////////////////////////////////////////////////////////////////
-// POLY 3 F-SHADED TEX PAL 8
-////////////////////////////////////////////////////////////////////////
-
-static void drawPoly3TEx8(short x1, short y1, short x2, short y2, short x3, short y3, short tx1, short ty1, short tx2, short ty2, short tx3, short ty3,short clX, short clY)
-{
- int i,j,xmin,xmax,ymin,ymax;
- int32_t difX, difY,difX2, difY2;
- int32_t posX,posY,YAdjust,clutP;
- short tC1,tC2;
-
- if(x1>drawW && x2>drawW && x3>drawW) return;
- if(y1>drawH && y2>drawH && y3>drawH) return;
- if(x1<drawX && x2<drawX && x3<drawX) return;
- if(y1<drawY && y2<drawY && y3<drawY) return;
- if(drawY>=drawH) return;
- if(drawX>=drawW) return; 
-
- if(!SetupSections_FT(x1,y1,x2,y2,x3,y3,tx1,ty1,tx2,ty2,tx3,ty3)) return;
-
- ymax=Ymax;
-
- for(ymin=Ymin;ymin<drawY;ymin++)
-  if(NextRow_FT()) return;
-
- clutP=(clY<<10)+clX;
-
- YAdjust=((GlobalTextAddrY)<<11)+(GlobalTextAddrX<<1);
-
- difX=delta_right_u;difX2=difX<<1;
- difY=delta_right_v;difY2=difY<<1;
-
-#ifdef FASTSOLID
-
- if(!bCheckMask && !DrawSemiTrans)
-  {
-   for (i=ymin;i<=ymax;i++)
-    {
-     xmin=(left_x >> 16);
-     xmax=(right_x >> 16)-1; //!!!!!!!!!!!!!!!
-     if(drawW<xmax) xmax=drawW;
-
-     if(xmax>=xmin)
-      {
-       posX=left_u;
-       posY=left_v;
-
-       if(xmin<drawX)
-        {j=drawX-xmin;xmin=drawX;posX+=j*difX;posY+=j*difY;}
-
-       for(j=xmin;j<xmax;j+=2)
-        {
-         tC1 = psxVub[((posY>>5)&(int32_t)0xFFFFF800)+YAdjust+(posX>>16)];
-         tC2 = psxVub[(((posY+difY)>>5)&(int32_t)0xFFFFF800)+YAdjust+
-                      ((posX+difX)>>16)];
-         GetTextureTransColG32_S((uint32_t *)&psxVuw[(i<<10)+j],
-             GETLE16(&psxVuw[clutP+tC1])|
-             ((int32_t)GETLE16(&psxVuw[clutP+tC2]))<<16);
-         posX+=difX2;
-         posY+=difY2;
-        }
-
-       if(j==xmax)
-        {
-         tC1 = psxVub[((posY>>5)&(int32_t)0xFFFFF800)+YAdjust+(posX>>16)];
-         GetTextureTransColG_S(&psxVuw[(i<<10)+j],GETLE16(&psxVuw[clutP+tC1]));
-        }
-      }
-     if(NextRow_FT()) 
-      {
-       return;
-      }
-    }
-   return;
-  }
-
-#endif
-
- for (i=ymin;i<=ymax;i++)
-  {
-   xmin=(left_x >> 16);
-   xmax=(right_x >> 16)-1; //!!!!!!!!!!!!!!!!!
-   if(drawW<xmax) xmax=drawW;
+   xmax=(right_x >> 16)-1; //!!!!!!!!!!!!!!!!!
+   if(drawW<xmax) xmax=drawW;
 
    if(xmax>=xmin)
     {
@@ -3642,156 +3333,6 @@ static void drawPoly3TEx8(short x1, short y1, short x2, short y2, short x3, shor
 
 ////////////////////////////////////////////////////////////////////////
 
-static void drawPoly3TEx8_IL(short x1, short y1, short x2, short y2, short x3, short y3, short tx1, short ty1, short tx2, short ty2, short tx3, short ty3,short clX, short clY)
-{
- int i,j,xmin,xmax,ymin,ymax,n_xi,n_yi,TXV,TXU;
- int32_t difX, difY,difX2, difY2;
- int32_t posX,posY,YAdjust,clutP;
- short tC1,tC2;
-
- if(x1>drawW && x2>drawW && x3>drawW) return;
- if(y1>drawH && y2>drawH && y3>drawH) return;
- if(x1<drawX && x2<drawX && x3<drawX) return;
- if(y1<drawY && y2<drawY && y3<drawY) return;
- if(drawY>=drawH) return;
- if(drawX>=drawW) return; 
-
- if(!SetupSections_FT(x1,y1,x2,y2,x3,y3,tx1,ty1,tx2,ty2,tx3,ty3)) return;
-
- ymax=Ymax;
-
- for(ymin=Ymin;ymin<drawY;ymin++)
-  if(NextRow_FT()) return;
-
- clutP=(clY<<10)+clX;
-
- YAdjust=(GlobalTextAddrY<<10)+GlobalTextAddrX;
-
- difX=delta_right_u;difX2=difX<<1;
- difY=delta_right_v;difY2=difY<<1;
-
-#ifdef FASTSOLID
-
- if(!bCheckMask && !DrawSemiTrans)
-  {
-   for (i=ymin;i<=ymax;i++)
-    {
-     xmin=(left_x >> 16);
-     xmax=(right_x >> 16)-1; //!!!!!!!!!!!!!!!
-     if(drawW<xmax) xmax=drawW;
-
-     if(xmax>=xmin)
-      {
-       posX=left_u;
-       posY=left_v;
-
-       if(xmin<drawX)
-        {j=drawX-xmin;xmin=drawX;posX+=j*difX;posY+=j*difY;}
-
-       for(j=xmin;j<xmax;j+=2)
-        {
-         TXU=posX>>16;
-         TXV=posY>>16;
-         n_xi = ( ( TXU >> 1 ) & ~0x78 ) + ( ( TXU << 2 ) & 0x40 ) + ( ( TXV << 3 ) & 0x38 );
-         n_yi = ( TXV & ~0x7 ) + ( ( TXU >> 5 ) & 0x7 );
-
-         tC1= (GETLE16(&psxVuw[(n_yi<<10)+YAdjust+n_xi]) >> ((TXU & 0x01)<<3)) & 0xff;
-
-         TXU=(posX+difX)>>16;
-         TXV=(posY+difY)>>16;
-         n_xi = ( ( TXU >> 1 ) & ~0x78 ) + ( ( TXU << 2 ) & 0x40 ) + ( ( TXV << 3 ) & 0x38 );
-         n_yi = ( TXV & ~0x7 ) + ( ( TXU >> 5 ) & 0x7 );
-
-         tC2= (GETLE16(&psxVuw[(n_yi<<10)+YAdjust+n_xi]) >> ((TXU & 0x01)<<3)) & 0xff;
-
-         GetTextureTransColG32_S((uint32_t *)&psxVuw[(i<<10)+j],
-             GETLE16(&psxVuw[clutP+tC1])|
-             ((int32_t)GETLE16(&psxVuw[clutP+tC2]))<<16);
-         posX+=difX2;
-         posY+=difY2;
-        }
-
-       if(j==xmax)
-        {
-         TXU=posX>>16;
-         TXV=posY>>16;
-         n_xi = ( ( TXU >> 1 ) & ~0x78 ) + ( ( TXU << 2 ) & 0x40 ) + ( ( TXV << 3 ) & 0x38 );
-         n_yi = ( TXV & ~0x7 ) + ( ( TXU >> 5 ) & 0x7 );
-
-         tC1= (GETLE16(&psxVuw[(n_yi<<10)+YAdjust+n_xi]) >> ((TXU & 0x01)<<3)) & 0xff;
-
-         GetTextureTransColG_S(&psxVuw[(i<<10)+j],GETLE16(&psxVuw[clutP+tC1]));
-        }
-      }
-     if(NextRow_FT()) 
-      {
-       return;
-      }
-    }
-   return;
-  }
-
-#endif
-
- for (i=ymin;i<=ymax;i++)
-  {
-   xmin=(left_x >> 16);
-   xmax=(right_x >> 16)-1; //!!!!!!!!!!!!!!!!!
-   if(drawW<xmax) xmax=drawW;
-
-   if(xmax>=xmin)
-    {
-     posX=left_u;
-     posY=left_v;
-
-     if(xmin<drawX)
-      {j=drawX-xmin;xmin=drawX;posX+=j*difX;posY+=j*difY;}
-
-     for(j=xmin;j<xmax;j+=2)
-      {
-       TXU=posX>>16;
-       TXV=posY>>16;
-       n_xi = ( ( TXU >> 1 ) & ~0x78 ) + ( ( TXU << 2 ) & 0x40 ) + ( ( TXV << 3 ) & 0x38 );
-       n_yi = ( TXV & ~0x7 ) + ( ( TXU >> 5 ) & 0x7 );
-
-       tC1= (GETLE16(&psxVuw[(n_yi<<10)+YAdjust+n_xi]) >> ((TXU & 0x01)<<3)) & 0xff;
-
-       TXU=(posX+difX)>>16;
-       TXV=(posY+difY)>>16;
-       n_xi = ( ( TXU >> 1 ) & ~0x78 ) + ( ( TXU << 2 ) & 0x40 ) + ( ( TXV << 3 ) & 0x38 );
-       n_yi = ( TXV & ~0x7 ) + ( ( TXU >> 5 ) & 0x7 );
-
-       tC2= (GETLE16(&psxVuw[(n_yi<<10)+YAdjust+n_xi]) >> ((TXU & 0x01)<<3)) & 0xff;
-
-       GetTextureTransColG32((uint32_t *)&psxVuw[(i<<10)+j],
-           GETLE16(&psxVuw[clutP+tC1])|
-           ((int32_t)GETLE16(&psxVuw[clutP+tC2]))<<16);
-       posX+=difX2;
-       posY+=difY2;
-      }
-
-     if(j==xmax)
-      {
-       TXU=posX>>16;
-       TXV=posY>>16;
-       n_xi = ( ( TXU >> 1 ) & ~0x78 ) + ( ( TXU << 2 ) & 0x40 ) + ( ( TXV << 3 ) & 0x38 );
-       n_yi = ( TXV & ~0x7 ) + ( ( TXU >> 5 ) & 0x7 );
-
-       tC1= (GETLE16(&psxVuw[(n_yi<<10)+YAdjust+n_xi]) >> ((TXU & 0x01)<<3)) & 0xff;
-
-       GetTextureTransColG(&psxVuw[(i<<10)+j],GETLE16(&psxVuw[clutP+tC1]));
-      }
-
-    }
-   if(NextRow_FT()) 
-    {
-     return;
-    }
-  }
-}
-
-////////////////////////////////////////////////////////////////////////
-
 static void drawPoly3TEx8_TW(short x1, short y1, short x2, short y2, short x3, short y3, short tx1, short ty1, short tx2, short ty2, short tx3, short ty3,short clX, short clY)
 {
  int i,j,xmin,xmax,ymin,ymax;
@@ -3843,10 +3384,10 @@ static void drawPoly3TEx8_TW(short x1, short y1, short x2, short y2, short x3, s
 
        for(j=xmin;j<xmax;j+=2)
         {
-         tC1 = psxVub[(((posY>>16)%TWin.Position.y1)<<11)+
-                      YAdjust+((posX>>16)%TWin.Position.x1)];
-         tC2 = psxVub[((((posY+difY)>>16)%TWin.Position.y1)<<11)+
-                      YAdjust+(((posX+difX)>>16)%TWin.Position.x1)];
+         tC1 = psxVub[(((posY>>16)&TWin.ymask)<<11)+
+                      YAdjust+((posX>>16)&TWin.xmask)];
+         tC2 = psxVub[((((posY+difY)>>16)&TWin.ymask)<<11)+
+                      YAdjust+(((posX+difX)>>16)&TWin.xmask)];
          GetTextureTransColG32_S((uint32_t *)&psxVuw[(i<<10)+j],
              GETLE16(&psxVuw[clutP+tC1])|
              ((int32_t)GETLE16(&psxVuw[clutP+tC2]))<<16);
@@ -3856,8 +3397,8 @@ static void drawPoly3TEx8_TW(short x1, short y1, short x2, short y2, short x3, s
 
        if(j==xmax)
         {
-         tC1 = psxVub[(((posY>>16)%TWin.Position.y1)<<11)+
-                      YAdjust+((posX>>16)%TWin.Position.x1)];
+         tC1 = psxVub[(((posY>>16)&TWin.ymask)<<11)+
+                      YAdjust+((posX>>16)&TWin.xmask)];
          GetTextureTransColG_S(&psxVuw[(i<<10)+j],GETLE16(&psxVuw[clutP+tC1]));
         }
       }
@@ -3887,10 +3428,10 @@ static void drawPoly3TEx8_TW(short x1, short y1, short x2, short y2, short x3, s
 
      for(j=xmin;j<xmax;j+=2)
       {
-       tC1 = psxVub[(((posY>>16)%TWin.Position.y1)<<11)+
-                    YAdjust+((posX>>16)%TWin.Position.x1)];
-       tC2 = psxVub[((((posY+difY)>>16)%TWin.Position.y1)<<11)+
-                    YAdjust+(((posX+difX)>>16)%TWin.Position.x1)];
+       tC1 = psxVub[(((posY>>16)&TWin.ymask)<<11)+
+                    YAdjust+((posX>>16)&TWin.xmask)];
+       tC2 = psxVub[((((posY+difY)>>16)&TWin.ymask)<<11)+
+                    YAdjust+(((posX+difX)>>16)&TWin.xmask)];
        GetTextureTransColG32((uint32_t *)&psxVuw[(i<<10)+j],
            GETLE16(&psxVuw[clutP+tC1])|
            ((int32_t)GETLE16(&psxVuw[clutP+tC2]))<<16);
@@ -3900,8 +3441,8 @@ static void drawPoly3TEx8_TW(short x1, short y1, short x2, short y2, short x3, s
 
      if(j==xmax)
       {
-       tC1 = psxVub[(((posY>>16)%TWin.Position.y1)<<11)+
-                    YAdjust+((posX>>16)%TWin.Position.x1)];
+       tC1 = psxVub[(((posY>>16)&TWin.ymask)<<11)+
+                    YAdjust+((posX>>16)&TWin.xmask)];
        GetTextureTransColG(&psxVuw[(i<<10)+j],GETLE16(&psxVuw[clutP+tC1]));
       }
 
@@ -4051,157 +3592,6 @@ static void drawPoly4TEx8(short x1, short y1, short x2, short y2, short x3, shor
 
 ////////////////////////////////////////////////////////////////////////
 
-static void drawPoly4TEx8_IL(short x1, short y1, short x2, short y2, short x3, short y3, short x4, short y4, short tx1, short ty1, short tx2, short ty2, short tx3, short ty3, short tx4, short ty4,short clX, short clY)
-{
- int32_t num; 
- int32_t i,j,xmin,xmax,ymin,ymax,n_xi,n_yi,TXV,TXU;
- int32_t difX, difY, difX2, difY2;
- int32_t posX,posY,YAdjust,clutP;
- short tC1,tC2;
-
- if(x1>drawW && x2>drawW && x3>drawW && x4>drawW) return;
- if(y1>drawH && y2>drawH && y3>drawH && y4>drawH) return;
- if(x1<drawX && x2<drawX && x3<drawX && x4<drawX) return;
- if(y1<drawY && y2<drawY && y3<drawY && y4<drawY) return;
- if(drawY>=drawH) return;
- if(drawX>=drawW) return; 
-
- if(!SetupSections_FT4(x1,y1,x2,y2,x3,y3,x4,y4,tx1,ty1,tx2,ty2,tx3,ty3,tx4,ty4)) return;
-
- ymax=Ymax;
-
- for(ymin=Ymin;ymin<drawY;ymin++)
-  if(NextRow_FT4()) return;
-
- clutP=(clY<<10)+clX;
-
- YAdjust=(GlobalTextAddrY<<10)+GlobalTextAddrX;
-
-#ifdef FASTSOLID
-
- if(!bCheckMask && !DrawSemiTrans)
-  {
-   for (i=ymin;i<=ymax;i++)
-    {
-     xmin=(left_x >> 16);
-     xmax=(right_x >> 16);
-
-     if(xmax>=xmin)
-      {
-       posX=left_u;
-       posY=left_v;
-
-       num=(xmax-xmin);
-       if(num==0) num=1;
-       difX=(right_u-posX)/num;
-       difY=(right_v-posY)/num;
-       difX2=difX<<1;
-       difY2=difY<<1;
-
-       if(xmin<drawX)
-        {j=drawX-xmin;xmin=drawX;posX+=j*difX;posY+=j*difY;}
-       xmax--;if(drawW<xmax) xmax=drawW;
-
-       for(j=xmin;j<xmax;j+=2)
-        {
-         TXU=posX>>16;
-         TXV=posY>>16;
-         n_xi = ( ( TXU >> 1 ) & ~0x78 ) + ( ( TXU << 2 ) & 0x40 ) + ( ( TXV << 3 ) & 0x38 );
-         n_yi = ( TXV & ~0x7 ) + ( ( TXU >> 5 ) & 0x7 );
-
-         tC1= (GETLE16(&psxVuw[(n_yi<<10)+YAdjust+n_xi]) >> ((TXU & 0x01)<<3)) & 0xff;
-
-         TXU=(posX+difX)>>16;
-         TXV=(posY+difY)>>16;
-         n_xi = ( ( TXU >> 1 ) & ~0x78 ) + ( ( TXU << 2 ) & 0x40 ) + ( ( TXV << 3 ) & 0x38 );
-         n_yi = ( TXV & ~0x7 ) + ( ( TXU >> 5 ) & 0x7 );
-
-         tC2= (GETLE16(&psxVuw[(n_yi<<10)+YAdjust+n_xi]) >> ((TXU & 0x01)<<3)) & 0xff;
-
-         GetTextureTransColG32_S((uint32_t *)&psxVuw[(i<<10)+j],
-              GETLE16(&psxVuw[clutP+tC1])|
-              ((int32_t)GETLE16(&psxVuw[clutP+tC2]))<<16);
-         posX+=difX2;
-         posY+=difY2;
-        }
-       if(j==xmax)
-        {
-         TXU=posX>>16;
-         TXV=posY>>16;
-         n_xi = ( ( TXU >> 1 ) & ~0x78 ) + ( ( TXU << 2 ) & 0x40 ) + ( ( TXV << 3 ) & 0x38 );
-         n_yi = ( TXV & ~0x7 ) + ( ( TXU >> 5 ) & 0x7 );
-
-         tC1= (GETLE16(&psxVuw[(n_yi<<10)+YAdjust+n_xi]) >> ((TXU & 0x01)<<3)) & 0xff;
-
-         GetTextureTransColG_S(&psxVuw[(i<<10)+j],GETLE16(&psxVuw[clutP+tC1]));
-        }
-      }
-     if(NextRow_FT4()) return;
-    }
-   return;
-  }
-
-#endif
-
- for (i=ymin;i<=ymax;i++)
-  {
-   xmin=(left_x >> 16);
-   xmax=(right_x >> 16);
-
-   if(xmax>=xmin)
-    {
-     posX=left_u;
-     posY=left_v;
-
-     num=(xmax-xmin);
-     if(num==0) num=1;
-     difX=(right_u-posX)/num;
-     difY=(right_v-posY)/num;
-     difX2=difX<<1;
-     difY2=difY<<1;
-
-     if(xmin<drawX)
-      {j=drawX-xmin;xmin=drawX;posX+=j*difX;posY+=j*difY;}
-     xmax--;if(drawW<xmax) xmax=drawW;
-
-     for(j=xmin;j<xmax;j+=2)
-      {
-       TXU=posX>>16;
-       TXV=posY>>16;
-       n_xi = ( ( TXU >> 1 ) & ~0x78 ) + ( ( TXU << 2 ) & 0x40 ) + ( ( TXV << 3 ) & 0x38 );
-       n_yi = ( TXV & ~0x7 ) + ( ( TXU >> 5 ) & 0x7 );
-
-       tC1= (GETLE16(&psxVuw[(n_yi<<10)+YAdjust+n_xi]) >> ((TXU & 0x01)<<3)) & 0xff;
-
-       TXU=(posX+difX)>>16;
-       TXV=(posY+difY)>>16;
-       n_xi = ( ( TXU >> 1 ) & ~0x78 ) + ( ( TXU << 2 ) & 0x40 ) + ( ( TXV << 3 ) & 0x38 );
-       n_yi = ( TXV & ~0x7 ) + ( ( TXU >> 5 ) & 0x7 );
-       
-       tC2= (GETLE16(&psxVuw[(n_yi<<10)+YAdjust+n_xi]) >> ((TXU & 0x01)<<3)) & 0xff;
-
-       GetTextureTransColG32((uint32_t *)&psxVuw[(i<<10)+j],
-            GETLE16(&psxVuw[clutP+tC1])|
-            ((int32_t)GETLE16(&psxVuw[clutP+tC2]))<<16);
-       posX+=difX2;
-       posY+=difY2;
-      }
-     if(j==xmax)
-      {
-       TXU=posX>>16;
-       TXV=posY>>16;
-       n_xi = ( ( TXU >> 1 ) & ~0x78 ) + ( ( TXU << 2 ) & 0x40 ) + ( ( TXV << 3 ) & 0x38 );
-       n_yi = ( TXV & ~0x7 ) + ( ( TXU >> 5 ) & 0x7 );
-       tC1= (GETLE16(&psxVuw[(n_yi<<10)+YAdjust+n_xi]) >> ((TXU & 0x01)<<3)) & 0xff;
-       GetTextureTransColG(&psxVuw[(i<<10)+j],GETLE16(&psxVuw[clutP+tC1]));
-      }
-    }
-   if(NextRow_FT4()) return;
-  }
-}
-
-////////////////////////////////////////////////////////////////////////
-
 static void drawPoly4TEx8_TW(short x1, short y1, short x2, short y2, short x3, short y3, short x4, short y4, short tx1, short ty1, short tx2, short ty2, short tx3, short ty3, short tx4, short ty4,short clX, short clY)
 {
  int32_t num; 
@@ -4256,10 +3646,10 @@ static void drawPoly4TEx8_TW(short x1, short y1, short x2, short y2, short x3, s
 
        for(j=xmin;j<xmax;j+=2)
         {
-         tC1 = psxVub[(((posY>>16)%TWin.Position.y1)<<11)+
-                      YAdjust+((posX>>16)%TWin.Position.x1)];
-         tC2 = psxVub[((((posY+difY)>>16)%TWin.Position.y1)<<11)+
-                      YAdjust+(((posX+difX)>>16)%TWin.Position.x1)];
+         tC1 = psxVub[(((posY>>16)&TWin.ymask)<<11)+
+                      YAdjust+((posX>>16)&TWin.xmask)];
+         tC2 = psxVub[((((posY+difY)>>16)&TWin.ymask)<<11)+
+                      YAdjust+(((posX+difX)>>16)&TWin.xmask)];
          GetTextureTransColG32_S((uint32_t *)&psxVuw[(i<<10)+j],
               GETLE16(&psxVuw[clutP+tC1])|
               ((int32_t)GETLE16(&psxVuw[clutP+tC2]))<<16);
@@ -4268,8 +3658,8 @@ static void drawPoly4TEx8_TW(short x1, short y1, short x2, short y2, short x3, s
         }
        if(j==xmax)
         {
-         tC1 = psxVub[((((posY+difY)>>16)%TWin.Position.y1)<<11)+
-                      YAdjust+((posX>>16)%TWin.Position.x1)];
+         tC1 = psxVub[((((posY+difY)>>16)&TWin.ymask)<<11)+
+                      YAdjust+((posX>>16)&TWin.xmask)];
          GetTextureTransColG_S(&psxVuw[(i<<10)+j],GETLE16(&psxVuw[clutP+tC1]));
         }
       }
@@ -4304,10 +3694,10 @@ static void drawPoly4TEx8_TW(short x1, short y1, short x2, short y2, short x3, s
 
      for(j=xmin;j<xmax;j+=2)
       {
-       tC1 = psxVub[(((posY>>16)%TWin.Position.y1)<<11)+
-                    YAdjust+((posX>>16)%TWin.Position.x1)];
-       tC2 = psxVub[((((posY+difY)>>16)%TWin.Position.y1)<<11)+
-                     YAdjust+(((posX+difX)>>16)%TWin.Position.x1)];
+       tC1 = psxVub[(((posY>>16)&TWin.ymask)<<11)+
+                    YAdjust+((posX>>16)&TWin.xmask)];
+       tC2 = psxVub[((((posY+difY)>>16)&TWin.ymask)<<11)+
+                     YAdjust+(((posX+difX)>>16)&TWin.xmask)];
        GetTextureTransColG32((uint32_t *)&psxVuw[(i<<10)+j],
             GETLE16(&psxVuw[clutP+tC1])|
             ((int32_t)GETLE16(&psxVuw[clutP+tC2]))<<16);
@@ -4316,8 +3706,8 @@ static void drawPoly4TEx8_TW(short x1, short y1, short x2, short y2, short x3, s
       }
      if(j==xmax)
       {
-       tC1 = psxVub[((((posY+difY)>>16)%TWin.Position.y1)<<11)+
-                    YAdjust+((posX>>16)%TWin.Position.x1)];
+       tC1 = psxVub[((((posY+difY)>>16)&TWin.ymask)<<11)+
+                    YAdjust+((posX>>16)&TWin.xmask)];
        GetTextureTransColG(&psxVuw[(i<<10)+j],GETLE16(&psxVuw[clutP+tC1]));
       }
     }
@@ -4381,10 +3771,10 @@ static void drawPoly4TEx8_TW_S(short x1, short y1, short x2, short y2, short x3,
 
        for(j=xmin;j<xmax;j+=2)
         {
-         tC1 = psxVub[(((posY>>16)%TWin.Position.y1)<<11)+
-                      YAdjust+((posX>>16)%TWin.Position.x1)];
-         tC2 = psxVub[((((posY+difY)>>16)%TWin.Position.y1)<<11)+
-                      YAdjust+(((posX+difX)>>16)%TWin.Position.x1)];
+         tC1 = psxVub[(((posY>>16)&TWin.ymask)<<11)+
+                      YAdjust+((posX>>16)&TWin.xmask)];
+         tC2 = psxVub[((((posY+difY)>>16)&TWin.ymask)<<11)+
+                      YAdjust+(((posX+difX)>>16)&TWin.xmask)];
          GetTextureTransColG32_S((uint32_t *)&psxVuw[(i<<10)+j],
               GETLE16(&psxVuw[clutP+tC1])|
               ((int32_t)GETLE16(&psxVuw[clutP+tC2]))<<16);
@@ -4393,8 +3783,8 @@ static void drawPoly4TEx8_TW_S(short x1, short y1, short x2, short y2, short x3,
         }
        if(j==xmax)
         {
-         tC1 = psxVub[((((posY+difY)>>16)%TWin.Position.y1)<<11)+
-                      YAdjust+((posX>>16)%TWin.Position.x1)];
+         tC1 = psxVub[((((posY+difY)>>16)&TWin.ymask)<<11)+
+                      YAdjust+((posX>>16)&TWin.xmask)];
          GetTextureTransColG_S(&psxVuw[(i<<10)+j],GETLE16(&psxVuw[clutP+tC1]));
         }
       }
@@ -4429,10 +3819,10 @@ static void drawPoly4TEx8_TW_S(short x1, short y1, short x2, short y2, short x3,
 
      for(j=xmin;j<xmax;j+=2)
       {
-       tC1 = psxVub[(((posY>>16)%TWin.Position.y1)<<11)+
-                    YAdjust+((posX>>16)%TWin.Position.x1)];
-       tC2 = psxVub[((((posY+difY)>>16)%TWin.Position.y1)<<11)+
-                     YAdjust+(((posX+difX)>>16)%TWin.Position.x1)];
+       tC1 = psxVub[(((posY>>16)&TWin.ymask)<<11)+
+                    YAdjust+((posX>>16)&TWin.xmask)];
+       tC2 = psxVub[((((posY+difY)>>16)&TWin.ymask)<<11)+
+                     YAdjust+(((posX+difX)>>16)&TWin.xmask)];
        GetTextureTransColG32_SPR((uint32_t *)&psxVuw[(i<<10)+j],
             GETLE16(&psxVuw[clutP+tC1])|
             ((int32_t)GETLE16(&psxVuw[clutP+tC2]))<<16);
@@ -4441,8 +3831,8 @@ static void drawPoly4TEx8_TW_S(short x1, short y1, short x2, short y2, short x3,
       }
      if(j==xmax)
       {
-       tC1 = psxVub[((((posY+difY)>>16)%TWin.Position.y1)<<11)+
-                    YAdjust+((posX>>16)%TWin.Position.x1)];
+       tC1 = psxVub[((((posY+difY)>>16)&TWin.ymask)<<11)+
+                    YAdjust+((posX>>16)&TWin.xmask)];
        GetTextureTransColG_SPR(&psxVuw[(i<<10)+j],GETLE16(&psxVuw[clutP+tC1]));
       }
     }
@@ -4598,18 +3988,18 @@ static void drawPoly3TD_TW(short x1, short y1, short x2, short y2, short x3, sho
        for(j=xmin;j<xmax;j+=2)
         {
          GetTextureTransColG32_S((uint32_t *)&psxVuw[(i<<10)+j],
-              (((int32_t)GETLE16(&psxVuw[(((((posY+difY)>>16)%TWin.Position.y1)+GlobalTextAddrY+TWin.Position.y0)<<10)+
-              (((posX+difX)>>16)%TWin.Position.x1)+GlobalTextAddrX+TWin.Position.x0]))<<16)|
-              GETLE16(&psxVuw[((((posY>>16)%TWin.Position.y1)+GlobalTextAddrY+TWin.Position.y0)<<10)+
-                     (((posX)>>16)%TWin.Position.x1)+GlobalTextAddrX+TWin.Position.x0]));
+              (((int32_t)GETLE16(&psxVuw[(((((posY+difY)>>16)&TWin.ymask)+GlobalTextAddrY+TWin.Position.y0)<<10)+
+              (((posX+difX)>>16)&TWin.xmask)+GlobalTextAddrX+TWin.Position.x0]))<<16)|
+              GETLE16(&psxVuw[((((posY>>16)&TWin.ymask)+GlobalTextAddrY+TWin.Position.y0)<<10)+
+                     (((posX)>>16)&TWin.xmask)+GlobalTextAddrX+TWin.Position.x0]));
 
          posX+=difX2;
          posY+=difY2;
         }
        if(j==xmax)
          GetTextureTransColG_S(&psxVuw[(i<<10)+j],
-             GETLE16(&psxVuw[((((posY>>16)%TWin.Position.y1)+GlobalTextAddrY+TWin.Position.y0)<<10)+
-                    ((posX>>16)%TWin.Position.x1)+GlobalTextAddrX+TWin.Position.x0]));
+             GETLE16(&psxVuw[((((posY>>16)&TWin.ymask)+GlobalTextAddrY+TWin.Position.y0)<<10)+
+                    ((posX>>16)&TWin.xmask)+GlobalTextAddrX+TWin.Position.x0]));
       }
      if(NextRow_FT()) 
       {
@@ -4638,18 +4028,18 @@ static void drawPoly3TD_TW(short x1, short y1, short x2, short y2, short x3, sho
      for(j=xmin;j<xmax;j+=2)
       {
        GetTextureTransColG32((uint32_t *)&psxVuw[(i<<10)+j],
-            (((int32_t)GETLE16(&psxVuw[(((((posY+difY)>>16)%TWin.Position.y1)+GlobalTextAddrY+TWin.Position.y0)<<10)+
-            (((posX+difX)>>16)%TWin.Position.x1)+GlobalTextAddrX+TWin.Position.x0]))<<16)|
-            GETLE16(&psxVuw[((((posY>>16)%TWin.Position.y1)+GlobalTextAddrY+TWin.Position.y0)<<10)+
-                   (((posX)>>16)%TWin.Position.x1)+GlobalTextAddrX+TWin.Position.x0]));
+            (((int32_t)GETLE16(&psxVuw[(((((posY+difY)>>16)&TWin.ymask)+GlobalTextAddrY+TWin.Position.y0)<<10)+
+            (((posX+difX)>>16)&TWin.xmask)+GlobalTextAddrX+TWin.Position.x0]))<<16)|
+            GETLE16(&psxVuw[((((posY>>16)&TWin.ymask)+GlobalTextAddrY+TWin.Position.y0)<<10)+
+                   (((posX)>>16)&TWin.xmask)+GlobalTextAddrX+TWin.Position.x0]));
 
        posX+=difX2;
        posY+=difY2;
       }
      if(j==xmax)
        GetTextureTransColG(&psxVuw[(i<<10)+j],
-           GETLE16(&psxVuw[((((posY>>16)%TWin.Position.y1)+GlobalTextAddrY+TWin.Position.y0)<<10)+
-                  ((posX>>16)%TWin.Position.x1)+GlobalTextAddrX+TWin.Position.x0]));
+           GETLE16(&psxVuw[((((posY>>16)&TWin.ymask)+GlobalTextAddrY+TWin.Position.y0)<<10)+
+                  ((posX>>16)&TWin.xmask)+GlobalTextAddrX+TWin.Position.x0]));
     }
    if(NextRow_FT()) 
     {
@@ -4830,18 +4220,18 @@ static void drawPoly4TD_TW(short x1, short y1, short x2, short y2, short x3, sho
        for(j=xmin;j<xmax;j+=2)
         {
          GetTextureTransColG32_S((uint32_t *)&psxVuw[(i<<10)+j],
-              (((int32_t)GETLE16(&psxVuw[(((((posY+difY)>>16)%TWin.Position.y1)+GlobalTextAddrY+TWin.Position.y0)<<10)+
-                             (((posX+difX)>>16)%TWin.Position.x1)+GlobalTextAddrX+TWin.Position.x0]))<<16)|
-              GETLE16(&psxVuw[((((posY>>16)%TWin.Position.y1)+GlobalTextAddrY)<<10)+TWin.Position.y0+
-                     ((posX>>16)%TWin.Position.x1)+GlobalTextAddrX+TWin.Position.x0]));
+              (((int32_t)GETLE16(&psxVuw[(((((posY+difY)>>16)&TWin.ymask)+GlobalTextAddrY+TWin.Position.y0)<<10)+
+                             (((posX+difX)>>16)&TWin.xmask)+GlobalTextAddrX+TWin.Position.x0]))<<16)|
+              GETLE16(&psxVuw[((((posY>>16)&TWin.ymask)+GlobalTextAddrY)<<10)+TWin.Position.y0+
+                    ((posX>>16)&TWin.xmask)+GlobalTextAddrX+TWin.Position.x0]));
 
          posX+=difX2;
          posY+=difY2;
         }
        if(j==xmax)
         GetTextureTransColG_S(&psxVuw[(i<<10)+j],
-           GETLE16(&psxVuw[((((posY>>16)%TWin.Position.y1)+GlobalTextAddrY+TWin.Position.y0)<<10)+
-                  ((posX>>16)%TWin.Position.x1)+GlobalTextAddrX+TWin.Position.x0]));
+           GETLE16(&psxVuw[((((posY>>16)&TWin.ymask)+GlobalTextAddrY+TWin.Position.y0)<<10)+
+                  ((posX>>16)&TWin.xmask)+GlobalTextAddrX+TWin.Position.x0]));
       }
      if(NextRow_FT4()) return;
     }
@@ -4874,18 +4264,18 @@ static void drawPoly4TD_TW(short x1, short y1, short x2, short y2, short x3, sho
      for(j=xmin;j<xmax;j+=2)
       {
        GetTextureTransColG32((uint32_t *)&psxVuw[(i<<10)+j],
-            (((int32_t)GETLE16(&psxVuw[(((((posY+difY)>>16)%TWin.Position.y1)+GlobalTextAddrY+TWin.Position.y0)<<10)+
-                           (((posX+difX)>>16)%TWin.Position.x1)+GlobalTextAddrX+TWin.Position.x0]))<<16)|
-            GETLE16(&psxVuw[((((posY>>16)%TWin.Position.y1)+GlobalTextAddrY+TWin.Position.y0)<<10)+
-                   ((posX>>16)%TWin.Position.x1)+GlobalTextAddrX+TWin.Position.x0]));
+            (((int32_t)GETLE16(&psxVuw[(((((posY+difY)>>16)&TWin.ymask)+GlobalTextAddrY+TWin.Position.y0)<<10)+
+                           (((posX+difX)>>16)&TWin.xmask)+GlobalTextAddrX+TWin.Position.x0]))<<16)|
+            GETLE16(&psxVuw[((((posY>>16)&TWin.ymask)+GlobalTextAddrY+TWin.Position.y0)<<10)+
+                   ((posX>>16)&TWin.xmask)+GlobalTextAddrX+TWin.Position.x0]));
 
        posX+=difX2;
        posY+=difY2;
       }
      if(j==xmax)
       GetTextureTransColG(&psxVuw[(i<<10)+j],
-         GETLE16(&psxVuw[((((posY>>16)%TWin.Position.y1)+GlobalTextAddrY+TWin.Position.y0)<<10)+
-                ((posX>>16)%TWin.Position.x1)+GlobalTextAddrX+TWin.Position.x0]));
+         GETLE16(&psxVuw[((((posY>>16)&TWin.ymask)+GlobalTextAddrY+TWin.Position.y0)<<10)+
+                ((posX>>16)&TWin.xmask)+GlobalTextAddrX+TWin.Position.x0]));
     }
    if(NextRow_FT4()) return;
   }
@@ -4942,18 +4332,18 @@ static void drawPoly4TD_TW_S(short x1, short y1, short x2, short y2, short x3, s
        for(j=xmin;j<xmax;j+=2)
         {
          GetTextureTransColG32_S((uint32_t *)&psxVuw[(i<<10)+j],
-              (((int32_t)GETLE16(&psxVuw[(((((posY+difY)>>16)%TWin.Position.y1)+GlobalTextAddrY+TWin.Position.y0)<<10)+
-                             (((posX+difX)>>16)%TWin.Position.x1)+GlobalTextAddrX+TWin.Position.x0]))<<16)|
-              GETLE16(&psxVuw[((((posY>>16)%TWin.Position.y1)+GlobalTextAddrY)<<10)+TWin.Position.y0+
-                     ((posX>>16)%TWin.Position.x1)+GlobalTextAddrX+TWin.Position.x0]));
+              (((int32_t)GETLE16(&psxVuw[(((((posY+difY)>>16)&TWin.ymask)+GlobalTextAddrY+TWin.Position.y0)<<10)+
+                             (((posX+difX)>>16)&TWin.xmask)+GlobalTextAddrX+TWin.Position.x0]))<<16)|
+              GETLE16(&psxVuw[((((posY>>16)&TWin.ymask)+GlobalTextAddrY)<<10)+TWin.Position.y0+
+                     ((posX>>16)&TWin.xmask)+GlobalTextAddrX+TWin.Position.x0]));
 
          posX+=difX2;
          posY+=difY2;
         }
        if(j==xmax)
         GetTextureTransColG_S(&psxVuw[(i<<10)+j],
-           GETLE16(&psxVuw[((((posY>>16)%TWin.Position.y1)+GlobalTextAddrY+TWin.Position.y0)<<10)+
-                  ((posX>>16)%TWin.Position.x1)+GlobalTextAddrX+TWin.Position.x0]));
+           GETLE16(&psxVuw[((((posY>>16)&TWin.ymask)+GlobalTextAddrY+TWin.Position.y0)<<10)+
+                  ((posX>>16)&TWin.xmask)+GlobalTextAddrX+TWin.Position.x0]));
       }
      if(NextRow_FT4()) return;
     }
@@ -4986,18 +4376,18 @@ static void drawPoly4TD_TW_S(short x1, short y1, short x2, short y2, short x3, s
      for(j=xmin;j<xmax;j+=2)
       {
        GetTextureTransColG32_SPR((uint32_t *)&psxVuw[(i<<10)+j],
-            (((int32_t)GETLE16(&psxVuw[(((((posY+difY)>>16)%TWin.Position.y1)+GlobalTextAddrY+TWin.Position.y0)<<10)+
-                           (((posX+difX)>>16)%TWin.Position.x1)+GlobalTextAddrX+TWin.Position.x0]))<<16)|
-            GETLE16(&psxVuw[((((posY>>16)%TWin.Position.y1)+GlobalTextAddrY+TWin.Position.y0)<<10)+
-                   ((posX>>16)%TWin.Position.x1)+GlobalTextAddrX+TWin.Position.x0]));
+            (((int32_t)GETLE16(&psxVuw[(((((posY+difY)>>16)&TWin.ymask)+GlobalTextAddrY+TWin.Position.y0)<<10)+
+                           (((posX+difX)>>16)&TWin.xmask)+GlobalTextAddrX+TWin.Position.x0]))<<16)|
+            GETLE16(&psxVuw[((((posY>>16)&TWin.ymask)+GlobalTextAddrY+TWin.Position.y0)<<10)+
+                   ((posX>>16)&TWin.xmask)+GlobalTextAddrX+TWin.Position.x0]));
 
        posX+=difX2;
        posY+=difY2;
       }
      if(j==xmax)
       GetTextureTransColG_SPR(&psxVuw[(i<<10)+j],
-         GETLE16(&psxVuw[((((posY>>16)%TWin.Position.y1)+GlobalTextAddrY+TWin.Position.y0)<<10)+
-                ((posX>>16)%TWin.Position.x1)+GlobalTextAddrX+TWin.Position.x0]));
+         GETLE16(&psxVuw[((((posY>>16)&TWin.ymask)+GlobalTextAddrY+TWin.Position.y0)<<10)+
+                ((posX>>16)&TWin.xmask)+GlobalTextAddrX+TWin.Position.x0]));
     }
    if(NextRow_FT4()) return;
   }
@@ -5124,179 +4514,33 @@ static inline void drawPoly3Gi(short x1,short y1,short x2,short y2,short x3,shor
     }
    if(NextRow_G()) return;
   }
-
-}
-
-////////////////////////////////////////////////////////////////////////
-
-static void drawPoly3G(int32_t rgb1, int32_t rgb2, int32_t rgb3)
-{
- drawPoly3Gi(lx0,ly0,lx1,ly1,lx2,ly2,rgb1,rgb2,rgb3);
-}
-
-// draw two g-shaded tris for right psx shading emulation
-
-static void drawPoly4G(int32_t rgb1, int32_t rgb2, int32_t rgb3, int32_t rgb4)
-{
- drawPoly3Gi(lx1,ly1,lx3,ly3,lx2,ly2,
-             rgb2,rgb4,rgb3);
- drawPoly3Gi(lx0,ly0,lx1,ly1,lx2,ly2,
-             rgb1,rgb2,rgb3);
-}
-
-////////////////////////////////////////////////////////////////////////
-// POLY 3/4 G-SHADED TEX PAL4
-////////////////////////////////////////////////////////////////////////
-
-static void drawPoly3TGEx4(short x1, short y1, short x2, short y2, short x3, short y3, short tx1, short ty1, short tx2, short ty2, short tx3, short ty3, short clX, short clY,int32_t col1, int32_t col2, int32_t col3)
-{
- int i,j,xmin,xmax,ymin,ymax;
- int32_t cR1,cG1,cB1;
- int32_t difR,difB,difG,difR2,difB2,difG2;
- int32_t difX, difY,difX2, difY2;
- int32_t posX,posY,YAdjust,clutP,XAdjust;
- short tC1,tC2;
-
- if(x1>drawW && x2>drawW && x3>drawW) return;
- if(y1>drawH && y2>drawH && y3>drawH) return;
- if(x1<drawX && x2<drawX && x3<drawX) return;
- if(y1<drawY && y2<drawY && y3<drawY) return;
- if(drawY>=drawH) return;
- if(drawX>=drawW) return; 
-
- if(!SetupSections_GT(x1,y1,x2,y2,x3,y3,tx1,ty1,tx2,ty2,tx3,ty3,col1,col2,col3)) return;
-
- ymax=Ymax;
-
- for(ymin=Ymin;ymin<drawY;ymin++)
-  if(NextRow_GT()) return;
-
- clutP=(clY<<10)+clX;
-
- YAdjust=((GlobalTextAddrY)<<11)+(GlobalTextAddrX<<1);
-
- difR=delta_right_R;
- difG=delta_right_G;
- difB=delta_right_B;
- difR2=difR<<1;
- difG2=difG<<1;
- difB2=difB<<1;
-
- difX=delta_right_u;difX2=difX<<1;
- difY=delta_right_v;difY2=difY<<1;
-
-#ifdef FASTSOLID
-
- if(!bCheckMask && !DrawSemiTrans && !iDither)
-  {
-   for (i=ymin;i<=ymax;i++)
-    {
-     xmin=((left_x) >> 16);
-     xmax=((right_x) >> 16)-1; //!!!!!!!!!!!!!
-     if(drawW<xmax) xmax=drawW;
-
-     if(xmax>=xmin)
-      {
-       posX=left_u;
-       posY=left_v;
-       cR1=left_R;
-       cG1=left_G;
-       cB1=left_B;
-
-       if(xmin<drawX)
-        {j=drawX-xmin;xmin=drawX;posX+=j*difX;posY+=j*difY;cR1+=j*difR;cG1+=j*difG;cB1+=j*difB;}
-
-       for(j=xmin;j<xmax;j+=2) 
-        {
-         XAdjust=(posX>>16);
-         tC1 = psxVub[((posY>>5)&0xFFFFF800)+YAdjust+(XAdjust>>1)];
-         tC1=(tC1>>((XAdjust&1)<<2))&0xf;
-         XAdjust=((posX+difX)>>16);
-         tC2 = psxVub[(((posY+difY)>>5)&(int32_t)0xFFFFF800)+YAdjust+
-                      (XAdjust>>1)];
-         tC2=(tC2>>((XAdjust&1)<<2))&0xf;
-
-         GetTextureTransColGX32_S((uint32_t *)&psxVuw[(i<<10)+j],
-               GETLE16(&psxVuw[clutP+tC1])|
-               ((int32_t)GETLE16(&psxVuw[clutP+tC2]))<<16,
-               (cB1>>16)|((cB1+difB)&0xff0000),
-               (cG1>>16)|((cG1+difG)&0xff0000),
-               (cR1>>16)|((cR1+difR)&0xff0000));
-         posX+=difX2;
-         posY+=difY2;
-         cR1+=difR2;
-         cG1+=difG2;
-         cB1+=difB2;
-        }
-       if(j==xmax)
-        {
-         XAdjust=(posX>>16);
-         tC1 = psxVub[((posY>>5)&(int32_t)0xFFFFF800)+YAdjust+(XAdjust>>1)];
-         tC1=(tC1>>((XAdjust&1)<<2))&0xf;
-         GetTextureTransColGX_S(&psxVuw[(i<<10)+j], 
-              GETLE16(&psxVuw[clutP+tC1]),
-              (cB1>>16),(cG1>>16),(cR1>>16));
-        }
-      }
-     if(NextRow_GT()) 
-      {
-       return;
-      }
-    }
-   return;
-  }
-
-#endif
-
- for (i=ymin;i<=ymax;i++)
-  {
-   xmin=(left_x >> 16);
-   xmax=(right_x >> 16)-1; //!!!!!!!!!!!!!!!!
-   if(drawW<xmax) xmax=drawW;
-
-   if(xmax>=xmin)
-    {
-     posX=left_u;
-     posY=left_v;
-     cR1=left_R;
-     cG1=left_G;
-     cB1=left_B;
-
-     if(xmin<drawX)
-      {j=drawX-xmin;xmin=drawX;posX+=j*difX;posY+=j*difY;cR1+=j*difR;cG1+=j*difG;cB1+=j*difB;}
-
-     for(j=xmin;j<=xmax;j++) 
-      {
-       XAdjust=(posX>>16);
-       tC1 = psxVub[((posY>>5)&(int32_t)0xFFFFF800)+YAdjust+(XAdjust>>1)];
-       tC1=(tC1>>((XAdjust&1)<<2))&0xf;
-       if(iDither)
-        GetTextureTransColGX_Dither(&psxVuw[(i<<10)+j], 
-            GETLE16(&psxVuw[clutP+tC1]),
-            (cB1>>16),(cG1>>16),(cR1>>16));
-       else
-        GetTextureTransColGX(&psxVuw[(i<<10)+j], 
-            GETLE16(&psxVuw[clutP+tC1]),
-            (cB1>>16),(cG1>>16),(cR1>>16));
-       posX+=difX;
-       posY+=difY;
-       cR1+=difR;
-       cG1+=difG;
-       cB1+=difB;
-      }
-    }
-   if(NextRow_GT()) 
-    {
-     return;
-    }
-  }
+
+}
+
+////////////////////////////////////////////////////////////////////////
+
+static void drawPoly3G(int32_t rgb1, int32_t rgb2, int32_t rgb3)
+{
+ drawPoly3Gi(lx0,ly0,lx1,ly1,lx2,ly2,rgb1,rgb2,rgb3);
+}
+
+// draw two g-shaded tris for right psx shading emulation
+
+static void drawPoly4G(int32_t rgb1, int32_t rgb2, int32_t rgb3, int32_t rgb4)
+{
+ drawPoly3Gi(lx1,ly1,lx3,ly3,lx2,ly2,
+             rgb2,rgb4,rgb3);
+ drawPoly3Gi(lx0,ly0,lx1,ly1,lx2,ly2,
+             rgb1,rgb2,rgb3);
 }
 
+////////////////////////////////////////////////////////////////////////
+// POLY 3/4 G-SHADED TEX PAL4
 ////////////////////////////////////////////////////////////////////////
 
-static void drawPoly3TGEx4_IL(short x1, short y1, short x2, short y2, short x3, short y3, short tx1, short ty1, short tx2, short ty2, short tx3, short ty3, short clX, short clY,int32_t col1, int32_t col2, int32_t col3)
+static void drawPoly3TGEx4(short x1, short y1, short x2, short y2, short x3, short y3, short tx1, short ty1, short tx2, short ty2, short tx3, short ty3, short clX, short clY,int32_t col1, int32_t col2, int32_t col3)
 {
- int i,j,xmin,xmax,ymin,ymax,n_xi,n_yi,TXV;
+ int i,j,xmin,xmax,ymin,ymax;
  int32_t cR1,cG1,cB1;
  int32_t difR,difB,difG,difR2,difB2,difG2;
  int32_t difX, difY,difX2, difY2;
@@ -5319,7 +4563,7 @@ static void drawPoly3TGEx4_IL(short x1, short y1, short x2, short y2, short x3,
 
  clutP=(clY<<10)+clX;
 
- YAdjust=(GlobalTextAddrY<<10)+GlobalTextAddrX;
+ YAdjust=((GlobalTextAddrY)<<11)+(GlobalTextAddrX<<1);
 
  difR=delta_right_R;
  difG=delta_right_G;
@@ -5355,20 +4599,12 @@ static void drawPoly3TGEx4_IL(short x1, short y1, short x2, short y2, short x3,
        for(j=xmin;j<xmax;j+=2) 
         {
          XAdjust=(posX>>16);
-
-         TXV=posY>>16;
-         n_xi = ( ( XAdjust >> 2 ) & ~0x3c ) + ( ( TXV << 2 ) & 0x3c );
-         n_yi = ( TXV & ~0xf ) + ( ( XAdjust >> 4 ) & 0xf );
-         tC1= (GETLE16(&psxVuw[(n_yi<<10)+YAdjust+n_xi]) >> ((XAdjust & 0x03)<<2)) & 0x0f ;
-
+         tC1 = psxVub[((posY>>5)&0xFFFFF800)+YAdjust+(XAdjust>>1)];
+         tC1=(tC1>>((XAdjust&1)<<2))&0xf;
          XAdjust=((posX+difX)>>16);
-
-         TXV=(posY+difY)>>16;
-         n_xi = ( ( XAdjust >> 2 ) & ~0x3c ) + ( ( TXV << 2 ) & 0x3c );
-         n_yi = ( TXV & ~0xf ) + ( ( XAdjust >> 4 ) & 0xf );
-
-         tC2= (GETLE16(&psxVuw[(n_yi<<10)+YAdjust+n_xi]) >> ((XAdjust & 0x03)<<2)) & 0x0f ;
+         tC2 = psxVub[(((posY+difY)>>5)&(int32_t)0xFFFFF800)+YAdjust+
+                      (XAdjust>>1)];
+         tC2=(tC2>>((XAdjust&1)<<2))&0xf;
 
          GetTextureTransColGX32_S((uint32_t *)&psxVuw[(i<<10)+j],
                GETLE16(&psxVuw[clutP+tC1])|
@@ -5385,13 +4621,8 @@ static void drawPoly3TGEx4_IL(short x1, short y1, short x2, short y2, short x3,
        if(j==xmax)
         {
          XAdjust=(posX>>16);
-
-         TXV=posY>>16;
-         n_xi = ( ( XAdjust >> 2 ) & ~0x3c ) + ( ( TXV << 2 ) & 0x3c );
-         n_yi = ( TXV & ~0xf ) + ( ( XAdjust >> 4 ) & 0xf );
-
-         tC1= (GETLE16(&psxVuw[(n_yi<<10)+YAdjust+n_xi]) >> ((XAdjust & 0x03)<<2)) & 0x0f ;
-
+         tC1 = psxVub[((posY>>5)&(int32_t)0xFFFFF800)+YAdjust+(XAdjust>>1)];
+         tC1=(tC1>>((XAdjust&1)<<2))&0xf;
          GetTextureTransColGX_S(&psxVuw[(i<<10)+j], 
               GETLE16(&psxVuw[clutP+tC1]),
               (cB1>>16),(cG1>>16),(cR1>>16));
@@ -5427,13 +4658,8 @@ static void drawPoly3TGEx4_IL(short x1, short y1, short x2, short y2, short x3,
      for(j=xmin;j<=xmax;j++) 
       {
        XAdjust=(posX>>16);
-
-       TXV=posY>>16;
-       n_xi = ( ( XAdjust >> 2 ) & ~0x3c ) + ( ( TXV << 2 ) & 0x3c );
-       n_yi = ( TXV & ~0xf ) + ( ( XAdjust >> 4 ) & 0xf );
-
-       tC1= (GETLE16(&psxVuw[(n_yi<<10)+YAdjust+n_xi]) >> ((XAdjust & 0x03)<<2)) & 0x0f ;
-
+       tC1 = psxVub[((posY>>5)&(int32_t)0xFFFFF800)+YAdjust+(XAdjust>>1)];
+       tC1=(tC1>>((XAdjust&1)<<2))&0xf;
        if(iDither)
         GetTextureTransColGX_Dither(&psxVuw[(i<<10)+j], 
             GETLE16(&psxVuw[clutP+tC1]),
@@ -5519,12 +4745,12 @@ static void drawPoly3TGEx4_TW(short x1, short y1, short x2, short y2, short x3,
 
        for(j=xmin;j<xmax;j+=2) 
         {
-         XAdjust=(posX>>16)%TWin.Position.x1;
-         tC1 = psxVub[(((posY>>16)%TWin.Position.y1)<<11)+
+         XAdjust=(posX>>16)&TWin.xmask;
+         tC1 = psxVub[(((posY>>16)&TWin.ymask)<<11)+
                       YAdjust+(XAdjust>>1)];
          tC1=(tC1>>((XAdjust&1)<<2))&0xf;
-         XAdjust=((posX+difX)>>16)%TWin.Position.x1;
-         tC2 = psxVub[((((posY+difY)>>16)%TWin.Position.y1)<<11)+
+         XAdjust=((posX+difX)>>16)&TWin.xmask;
+         tC2 = psxVub[((((posY+difY)>>16)&TWin.ymask)<<11)+
                       YAdjust+(XAdjust>>1)];
          tC2=(tC2>>((XAdjust&1)<<2))&0xf;
          GetTextureTransColGX32_S((uint32_t *)&psxVuw[(i<<10)+j],
@@ -5541,8 +4767,8 @@ static void drawPoly3TGEx4_TW(short x1, short y1, short x2, short y2, short x3,
         }
        if(j==xmax)
         {
-         XAdjust=(posX>>16)%TWin.Position.x1;
-         tC1 = psxVub[(((posY>>16)%TWin.Position.y1)<<11)+
+         XAdjust=(posX>>16)&TWin.xmask;
+         tC1 = psxVub[(((posY>>16)&TWin.ymask)<<11)+
                        YAdjust+(XAdjust>>1)];
          tC1=(tC1>>((XAdjust&1)<<2))&0xf;
          GetTextureTransColGX_S(&psxVuw[(i<<10)+j], 
@@ -5579,8 +4805,8 @@ static void drawPoly3TGEx4_TW(short x1, short y1, short x2, short y2, short x3,
 
      for(j=xmin;j<=xmax;j++) 
       {
-       XAdjust=(posX>>16)%TWin.Position.x1;
-       tC1 = psxVub[(((posY>>16)%TWin.Position.y1)<<11)+
+       XAdjust=(posX>>16)&TWin.xmask;
+       tC1 = psxVub[(((posY>>16)&TWin.ymask)<<11)+
                     YAdjust+(XAdjust>>1)];
        tC1=(tC1>>((XAdjust&1)<<2))&0xf;
        if(iDither)
@@ -5612,21 +4838,6 @@ static void drawPoly3TGEx4_TW(short x1, short y1, short x2, short y2, short x3,
 // correct that way, so small texture distortions can 
 // happen... 
 
-static void drawPoly4TGEx4_TRI_IL(short x1, short y1, short x2, short y2, short x3, short y3, short x4, short y4,
-                    short tx1, short ty1, short tx2, short ty2, short tx3, short ty3, short tx4, short ty4,
-                    short clX, short clY,
-                    int32_t col1, int32_t col2, int32_t col3, int32_t col4)
-{
- drawPoly3TGEx4_IL(x2,y2,x3,y3,x4,y4,
-                   tx2,ty2,tx3,ty3,tx4,ty4,
-                   clX,clY,
-                   col2,col4,col3);
- drawPoly3TGEx4_IL(x1,y1,x2,y2,x4,y4,
-                   tx1,ty1,tx2,ty2,tx4,ty4,
-                   clX,clY,
-                   col1,col2,col3);
-}
-
 #ifdef POLYQUAD3GT
 
 static void drawPoly4TGEx4_TRI(short x1, short y1, short x2, short y2, short x3, short y3, short x4, short y4, 
@@ -5970,165 +5181,6 @@ static void drawPoly3TGEx8(short x1, short y1, short x2, short y2, short x3, sho
 
 ////////////////////////////////////////////////////////////////////////
 
-static void drawPoly3TGEx8_IL(short x1, short y1, short x2, short y2, short x3, short y3, short tx1, short ty1, short tx2, short ty2, short tx3, short ty3, short clX, short clY,int32_t col1, int32_t col2, int32_t col3)
-{
- int i,j,xmin,xmax,ymin,ymax,n_xi,n_yi,TXV,TXU;
- int32_t cR1,cG1,cB1;
- int32_t difR,difB,difG,difR2,difB2,difG2;
- int32_t difX, difY,difX2, difY2;
- int32_t posX,posY,YAdjust,clutP;
- short tC1,tC2;
-
- if(x1>drawW && x2>drawW && x3>drawW) return;
- if(y1>drawH && y2>drawH && y3>drawH) return;
- if(x1<drawX && x2<drawX && x3<drawX) return;
- if(y1<drawY && y2<drawY && y3<drawY) return;
- if(drawY>=drawH) return;
- if(drawX>=drawW) return; 
-
- if(!SetupSections_GT(x1,y1,x2,y2,x3,y3,tx1,ty1,tx2,ty2,tx3,ty3,col1,col2,col3)) return;
-
- ymax=Ymax;
-
- for(ymin=Ymin;ymin<drawY;ymin++)
-  if(NextRow_GT()) return;
-
- clutP=(clY<<10)+clX;
-
- YAdjust=(GlobalTextAddrY<<10)+GlobalTextAddrX;
-
- difR=delta_right_R;
- difG=delta_right_G;
- difB=delta_right_B;
- difR2=difR<<1;
- difG2=difG<<1;
- difB2=difB<<1;
- difX=delta_right_u;difX2=difX<<1;
- difY=delta_right_v;difY2=difY<<1;
-
-#ifdef FASTSOLID
-
- if(!bCheckMask && !DrawSemiTrans && !iDither)
-  {
-   for (i=ymin;i<=ymax;i++)
-    {
-     xmin=(left_x >> 16);
-     xmax=(right_x >> 16)-1; // !!!!!!!!!!!!!
-     if(drawW<xmax) xmax=drawW;
-
-     if(xmax>=xmin)
-      {
-       posX=left_u;
-       posY=left_v;
-       cR1=left_R;
-       cG1=left_G;
-       cB1=left_B;
-
-       if(xmin<drawX)
-        {j=drawX-xmin;xmin=drawX;posX+=j*difX;posY+=j*difY;cR1+=j*difR;cG1+=j*difG;cB1+=j*difB;}
-
-       for(j=xmin;j<xmax;j+=2)
-        {
-         TXU=posX>>16;
-         TXV=posY>>16;
-         n_xi = ( ( TXU >> 1 ) & ~0x78 ) + ( ( TXU << 2 ) & 0x40 ) + ( ( TXV << 3 ) & 0x38 );
-         n_yi = ( TXV & ~0x7 ) + ( ( TXU >> 5 ) & 0x7 );
-
-         tC1= (GETLE16(&psxVuw[(n_yi<<10)+YAdjust+n_xi]) >> ((TXU & 0x01)<<3)) & 0xff;
-
-         TXU=(posX+difX)>>16;
-         TXV=(posY+difY)>>16;
-         n_xi = ( ( TXU >> 1 ) & ~0x78 ) + ( ( TXU << 2 ) & 0x40 ) + ( ( TXV << 3 ) & 0x38 );
-         n_yi = ( TXV & ~0x7 ) + ( ( TXU >> 5 ) & 0x7 );
-
-         tC2= (GETLE16(&psxVuw[(n_yi<<10)+YAdjust+n_xi]) >> ((TXU & 0x01)<<3)) & 0xff;
-
-         GetTextureTransColGX32_S((uint32_t *)&psxVuw[(i<<10)+j],
-              GETLE16(&psxVuw[clutP+tC1])|
-              ((int32_t)GETLE16(&psxVuw[clutP+tC2]))<<16,
-              (cB1>>16)|((cB1+difB)&0xff0000),
-              (cG1>>16)|((cG1+difG)&0xff0000),
-              (cR1>>16)|((cR1+difR)&0xff0000));
-         posX+=difX2;
-         posY+=difY2;
-         cR1+=difR2;
-         cG1+=difG2;
-         cB1+=difB2;
-        }
-       if(j==xmax)
-        {
-         TXU=posX>>16;
-         TXV=posY>>16;
-         n_xi = ( ( TXU >> 1 ) & ~0x78 ) + ( ( TXU << 2 ) & 0x40 ) + ( ( TXV << 3 ) & 0x38 );
-         n_yi = ( TXV & ~0x7 ) + ( ( TXU >> 5 ) & 0x7 );
-
-         tC1= (GETLE16(&psxVuw[(n_yi<<10)+YAdjust+n_xi]) >> ((TXU & 0x01)<<3)) & 0xff;
-
-         GetTextureTransColGX_S(&psxVuw[(i<<10)+j], 
-              GETLE16(&psxVuw[clutP+tC1]),
-              (cB1>>16),(cG1>>16),(cR1>>16));
-        }
-      }
-     if(NextRow_GT()) 
-      {
-       return;
-      }
-    }
-   return;
-  }
-
-#endif
-
- for (i=ymin;i<=ymax;i++)
-  {
-   xmin=(left_x >> 16);
-   xmax=(right_x >> 16)-1; //!!!!!!!!!!!!!!!!!!!!!!!
-   if(drawW<xmax) xmax=drawW;
-
-   if(xmax>=xmin)
-    {
-     posX=left_u;
-     posY=left_v;
-     cR1=left_R;
-     cG1=left_G;
-     cB1=left_B;
-
-     if(xmin<drawX)
-      {j=drawX-xmin;xmin=drawX;posX+=j*difX;posY+=j*difY;cR1+=j*difR;cG1+=j*difG;cB1+=j*difB;}
-
-     for(j=xmin;j<=xmax;j++)
-      {
-       TXU=posX>>16;
-       TXV=posY>>16;
-       n_xi = ( ( TXU >> 1 ) & ~0x78 ) + ( ( TXU << 2 ) & 0x40 ) + ( ( TXV << 3 ) & 0x38 );
-       n_yi = ( TXV & ~0x7 ) + ( ( TXU >> 5 ) & 0x7 );
-
-       tC1= (GETLE16(&psxVuw[(n_yi<<10)+YAdjust+n_xi]) >> ((TXU & 0x01)<<3)) & 0xff;
-
-       if(iDither)
-        GetTextureTransColGX_Dither(&psxVuw[(i<<10)+j], 
-            GETLE16(&psxVuw[clutP+tC1]),
-            (cB1>>16),(cG1>>16),(cR1>>16));
-       else
-        GetTextureTransColGX(&psxVuw[(i<<10)+j], 
-            GETLE16(&psxVuw[clutP+tC1]),
-            (cB1>>16),(cG1>>16),(cR1>>16));
-       posX+=difX;
-       posY+=difY;
-       cR1+=difR;
-       cG1+=difG;
-       cB1+=difB;
-      }
-    }
-   if(NextRow_GT()) 
-    {
-     return;
-    }
-  }
-}
-
-////////////////////////////////////////////////////////////////////////
-
 static void drawPoly3TGEx8_TW(short x1, short y1, short x2, short y2, short x3, short y3, short tx1, short ty1, short tx2, short ty2, short tx3, short ty3, short clX, short clY,int32_t col1, int32_t col2, int32_t col3)
 {
  int i,j,xmin,xmax,ymin,ymax;
@@ -6189,10 +5241,10 @@ static void drawPoly3TGEx8_TW(short x1, short y1, short x2, short y2, short x3,
 
        for(j=xmin;j<xmax;j+=2)
         {
-         tC1 = psxVub[(((posY>>16)%TWin.Position.y1)<<11)+
-                      YAdjust+((posX>>16)%TWin.Position.x1)];
-         tC2 = psxVub[((((posY+difY)>>16)%TWin.Position.y1)<<11)+
-                      YAdjust+(((posX+difX)>>16)%TWin.Position.x1)];
+         tC1 = psxVub[(((posY>>16)&TWin.ymask)<<11)+
+                      YAdjust+((posX>>16)&TWin.xmask)];
+         tC2 = psxVub[((((posY+difY)>>16)&TWin.ymask)<<11)+
+                      YAdjust+(((posX+difX)>>16)&TWin.xmask)];
                       
          GetTextureTransColGX32_S((uint32_t *)&psxVuw[(i<<10)+j],
               GETLE16(&psxVuw[clutP+tC1])|
@@ -6208,8 +5260,8 @@ static void drawPoly3TGEx8_TW(short x1, short y1, short x2, short y2, short x3,
         }
        if(j==xmax)
         {
-         tC1 = psxVub[(((posY>>16)%TWin.Position.y1)<<11)+
-                      YAdjust+((posX>>16)%TWin.Position.x1)];
+         tC1 = psxVub[(((posY>>16)&TWin.ymask)<<11)+
+                      YAdjust+((posX>>16)&TWin.xmask)];
          GetTextureTransColGX_S(&psxVuw[(i<<10)+j], 
               GETLE16(&psxVuw[clutP+tC1]),
               (cB1>>16),(cG1>>16),(cR1>>16));
@@ -6244,8 +5296,8 @@ static void drawPoly3TGEx8_TW(short x1, short y1, short x2, short y2, short x3,
 
      for(j=xmin;j<=xmax;j++)
       {
-       tC1 = psxVub[(((posY>>16)%TWin.Position.y1)<<11)+
-                    YAdjust+((posX>>16)%TWin.Position.x1)];
+       tC1 = psxVub[(((posY>>16)&TWin.ymask)<<11)+
+                    YAdjust+((posX>>16)&TWin.xmask)];
        if(iDither)
         GetTextureTransColGX_Dither(&psxVuw[(i<<10)+j], 
             GETLE16(&psxVuw[clutP+tC1]),
@@ -6272,21 +5324,6 @@ static void drawPoly3TGEx8_TW(short x1, short y1, short x2, short y2, short x3,
 
 // note: two g-shaded tris: small texture distortions can happen
 
-static void drawPoly4TGEx8_TRI_IL(short x1, short y1, short x2, short y2, short x3, short y3, short x4, short y4, 
-                           short tx1, short ty1, short tx2, short ty2, short tx3, short ty3, short tx4, short ty4, 
-                           short clX, short clY,
-                           int32_t col1, int32_t col2, int32_t col3, int32_t col4)
-{
- drawPoly3TGEx8_IL(x2,y2,x3,y3,x4,y4,
-                   tx2,ty2,tx3,ty3,tx4,ty4,
-                   clX,clY,
-                   col2,col4,col3);
- drawPoly3TGEx8_IL(x1,y1,x2,y2,x4,y4,
-                   tx1,ty1,tx2,ty2,tx4,ty4,
-                   clX,clY,
-                   col1,col2,col3);
-}
-
 #ifdef POLYQUAD3GT
                       
 static void drawPoly4TGEx8_TRI(short x1, short y1, short x2, short y2, short x3, short y3, short x4, short y4, 
@@ -6658,10 +5695,10 @@ static void drawPoly3TGD_TW(short x1, short y1, short x2, short y2, short x3, sh
        for(j=xmin;j<xmax;j+=2)
         {
          GetTextureTransColGX32_S((uint32_t *)&psxVuw[(i<<10)+j],
-              (((int32_t)GETLE16(&psxVuw[(((((posY+difY)>>16)%TWin.Position.y1)+GlobalTextAddrY+TWin.Position.y0)<<10)+
-                             (((posX+difX)>>16)%TWin.Position.x1)+GlobalTextAddrX+TWin.Position.x0]))<<16)|
-              GETLE16(&psxVuw[((((posY>>16)%TWin.Position.y1)+GlobalTextAddrY+TWin.Position.y0)<<10)+
-                     (((posX)>>16)%TWin.Position.x1)+GlobalTextAddrX+TWin.Position.x0]),
+              (((int32_t)GETLE16(&psxVuw[(((((posY+difY)>>16)&TWin.ymask)+GlobalTextAddrY+TWin.Position.y0)<<10)+
+                             (((posX+difX)>>16)&TWin.xmask)+GlobalTextAddrX+TWin.Position.x0]))<<16)|
+              GETLE16(&psxVuw[((((posY>>16)&TWin.ymask)+GlobalTextAddrY+TWin.Position.y0)<<10)+
+                     (((posX)>>16)&TWin.xmask)+GlobalTextAddrX+TWin.Position.x0]),
               (cB1>>16)|((cB1+difB)&0xff0000),
               (cG1>>16)|((cG1+difG)&0xff0000),
               (cR1>>16)|((cR1+difR)&0xff0000));
@@ -6673,8 +5710,8 @@ static void drawPoly3TGD_TW(short x1, short y1, short x2, short y2, short x3, sh
         }
        if(j==xmax)
         GetTextureTransColGX_S(&psxVuw[(i<<10)+j],
-            GETLE16(&psxVuw[((((posY>>16)%TWin.Position.y1)+GlobalTextAddrY+TWin.Position.y0)<<10)+
-                   ((posX>>16)%TWin.Position.x1)+GlobalTextAddrX+TWin.Position.x0]),
+            GETLE16(&psxVuw[((((posY>>16)&TWin.ymask)+GlobalTextAddrY+TWin.Position.y0)<<10)+
+                   ((posX>>16)&TWin.xmask)+GlobalTextAddrX+TWin.Position.x0]),
             (cB1>>16),(cG1>>16),(cR1>>16));
       }
      if(NextRow_GT()) 
@@ -6708,13 +5745,13 @@ static void drawPoly3TGD_TW(short x1, short y1, short x2, short y2, short x3, sh
       {
        if(iDither)
         GetTextureTransColGX_Dither(&psxVuw[(i<<10)+j],
-          GETLE16(&psxVuw[((((posY>>16)%TWin.Position.y1)+GlobalTextAddrY+TWin.Position.y0)<<10)+
-                 ((posX>>16)%TWin.Position.x1)+GlobalTextAddrX+TWin.Position.x0]),
+          GETLE16(&psxVuw[((((posY>>16)&TWin.ymask)+GlobalTextAddrY+TWin.Position.y0)<<10)+
+                 ((posX>>16)&TWin.xmask)+GlobalTextAddrX+TWin.Position.x0]),
           (cB1>>16),(cG1>>16),(cR1>>16));
        else
         GetTextureTransColGX(&psxVuw[(i<<10)+j],
-          GETLE16(&psxVuw[((((posY>>16)%TWin.Position.y1)+GlobalTextAddrY+TWin.Position.y0)<<10)+
-                 ((posX>>16)%TWin.Position.x1)+GlobalTextAddrX+TWin.Position.x0]),
+          GETLE16(&psxVuw[((((posY>>16)&TWin.ymask)+GlobalTextAddrY+TWin.Position.y0)<<10)+
+                 ((posX>>16)&TWin.xmask)+GlobalTextAddrX+TWin.Position.x0]),
           (cB1>>16),(cG1>>16),(cR1>>16));
        posX+=difX;
        posY+=difY;
@@ -6949,19 +5986,6 @@ static void drawPoly3FT(unsigned char * baseAddr)
 {
  uint32_t *gpuData = ((uint32_t *) baseAddr);
 
- if(GlobalTextIL && GlobalTextTP<2)
-  {
-   if(GlobalTextTP==0)
-    drawPoly3TEx4_IL(lx0,ly0,lx1,ly1,lx2,ly2,
-                     (GETLE32(&gpuData[2]) & 0x000000ff), ((GETLE32(&gpuData[2])>>8) & 0x000000ff), (GETLE32(&gpuData[4]) & 0x000000ff), ((GETLE32(&gpuData[4])>>8) & 0x000000ff),(GETLE32(&gpuData[6]) & 0x000000ff), ((GETLE32(&gpuData[6])>>8) & 0x000000ff), 
-                     ((GETLE32(&gpuData[2])>>12) & 0x3f0), ((GETLE32(&gpuData[2])>>22) & 511));
-   else
-    drawPoly3TEx8_IL(lx0,ly0,lx1,ly1,lx2,ly2,
-                     (GETLE32(&gpuData[2]) & 0x000000ff), ((GETLE32(&gpuData[2])>>8) & 0x000000ff), (GETLE32(&gpuData[4]) & 0x000000ff), ((GETLE32(&gpuData[4])>>8) & 0x000000ff),(GETLE32(&gpuData[6]) & 0x000000ff), ((GETLE32(&gpuData[6])>>8) & 0x000000ff), 
-                     ((GETLE32(&gpuData[2])>>12) & 0x3f0), ((GETLE32(&gpuData[2])>>22) & 511));
-   return;
-  }
-
  if(!bUsingTWin && !(dwActFixes&0x100))
   {
    switch(GlobalTextTP)   // depending on texture mode
@@ -7007,17 +6031,6 @@ static void drawPoly4FT(unsigned char * baseAddr)
 {
  uint32_t *gpuData = ((uint32_t *) baseAddr);
 
- if(GlobalTextIL && GlobalTextTP<2)
-  {
-   if(GlobalTextTP==0)
-    drawPoly4TEx4_IL(lx0,ly0,lx1,ly1,lx3,ly3,lx2,ly2,
-                     (GETLE32(&gpuData[2]) & 0x000000ff), ((GETLE32(&gpuData[2])>>8) & 0x000000ff), (GETLE32(&gpuData[4]) & 0x000000ff), ((GETLE32(&gpuData[4])>>8) & 0x000000ff),(GETLE32(&gpuData[8]) & 0x000000ff), ((GETLE32(&gpuData[8])>>8) & 0x000000ff),(GETLE32(&gpuData[6]) & 0x000000ff), ((GETLE32(&gpuData[6])>>8) & 0x000000ff), ((GETLE32(&gpuData[2])>>12) & 0x3f0), ((GETLE32(&gpuData[2])>>22) & 511));
-   else
-    drawPoly4TEx8_IL(lx0,ly0,lx1,ly1,lx3,ly3,lx2,ly2,
-                  (GETLE32(&gpuData[2]) & 0x000000ff), ((GETLE32(&gpuData[2])>>8) & 0x000000ff), (GETLE32(&gpuData[4]) & 0x000000ff), ((GETLE32(&gpuData[4])>>8) & 0x000000ff),(GETLE32(&gpuData[8]) & 0x000000ff), ((GETLE32(&gpuData[8])>>8) & 0x000000ff),(GETLE32(&gpuData[6]) & 0x000000ff), ((GETLE32(&gpuData[6])>>8) & 0x000000ff), ((GETLE32(&gpuData[2])>>12) & 0x3f0), ((GETLE32(&gpuData[2])>>22) & 511));
-   return;
-  }
-
  if(!bUsingTWin)
   {
 #ifdef POLYQUAD3GT
@@ -7080,21 +6093,6 @@ static void drawPoly3GT(unsigned char * baseAddr)
 {
  uint32_t *gpuData = ((uint32_t *) baseAddr);
 
- if(GlobalTextIL && GlobalTextTP<2)
-  {
-   if(GlobalTextTP==0)
-    drawPoly3TGEx4_IL(lx0,ly0,lx1,ly1,lx2,ly2,
-                      (GETLE32(&gpuData[2]) & 0x000000ff), ((GETLE32(&gpuData[2])>>8) & 0x000000ff), (GETLE32(&gpuData[5]) & 0x000000ff), ((GETLE32(&gpuData[5])>>8) & 0x000000ff),(GETLE32(&gpuData[8]) & 0x000000ff), ((GETLE32(&gpuData[8])>>8) & 0x000000ff), 
-                      ((GETLE32(&gpuData[2])>>12) & 0x3f0), ((GETLE32(&gpuData[2])>>22) & 511),
-                      GETLE32(&gpuData[0]),GETLE32(&gpuData[3]),GETLE32(&gpuData[6]));
-   else
-    drawPoly3TGEx8_IL(lx0,ly0,lx1,ly1,lx2,ly2,
-                      (GETLE32(&gpuData[2]) & 0x000000ff), ((GETLE32(&gpuData[2])>>8) & 0x000000ff), (GETLE32(&gpuData[5]) & 0x000000ff), ((GETLE32(&gpuData[5])>>8) & 0x000000ff),(GETLE32(&gpuData[8]) & 0x000000ff), ((GETLE32(&gpuData[8])>>8) & 0x000000ff), 
-                      ((GETLE32(&gpuData[2])>>12) & 0x3f0), ((GETLE32(&gpuData[2])>>22) & 511),
-                      GETLE32(&gpuData[0]),GETLE32(&gpuData[3]),GETLE32(&gpuData[6]));
-   return;
-  }
-
  if(!bUsingTWin)
   {
    switch (GlobalTextTP)
@@ -7144,21 +6142,6 @@ static void drawPoly4GT(unsigned char *baseAddr)
 {
  uint32_t *gpuData = ((uint32_t *) baseAddr);
 
- if(GlobalTextIL && GlobalTextTP<2)
-  {
-   if(GlobalTextTP==0)
-    drawPoly4TGEx4_TRI_IL(lx0,ly0,lx1,ly1,lx3,ly3,lx2,ly2,
-                          (GETLE32(&gpuData[2]) & 0x000000ff), ((GETLE32(&gpuData[2])>>8) & 0x000000ff), (GETLE32(&gpuData[5]) & 0x000000ff), ((GETLE32(&gpuData[5])>>8) & 0x000000ff),(GETLE32(&gpuData[11]) & 0x000000ff), ((GETLE32(&gpuData[11])>>8) & 0x000000ff),(GETLE32(&gpuData[8]) & 0x000000ff), ((GETLE32(&gpuData[8])>>8) & 0x000000ff),
-                          ((GETLE32(&gpuData[2])>>12) & 0x3f0),((GETLE32(&gpuData[2])>>22) & 511),
-                          GETLE32(&gpuData[0]),GETLE32(&gpuData[3]),GETLE32(&gpuData[6]),GETLE32(&gpuData[9]));
-   else
-    drawPoly4TGEx8_TRI_IL(lx0,ly0,lx1,ly1,lx3,ly3,lx2,ly2,
-                          (GETLE32(&gpuData[2]) & 0x000000ff), ((GETLE32(&gpuData[2])>>8) & 0x000000ff), (GETLE32(&gpuData[5]) & 0x000000ff), ((GETLE32(&gpuData[5])>>8) & 0x000000ff),(GETLE32(&gpuData[11]) & 0x000000ff), ((GETLE32(&gpuData[11])>>8) & 0x000000ff),(GETLE32(&gpuData[8]) & 0x000000ff), ((GETLE32(&gpuData[8])>>8) & 0x000000ff),
-                          ((GETLE32(&gpuData[2])>>12) & 0x3f0),((GETLE32(&gpuData[2])>>22) & 511),
-                          GETLE32(&gpuData[0]),GETLE32(&gpuData[3]),GETLE32(&gpuData[6]),GETLE32(&gpuData[9]));
-   return;
-  }
-
  if(!bUsingTWin)
   {
 #ifdef POLYQUAD3GT
@@ -7367,44 +6350,6 @@ static void DrawSoftwareSpriteMirror(unsigned char * baseAddr,int32_t w,int32_t
 
 ////////////////////////////////////////////////////////////////////////
 
-void DrawSoftwareSprite_IL(unsigned char * baseAddr,short w,short h,int32_t tx,int32_t ty)
-{
- int32_t sprtY,sprtX,sprtW,sprtH,tdx,tdy;
- uint32_t *gpuData = (uint32_t *)baseAddr;
-
- sprtY = ly0;
- sprtX = lx0;
- sprtH = h;
- sprtW = w;
-
- sprtX+=PSXDisplay.DrawOffset.x;
- sprtY+=PSXDisplay.DrawOffset.y;
-
- if(sprtX>drawW) return;
- if(sprtY>drawH) return;
-
- tdx=tx+sprtW;
- tdy=ty+sprtH;
-
- sprtW+=sprtX;
- sprtH+=sprtY;
-
- // Pete is too lazy to make a faster version ;)
-
- if(GlobalTextTP==0)
-  drawPoly4TEx4_IL(sprtX,sprtY,sprtX,sprtH,sprtW,sprtH,sprtW,sprtY,
-                   tx,ty,      tx,tdy,     tdx,tdy,    tdx,ty,     
-                   (GETLE32(&gpuData[2])>>12) & 0x3f0, ((GETLE32(&gpuData[2])>>22) & 511));
-
-
- else
-  drawPoly4TEx8_IL(sprtX,sprtY,sprtX,sprtH,sprtW,sprtH,sprtW,sprtY,
-                   tx,ty,      tx,tdy,     tdx,tdy,    tdx,ty,     
-                   (GETLE32(&gpuData[2])>>12) & 0x3f0, ((GETLE32(&gpuData[2])>>22) & 511));
-}
-
-////////////////////////////////////////////////////////////////////////
-
 static void DrawSoftwareSprite(unsigned char * baseAddr,short w,short h,int32_t tx,int32_t ty)
 {
  int32_t sprtY,sprtX,sprtW,sprtH;
@@ -7414,9 +6359,6 @@ static void DrawSoftwareSprite(unsigned char * baseAddr,short w,short h,int32_t
  unsigned char * pV;
  BOOL bWT,bWS;
 
- if(GlobalTextIL && GlobalTextTP<2)
-  {DrawSoftwareSprite_IL(baseAddr,w,h,tx,ty);return;}
-
  sprtY = ly0;
  sprtX = lx0;
  sprtH = h;
@@ -8150,7 +7092,7 @@ static void DrawSoftwareLineShade(int32_t rgb0, int32_t rgb1)
 {
        short x0, y0, x1, y1, xt, yt;
        int32_t rgbt;
-       double m, dy, dx;
+       int dy, dx;
 
        if (lx0 > drawW && lx1 > drawW) return;
        if (ly0 > drawH && ly1 > drawH) return;
@@ -8200,17 +7142,15 @@ static void DrawSoftwareLineShade(int32_t rgb0, int32_t rgb1)
                                dy = y1 - y0;
                        }
 
-                       m = dy / dx;
-
-                       if (m >= 0)
+                       if ((dx >= 0 && dy >= 0) || (dx < 0 && dy < 0))
                        {
-                               if (m > 1)
+                               if (abs(dy) > abs(dx))
                                        Line_S_SE_Shade(x0, y0, x1, y1, rgb0, rgb1);
                                else
                                        Line_E_SE_Shade(x0, y0, x1, y1, rgb0, rgb1);
                        }
                        else
-                               if (m < -1)
+                               if (abs(dy) > abs(dx))
                                        Line_N_NE_Shade(x0, y0, x1, y1, rgb0, rgb1);
                                else
                                        Line_E_NE_Shade(x0, y0, x1, y1, rgb0, rgb1);
@@ -8222,7 +7162,7 @@ static void DrawSoftwareLineShade(int32_t rgb0, int32_t rgb1)
 static void DrawSoftwareLineFlat(int32_t rgb)
 {
        short x0, y0, x1, y1, xt, yt;
-       double m, dy, dx;
+       int dy, dx;
        unsigned short colour = 0;
  
        if (lx0 > drawW && lx1 > drawW) return;
@@ -8274,17 +7214,15 @@ static void DrawSoftwareLineFlat(int32_t rgb)
                                dy = y1 - y0;
                        }
 
-                       m = dy/dx;
-
-                       if (m >= 0)
+                       if ((dx >= 0 && dy >= 0) || (dx < 0 && dy < 0))
                        {
-                               if (m > 1)
+                               if (abs(dy) > abs(dx))
                                        Line_S_SE_Flat(x0, y0, x1, y1, colour);
                                else
                                        Line_E_SE_Flat(x0, y0, x1, y1, colour);
                        }
                        else
-                               if (m < -1)
+                               if (abs(dy) > abs(dx))
                                        Line_N_NE_Flat(x0, y0, x1, y1, colour);
                                else
                                        Line_E_NE_Flat(x0, y0, x1, y1, colour);