plugins/gpu_unai/gpu_raster_polygon.h

   1 /***************************************************************************
   2 *   Copyright (C) 2010 PCSX4ALL Team                                      *
   3 *   Copyright (C) 2010 Unai                                               *
   4 *                                                                         *
   5 *   This program is free software; you can redistribute it and/or modify  *
   6 *   it under the terms of the GNU General Public License as published by  *
   7 *   the Free Software Foundation; either version 2 of the License, or     *
   8 *   (at your option) any later version.                                   *
   9 *                                                                         *
  10 *   This program is distributed in the hope that it will be useful,       *
  11 *   but WITHOUT ANY WARRANTY; without even the implied warranty of        *
  12 *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the         *
  13 *   GNU General Public License for more details.                          *
  14 *                                                                         *
  15 *   You should have received a copy of the GNU General Public License     *
  16 *   along with this program; if not, write to the                         *
  17 *   Free Software Foundation, Inc.,                                       *
  18 *   51 Franklin Street, Fifth Floor, Boston, MA 02111-1307 USA.           *
  19 ***************************************************************************/
  20
  21 #ifndef __GPU_UNAI_GPU_RASTER_POLYGON_H__
  22 #define __GPU_UNAI_GPU_RASTER_POLYGON_H__
  23
  24 //senquack - NOTE: GPU Unai poly routines have been rewritten/adapted
  25 // from DrHell routines to fix multiple issues. See README_senquack.txt
  26
  27 ///////////////////////////////////////////////////////////////////////////////
  28 // Shared poly vertex buffer, able to handle 3 or 4-pt polys of any type.
  29 ///////////////////////////////////////////////////////////////////////////////
  30
  31 struct PolyVertex {
  32         s32 x, y; // Sign-extended 11-bit X,Y coords
  33         union {
  34 #if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
  35                 struct { u8 pad[2], v, u; } tex; // Texture coords (if used)
  36 #else
  37                 struct { u8 u, v, pad[2]; } tex; // Texture coords (if used)
  38 #endif
  39                 u32 tex_word;
  40         };
  41         union {
  42 #if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
  43                 struct { u8 pad, b, g, r; } col; // 24-bit RGB color (if used)
  44 #else
  45                 struct { u8 r, g, b, pad; } col; // 24-bit RGB color (if used)
  46 #endif
  47                 u32 col_word;
  48         };
  49 };
  50
  51 enum PolyAttribute {
  52         POLYATTR_TEXTURE = (1 << 0),
  53         POLYATTR_GOURAUD = (1 << 1)
  54 };
  55
  56 enum PolyType {
  57         POLYTYPE_F  = 0,
  58         POLYTYPE_FT = (POLYATTR_TEXTURE),
  59         POLYTYPE_G  = (POLYATTR_GOURAUD),
  60         POLYTYPE_GT = (POLYATTR_TEXTURE | POLYATTR_GOURAUD)
  61 };
  62
  63 ///////////////////////////////////////////////////////////////////////////////
  64 // polyInitVertexBuffer()
  65 // Fills vbuf[] array with data from any type of poly draw-command packet.
  66 ///////////////////////////////////////////////////////////////////////////////
  67 static void polyInitVertexBuffer(PolyVertex *vbuf, const PtrUnion packet, PolyType ptype, u32 is_quad)
  68 {
  69         bool texturing = ptype & POLYATTR_TEXTURE;
  70         bool gouraud   = ptype & POLYATTR_GOURAUD;
  71
  72         int vert_stride = 1; // Stride of vertices in cmd packet, in 32-bit words
  73         if (texturing)
  74                 vert_stride++;
  75         if (gouraud)
  76                 vert_stride++;
  77
  78         int num_verts = (is_quad) ? 4 : 3;
  79         le32_t *ptr;
  80
  81         // X,Y coords
  82         ptr = &packet.U4[1];
  83         for (int i=0;  i < num_verts; ++i, ptr += vert_stride) {
  84                 u32 coords = le32_to_u32(*ptr);
  85                 vbuf[i].x = GPU_EXPANDSIGN(coords);
  86                 vbuf[i].y = GPU_EXPANDSIGN(coords >> 16);
  87         }
  88
  89         // U,V texture coords (if applicable)
  90         if (texturing) {
  91                 ptr = &packet.U4[2];
  92                 for (int i=0;  i < num_verts; ++i, ptr += vert_stride)
  93                         vbuf[i].tex_word = le32_to_u32(*ptr);
  94         }
  95
  96         // Colors (if applicable)
  97         if (gouraud) {
  98                 ptr = &packet.U4[0];
  99                 for (int i=0;  i < num_verts; ++i, ptr += vert_stride)
 100                         vbuf[i].col_word = le32_to_u32(*ptr);
 101         }
 102 }
 103
 104 ///////////////////////////////////////////////////////////////////////////////
 105 //  Helper functions to determine which vertex in a 2 or 3 vertex array
 106 //   has the highest/lowest X/Y coordinate.
 107 //   Note: the comparison logic is such that, given a set of vertices with
 108 //    identical values for a given coordinate, a different index will be
 109 //    returned from vertIdxOfLeast..() than a call to vertIdxOfHighest..().
 110 //    This ensures that, during the vertex-ordering phase of rasterization,
 111 //    all three vertices remain unique.
 112 ///////////////////////////////////////////////////////////////////////////////
 113
 114 template<typename T>
 115 static inline int vertIdxOfLeastXCoord2(const T *Tptr)
 116 {
 117         return (Tptr[0].x <= Tptr[1].x) ? 0 : 1;
 118 }
 119
 120 template<typename T>
 121 static inline int vertIdxOfLeastXCoord3(const T *Tptr)
 122 {
 123         int least_of_v0_v1 = vertIdxOfLeastXCoord2(Tptr);
 124         return (Tptr[least_of_v0_v1].x <= Tptr[2].x) ? least_of_v0_v1 : 2;
 125 }
 126
 127 template<typename T>
 128 static inline int vertIdxOfLeastYCoord2(const T *Tptr)
 129 {
 130         return (Tptr[0].y <= Tptr[1].y) ? 0 : 1;
 131 }
 132
 133 template<typename T>
 134 static inline int vertIdxOfLeastYCoord3(const T *Tptr)
 135 {
 136         int least_of_v0_v1 = vertIdxOfLeastYCoord2(Tptr);
 137         return (Tptr[least_of_v0_v1].y <= Tptr[2].y) ? least_of_v0_v1 : 2;
 138 }
 139
 140 template<typename T>
 141 static inline int vertIdxOfHighestXCoord2(const T *Tptr)
 142 {
 143         return (Tptr[1].x >= Tptr[0].x) ? 1 : 0;
 144 }
 145
 146 template<typename T>
 147 static inline int vertIdxOfHighestXCoord3(const T *Tptr)
 148 {
 149         int highest_of_v0_v1 = vertIdxOfHighestXCoord2(Tptr);
 150         return (Tptr[2].x >= Tptr[highest_of_v0_v1].x) ? 2 : highest_of_v0_v1;
 151 }
 152
 153 template<typename T>
 154 static inline int vertIdxOfHighestYCoord2(const T *Tptr)
 155 {
 156         return (Tptr[1].y >= Tptr[0].y) ? 1 : 0;
 157 }
 158
 159 template<typename T>
 160 static inline int vertIdxOfHighestYCoord3(const T *Tptr)
 161 {
 162         int highest_of_v0_v1 = vertIdxOfHighestYCoord2(Tptr);
 163         return (Tptr[2].y >= Tptr[highest_of_v0_v1].y) ? 2 : highest_of_v0_v1;
 164 }
 165
 166 ///////////////////////////////////////////////////////////////////////////////
 167 // polyUseTriangle()
 168 //  Determines if the specified triangle should be rendered. If so, it
 169 //  fills the given array of vertex pointers, vert_ptrs, in order of
 170 //  increasing Y coordinate values, as required by rasterization algorithm.
 171 //  Parameter 'tri_num' is 0 for first triangle (idx 0,1,2 of vbuf[]),
 172 //   or 1 for second triangle of a quad (idx 1,2,3 of vbuf[]).
 173 //  Returns true if triangle should be rendered, false if not.
 174 ///////////////////////////////////////////////////////////////////////////////
 175 static bool polyUseTriangle(const PolyVertex *vbuf, int tri_num, const PolyVertex **vert_ptrs, s32 &x_off, s32 &y_off)
 176 {
 177         // Using verts 0,1,2 or is this the 2nd pass of a quad (verts 1,2,3)?
 178         const PolyVertex *tri_ptr = &vbuf[(tri_num == 0) ? 0 : 1];
 179
 180         // Get indices of highest/lowest X,Y coords within triangle
 181         int idx_lowest_x  = vertIdxOfLeastXCoord3(tri_ptr);
 182         int idx_highest_x = vertIdxOfHighestXCoord3(tri_ptr);
 183         int idx_lowest_y  = vertIdxOfLeastYCoord3(tri_ptr);
 184         int idx_highest_y = vertIdxOfHighestYCoord3(tri_ptr);
 185
 186         // Maximum absolute distance between any two X coordinates is 1023,
 187         //  and for Y coordinates is 511 (PS1 hardware limitation)
 188         int lowest_x  = tri_ptr[idx_lowest_x].x;
 189         int highest_x = tri_ptr[idx_highest_x].x;
 190         int lowest_y  = tri_ptr[idx_lowest_y].y;
 191         int highest_y = tri_ptr[idx_highest_y].y;
 192         if ((highest_x - lowest_x) >= CHKMAX_X ||
 193             (highest_y - lowest_y) >= CHKMAX_Y)
 194                 return false;
 195
 196         // Determine offsets
 197         x_off = gpu_unai.DrawingOffset[0];
 198         y_off = gpu_unai.DrawingOffset[1];
 199         x_off = GPU_EXPANDSIGN(lowest_x + x_off) - lowest_x;
 200         y_off = GPU_EXPANDSIGN(lowest_y + y_off) - lowest_y;
 201
 202         // Determine if triangle is completely outside clipping range
 203         s32 xmin, xmax, ymin, ymax;
 204         xmin = gpu_unai.DrawingArea[0];  xmax = gpu_unai.DrawingArea[2];
 205         ymin = gpu_unai.DrawingArea[1];  ymax = gpu_unai.DrawingArea[3];
 206         int clipped_lowest_x  = Max2(xmin, lowest_x + x_off);
 207         int clipped_lowest_y  = Max2(ymin, lowest_y + y_off);
 208         int clipped_highest_x = Min2(xmax, highest_x + x_off);
 209         int clipped_highest_y = Min2(ymax, highest_y + y_off);
 210         if (clipped_lowest_x >= clipped_highest_x ||
 211             clipped_lowest_y >= clipped_highest_y)
 212                 return false;
 213
 214         // Order vertex ptrs by increasing y value (draw routines need this).
 215         // The middle index is deduced by a binary math trick that depends
 216         //  on index range always being between 0..2
 217         vert_ptrs[0] = tri_ptr + idx_lowest_y;
 218         vert_ptrs[1] = tri_ptr + ((idx_lowest_y + idx_highest_y) ^ 3);
 219         vert_ptrs[2] = tri_ptr + idx_highest_y;
 220         return true;
 221 }
 222
 223 ///////////////////////////////////////////////////////////////////////////////
 224 //  GPU internal polygon drawing functions
 225 ///////////////////////////////////////////////////////////////////////////////
 226
 227 /*----------------------------------------------------------------------
 228 gpuDrawPolyF - Flat-shaded, untextured poly
 229 ----------------------------------------------------------------------*/
 230 void gpuDrawPolyF(const PtrUnion packet, const PP gpuPolySpanDriver, u32 is_quad,
 231         PolyType ptype = POLYTYPE_F)
 232 {
 233         // Set up bgr555 color to be used across calls in inner driver
 234         gpu_unai.inn.PixelData = GPU_RGB16(le32_to_u32(packet.U4[0]));
 235
 236         PolyVertex vbuf[4];
 237         polyInitVertexBuffer(vbuf, packet, ptype, is_quad);
 238
 239         int total_passes = is_quad ? 2 : 1;
 240         int cur_pass = 0;
 241         do
 242         {
 243                 const PolyVertex* vptrs[3];
 244                 s32 x_off, y_off;
 245                 if (!polyUseTriangle(vbuf, cur_pass, vptrs, x_off, y_off))
 246                         continue;
 247
 248                 s32 xa, xb, ya, yb;
 249                 s32 x3, dx3, x4, dx4, dx;
 250                 s32 x0, x1, x2, y0, y1, y2;
 251
 252                 x0 = vptrs[0]->x + x_off;  y0 = vptrs[0]->y + y_off;
 253                 x1 = vptrs[1]->x + x_off;  y1 = vptrs[1]->y + y_off;
 254                 x2 = vptrs[2]->x + x_off;  y2 = vptrs[2]->y + y_off;
 255
 256                 ya = y2 - y0;
 257                 yb = y2 - y1;
 258                 dx = (x2 - x1) * ya - (x2 - x0) * yb;
 259
 260                 for (int loop0 = 2; loop0; loop0--) {
 261                         if (loop0 == 2) {
 262                                 ya = y0;  yb = y1;
 263                                 x3 = x4 = i2x(x0);
 264                                 if (dx < 0) {
 265 #ifdef GPU_UNAI_USE_FLOATMATH
 266 #ifdef GPU_UNAI_USE_FLOAT_DIV_MULTINV
 267                                         dx3 = ((y2 - y0) != 0) ? (fixed)(((x2 - x0) << FIXED_BITS) * FloatInv(y2 - y0)) : 0;
 268                                         dx4 = ((y1 - y0) != 0) ? (fixed)(((x1 - x0) << FIXED_BITS) * FloatInv(y1 - y0)) : 0;
 269 #else
 270                                         dx3 = ((y2 - y0) != 0) ? (fixed)(((x2 - x0) << FIXED_BITS) / (float)(y2 - y0)) : 0;
 271                                         dx4 = ((y1 - y0) != 0) ? (fixed)(((x1 - x0) << FIXED_BITS) / (float)(y1 - y0)) : 0;
 272 #endif
 273 #else  // Integer Division:
 274 #ifdef GPU_UNAI_USE_INT_DIV_MULTINV
 275                                         dx3 = ((y2 - y0) != 0) ? xLoDivx((x2 - x0), (y2 - y0)) : 0;
 276                                         dx4 = ((y1 - y0) != 0) ? xLoDivx((x1 - x0), (y1 - y0)) : 0;
 277 #else
 278                                         dx3 = ((y2 - y0) != 0) ? GPU_FAST_DIV((x2 - x0) << FIXED_BITS, (y2 - y0)) : 0;
 279                                         dx4 = ((y1 - y0) != 0) ? GPU_FAST_DIV((x1 - x0) << FIXED_BITS, (y1 - y0)) : 0;
 280 #endif
 281 #endif
 282                                 } else {
 283 #ifdef GPU_UNAI_USE_FLOATMATH
 284 #ifdef GPU_UNAI_USE_FLOAT_DIV_MULTINV
 285                                         dx3 = ((y1 - y0) != 0) ? (fixed)(((x1 - x0) << FIXED_BITS) * FloatInv(y1 - y0)) : 0;
 286                                         dx4 = ((y2 - y0) != 0) ? (fixed)(((x2 - x0) << FIXED_BITS) * FloatInv(y2 - y0)) : 0;
 287 #else
 288                                         dx3 = ((y1 - y0) != 0) ? (fixed)(((x1 - x0) << FIXED_BITS) / (float)(y1 - y0)) : 0;
 289                                         dx4 = ((y2 - y0) != 0) ? (fixed)(((x2 - x0) << FIXED_BITS) / (float)(y2 - y0)) : 0;
 290 #endif
 291 #else  // Integer Division:
 292 #ifdef GPU_UNAI_USE_INT_DIV_MULTINV
 293                                         dx3 = ((y1 - y0) != 0) ? xLoDivx((x1 - x0), (y1 - y0)) : 0;
 294                                         dx4 = ((y2 - y0) != 0) ? xLoDivx((x2 - x0), (y2 - y0)) : 0;
 295 #else
 296                                         dx3 = ((y1 - y0) != 0) ? GPU_FAST_DIV((x1 - x0) << FIXED_BITS, (y1 - y0)) : 0;
 297                                         dx4 = ((y2 - y0) != 0) ? GPU_FAST_DIV((x2 - x0) << FIXED_BITS, (y2 - y0)) : 0;
 298 #endif
 299 #endif
 300                                 }
 301                         } else {
 302                                 //senquack - break out of final loop if nothing to be drawn (1st loop
 303                                 //           must always be taken to setup dx3/dx4)
 304                                 if (y1 == y2) break;
 305
 306                                 ya = y1;  yb = y2;
 307
 308                                 if (dx < 0) {
 309                                         x3 = i2x(x0) + (dx3 * (y1 - y0));
 310                                         x4 = i2x(x1);
 311 #ifdef GPU_UNAI_USE_FLOATMATH
 312 #ifdef GPU_UNAI_USE_FLOAT_DIV_MULTINV
 313                                         dx4 = ((y2 - y1) != 0) ? (fixed)(((x2 - x1) << FIXED_BITS) * FloatInv(y2 - y1)) : 0;
 314 #else
 315                                         dx4 = ((y2 - y1) != 0) ? (fixed)(((x2 - x1) << FIXED_BITS) / (float)(y2 - y1)) : 0;
 316 #endif
 317 #else  // Integer Division:
 318 #ifdef GPU_UNAI_USE_INT_DIV_MULTINV
 319                                         dx4 = ((y2 - y1) != 0) ? xLoDivx ((x2 - x1), (y2 - y1)) : 0;
 320 #else
 321                                         dx4 = ((y2 - y1) != 0) ? GPU_FAST_DIV((x2 - x1) << FIXED_BITS, (y2 - y1)) : 0;
 322 #endif
 323 #endif
 324                                 } else {
 325                                         x3 = i2x(x1);
 326                                         x4 = i2x(x0) + (dx4 * (y1 - y0));
 327 #ifdef GPU_UNAI_USE_FLOATMATH
 328 #ifdef GPU_UNAI_USE_FLOAT_DIV_MULTINV
 329                                         dx3 = ((y2 - y1) != 0) ? (fixed)(((x2 - x1) << FIXED_BITS) * FloatInv(y2 - y1)) : 0;
 330 #else
 331                                         dx3 = ((y2 - y1) != 0) ? (fixed)(((x2 - x1) << FIXED_BITS) / (float)(y2 - y1)) : 0;
 332 #endif
 333 #else  // Integer Division:
 334 #ifdef GPU_UNAI_USE_INT_DIV_MULTINV
 335                                         dx3 = ((y2 - y1) != 0) ? xLoDivx ((x2 - x1), (y2 - y1)) : 0;
 336 #else
 337                                         dx3 = ((y2 - y1) != 0) ? GPU_FAST_DIV((x2 - x1) << FIXED_BITS, (y2 - y1)) : 0;
 338 #endif
 339 #endif
 340                                 }
 341                         }
 342
 343                         s32 xmin, xmax, ymin, ymax;
 344                         xmin = gpu_unai.DrawingArea[0];  xmax = gpu_unai.DrawingArea[2];
 345                         ymin = gpu_unai.DrawingArea[1];  ymax = gpu_unai.DrawingArea[3];
 346
 347                         if ((ymin - ya) > 0) {
 348                                 x3 += (dx3 * (ymin - ya));
 349                                 x4 += (dx4 * (ymin - ya));
 350                                 ya = ymin;
 351                         }
 352
 353                         if (yb > ymax) yb = ymax;
 354
 355                         int loop1 = yb - ya;
 356                         if (loop1 <= 0)
 357                                 continue;
 358
 359                         le16_t* PixelBase = &gpu_unai.vram[FRAME_OFFSET(0, ya)];
 360                         int li=gpu_unai.inn.ilace_mask;
 361                         int pi=(ProgressiveInterlaceEnabled()?(gpu_unai.inn.ilace_mask+1):0);
 362                         int pif=(ProgressiveInterlaceEnabled()?(gpu_unai.prog_ilace_flag?(gpu_unai.inn.ilace_mask+1):0):1);
 363
 364                         for (; loop1; --loop1, ya++, PixelBase += FRAME_WIDTH,
 365                                         x3 += dx3, x4 += dx4 )
 366                         {
 367                                 if (ya&li) continue;
 368                                 if ((ya&pi)==pif) continue;
 369
 370                                 xa = FixedCeilToInt(x3);  xb = FixedCeilToInt(x4);
 371                                 if ((xmin - xa) > 0) xa = xmin;
 372                                 if (xb > xmax) xb = xmax;
 373                                 if ((xb - xa) > 0)
 374                                         gpuPolySpanDriver(gpu_unai, PixelBase + xa, (xb - xa));
 375                         }
 376                 }
 377         } while (++cur_pass < total_passes);
 378 }
 379
 380 /*----------------------------------------------------------------------
 381 gpuDrawPolyFT - Flat-shaded, textured poly
 382 ----------------------------------------------------------------------*/
 383 void gpuDrawPolyFT(const PtrUnion packet, const PP gpuPolySpanDriver, u32 is_quad,
 384         PolyType ptype = POLYTYPE_FT)
 385 {
 386         // r8/g8/b8 used if texture-blending & dithering is applied (24-bit light)
 387         gpu_unai.inn.r8 = packet.U1[0];
 388         gpu_unai.inn.g8 = packet.U1[1];
 389         gpu_unai.inn.b8 = packet.U1[2];
 390         // r5/g5/b5 used if just texture-blending is applied (15-bit light)
 391         gpu_unai.inn.r5 = packet.U1[0] >> 3;
 392         gpu_unai.inn.g5 = packet.U1[1] >> 3;
 393         gpu_unai.inn.b5 = packet.U1[2] >> 3;
 394
 395         PolyVertex vbuf[4];
 396         polyInitVertexBuffer(vbuf, packet, ptype, is_quad);
 397
 398         int total_passes = is_quad ? 2 : 1;
 399         int cur_pass = 0;
 400         do
 401         {
 402                 const PolyVertex* vptrs[3];
 403                 s32 x_off, y_off;
 404                 if (!polyUseTriangle(vbuf, cur_pass, vptrs, x_off, y_off))
 405                         continue;
 406
 407                 s32 xa, xb, ya, yb;
 408                 s32 x3, dx3, x4, dx4, dx;
 409                 s32 u3, du3, v3, dv3;
 410                 s32 x0, x1, x2, y0, y1, y2;
 411                 s32 u0, u1, u2, v0, v1, v2;
 412                 s32 du4, dv4;
 413
 414                 x0 = vptrs[0]->x + x_off; y0 = vptrs[0]->y + y_off;
 415                 u0 = vptrs[0]->tex.u;     v0 = vptrs[0]->tex.v;
 416                 x1 = vptrs[1]->x + x_off; y1 = vptrs[1]->y + y_off;
 417                 u1 = vptrs[1]->tex.u;     v1 = vptrs[1]->tex.v;
 418                 x2 = vptrs[2]->x + x_off; y2 = vptrs[2]->y + y_off;
 419                 u2 = vptrs[2]->tex.u;     v2 = vptrs[2]->tex.v;
 420
 421                 ya = y2 - y0;
 422                 yb = y2 - y1;
 423                 dx4 = (x2 - x1) * ya - (x2 - x0) * yb;
 424                 du4 = (u2 - u1) * ya - (u2 - u0) * yb;
 425                 dv4 = (v2 - v1) * ya - (v2 - v0) * yb;
 426                 dx = dx4;
 427                 if (dx4 < 0) {
 428                         dx4 = -dx4;
 429                         du4 = -du4;
 430                         dv4 = -dv4;
 431                 }
 432
 433 #ifdef GPU_UNAI_USE_FLOATMATH
 434 #ifdef GPU_UNAI_USE_FLOAT_DIV_MULTINV
 435                 if (dx4 != 0) {
 436                         float finv = FloatInv(dx4);
 437                         du4 = (fixed)((du4 << FIXED_BITS) * finv);
 438                         dv4 = (fixed)((dv4 << FIXED_BITS) * finv);
 439                 } else {
 440                         du4 = dv4 = 0;
 441                 }
 442 #else
 443                 if (dx4 != 0) {
 444                         float fdiv = dx4;
 445                         du4 = (fixed)((du4 << FIXED_BITS) / fdiv);
 446                         dv4 = (fixed)((dv4 << FIXED_BITS) / fdiv);
 447                 } else {
 448                         du4 = dv4 = 0;
 449                 }
 450 #endif
 451 #else  // Integer Division:
 452 #ifdef GPU_UNAI_USE_INT_DIV_MULTINV
 453                 if (dx4 != 0) {
 454                         int iF, iS;
 455                         xInv(dx4, iF, iS);
 456                         du4 = xInvMulx(du4, iF, iS);
 457                         dv4 = xInvMulx(dv4, iF, iS);
 458                 } else {
 459                         du4 = dv4 = 0;
 460                 }
 461 #else
 462                 if (dx4 != 0) {
 463                         du4 = GPU_FAST_DIV(du4 << FIXED_BITS, dx4);
 464                         dv4 = GPU_FAST_DIV(dv4 << FIXED_BITS, dx4);
 465                 } else {
 466                         du4 = dv4 = 0;
 467                 }
 468 #endif
 469 #endif
 470                 // Set u,v increments for inner driver
 471                 gpu_unai.inn.u_inc = du4;
 472                 gpu_unai.inn.v_inc = dv4;
 473
 474                 //senquack - TODO: why is it always going through 2 iterations when sometimes one would suffice here?
 475                 //                       (SAME ISSUE ELSEWHERE)
 476                 for (s32 loop0 = 2; loop0; loop0--) {
 477                         if (loop0 == 2) {
 478                                 ya = y0;  yb = y1;
 479                                 x3 = x4 = i2x(x0);
 480                                 u3 = i2x(u0);  v3 = i2x(v0);
 481                                 if (dx < 0) {
 482 #ifdef GPU_UNAI_USE_FLOATMATH
 483 #ifdef GPU_UNAI_USE_FLOAT_DIV_MULTINV
 484                                         if ((y2 - y0) != 0) {
 485                                                 float finv = FloatInv(y2 - y0);
 486                                                 dx3 = (fixed)(((x2 - x0) << FIXED_BITS) * finv);
 487                                                 du3 = (fixed)(((u2 - u0) << FIXED_BITS) * finv);
 488                                                 dv3 = (fixed)(((v2 - v0) << FIXED_BITS) * finv);
 489                                         } else {
 490                                                 dx3 = du3 = dv3 = 0;
 491                                         }
 492                                         dx4 = ((y1 - y0) != 0) ? (fixed)(((x1 - x0) << FIXED_BITS) * FloatInv(y1 - y0)) : 0;
 493 #else
 494                                         if ((y2 - y0) != 0) {
 495                                                 float fdiv = y2 - y0;
 496                                                 dx3 = (fixed)(((x2 - x0) << FIXED_BITS) / fdiv);
 497                                                 du3 = (fixed)(((u2 - u0) << FIXED_BITS) / fdiv);
 498                                                 dv3 = (fixed)(((v2 - v0) << FIXED_BITS) / fdiv);
 499                                         } else {
 500                                                 dx3 = du3 = dv3 = 0;
 501                                         }
 502                                         dx4 = ((y1 - y0) != 0) ? (fixed)(((x1 - x0) << FIXED_BITS) / (float)(y1 - y0)) : 0;
 503 #endif
 504 #else  // Integer Division:
 505 #ifdef GPU_UNAI_USE_INT_DIV_MULTINV
 506                                         if ((y2 - y0) != 0) {
 507                                                 int iF, iS;
 508                                                 xInv((y2 - y0), iF, iS);
 509                                                 dx3 = xInvMulx((x2 - x0), iF, iS);
 510                                                 du3 = xInvMulx((u2 - u0), iF, iS);
 511                                                 dv3 = xInvMulx((v2 - v0), iF, iS);
 512                                         } else {
 513                                                 dx3 = du3 = dv3 = 0;
 514                                         }
 515                                         dx4 = ((y1 - y0) != 0) ? xLoDivx((x1 - x0), (y1 - y0)) : 0;
 516 #else
 517                                         if ((y2 - y0) != 0) {
 518                                                 dx3 = GPU_FAST_DIV((x2 - x0) << FIXED_BITS, (y2 - y0));
 519                                                 du3 = GPU_FAST_DIV((u2 - u0) << FIXED_BITS, (y2 - y0));
 520                                                 dv3 = GPU_FAST_DIV((v2 - v0) << FIXED_BITS, (y2 - y0));
 521                                         } else {
 522                                                 dx3 = du3 = dv3 = 0;
 523                                         }
 524                                         dx4 = ((y1 - y0) != 0) ? GPU_FAST_DIV((x1 - x0) << FIXED_BITS, (y1 - y0)) : 0;
 525 #endif
 526 #endif
 527                                 } else {
 528 #ifdef GPU_UNAI_USE_FLOATMATH
 529 #ifdef GPU_UNAI_USE_FLOAT_DIV_MULTINV
 530                                         if ((y1 - y0) != 0) {
 531                                                 float finv = FloatInv(y1 - y0);
 532                                                 dx3 = (fixed)(((x1 - x0) << FIXED_BITS) * finv);
 533                                                 du3 = (fixed)(((u1 - u0) << FIXED_BITS) * finv);
 534                                                 dv3 = (fixed)(((v1 - v0) << FIXED_BITS) * finv);
 535                                         } else {
 536                                                 dx3 = du3 = dv3 = 0;
 537                                         }
 538                                         dx4 = ((y2 - y0) != 0) ? (fixed)(((x2 - x0) << FIXED_BITS) * FloatInv(y2 - y0)) : 0;
 539 #else
 540                                         if ((y1 - y0) != 0) {
 541                                                 float fdiv = y1 - y0;
 542                                                 dx3 = (fixed)(((x1 - x0) << FIXED_BITS) / fdiv);
 543                                                 du3 = (fixed)(((u1 - u0) << FIXED_BITS) / fdiv);
 544                                                 dv3 = (fixed)(((v1 - v0) << FIXED_BITS) / fdiv);
 545                                         } else {
 546                                                 dx3 = du3 = dv3 = 0;
 547                                         }
 548                                         dx4 = ((y2 - y0) != 0) ? (fixed)(((x2 - x0) << FIXED_BITS) / (float)(y2 - y0)) : 0;
 549 #endif
 550 #else  // Integer Division:
 551 #ifdef GPU_UNAI_USE_INT_DIV_MULTINV
 552                                         if ((y1 - y0) != 0) {
 553                                                 int iF, iS;
 554                                                 xInv((y1 - y0), iF, iS);
 555                                                 dx3 = xInvMulx((x1 - x0), iF, iS);
 556                                                 du3 = xInvMulx((u1 - u0), iF, iS);
 557                                                 dv3 = xInvMulx((v1 - v0), iF, iS);
 558                                         } else {
 559                                                 dx3 = du3 = dv3 = 0;
 560                                         }
 561                                         dx4 = ((y2 - y0) != 0) ? xLoDivx((x2 - x0), (y2 - y0)) : 0;
 562 #else
 563                                         if ((y1 - y0) != 0) {
 564                                                 dx3 = GPU_FAST_DIV((x1 - x0) << FIXED_BITS, (y1 - y0));
 565                                                 du3 = GPU_FAST_DIV((u1 - u0) << FIXED_BITS, (y1 - y0));
 566                                                 dv3 = GPU_FAST_DIV((v1 - v0) << FIXED_BITS, (y1 - y0));
 567                                         } else {
 568                                                 dx3 = du3 = dv3 = 0;
 569                                         }
 570                                         dx4 = ((y2 - y0) != 0) ? GPU_FAST_DIV((x2 - x0) << FIXED_BITS, (y2 - y0)) : 0;
 571 #endif
 572 #endif
 573                                 }
 574                         } else {
 575                                 //senquack - break out of final loop if nothing to be drawn (1st loop
 576                                 //           must always be taken to setup dx3/dx4)
 577                                 if (y1 == y2) break;
 578
 579                                 ya = y1;  yb = y2;
 580
 581                                 if (dx < 0) {
 582                                         x3 = i2x(x0);
 583                                         x4 = i2x(x1);
 584                                         u3 = i2x(u0);
 585                                         v3 = i2x(v0);
 586                                         if ((y1 - y0) != 0) {
 587                                                 x3 += (dx3 * (y1 - y0));
 588                                                 u3 += (du3 * (y1 - y0));
 589                                                 v3 += (dv3 * (y1 - y0));
 590                                         }
 591 #ifdef GPU_UNAI_USE_FLOATMATH
 592 #ifdef GPU_UNAI_USE_FLOAT_DIV_MULTINV
 593                                         dx4 = ((y2 - y1) != 0) ? (fixed)(((x2 - x1) << FIXED_BITS) * FloatInv(y2 - y1)) : 0;
 594 #else
 595                                         dx4 = ((y2 - y1) != 0) ? (fixed)(((x2 - x1) << FIXED_BITS) / (float)(y2 - y1)) : 0;
 596 #endif
 597 #else  // Integer Division:
 598 #ifdef GPU_UNAI_USE_INT_DIV_MULTINV
 599                                         dx4 = ((y2 - y1) != 0) ? xLoDivx((x2 - x1), (y2 - y1)) : 0;
 600 #else
 601                                         dx4 = ((y2 - y1) != 0) ? GPU_FAST_DIV((x2 - x1) << FIXED_BITS, (y2 - y1)) : 0;
 602 #endif
 603 #endif
 604                                 } else {
 605                                         x3 = i2x(x1);
 606                                         x4 = i2x(x0) + (dx4 * (y1 - y0));
 607                                         u3 = i2x(u1);
 608                                         v3 = i2x(v1);
 609 #ifdef GPU_UNAI_USE_FLOATMATH
 610 #ifdef GPU_UNAI_USE_FLOAT_DIV_MULTINV
 611                                         if ((y2 - y1) != 0) {
 612                                                 float finv = FloatInv(y2 - y1);
 613                                                 dx3 = (fixed)(((x2 - x1) << FIXED_BITS) * finv);
 614                                                 du3 = (fixed)(((u2 - u1) << FIXED_BITS) * finv);
 615                                                 dv3 = (fixed)(((v2 - v1) << FIXED_BITS) * finv);
 616                                         } else {
 617                                                 dx3 = du3 = dv3 = 0;
 618                                         }
 619 #else
 620                                         if ((y2 - y1) != 0) {
 621                                                 float fdiv = y2 - y1;
 622                                                 dx3 = (fixed)(((x2 - x1) << FIXED_BITS) / fdiv);
 623                                                 du3 = (fixed)(((u2 - u1) << FIXED_BITS) / fdiv);
 624                                                 dv3 = (fixed)(((v2 - v1) << FIXED_BITS) / fdiv);
 625                                         } else {
 626                                                 dx3 = du3 = dv3 = 0;
 627                                         }
 628 #endif
 629 #else  // Integer Division:
 630 #ifdef GPU_UNAI_USE_INT_DIV_MULTINV
 631                                         if ((y2 - y1) != 0) {
 632                                                 int iF, iS;
 633                                                 xInv((y2 - y1), iF, iS);
 634                                                 dx3 = xInvMulx((x2 - x1), iF, iS);
 635                                                 du3 = xInvMulx((u2 - u1), iF, iS);
 636                                                 dv3 = xInvMulx((v2 - v1), iF, iS);
 637                                         } else {
 638                                                 dx3 = du3 = dv3 = 0;
 639                                         }
 640 #else
 641                                         if ((y2 - y1) != 0) {
 642                                                 dx3 = GPU_FAST_DIV((x2 - x1) << FIXED_BITS, (y2 - y1));
 643                                                 du3 = GPU_FAST_DIV((u2 - u1) << FIXED_BITS, (y2 - y1));
 644                                                 dv3 = GPU_FAST_DIV((v2 - v1) << FIXED_BITS, (y2 - y1));
 645                                         } else {
 646                                                 dx3 = du3 = dv3 = 0;
 647                                         }
 648 #endif
 649 #endif
 650                                 }
 651                         }
 652
 653                         s32 xmin, xmax, ymin, ymax;
 654                         xmin = gpu_unai.DrawingArea[0];  xmax = gpu_unai.DrawingArea[2];
 655                         ymin = gpu_unai.DrawingArea[1];  ymax = gpu_unai.DrawingArea[3];
 656
 657                         if ((ymin - ya) > 0) {
 658                                 x3 += dx3 * (ymin - ya);
 659                                 x4 += dx4 * (ymin - ya);
 660                                 u3 += du3 * (ymin - ya);
 661                                 v3 += dv3 * (ymin - ya);
 662                                 ya = ymin;
 663                         }
 664
 665                         if (yb > ymax) yb = ymax;
 666
 667                         int loop1 = yb - ya;
 668                         if (loop1 <= 0)
 669                                 continue;
 670
 671                         le16_t* PixelBase = &gpu_unai.vram[FRAME_OFFSET(0, ya)];
 672                         int li=gpu_unai.inn.ilace_mask;
 673                         int pi=(ProgressiveInterlaceEnabled()?(gpu_unai.inn.ilace_mask+1):0);
 674                         int pif=(ProgressiveInterlaceEnabled()?(gpu_unai.prog_ilace_flag?(gpu_unai.inn.ilace_mask+1):0):1);
 675
 676                         for (; loop1; --loop1, ++ya, PixelBase += FRAME_WIDTH,
 677                                         x3 += dx3, x4 += dx4,
 678                                         u3 += du3, v3 += dv3 )
 679                         {
 680                                 if (ya&li) continue;
 681                                 if ((ya&pi)==pif) continue;
 682
 683                                 u32 u4, v4;
 684
 685                                 xa = FixedCeilToInt(x3);  xb = FixedCeilToInt(x4);
 686                                 u4 = u3;  v4 = v3;
 687
 688                                 fixed itmp = i2x(xa) - x3;
 689                                 if (itmp != 0) {
 690                                         u4 += (du4 * itmp) >> FIXED_BITS;
 691                                         v4 += (dv4 * itmp) >> FIXED_BITS;
 692                                 }
 693
 694                                 u4 += fixed_HALF;
 695                                 v4 += fixed_HALF;
 696
 697                                 if ((xmin - xa) > 0) {
 698                                         u4 += du4 * (xmin - xa);
 699                                         v4 += dv4 * (xmin - xa);
 700                                         xa = xmin;
 701                                 }
 702
 703                                 // Set u,v coords for inner driver
 704                                 gpu_unai.inn.u = u4;
 705                                 gpu_unai.inn.v = v4;
 706
 707                                 if (xb > xmax) xb = xmax;
 708                                 if ((xb - xa) > 0)
 709                                         gpuPolySpanDriver(gpu_unai, PixelBase + xa, (xb - xa));
 710                         }
 711                 }
 712         } while (++cur_pass < total_passes);
 713 }
 714
 715 /*----------------------------------------------------------------------
 716 gpuDrawPolyG - Gouraud-shaded, untextured poly
 717 ----------------------------------------------------------------------*/
 718 void gpuDrawPolyG(const PtrUnion packet, const PP gpuPolySpanDriver, u32 is_quad)
 719 {
 720         PolyVertex vbuf[4];
 721         polyInitVertexBuffer(vbuf, packet, POLYTYPE_G, is_quad);
 722
 723         int total_passes = is_quad ? 2 : 1;
 724         int cur_pass = 0;
 725         do
 726         {
 727                 const PolyVertex* vptrs[3];
 728                 s32 x_off, y_off;
 729                 if (!polyUseTriangle(vbuf, cur_pass, vptrs, x_off, y_off))
 730                         continue;
 731
 732                 s32 xa, xb, ya, yb;
 733                 s32 x3, dx3, x4, dx4, dx;
 734                 s32 r3, dr3, g3, dg3, b3, db3;
 735                 s32 x0, x1, x2, y0, y1, y2;
 736                 s32 r0, r1, r2, g0, g1, g2, b0, b1, b2;
 737                 s32 dr4, dg4, db4;
 738
 739                 x0 = vptrs[0]->x + x_off; y0 = vptrs[0]->y + y_off;
 740                 r0 = vptrs[0]->col.r;     g0 = vptrs[0]->col.g;  b0 = vptrs[0]->col.b;
 741                 x1 = vptrs[1]->x + x_off; y1 = vptrs[1]->y + y_off;
 742                 r1 = vptrs[1]->col.r;     g1 = vptrs[1]->col.g;  b1 = vptrs[1]->col.b;
 743                 x2 = vptrs[2]->x + x_off; y2 = vptrs[2]->y + y_off;
 744                 r2 = vptrs[2]->col.r;     g2 = vptrs[2]->col.g;  b2 = vptrs[2]->col.b;
 745
 746                 ya = y2 - y0;
 747                 yb = y2 - y1;
 748                 dx4 = (x2 - x1) * ya - (x2 - x0) * yb;
 749                 dr4 = (r2 - r1) * ya - (r2 - r0) * yb;
 750                 dg4 = (g2 - g1) * ya - (g2 - g0) * yb;
 751                 db4 = (b2 - b1) * ya - (b2 - b0) * yb;
 752                 dx = dx4;
 753                 if (dx4 < 0) {
 754                         dx4 = -dx4;
 755                         dr4 = -dr4;
 756                         dg4 = -dg4;
 757                         db4 = -db4;
 758                 }
 759
 760 #ifdef GPU_UNAI_USE_FLOATMATH
 761 #ifdef GPU_UNAI_USE_FLOAT_DIV_MULTINV
 762                 if (dx4 != 0) {
 763                         float finv = FloatInv(dx4);
 764                         dr4 = (fixed)((dr4 << FIXED_BITS) * finv);
 765                         dg4 = (fixed)((dg4 << FIXED_BITS) * finv);
 766                         db4 = (fixed)((db4 << FIXED_BITS) * finv);
 767                 } else {
 768                         dr4 = dg4 = db4 = 0;
 769                 }
 770 #else
 771                 if (dx4 != 0) {
 772                         float fdiv = dx4;
 773                         dr4 = (fixed)((dr4 << FIXED_BITS) / fdiv);
 774                         dg4 = (fixed)((dg4 << FIXED_BITS) / fdiv);
 775                         db4 = (fixed)((db4 << FIXED_BITS) / fdiv);
 776                 } else {
 777                         dr4 = dg4 = db4 = 0;
 778                 }
 779 #endif
 780 #else  // Integer Division:
 781 #ifdef GPU_UNAI_USE_INT_DIV_MULTINV
 782                 if (dx4 != 0) {
 783                         int iF, iS;
 784                         xInv(dx4, iF, iS);
 785                         dr4 = xInvMulx(dr4, iF, iS);
 786                         dg4 = xInvMulx(dg4, iF, iS);
 787                         db4 = xInvMulx(db4, iF, iS);
 788                 } else {
 789                         dr4 = dg4 = db4 = 0;
 790                 }
 791 #else
 792                 if (dx4 != 0) {
 793                         dr4 = GPU_FAST_DIV(dr4 << FIXED_BITS, dx4);
 794                         dg4 = GPU_FAST_DIV(dg4 << FIXED_BITS, dx4);
 795                         db4 = GPU_FAST_DIV(db4 << FIXED_BITS, dx4);
 796                 } else {
 797                         dr4 = dg4 = db4 = 0;
 798                 }
 799 #endif
 800 #endif
 801                 // Setup packed Gouraud increment for inner driver
 802                 gpu_unai.inn.gInc = gpuPackGouraudColInc(dr4, dg4, db4);
 803
 804                 for (s32 loop0 = 2; loop0; loop0--) {
 805                         if (loop0 == 2) {
 806                                 ya = y0;
 807                                 yb = y1;
 808                                 x3 = x4 = i2x(x0);
 809                                 r3 = i2x(r0);
 810                                 g3 = i2x(g0);
 811                                 b3 = i2x(b0);
 812                                 if (dx < 0) {
 813 #ifdef GPU_UNAI_USE_FLOATMATH
 814 #ifdef GPU_UNAI_USE_FLOAT_DIV_MULTINV
 815                                         if ((y2 - y0) != 0) {
 816                                                 float finv = FloatInv(y2 - y0);
 817                                                 dx3 = (fixed)(((x2 - x0) << FIXED_BITS) * finv);
 818                                                 dr3 = (fixed)(((r2 - r0) << FIXED_BITS) * finv);
 819                                                 dg3 = (fixed)(((g2 - g0) << FIXED_BITS) * finv);
 820                                                 db3 = (fixed)(((b2 - b0) << FIXED_BITS) * finv);
 821                                         } else {
 822                                                 dx3 = dr3 = dg3 = db3 = 0;
 823                                         }
 824                                         dx4 = ((y1 - y0) != 0) ? (fixed)(((x1 - x0) << FIXED_BITS) * FloatInv(y1 - y0)) : 0;
 825 #else
 826                                         if ((y2 - y0) != 0) {
 827                                                 float fdiv = y2 - y0;
 828                                                 dx3 = (fixed)(((x2 - x0) << FIXED_BITS) / fdiv);
 829                                                 dr3 = (fixed)(((r2 - r0) << FIXED_BITS) / fdiv);
 830                                                 dg3 = (fixed)(((g2 - g0) << FIXED_BITS) / fdiv);
 831                                                 db3 = (fixed)(((b2 - b0) << FIXED_BITS) / fdiv);
 832                                         } else {
 833                                                 dx3 = dr3 = dg3 = db3 = 0;
 834                                         }
 835                                         dx4 = ((y1 - y0) != 0) ? (fixed)(((x1 - x0) << FIXED_BITS) / (float)(y1 - y0)) : 0;
 836 #endif
 837 #else  // Integer Division:
 838 #ifdef GPU_UNAI_USE_INT_DIV_MULTINV
 839                                         if ((y2 - y0) != 0) {
 840                                                 int iF, iS;
 841                                                 xInv((y2 - y0), iF, iS);
 842                                                 dx3 = xInvMulx((x2 - x0), iF, iS);
 843                                                 dr3 = xInvMulx((r2 - r0), iF, iS);
 844                                                 dg3 = xInvMulx((g2 - g0), iF, iS);
 845                                                 db3 = xInvMulx((b2 - b0), iF, iS);
 846                                         } else {
 847                                                 dx3 = dr3 = dg3 = db3 = 0;
 848                                         }
 849                                         dx4 = ((y1 - y0) != 0) ? xLoDivx((x1 - x0), (y1 - y0)) : 0;
 850 #else
 851                                         if ((y2 - y0) != 0) {
 852                                                 dx3 = GPU_FAST_DIV((x2 - x0) << FIXED_BITS, (y2 - y0));
 853                                                 dr3 = GPU_FAST_DIV((r2 - r0) << FIXED_BITS, (y2 - y0));
 854                                                 dg3 = GPU_FAST_DIV((g2 - g0) << FIXED_BITS, (y2 - y0));
 855                                                 db3 = GPU_FAST_DIV((b2 - b0) << FIXED_BITS, (y2 - y0));
 856                                         } else {
 857                                                 dx3 = dr3 = dg3 = db3 = 0;
 858                                         }
 859                                         dx4 = ((y1 - y0) != 0) ? GPU_FAST_DIV((x1 - x0) << FIXED_BITS, (y1 - y0)) : 0;
 860 #endif
 861 #endif
 862                                 } else {
 863 #ifdef GPU_UNAI_USE_FLOATMATH
 864 #ifdef GPU_UNAI_USE_FLOAT_DIV_MULTINV
 865                                         if ((y1 - y0) != 0) {
 866                                                 float finv = FloatInv(y1 - y0);
 867                                                 dx3 = (fixed)(((x1 - x0) << FIXED_BITS) * finv);
 868                                                 dr3 = (fixed)(((r1 - r0) << FIXED_BITS) * finv);
 869                                                 dg3 = (fixed)(((g1 - g0) << FIXED_BITS) * finv);
 870                                                 db3 = (fixed)(((b1 - b0) << FIXED_BITS) * finv);
 871                                         } else {
 872                                                 dx3 = dr3 = dg3 = db3 = 0;
 873                                         }
 874                                         dx4 = ((y2 - y0) != 0) ? (fixed)(((x2 - x0) << FIXED_BITS) * FloatInv(y2 - y0)) : 0;
 875 #else
 876                                         if ((y1 - y0) != 0) {
 877                                                 float fdiv = y1 - y0;
 878                                                 dx3 = (fixed)(((x1 - x0) << FIXED_BITS) / fdiv);
 879                                                 dr3 = (fixed)(((r1 - r0) << FIXED_BITS) / fdiv);
 880                                                 dg3 = (fixed)(((g1 - g0) << FIXED_BITS) / fdiv);
 881                                                 db3 = (fixed)(((b1 - b0) << FIXED_BITS) / fdiv);
 882                                         } else {
 883                                                 dx3 = dr3 = dg3 = db3 = 0;
 884                                         }
 885                                         dx4 = ((y2 - y0) != 0) ? (fixed)(((x2 - x0) << FIXED_BITS) / (float)(y2 - y0)) : 0;
 886 #endif
 887 #else  // Integer Division:
 888 #ifdef GPU_UNAI_USE_INT_DIV_MULTINV
 889                                         if ((y1 - y0) != 0) {
 890                                                 int iF, iS;
 891                                                 xInv((y1 - y0), iF, iS);
 892                                                 dx3 = xInvMulx((x1 - x0), iF, iS);
 893                                                 dr3 = xInvMulx((r1 - r0), iF, iS);
 894                                                 dg3 = xInvMulx((g1 - g0), iF, iS);
 895                                                 db3 = xInvMulx((b1 - b0), iF, iS);
 896                                         } else {
 897                                                 dx3 = dr3 = dg3 = db3 = 0;
 898                                         }
 899                                         dx4 = ((y2 - y0) != 0) ? xLoDivx((x2 - x0), (y2 - y0)) : 0;
 900 #else
 901                                         if ((y1 - y0) != 0) {
 902                                                 dx3 = GPU_FAST_DIV((x1 - x0) << FIXED_BITS, (y1 - y0));
 903                                                 dr3 = GPU_FAST_DIV((r1 - r0) << FIXED_BITS, (y1 - y0));
 904                                                 dg3 = GPU_FAST_DIV((g1 - g0) << FIXED_BITS, (y1 - y0));
 905                                                 db3 = GPU_FAST_DIV((b1 - b0) << FIXED_BITS, (y1 - y0));
 906                                         } else {
 907                                                 dx3 = dr3 = dg3 = db3 = 0;
 908                                         }
 909                                         dx4 = ((y2 - y0) != 0) ? GPU_FAST_DIV((x2 - x0) << FIXED_BITS, (y2 - y0)) : 0;
 910 #endif
 911 #endif
 912                                 }
 913                         } else {
 914                                 //senquack - break out of final loop if nothing to be drawn (1st loop
 915                                 //           must always be taken to setup dx3/dx4)
 916                                 if (y1 == y2) break;
 917
 918                                 ya = y1;  yb = y2;
 919
 920                                 if (dx < 0) {
 921                                         x3 = i2x(x0);  x4 = i2x(x1);
 922                                         r3 = i2x(r0);  g3 = i2x(g0);  b3 = i2x(b0);
 923
 924                                         if ((y1 - y0) != 0) {
 925                                                 x3 += (dx3 * (y1 - y0));
 926                                                 r3 += (dr3 * (y1 - y0));
 927                                                 g3 += (dg3 * (y1 - y0));
 928                                                 b3 += (db3 * (y1 - y0));
 929                                         }
 930
 931 #ifdef GPU_UNAI_USE_FLOATMATH
 932 #ifdef GPU_UNAI_USE_FLOAT_DIV_MULTINV
 933                                         dx4 = ((y2 - y1) != 0) ? (fixed)(((x2 - x1) << FIXED_BITS) * FloatInv(y2 - y1)) : 0;
 934 #else
 935                                         dx4 = ((y2 - y1) != 0) ? (fixed)(((x2 - x1) << FIXED_BITS) / (float)(y2 - y1)) : 0;
 936 #endif
 937 #else  // Integer Division:
 938 #ifdef GPU_UNAI_USE_INT_DIV_MULTINV
 939                                         dx4 = ((y2 - y1) != 0) ? xLoDivx((x2 - x1), (y2 - y1)) : 0;
 940 #else
 941                                         dx4 = ((y2 - y1) != 0) ? GPU_FAST_DIV((x2 - x1) << FIXED_BITS, (y2 - y1)) : 0;
 942 #endif
 943 #endif
 944                                 } else {
 945                                         x3 = i2x(x1);
 946                                         x4 = i2x(x0) + (dx4 * (y1 - y0));
 947
 948                                         r3 = i2x(r1);  g3 = i2x(g1);  b3 = i2x(b1);
 949
 950 #ifdef GPU_UNAI_USE_FLOATMATH
 951 #ifdef GPU_UNAI_USE_FLOAT_DIV_MULTINV
 952                                         if ((y2 - y1) != 0) {
 953                                                 float finv = FloatInv(y2 - y1);
 954                                                 dx3 = (fixed)(((x2 - x1) << FIXED_BITS) * finv);
 955                                                 dr3 = (fixed)(((r2 - r1) << FIXED_BITS) * finv);
 956                                                 dg3 = (fixed)(((g2 - g1) << FIXED_BITS) * finv);
 957                                                 db3 = (fixed)(((b2 - b1) << FIXED_BITS) * finv);
 958                                         } else {
 959                                                 dx3 = dr3 = dg3 = db3 = 0;
 960                                         }
 961 #else
 962                                         if ((y2 - y1) != 0) {
 963                                                 float fdiv = y2 - y1;
 964                                                 dx3 = (fixed)(((x2 - x1) << FIXED_BITS) / fdiv);
 965                                                 dr3 = (fixed)(((r2 - r1) << FIXED_BITS) / fdiv);
 966                                                 dg3 = (fixed)(((g2 - g1) << FIXED_BITS) / fdiv);
 967                                                 db3 = (fixed)(((b2 - b1) << FIXED_BITS) / fdiv);
 968                                         } else {
 969                                                 dx3 = dr3 = dg3 = db3 = 0;
 970                                         }
 971 #endif
 972 #else  // Integer Division:
 973 #ifdef GPU_UNAI_USE_INT_DIV_MULTINV
 974                                         if ((y2 - y1) != 0) {
 975                                                 int iF, iS;
 976                                                 xInv((y2 - y1), iF, iS);
 977                                                 dx3 = xInvMulx((x2 - x1), iF, iS);
 978                                                 dr3 = xInvMulx((r2 - r1), iF, iS);
 979                                                 dg3 = xInvMulx((g2 - g1), iF, iS);
 980                                                 db3 = xInvMulx((b2 - b1), iF, iS);
 981                                         } else {
 982                                                 dx3 = dr3 = dg3 = db3 = 0;
 983                                         }
 984 #else
 985                                         if ((y2 - y1) != 0) {
 986                                                 dx3 = GPU_FAST_DIV((x2 - x1) << FIXED_BITS, (y2 - y1));
 987                                                 dr3 = GPU_FAST_DIV((r2 - r1) << FIXED_BITS, (y2 - y1));
 988                                                 dg3 = GPU_FAST_DIV((g2 - g1) << FIXED_BITS, (y2 - y1));
 989                                                 db3 = GPU_FAST_DIV((b2 - b1) << FIXED_BITS, (y2 - y1));
 990                                         } else {
 991                                                 dx3 = dr3 = dg3 = db3 = 0;
 992                                         }
 993 #endif
 994 #endif
 995                                 }
 996                         }
 997
 998                         s32 xmin, xmax, ymin, ymax;
 999                         xmin = gpu_unai.DrawingArea[0];  xmax = gpu_unai.DrawingArea[2];
1000                         ymin = gpu_unai.DrawingArea[1];  ymax = gpu_unai.DrawingArea[3];
1001
1002                         if ((ymin - ya) > 0) {
1003                                 x3 += (dx3 * (ymin - ya));
1004                                 x4 += (dx4 * (ymin - ya));
1005                                 r3 += (dr3 * (ymin - ya));
1006                                 g3 += (dg3 * (ymin - ya));
1007                                 b3 += (db3 * (ymin - ya));
1008                                 ya = ymin;
1009                         }
1010
1011                         if (yb > ymax) yb = ymax;
1012
1013                         int loop1 = yb - ya;
1014                         if (loop1 <= 0)
1015                                 continue;
1016
1017                         le16_t* PixelBase = &gpu_unai.vram[FRAME_OFFSET(0, ya)];
1018                         int li=gpu_unai.inn.ilace_mask;
1019                         int pi=(ProgressiveInterlaceEnabled()?(gpu_unai.inn.ilace_mask+1):0);
1020                         int pif=(ProgressiveInterlaceEnabled()?(gpu_unai.prog_ilace_flag?(gpu_unai.inn.ilace_mask+1):0):1);
1021
1022                         for (; loop1; --loop1, ++ya, PixelBase += FRAME_WIDTH,
1023                                         x3 += dx3, x4 += dx4,
1024                                         r3 += dr3, g3 += dg3, b3 += db3 )
1025                         {
1026                                 if (ya&li) continue;
1027                                 if ((ya&pi)==pif) continue;
1028
1029                                 u32 r4, g4, b4;
1030
1031                                 xa = FixedCeilToInt(x3);
1032                                 xb = FixedCeilToInt(x4);
1033                                 r4 = r3;  g4 = g3;  b4 = b3;
1034
1035                                 fixed itmp = i2x(xa) - x3;
1036                                 if (itmp != 0) {
1037                                         r4 += (dr4 * itmp) >> FIXED_BITS;
1038                                         g4 += (dg4 * itmp) >> FIXED_BITS;
1039                                         b4 += (db4 * itmp) >> FIXED_BITS;
1040                                 }
1041
1042                                 r4 += fixed_HALF;
1043                                 g4 += fixed_HALF;
1044                                 b4 += fixed_HALF;
1045
1046                                 if ((xmin - xa) > 0) {
1047                                         r4 += (dr4 * (xmin - xa));
1048                                         g4 += (dg4 * (xmin - xa));
1049                                         b4 += (db4 * (xmin - xa));
1050                                         xa = xmin;
1051                                 }
1052
1053                                 // Setup packed Gouraud color for inner driver
1054                                 gpu_unai.inn.gCol = gpuPackGouraudCol(r4, g4, b4);
1055
1056                                 if (xb > xmax) xb = xmax;
1057                                 if ((xb - xa) > 0)
1058                                         gpuPolySpanDriver(gpu_unai, PixelBase + xa, (xb - xa));
1059                         }
1060                 }
1061         } while (++cur_pass < total_passes);
1062 }
1063
1064 /*----------------------------------------------------------------------
1065 gpuDrawPolyGT - Gouraud-shaded, textured poly
1066 ----------------------------------------------------------------------*/
1067 void gpuDrawPolyGT(const PtrUnion packet, const PP gpuPolySpanDriver, u32 is_quad)
1068 {
1069         PolyVertex vbuf[4];
1070         polyInitVertexBuffer(vbuf, packet, POLYTYPE_GT, is_quad);
1071
1072         int total_passes = is_quad ? 2 : 1;
1073         int cur_pass = 0;
1074         do
1075         {
1076                 const PolyVertex* vptrs[3];
1077                 s32 x_off, y_off;
1078                 if (!polyUseTriangle(vbuf, cur_pass, vptrs, x_off, y_off))
1079                         continue;
1080
1081                 s32 xa, xb, ya, yb;
1082                 s32 x3, dx3, x4, dx4, dx;
1083                 s32 u3, du3, v3, dv3;
1084                 s32 r3, dr3, g3, dg3, b3, db3;
1085                 s32 x0, x1, x2, y0, y1, y2;
1086                 s32 u0, u1, u2, v0, v1, v2;
1087                 s32 r0, r1, r2, g0, g1, g2, b0, b1, b2;
1088                 s32 du4, dv4;
1089                 s32 dr4, dg4, db4;
1090
1091                 x0 = vptrs[0]->x + x_off; y0 = vptrs[0]->y + y_off;
1092                 u0 = vptrs[0]->tex.u;     v0 = vptrs[0]->tex.v;
1093                 r0 = vptrs[0]->col.r;     g0 = vptrs[0]->col.g;  b0 = vptrs[0]->col.b;
1094                 x1 = vptrs[1]->x + x_off; y1 = vptrs[1]->y + y_off;
1095                 u1 = vptrs[1]->tex.u;     v1 = vptrs[1]->tex.v;
1096                 r1 = vptrs[1]->col.r;     g1 = vptrs[1]->col.g;  b1 = vptrs[1]->col.b;
1097                 x2 = vptrs[2]->x + x_off; y2 = vptrs[2]->y + y_off;
1098                 u2 = vptrs[2]->tex.u;     v2 = vptrs[2]->tex.v;
1099                 r2 = vptrs[2]->col.r;     g2 = vptrs[2]->col.g;  b2 = vptrs[2]->col.b;
1100
1101                 ya = y2 - y0;
1102                 yb = y2 - y1;
1103                 dx4 = (x2 - x1) * ya - (x2 - x0) * yb;
1104                 du4 = (u2 - u1) * ya - (u2 - u0) * yb;
1105                 dv4 = (v2 - v1) * ya - (v2 - v0) * yb;
1106                 dr4 = (r2 - r1) * ya - (r2 - r0) * yb;
1107                 dg4 = (g2 - g1) * ya - (g2 - g0) * yb;
1108                 db4 = (b2 - b1) * ya - (b2 - b0) * yb;
1109                 dx = dx4;
1110                 if (dx4 < 0) {
1111                         dx4 = -dx4;
1112                         du4 = -du4;
1113                         dv4 = -dv4;
1114                         dr4 = -dr4;
1115                         dg4 = -dg4;
1116                         db4 = -db4;
1117                 }
1118
1119 #ifdef GPU_UNAI_USE_FLOATMATH
1120 #ifdef GPU_UNAI_USE_FLOAT_DIV_MULTINV
1121                 if (dx4 != 0) {
1122                         float finv = FloatInv(dx4);
1123                         du4 = (fixed)((du4 << FIXED_BITS) * finv);
1124                         dv4 = (fixed)((dv4 << FIXED_BITS) * finv);
1125                         dr4 = (fixed)((dr4 << FIXED_BITS) * finv);
1126                         dg4 = (fixed)((dg4 << FIXED_BITS) * finv);
1127                         db4 = (fixed)((db4 << FIXED_BITS) * finv);
1128                 } else {
1129                         du4 = dv4 = dr4 = dg4 = db4 = 0;
1130                 }
1131 #else
1132                 if (dx4 != 0) {
1133                         float fdiv = dx4;
1134                         du4 = (fixed)((du4 << FIXED_BITS) / fdiv);
1135                         dv4 = (fixed)((dv4 << FIXED_BITS) / fdiv);
1136                         dr4 = (fixed)((dr4 << FIXED_BITS) / fdiv);
1137                         dg4 = (fixed)((dg4 << FIXED_BITS) / fdiv);
1138                         db4 = (fixed)((db4 << FIXED_BITS) / fdiv);
1139                 } else {
1140                         du4 = dv4 = dr4 = dg4 = db4 = 0;
1141                 }
1142 #endif
1143 #else  // Integer Division:
1144 #ifdef GPU_UNAI_USE_INT_DIV_MULTINV
1145                 if (dx4 != 0) {
1146                         int iF, iS;
1147                         xInv(dx4, iF, iS);
1148                         du4 = xInvMulx(du4, iF, iS);
1149                         dv4 = xInvMulx(dv4, iF, iS);
1150                         dr4 = xInvMulx(dr4, iF, iS);
1151                         dg4 = xInvMulx(dg4, iF, iS);
1152                         db4 = xInvMulx(db4, iF, iS);
1153                 } else {
1154                         du4 = dv4 = dr4 = dg4 = db4 = 0;
1155                 }
1156 #else
1157                 if (dx4 != 0) {
1158                         du4 = GPU_FAST_DIV(du4 << FIXED_BITS, dx4);
1159                         dv4 = GPU_FAST_DIV(dv4 << FIXED_BITS, dx4);
1160                         dr4 = GPU_FAST_DIV(dr4 << FIXED_BITS, dx4);
1161                         dg4 = GPU_FAST_DIV(dg4 << FIXED_BITS, dx4);
1162                         db4 = GPU_FAST_DIV(db4 << FIXED_BITS, dx4);
1163                 } else {
1164                         du4 = dv4 = dr4 = dg4 = db4 = 0;
1165                 }
1166 #endif
1167 #endif
1168                 // Set u,v increments and packed Gouraud increment for inner driver
1169                 gpu_unai.inn.u_inc = du4;
1170                 gpu_unai.inn.v_inc = dv4;
1171                 gpu_unai.inn.gInc = gpuPackGouraudColInc(dr4, dg4, db4);
1172
1173                 for (s32 loop0 = 2; loop0; loop0--) {
1174                         if (loop0 == 2) {
1175                                 ya = y0;  yb = y1;
1176                                 x3 = x4 = i2x(x0);
1177                                 u3 = i2x(u0);  v3 = i2x(v0);
1178                                 r3 = i2x(r0);  g3 = i2x(g0);  b3 = i2x(b0);
1179                                 if (dx < 0) {
1180 #ifdef GPU_UNAI_USE_FLOATMATH
1181 #ifdef GPU_UNAI_USE_FLOAT_DIV_MULTINV
1182                                         if ((y2 - y0) != 0) {
1183                                                 float finv = FloatInv(y2 - y0);
1184                                                 dx3 = (fixed)(((x2 - x0) << FIXED_BITS) * finv);
1185                                                 du3 = (fixed)(((u2 - u0) << FIXED_BITS) * finv);
1186                                                 dv3 = (fixed)(((v2 - v0) << FIXED_BITS) * finv);
1187                                                 dr3 = (fixed)(((r2 - r0) << FIXED_BITS) * finv);
1188                                                 dg3 = (fixed)(((g2 - g0) << FIXED_BITS) * finv);
1189                                                 db3 = (fixed)(((b2 - b0) << FIXED_BITS) * finv);
1190                                         } else {
1191                                                 dx3 = du3 = dv3 = dr3 = dg3 = db3 = 0;
1192                                         }
1193                                         dx4 = ((y1 - y0) != 0) ? (fixed)(((x1 - x0) << FIXED_BITS) * FloatInv(y1 - y0)) : 0;
1194 #else
1195                                         if ((y2 - y0) != 0) {
1196                                                 float fdiv = y2 - y0;
1197                                                 dx3 = (fixed)(((x2 - x0) << FIXED_BITS) / fdiv);
1198                                                 du3 = (fixed)(((u2 - u0) << FIXED_BITS) / fdiv);
1199                                                 dv3 = (fixed)(((v2 - v0) << FIXED_BITS) / fdiv);
1200                                                 dr3 = (fixed)(((r2 - r0) << FIXED_BITS) / fdiv);
1201                                                 dg3 = (fixed)(((g2 - g0) << FIXED_BITS) / fdiv);
1202                                                 db3 = (fixed)(((b2 - b0) << FIXED_BITS) / fdiv);
1203                                         } else {
1204                                                 dx3 = du3 = dv3 = dr3 = dg3 = db3 = 0;
1205                                         }
1206                                         dx4 = ((y1 - y0) != 0) ? (fixed)(((x1 - x0) << FIXED_BITS) / (float)(y1 - y0)) : 0;
1207 #endif
1208 #else  // Integer Division:
1209 #ifdef GPU_UNAI_USE_INT_DIV_MULTINV
1210                                         if ((y2 - y0) != 0) {
1211                                                 int iF, iS;
1212                                                 xInv((y2 - y0), iF, iS);
1213                                                 dx3 = xInvMulx((x2 - x0), iF, iS);
1214                                                 du3 = xInvMulx((u2 - u0), iF, iS);
1215                                                 dv3 = xInvMulx((v2 - v0), iF, iS);
1216                                                 dr3 = xInvMulx((r2 - r0), iF, iS);
1217                                                 dg3 = xInvMulx((g2 - g0), iF, iS);
1218                                                 db3 = xInvMulx((b2 - b0), iF, iS);
1219                                         } else {
1220                                                 dx3 = du3 = dv3 = dr3 = dg3 = db3 = 0;
1221                                         }
1222                                         dx4 = ((y1 - y0) != 0) ? xLoDivx((x1 - x0), (y1 - y0)) : 0;
1223 #else
1224                                         if ((y2 - y0) != 0) {
1225                                                 dx3 = GPU_FAST_DIV((x2 - x0) << FIXED_BITS, (y2 - y0));
1226                                                 du3 = GPU_FAST_DIV((u2 - u0) << FIXED_BITS, (y2 - y0));
1227                                                 dv3 = GPU_FAST_DIV((v2 - v0) << FIXED_BITS, (y2 - y0));
1228                                                 dr3 = GPU_FAST_DIV((r2 - r0) << FIXED_BITS, (y2 - y0));
1229                                                 dg3 = GPU_FAST_DIV((g2 - g0) << FIXED_BITS, (y2 - y0));
1230                                                 db3 = GPU_FAST_DIV((b2 - b0) << FIXED_BITS, (y2 - y0));
1231                                         } else {
1232                                                 dx3 = du3 = dv3 = dr3 = dg3 = db3 = 0;
1233                                         }
1234                                         dx4 = ((y1 - y0) != 0) ? GPU_FAST_DIV((x1 - x0) << FIXED_BITS, (y1 - y0)) : 0;
1235 #endif
1236 #endif
1237                                 } else {
1238 #ifdef GPU_UNAI_USE_FLOATMATH
1239 #ifdef GPU_UNAI_USE_FLOAT_DIV_MULTINV
1240                                         if ((y1 - y0) != 0) {
1241                                                 float finv = FloatInv(y1 - y0);
1242                                                 dx3 = (fixed)(((x1 - x0) << FIXED_BITS) * finv);
1243                                                 du3 = (fixed)(((u1 - u0) << FIXED_BITS) * finv);
1244                                                 dv3 = (fixed)(((v1 - v0) << FIXED_BITS) * finv);
1245                                                 dr3 = (fixed)(((r1 - r0) << FIXED_BITS) * finv);
1246                                                 dg3 = (fixed)(((g1 - g0) << FIXED_BITS) * finv);
1247                                                 db3 = (fixed)(((b1 - b0) << FIXED_BITS) * finv);
1248                                         } else {
1249                                                 dx3 = du3 = dv3 = dr3 = dg3 = db3 = 0;
1250                                         }
1251                                         dx4 = ((y2 - y0) != 0) ? (fixed)(((x2 - x0) << FIXED_BITS) * FloatInv(y2 - y0)) : 0;
1252 #else
1253                                         if ((y1 - y0) != 0) {
1254                                                 float fdiv = y1 - y0;
1255                                                 dx3 = (fixed)(((x1 - x0) << FIXED_BITS) / fdiv);
1256                                                 du3 = (fixed)(((u1 - u0) << FIXED_BITS) / fdiv);
1257                                                 dv3 = (fixed)(((v1 - v0) << FIXED_BITS) / fdiv);
1258                                                 dr3 = (fixed)(((r1 - r0) << FIXED_BITS) / fdiv);
1259                                                 dg3 = (fixed)(((g1 - g0) << FIXED_BITS) / fdiv);
1260                                                 db3 = (fixed)(((b1 - b0) << FIXED_BITS) / fdiv);
1261                                         } else {
1262                                                 dx3 = du3 = dv3 = dr3 = dg3 = db3 = 0;
1263                                         }
1264                                         dx4 = ((y2 - y0) != 0) ? (fixed)(((x2 - x0) << FIXED_BITS) / float(y2 - y0)) : 0;
1265 #endif
1266 #else  // Integer Division:
1267 #ifdef GPU_UNAI_USE_INT_DIV_MULTINV
1268                                         if ((y1 - y0) != 0) {
1269                                                 int iF, iS;
1270                                                 xInv((y1 - y0), iF, iS);
1271                                                 dx3 = xInvMulx((x1 - x0), iF, iS);
1272                                                 du3 = xInvMulx((u1 - u0), iF, iS);
1273                                                 dv3 = xInvMulx((v1 - v0), iF, iS);
1274                                                 dr3 = xInvMulx((r1 - r0), iF, iS);
1275                                                 dg3 = xInvMulx((g1 - g0), iF, iS);
1276                                                 db3 = xInvMulx((b1 - b0), iF, iS);
1277                                         } else {
1278                                                 dx3 = du3 = dv3 = dr3 = dg3 = db3 = 0;
1279                                         }
1280                                         dx4 = ((y2 - y0) != 0) ? xLoDivx((x2 - x0), (y2 - y0)) : 0;
1281 #else
1282                                         if ((y1 - y0) != 0) {
1283                                                 dx3 = GPU_FAST_DIV((x1 - x0) << FIXED_BITS, (y1 - y0));
1284                                                 du3 = GPU_FAST_DIV((u1 - u0) << FIXED_BITS, (y1 - y0));
1285                                                 dv3 = GPU_FAST_DIV((v1 - v0) << FIXED_BITS, (y1 - y0));
1286                                                 dr3 = GPU_FAST_DIV((r1 - r0) << FIXED_BITS, (y1 - y0));
1287                                                 dg3 = GPU_FAST_DIV((g1 - g0) << FIXED_BITS, (y1 - y0));
1288                                                 db3 = GPU_FAST_DIV((b1 - b0) << FIXED_BITS, (y1 - y0));
1289                                         } else {
1290                                                 dx3 = du3 = dv3 = dr3 = dg3 = db3 = 0;
1291                                         }
1292                                         dx4 = ((y2 - y0) != 0) ? GPU_FAST_DIV((x2 - x0) << FIXED_BITS, (y2 - y0)) : 0;
1293 #endif
1294 #endif
1295                                 }
1296                         } else {
1297                                 //senquack - break out of final loop if nothing to be drawn (1st loop
1298                                 //           must always be taken to setup dx3/dx4)
1299                                 if (y1 == y2) break;
1300
1301                                 ya = y1;  yb = y2;
1302
1303                                 if (dx < 0) {
1304                                         x3 = i2x(x0);  x4 = i2x(x1);
1305                                         u3 = i2x(u0);  v3 = i2x(v0);
1306                                         r3 = i2x(r0);  g3 = i2x(g0);  b3 = i2x(b0);
1307
1308                                         if ((y1 - y0) != 0) {
1309                                                 x3 += (dx3 * (y1 - y0));
1310                                                 u3 += (du3 * (y1 - y0));
1311                                                 v3 += (dv3 * (y1 - y0));
1312                                                 r3 += (dr3 * (y1 - y0));
1313                                                 g3 += (dg3 * (y1 - y0));
1314                                                 b3 += (db3 * (y1 - y0));
1315                                         }
1316
1317 #ifdef GPU_UNAI_USE_FLOATMATH
1318 #ifdef GPU_UNAI_USE_FLOAT_DIV_MULTINV
1319                                         dx4 = ((y2 - y1) != 0) ? (fixed)(((x2 - x1) << FIXED_BITS) * FloatInv(y2 - y1)) : 0;
1320 #else
1321                                         dx4 = ((y2 - y1) != 0) ? (fixed)(((x2 - x1) << FIXED_BITS) / (float)(y2 - y1)) : 0;
1322 #endif
1323 #else  // Integer Division:
1324 #ifdef GPU_UNAI_USE_INT_DIV_MULTINV
1325                                         dx4 = ((y2 - y1) != 0) ? xLoDivx((x2 - x1), (y2 - y1)) : 0;
1326 #else
1327                                         dx4 = ((y2 - y1) != 0) ? GPU_FAST_DIV((x2 - x1) << FIXED_BITS, (y2 - y1)) : 0;
1328 #endif
1329 #endif
1330                                 } else {
1331                                         x3 = i2x(x1);
1332                                         x4 = i2x(x0) + (dx4 * (y1 - y0));
1333
1334                                         u3 = i2x(u1);  v3 = i2x(v1);
1335                                         r3 = i2x(r1);  g3 = i2x(g1);  b3 = i2x(b1);
1336 #ifdef GPU_UNAI_USE_FLOATMATH
1337 #ifdef GPU_UNAI_USE_FLOAT_DIV_MULTINV
1338                                         if ((y2 - y1) != 0) {
1339                                                 float finv = FloatInv(y2 - y1);
1340                                                 dx3 = (fixed)(((x2 - x1) << FIXED_BITS) * finv);
1341                                                 du3 = (fixed)(((u2 - u1) << FIXED_BITS) * finv);
1342                                                 dv3 = (fixed)(((v2 - v1) << FIXED_BITS) * finv);
1343                                                 dr3 = (fixed)(((r2 - r1) << FIXED_BITS) * finv);
1344                                                 dg3 = (fixed)(((g2 - g1) << FIXED_BITS) * finv);
1345                                                 db3 = (fixed)(((b2 - b1) << FIXED_BITS) * finv);
1346                                         } else {
1347                                                 dx3 = du3 = dv3 = dr3 = dg3 = db3 = 0;
1348                                         }
1349 #else
1350                                         if ((y2 - y1) != 0) {
1351                                                 float fdiv = y2 - y1;
1352                                                 dx3 = (fixed)(((x2 - x1) << FIXED_BITS) / fdiv);
1353                                                 du3 = (fixed)(((u2 - u1) << FIXED_BITS) / fdiv);
1354                                                 dv3 = (fixed)(((v2 - v1) << FIXED_BITS) / fdiv);
1355                                                 dr3 = (fixed)(((r2 - r1) << FIXED_BITS) / fdiv);
1356                                                 dg3 = (fixed)(((g2 - g1) << FIXED_BITS) / fdiv);
1357                                                 db3 = (fixed)(((b2 - b1) << FIXED_BITS) / fdiv);
1358                                         } else {
1359                                                 dx3 = du3 = dv3 = dr3 = dg3 = db3 = 0;
1360                                         }
1361 #endif
1362 #else  // Integer Division:
1363 #ifdef GPU_UNAI_USE_INT_DIV_MULTINV
1364                                         if ((y2 - y1) != 0) {
1365                                                 int iF, iS;
1366                                                 xInv((y2 - y1), iF, iS);
1367                                                 dx3 = xInvMulx((x2 - x1), iF, iS);
1368                                                 du3 = xInvMulx((u2 - u1), iF, iS);
1369                                                 dv3 = xInvMulx((v2 - v1), iF, iS);
1370                                                 dr3 = xInvMulx((r2 - r1), iF, iS);
1371                                                 dg3 = xInvMulx((g2 - g1), iF, iS);
1372                                                 db3 = xInvMulx((b2 - b1), iF, iS);
1373                                         } else {
1374                                                 dx3 = du3 = dv3 = dr3 = dg3 = db3 = 0;
1375                                         }
1376 #else
1377                                         if ((y2 - y1) != 0) {
1378                                                 dx3 = GPU_FAST_DIV((x2 - x1) << FIXED_BITS, (y2 - y1));
1379                                                 du3 = GPU_FAST_DIV((u2 - u1) << FIXED_BITS, (y2 - y1));
1380                                                 dv3 = GPU_FAST_DIV((v2 - v1) << FIXED_BITS, (y2 - y1));
1381                                                 dr3 = GPU_FAST_DIV((r2 - r1) << FIXED_BITS, (y2 - y1));
1382                                                 dg3 = GPU_FAST_DIV((g2 - g1) << FIXED_BITS, (y2 - y1));
1383                                                 db3 = GPU_FAST_DIV((b2 - b1) << FIXED_BITS, (y2 - y1));
1384                                         } else {
1385                                                 dx3 = du3 = dv3 = dr3 = dg3 = db3 = 0;
1386                                         }
1387 #endif
1388 #endif
1389                                 }
1390                         }
1391
1392                         s32 xmin, xmax, ymin, ymax;
1393                         xmin = gpu_unai.DrawingArea[0];  xmax = gpu_unai.DrawingArea[2];
1394                         ymin = gpu_unai.DrawingArea[1];  ymax = gpu_unai.DrawingArea[3];
1395
1396                         if ((ymin - ya) > 0) {
1397                                 x3 += (dx3 * (ymin - ya));
1398                                 x4 += (dx4 * (ymin - ya));
1399                                 u3 += (du3 * (ymin - ya));
1400                                 v3 += (dv3 * (ymin - ya));
1401                                 r3 += (dr3 * (ymin - ya));
1402                                 g3 += (dg3 * (ymin - ya));
1403                                 b3 += (db3 * (ymin - ya));
1404                                 ya = ymin;
1405                         }
1406
1407                         if (yb > ymax) yb = ymax;
1408
1409                         int loop1 = yb - ya;
1410                         if (loop1 <= 0)
1411                                 continue;
1412
1413                         le16_t* PixelBase = &gpu_unai.vram[FRAME_OFFSET(0, ya)];
1414                         int li=gpu_unai.inn.ilace_mask;
1415                         int pi=(ProgressiveInterlaceEnabled()?(gpu_unai.inn.ilace_mask+1):0);
1416                         int pif=(ProgressiveInterlaceEnabled()?(gpu_unai.prog_ilace_flag?(gpu_unai.inn.ilace_mask+1):0):1);
1417
1418                         for (; loop1; --loop1, ++ya, PixelBase += FRAME_WIDTH,
1419                                         x3 += dx3, x4 += dx4,
1420                                         u3 += du3, v3 += dv3,
1421                                         r3 += dr3, g3 += dg3, b3 += db3 )
1422                         {
1423                                 if (ya&li) continue;
1424                                 if ((ya&pi)==pif) continue;
1425
1426                                 u32 u4, v4;
1427                                 u32 r4, g4, b4;
1428
1429                                 xa = FixedCeilToInt(x3);
1430                                 xb = FixedCeilToInt(x4);
1431                                 u4 = u3;  v4 = v3;
1432                                 r4 = r3;  g4 = g3;  b4 = b3;
1433
1434                                 fixed itmp = i2x(xa) - x3;
1435                                 if (itmp != 0) {
1436                                         u4 += (du4 * itmp) >> FIXED_BITS;
1437                                         v4 += (dv4 * itmp) >> FIXED_BITS;
1438                                         r4 += (dr4 * itmp) >> FIXED_BITS;
1439                                         g4 += (dg4 * itmp) >> FIXED_BITS;
1440                                         b4 += (db4 * itmp) >> FIXED_BITS;
1441                                 }
1442
1443                                 u4 += fixed_HALF;
1444                                 v4 += fixed_HALF;
1445                                 r4 += fixed_HALF;
1446                                 g4 += fixed_HALF;
1447                                 b4 += fixed_HALF;
1448
1449                                 if ((xmin - xa) > 0) {
1450                                         u4 += du4 * (xmin - xa);
1451                                         v4 += dv4 * (xmin - xa);
1452                                         r4 += dr4 * (xmin - xa);
1453                                         g4 += dg4 * (xmin - xa);
1454                                         b4 += db4 * (xmin - xa);
1455                                         xa = xmin;
1456                                 }
1457
1458                                 // Set packed Gouraud color and u,v coords for inner driver
1459                                 gpu_unai.inn.u = u4;
1460                                 gpu_unai.inn.v = v4;
1461                                 gpu_unai.inn.gCol = gpuPackGouraudCol(r4, g4, b4);
1462
1463                                 if (xb > xmax) xb = xmax;
1464                                 if ((xb - xa) > 0)
1465                                         gpuPolySpanDriver(gpu_unai, PixelBase + xa, (xb - xa));
1466                         }
1467                 }
1468         } while (++cur_pass < total_passes);
1469 }
1470
1471 #endif /* __GPU_UNAI_GPU_RASTER_POLYGON_H__ */