9b259bb1d645ed537f4cb8daad72e63e5f34feb2
[pcsx_rearmed.git] / plugins / gpu_unai / gpu_raster_polygon.h
1 /***************************************************************************
2 *   Copyright (C) 2010 PCSX4ALL Team                                      *
3 *   Copyright (C) 2010 Unai                                               *
4 *                                                                         *
5 *   This program is free software; you can redistribute it and/or modify  *
6 *   it under the terms of the GNU General Public License as published by  *
7 *   the Free Software Foundation; either version 2 of the License, or     *
8 *   (at your option) any later version.                                   *
9 *                                                                         *
10 *   This program is distributed in the hope that it will be useful,       *
11 *   but WITHOUT ANY WARRANTY; without even the implied warranty of        *
12 *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the         *
13 *   GNU General Public License for more details.                          *
14 *                                                                         *
15 *   You should have received a copy of the GNU General Public License     *
16 *   along with this program; if not, write to the                         *
17 *   Free Software Foundation, Inc.,                                       *
18 *   51 Franklin Street, Fifth Floor, Boston, MA 02111-1307 USA.           *
19 ***************************************************************************/
20
21 #ifndef __GPU_UNAI_GPU_RASTER_POLYGON_H__
22 #define __GPU_UNAI_GPU_RASTER_POLYGON_H__
23
24 //senquack - NOTE: GPU Unai poly routines have been rewritten/adapted
25 // from DrHell routines to fix multiple issues. See README_senquack.txt
26
27 ///////////////////////////////////////////////////////////////////////////////
28 // Shared poly vertex buffer, able to handle 3 or 4-pt polys of any type.
29 ///////////////////////////////////////////////////////////////////////////////
30
31 struct PolyVertex {
32         s32 x, y; // Sign-extended 11-bit X,Y coords
33         union {
34 #if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
35                 struct { u8 pad[2], v, u; } tex; // Texture coords (if used)
36 #else
37                 struct { u8 u, v, pad[2]; } tex; // Texture coords (if used)
38 #endif
39                 u32 tex_word;
40         };
41         union {
42 #if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
43                 struct { u8 pad, b, g, r; } col; // 24-bit RGB color (if used)
44 #else
45                 struct { u8 r, g, b, pad; } col; // 24-bit RGB color (if used)
46 #endif
47                 u32 col_word;
48         };
49 };
50
51 enum PolyAttribute {
52         POLYATTR_TEXTURE = (1 << 0),
53         POLYATTR_GOURAUD = (1 << 1)
54 };
55
56 enum PolyType {
57         POLYTYPE_F  = 0,
58         POLYTYPE_FT = (POLYATTR_TEXTURE),
59         POLYTYPE_G  = (POLYATTR_GOURAUD),
60         POLYTYPE_GT = (POLYATTR_TEXTURE | POLYATTR_GOURAUD)
61 };
62
63 ///////////////////////////////////////////////////////////////////////////////
64 // polyInitVertexBuffer()
65 // Fills vbuf[] array with data from any type of poly draw-command packet.
66 ///////////////////////////////////////////////////////////////////////////////
67 static void polyInitVertexBuffer(PolyVertex *vbuf, const PtrUnion packet, PolyType ptype, u32 is_quad)
68 {
69         bool texturing = ptype & POLYATTR_TEXTURE;
70         bool gouraud   = ptype & POLYATTR_GOURAUD;
71
72         int vert_stride = 1; // Stride of vertices in cmd packet, in 32-bit words
73         if (texturing)
74                 vert_stride++;
75         if (gouraud)
76                 vert_stride++;
77
78         int num_verts = (is_quad) ? 4 : 3;
79         le32_t *ptr;
80
81         // X,Y coords
82         ptr = &packet.U4[1];
83         for (int i=0;  i < num_verts; ++i, ptr += vert_stride) {
84                 u32 coords = le32_to_u32(*ptr);
85                 vbuf[i].x = GPU_EXPANDSIGN(coords);
86                 vbuf[i].y = GPU_EXPANDSIGN(coords >> 16);
87         }
88
89         // U,V texture coords (if applicable)
90         if (texturing) {
91                 ptr = &packet.U4[2];
92                 for (int i=0;  i < num_verts; ++i, ptr += vert_stride)
93                         vbuf[i].tex_word = le32_to_u32(*ptr);
94         }
95
96         // Colors (if applicable)
97         if (gouraud) {
98                 ptr = &packet.U4[0];
99                 for (int i=0;  i < num_verts; ++i, ptr += vert_stride)
100                         vbuf[i].col_word = le32_to_u32(*ptr);
101         }
102 }
103
104 ///////////////////////////////////////////////////////////////////////////////
105 //  Helper functions to determine which vertex in a 2 or 3 vertex array
106 //   has the highest/lowest X/Y coordinate.
107 //   Note: the comparison logic is such that, given a set of vertices with
108 //    identical values for a given coordinate, a different index will be
109 //    returned from vertIdxOfLeast..() than a call to vertIdxOfHighest..().
110 //    This ensures that, during the vertex-ordering phase of rasterization,
111 //    all three vertices remain unique.
112 ///////////////////////////////////////////////////////////////////////////////
113
114 template<typename T>
115 static inline int vertIdxOfLeastXCoord2(const T *Tptr)
116 {
117         return (Tptr[0].x <= Tptr[1].x) ? 0 : 1;
118 }
119
120 template<typename T>
121 static inline int vertIdxOfLeastXCoord3(const T *Tptr)
122 {
123         int least_of_v0_v1 = vertIdxOfLeastXCoord2(Tptr);
124         return (Tptr[least_of_v0_v1].x <= Tptr[2].x) ? least_of_v0_v1 : 2;
125 }
126
127 template<typename T>
128 static inline int vertIdxOfLeastYCoord2(const T *Tptr)
129 {
130         return (Tptr[0].y <= Tptr[1].y) ? 0 : 1;
131 }
132
133 template<typename T>
134 static inline int vertIdxOfLeastYCoord3(const T *Tptr)
135 {
136         int least_of_v0_v1 = vertIdxOfLeastYCoord2(Tptr);
137         return (Tptr[least_of_v0_v1].y <= Tptr[2].y) ? least_of_v0_v1 : 2;
138 }
139
140 template<typename T>
141 static inline int vertIdxOfHighestXCoord2(const T *Tptr)
142 {
143         return (Tptr[1].x >= Tptr[0].x) ? 1 : 0;
144 }
145
146 template<typename T>
147 static inline int vertIdxOfHighestXCoord3(const T *Tptr)
148 {
149         int highest_of_v0_v1 = vertIdxOfHighestXCoord2(Tptr);
150         return (Tptr[2].x >= Tptr[highest_of_v0_v1].x) ? 2 : highest_of_v0_v1;
151 }
152
153 template<typename T>
154 static inline int vertIdxOfHighestYCoord2(const T *Tptr)
155 {
156         return (Tptr[1].y >= Tptr[0].y) ? 1 : 0;
157 }
158
159 template<typename T>
160 static inline int vertIdxOfHighestYCoord3(const T *Tptr)
161 {
162         int highest_of_v0_v1 = vertIdxOfHighestYCoord2(Tptr);
163         return (Tptr[2].y >= Tptr[highest_of_v0_v1].y) ? 2 : highest_of_v0_v1;
164 }
165
166 ///////////////////////////////////////////////////////////////////////////////
167 // polyUseTriangle()
168 //  Determines if the specified triangle should be rendered. If so, it
169 //  fills the given array of vertex pointers, vert_ptrs, in order of
170 //  increasing Y coordinate values, as required by rasterization algorithm.
171 //  Parameter 'tri_num' is 0 for first triangle (idx 0,1,2 of vbuf[]),
172 //   or 1 for second triangle of a quad (idx 1,2,3 of vbuf[]).
173 //  Returns true if triangle should be rendered, false if not.
174 ///////////////////////////////////////////////////////////////////////////////
175 static bool polyUseTriangle(const PolyVertex *vbuf, int tri_num, const PolyVertex **vert_ptrs, s32 &x_off, s32 &y_off)
176 {
177         // Using verts 0,1,2 or is this the 2nd pass of a quad (verts 1,2,3)?
178         const PolyVertex *tri_ptr = &vbuf[(tri_num == 0) ? 0 : 1];
179
180         // Get indices of highest/lowest X,Y coords within triangle
181         int idx_lowest_x  = vertIdxOfLeastXCoord3(tri_ptr);
182         int idx_highest_x = vertIdxOfHighestXCoord3(tri_ptr);
183         int idx_lowest_y  = vertIdxOfLeastYCoord3(tri_ptr);
184         int idx_highest_y = vertIdxOfHighestYCoord3(tri_ptr);
185
186         // Maximum absolute distance between any two X coordinates is 1023,
187         //  and for Y coordinates is 511 (PS1 hardware limitation)
188         int lowest_x  = tri_ptr[idx_lowest_x].x;
189         int highest_x = tri_ptr[idx_highest_x].x;
190         int lowest_y  = tri_ptr[idx_lowest_y].y;
191         int highest_y = tri_ptr[idx_highest_y].y;
192         if ((highest_x - lowest_x) >= CHKMAX_X ||
193             (highest_y - lowest_y) >= CHKMAX_Y)
194                 return false;
195
196         // Determine offsets
197         x_off = gpu_unai.DrawingOffset[0];
198         y_off = gpu_unai.DrawingOffset[1];
199         x_off = GPU_EXPANDSIGN(lowest_x + x_off) - lowest_x;
200         y_off = GPU_EXPANDSIGN(lowest_y + y_off) - lowest_y;
201
202         // Determine if triangle is completely outside clipping range
203         s32 xmin, xmax, ymin, ymax;
204         xmin = gpu_unai.DrawingArea[0];  xmax = gpu_unai.DrawingArea[2];
205         ymin = gpu_unai.DrawingArea[1];  ymax = gpu_unai.DrawingArea[3];
206         int clipped_lowest_x  = Max2(xmin, lowest_x + x_off);
207         int clipped_lowest_y  = Max2(ymin, lowest_y + y_off);
208         int clipped_highest_x = Min2(xmax, highest_x + x_off);
209         int clipped_highest_y = Min2(ymax, highest_y + y_off);
210         if (clipped_lowest_x >= clipped_highest_x ||
211             clipped_lowest_y >= clipped_highest_y)
212                 return false;
213
214         // Order vertex ptrs by increasing y value (draw routines need this).
215         // The middle index is deduced by a binary math trick that depends
216         //  on index range always being between 0..2
217         vert_ptrs[0] = tri_ptr + idx_lowest_y;
218         vert_ptrs[1] = tri_ptr + ((idx_lowest_y + idx_highest_y) ^ 3);
219         vert_ptrs[2] = tri_ptr + idx_highest_y;
220         return true;
221 }
222
223 ///////////////////////////////////////////////////////////////////////////////
224 //  GPU internal polygon drawing functions
225 ///////////////////////////////////////////////////////////////////////////////
226
227 /*----------------------------------------------------------------------
228 gpuDrawPolyF - Flat-shaded, untextured poly
229 ----------------------------------------------------------------------*/
230 void gpuDrawPolyF(const PtrUnion packet, const PP gpuPolySpanDriver, u32 is_quad,
231         PolyType ptype = POLYTYPE_F)
232 {
233         // Set up bgr555 color to be used across calls in inner driver
234         gpu_unai.inn.PixelData = GPU_RGB16(le32_to_u32(packet.U4[0]));
235
236         PolyVertex vbuf[4];
237         polyInitVertexBuffer(vbuf, packet, ptype, is_quad);
238
239         int total_passes = is_quad ? 2 : 1;
240         int cur_pass = 0;
241         do
242         {
243                 const PolyVertex* vptrs[3];
244                 s32 x_off, y_off;
245                 if (!polyUseTriangle(vbuf, cur_pass, vptrs, x_off, y_off))
246                         continue;
247
248                 s32 xa, xb, ya, yb;
249                 s32 x3, dx3, x4, dx4, dx;
250                 s32 x0, x1, x2, y0, y1, y2;
251
252                 x0 = vptrs[0]->x + x_off;  y0 = vptrs[0]->y + y_off;
253                 x1 = vptrs[1]->x + x_off;  y1 = vptrs[1]->y + y_off;
254                 x2 = vptrs[2]->x + x_off;  y2 = vptrs[2]->y + y_off;
255
256                 ya = y2 - y0;
257                 yb = y2 - y1;
258                 dx = (x2 - x1) * ya - (x2 - x0) * yb;
259
260                 for (int loop0 = 2; loop0; loop0--) {
261                         if (loop0 == 2) {
262                                 ya = y0;  yb = y1;
263                                 x3 = x4 = i2x(x0);
264                                 if (dx < 0) {
265 #ifdef GPU_UNAI_USE_FLOATMATH
266 #ifdef GPU_UNAI_USE_FLOAT_DIV_MULTINV
267                                         dx3 = ((y2 - y0) != 0) ? (fixed)(((x2 - x0) << FIXED_BITS) * FloatInv(y2 - y0)) : 0;
268                                         dx4 = ((y1 - y0) != 0) ? (fixed)(((x1 - x0) << FIXED_BITS) * FloatInv(y1 - y0)) : 0;
269 #else
270                                         dx3 = ((y2 - y0) != 0) ? (fixed)(((x2 - x0) << FIXED_BITS) / (float)(y2 - y0)) : 0;
271                                         dx4 = ((y1 - y0) != 0) ? (fixed)(((x1 - x0) << FIXED_BITS) / (float)(y1 - y0)) : 0;
272 #endif
273 #else  // Integer Division:
274 #ifdef GPU_UNAI_USE_INT_DIV_MULTINV
275                                         dx3 = ((y2 - y0) != 0) ? xLoDivx((x2 - x0), (y2 - y0)) : 0;
276                                         dx4 = ((y1 - y0) != 0) ? xLoDivx((x1 - x0), (y1 - y0)) : 0;
277 #else
278                                         dx3 = ((y2 - y0) != 0) ? GPU_FAST_DIV((x2 - x0) << FIXED_BITS, (y2 - y0)) : 0;
279                                         dx4 = ((y1 - y0) != 0) ? GPU_FAST_DIV((x1 - x0) << FIXED_BITS, (y1 - y0)) : 0;
280 #endif
281 #endif
282                                 } else {
283 #ifdef GPU_UNAI_USE_FLOATMATH
284 #ifdef GPU_UNAI_USE_FLOAT_DIV_MULTINV
285                                         dx3 = ((y1 - y0) != 0) ? (fixed)(((x1 - x0) << FIXED_BITS) * FloatInv(y1 - y0)) : 0;
286                                         dx4 = ((y2 - y0) != 0) ? (fixed)(((x2 - x0) << FIXED_BITS) * FloatInv(y2 - y0)) : 0;
287 #else
288                                         dx3 = ((y1 - y0) != 0) ? (fixed)(((x1 - x0) << FIXED_BITS) / (float)(y1 - y0)) : 0;
289                                         dx4 = ((y2 - y0) != 0) ? (fixed)(((x2 - x0) << FIXED_BITS) / (float)(y2 - y0)) : 0;
290 #endif
291 #else  // Integer Division:
292 #ifdef GPU_UNAI_USE_INT_DIV_MULTINV
293                                         dx3 = ((y1 - y0) != 0) ? xLoDivx((x1 - x0), (y1 - y0)) : 0;
294                                         dx4 = ((y2 - y0) != 0) ? xLoDivx((x2 - x0), (y2 - y0)) : 0;
295 #else
296                                         dx3 = ((y1 - y0) != 0) ? GPU_FAST_DIV((x1 - x0) << FIXED_BITS, (y1 - y0)) : 0;
297                                         dx4 = ((y2 - y0) != 0) ? GPU_FAST_DIV((x2 - x0) << FIXED_BITS, (y2 - y0)) : 0;
298 #endif
299 #endif
300                                 }
301                         } else {
302                                 //senquack - break out of final loop if nothing to be drawn (1st loop
303                                 //           must always be taken to setup dx3/dx4)
304                                 if (y1 == y2) break;
305
306                                 ya = y1;  yb = y2;
307
308                                 if (dx < 0) {
309                                         x3 = i2x(x0) + (dx3 * (y1 - y0));
310                                         x4 = i2x(x1);
311 #ifdef GPU_UNAI_USE_FLOATMATH
312 #ifdef GPU_UNAI_USE_FLOAT_DIV_MULTINV
313                                         dx4 = ((y2 - y1) != 0) ? (fixed)(((x2 - x1) << FIXED_BITS) * FloatInv(y2 - y1)) : 0;
314 #else
315                                         dx4 = ((y2 - y1) != 0) ? (fixed)(((x2 - x1) << FIXED_BITS) / (float)(y2 - y1)) : 0;
316 #endif
317 #else  // Integer Division:
318 #ifdef GPU_UNAI_USE_INT_DIV_MULTINV
319                                         dx4 = ((y2 - y1) != 0) ? xLoDivx ((x2 - x1), (y2 - y1)) : 0;
320 #else
321                                         dx4 = ((y2 - y1) != 0) ? GPU_FAST_DIV((x2 - x1) << FIXED_BITS, (y2 - y1)) : 0;
322 #endif
323 #endif
324                                 } else {
325                                         x3 = i2x(x1);
326                                         x4 = i2x(x0) + (dx4 * (y1 - y0));
327 #ifdef GPU_UNAI_USE_FLOATMATH
328 #ifdef GPU_UNAI_USE_FLOAT_DIV_MULTINV
329                                         dx3 = ((y2 - y1) != 0) ? (fixed)(((x2 - x1) << FIXED_BITS) * FloatInv(y2 - y1)) : 0;
330 #else
331                                         dx3 = ((y2 - y1) != 0) ? (fixed)(((x2 - x1) << FIXED_BITS) / (float)(y2 - y1)) : 0;
332 #endif
333 #else  // Integer Division:
334 #ifdef GPU_UNAI_USE_INT_DIV_MULTINV
335                                         dx3 = ((y2 - y1) != 0) ? xLoDivx ((x2 - x1), (y2 - y1)) : 0;
336 #else
337                                         dx3 = ((y2 - y1) != 0) ? GPU_FAST_DIV((x2 - x1) << FIXED_BITS, (y2 - y1)) : 0;
338 #endif
339 #endif
340                                 }
341                         }
342
343                         s32 xmin, xmax, ymin, ymax;
344                         xmin = gpu_unai.DrawingArea[0];  xmax = gpu_unai.DrawingArea[2];
345                         ymin = gpu_unai.DrawingArea[1];  ymax = gpu_unai.DrawingArea[3];
346
347                         if ((ymin - ya) > 0) {
348                                 x3 += (dx3 * (ymin - ya));
349                                 x4 += (dx4 * (ymin - ya));
350                                 ya = ymin;
351                         }
352
353                         if (yb > ymax) yb = ymax;
354
355                         int loop1 = yb - ya;
356                         if (loop1 <= 0)
357                                 continue;
358
359                         le16_t* PixelBase = &gpu_unai.vram[FRAME_OFFSET(0, ya)];
360                         int li=gpu_unai.inn.ilace_mask;
361                         int pi=(ProgressiveInterlaceEnabled()?(gpu_unai.inn.ilace_mask+1):0);
362                         int pif=(ProgressiveInterlaceEnabled()?(gpu_unai.prog_ilace_flag?(gpu_unai.inn.ilace_mask+1):0):1);
363
364                         for (; loop1; --loop1, ya++, PixelBase += FRAME_WIDTH,
365                                         x3 += dx3, x4 += dx4 )
366                         {
367                                 if (ya&li) continue;
368                                 if ((ya&pi)==pif) continue;
369
370                                 xa = FixedCeilToInt(x3);  xb = FixedCeilToInt(x4);
371                                 if ((xmin - xa) > 0) xa = xmin;
372                                 if (xb > xmax) xb = xmax;
373                                 if ((xb - xa) > 0)
374                                         gpuPolySpanDriver(gpu_unai, PixelBase + xa, (xb - xa));
375                         }
376                 }
377         } while (++cur_pass < total_passes);
378 }
379
380 /*----------------------------------------------------------------------
381 gpuDrawPolyFT - Flat-shaded, textured poly
382 ----------------------------------------------------------------------*/
383 void gpuDrawPolyFT(const PtrUnion packet, const PP gpuPolySpanDriver, u32 is_quad,
384         PolyType ptype = POLYTYPE_FT)
385 {
386         // r8/g8/b8 used if texture-blending & dithering is applied (24-bit light)
387         gpu_unai.inn.r8 = packet.U1[0];
388         gpu_unai.inn.g8 = packet.U1[1];
389         gpu_unai.inn.b8 = packet.U1[2];
390         // r5/g5/b5 used if just texture-blending is applied (15-bit light)
391         gpu_unai.inn.r5 = packet.U1[0] >> 3;
392         gpu_unai.inn.g5 = packet.U1[1] >> 3;
393         gpu_unai.inn.b5 = packet.U1[2] >> 3;
394
395         PolyVertex vbuf[4];
396         polyInitVertexBuffer(vbuf, packet, ptype, is_quad);
397
398         int total_passes = is_quad ? 2 : 1;
399         int cur_pass = 0;
400         do
401         {
402                 const PolyVertex* vptrs[3];
403                 s32 x_off, y_off;
404                 if (!polyUseTriangle(vbuf, cur_pass, vptrs, x_off, y_off))
405                         continue;
406
407                 s32 xa, xb, ya, yb;
408                 s32 x3, dx3, x4, dx4, dx;
409                 s32 u3, du3, v3, dv3;
410                 s32 x0, x1, x2, y0, y1, y2;
411                 s32 u0, u1, u2, v0, v1, v2;
412                 s32 du4, dv4;
413
414                 x0 = vptrs[0]->x + x_off; y0 = vptrs[0]->y + y_off;
415                 u0 = vptrs[0]->tex.u;     v0 = vptrs[0]->tex.v;
416                 x1 = vptrs[1]->x + x_off; y1 = vptrs[1]->y + y_off;
417                 u1 = vptrs[1]->tex.u;     v1 = vptrs[1]->tex.v;
418                 x2 = vptrs[2]->x + x_off; y2 = vptrs[2]->y + y_off;
419                 u2 = vptrs[2]->tex.u;     v2 = vptrs[2]->tex.v;
420
421                 ya = y2 - y0;
422                 yb = y2 - y1;
423                 dx4 = (x2 - x1) * ya - (x2 - x0) * yb;
424                 du4 = (u2 - u1) * ya - (u2 - u0) * yb;
425                 dv4 = (v2 - v1) * ya - (v2 - v0) * yb;
426                 dx = dx4;
427                 if (dx4 < 0) {
428                         dx4 = -dx4;
429                         du4 = -du4;
430                         dv4 = -dv4;
431                 }
432
433 #ifdef GPU_UNAI_USE_FLOATMATH
434 #ifdef GPU_UNAI_USE_FLOAT_DIV_MULTINV
435                 if (dx4 != 0) {
436                         float finv = FloatInv(dx4);
437                         du4 = (fixed)((du4 << FIXED_BITS) * finv);
438                         dv4 = (fixed)((dv4 << FIXED_BITS) * finv);
439                 } else {
440                         du4 = dv4 = 0;
441                 }
442 #else
443                 if (dx4 != 0) {
444                         float fdiv = dx4;
445                         du4 = (fixed)((du4 << FIXED_BITS) / fdiv);
446                         dv4 = (fixed)((dv4 << FIXED_BITS) / fdiv);
447                 } else {
448                         du4 = dv4 = 0;
449                 }
450 #endif
451 #else  // Integer Division:
452 #ifdef GPU_UNAI_USE_INT_DIV_MULTINV
453                 if (dx4 != 0) {
454                         int iF, iS;
455                         xInv(dx4, iF, iS);
456                         du4 = xInvMulx(du4, iF, iS);
457                         dv4 = xInvMulx(dv4, iF, iS);
458                 } else {
459                         du4 = dv4 = 0;
460                 }
461 #else
462                 if (dx4 != 0) {
463                         du4 = GPU_FAST_DIV(du4 << FIXED_BITS, dx4);
464                         dv4 = GPU_FAST_DIV(dv4 << FIXED_BITS, dx4);
465                 } else {
466                         du4 = dv4 = 0;
467                 }
468 #endif
469 #endif
470                 // Set u,v increments for inner driver
471                 gpu_unai.inn.u_inc = du4;
472                 gpu_unai.inn.v_inc = dv4;
473
474                 //senquack - TODO: why is it always going through 2 iterations when sometimes one would suffice here?
475                 //                       (SAME ISSUE ELSEWHERE)
476                 for (s32 loop0 = 2; loop0; loop0--) {
477                         if (loop0 == 2) {
478                                 ya = y0;  yb = y1;
479                                 x3 = x4 = i2x(x0);
480                                 u3 = i2x(u0);  v3 = i2x(v0);
481                                 if (dx < 0) {
482 #ifdef GPU_UNAI_USE_FLOATMATH
483 #ifdef GPU_UNAI_USE_FLOAT_DIV_MULTINV
484                                         if ((y2 - y0) != 0) {
485                                                 float finv = FloatInv(y2 - y0);
486                                                 dx3 = (fixed)(((x2 - x0) << FIXED_BITS) * finv);
487                                                 du3 = (fixed)(((u2 - u0) << FIXED_BITS) * finv);
488                                                 dv3 = (fixed)(((v2 - v0) << FIXED_BITS) * finv);
489                                         } else {
490                                                 dx3 = du3 = dv3 = 0;
491                                         }
492                                         dx4 = ((y1 - y0) != 0) ? (fixed)(((x1 - x0) << FIXED_BITS) * FloatInv(y1 - y0)) : 0;
493 #else
494                                         if ((y2 - y0) != 0) {
495                                                 float fdiv = y2 - y0;
496                                                 dx3 = (fixed)(((x2 - x0) << FIXED_BITS) / fdiv);
497                                                 du3 = (fixed)(((u2 - u0) << FIXED_BITS) / fdiv);
498                                                 dv3 = (fixed)(((v2 - v0) << FIXED_BITS) / fdiv);
499                                         } else {
500                                                 dx3 = du3 = dv3 = 0;
501                                         }
502                                         dx4 = ((y1 - y0) != 0) ? (fixed)(((x1 - x0) << FIXED_BITS) / (float)(y1 - y0)) : 0;
503 #endif
504 #else  // Integer Division:
505 #ifdef GPU_UNAI_USE_INT_DIV_MULTINV
506                                         if ((y2 - y0) != 0) {
507                                                 int iF, iS;
508                                                 xInv((y2 - y0), iF, iS);
509                                                 dx3 = xInvMulx((x2 - x0), iF, iS);
510                                                 du3 = xInvMulx((u2 - u0), iF, iS);
511                                                 dv3 = xInvMulx((v2 - v0), iF, iS);
512                                         } else {
513                                                 dx3 = du3 = dv3 = 0;
514                                         }
515                                         dx4 = ((y1 - y0) != 0) ? xLoDivx((x1 - x0), (y1 - y0)) : 0;
516 #else
517                                         if ((y2 - y0) != 0) {
518                                                 dx3 = GPU_FAST_DIV((x2 - x0) << FIXED_BITS, (y2 - y0));
519                                                 du3 = GPU_FAST_DIV((u2 - u0) << FIXED_BITS, (y2 - y0));
520                                                 dv3 = GPU_FAST_DIV((v2 - v0) << FIXED_BITS, (y2 - y0));
521                                         } else {
522                                                 dx3 = du3 = dv3 = 0;
523                                         }
524                                         dx4 = ((y1 - y0) != 0) ? GPU_FAST_DIV((x1 - x0) << FIXED_BITS, (y1 - y0)) : 0;
525 #endif
526 #endif
527                                 } else {
528 #ifdef GPU_UNAI_USE_FLOATMATH
529 #ifdef GPU_UNAI_USE_FLOAT_DIV_MULTINV
530                                         if ((y1 - y0) != 0) {
531                                                 float finv = FloatInv(y1 - y0);
532                                                 dx3 = (fixed)(((x1 - x0) << FIXED_BITS) * finv);
533                                                 du3 = (fixed)(((u1 - u0) << FIXED_BITS) * finv);
534                                                 dv3 = (fixed)(((v1 - v0) << FIXED_BITS) * finv);
535                                         } else {
536                                                 dx3 = du3 = dv3 = 0;
537                                         }
538                                         dx4 = ((y2 - y0) != 0) ? (fixed)(((x2 - x0) << FIXED_BITS) * FloatInv(y2 - y0)) : 0;
539 #else
540                                         if ((y1 - y0) != 0) {
541                                                 float fdiv = y1 - y0;
542                                                 dx3 = (fixed)(((x1 - x0) << FIXED_BITS) / fdiv);
543                                                 du3 = (fixed)(((u1 - u0) << FIXED_BITS) / fdiv);
544                                                 dv3 = (fixed)(((v1 - v0) << FIXED_BITS) / fdiv);
545                                         } else {
546                                                 dx3 = du3 = dv3 = 0;
547                                         }
548                                         dx4 = ((y2 - y0) != 0) ? (fixed)(((x2 - x0) << FIXED_BITS) / (float)(y2 - y0)) : 0;
549 #endif
550 #else  // Integer Division:
551 #ifdef GPU_UNAI_USE_INT_DIV_MULTINV
552                                         if ((y1 - y0) != 0) {
553                                                 int iF, iS;
554                                                 xInv((y1 - y0), iF, iS);
555                                                 dx3 = xInvMulx((x1 - x0), iF, iS);
556                                                 du3 = xInvMulx((u1 - u0), iF, iS);
557                                                 dv3 = xInvMulx((v1 - v0), iF, iS);
558                                         } else {
559                                                 dx3 = du3 = dv3 = 0;
560                                         }
561                                         dx4 = ((y2 - y0) != 0) ? xLoDivx((x2 - x0), (y2 - y0)) : 0;
562 #else
563                                         if ((y1 - y0) != 0) {
564                                                 dx3 = GPU_FAST_DIV((x1 - x0) << FIXED_BITS, (y1 - y0));
565                                                 du3 = GPU_FAST_DIV((u1 - u0) << FIXED_BITS, (y1 - y0));
566                                                 dv3 = GPU_FAST_DIV((v1 - v0) << FIXED_BITS, (y1 - y0));
567                                         } else {
568                                                 dx3 = du3 = dv3 = 0;
569                                         }
570                                         dx4 = ((y2 - y0) != 0) ? GPU_FAST_DIV((x2 - x0) << FIXED_BITS, (y2 - y0)) : 0;
571 #endif
572 #endif
573                                 }
574                         } else {
575                                 //senquack - break out of final loop if nothing to be drawn (1st loop
576                                 //           must always be taken to setup dx3/dx4)
577                                 if (y1 == y2) break;
578
579                                 ya = y1;  yb = y2;
580
581                                 if (dx < 0) {
582                                         x3 = i2x(x0);
583                                         x4 = i2x(x1);
584                                         u3 = i2x(u0);
585                                         v3 = i2x(v0);
586                                         if ((y1 - y0) != 0) {
587                                                 x3 += (dx3 * (y1 - y0));
588                                                 u3 += (du3 * (y1 - y0));
589                                                 v3 += (dv3 * (y1 - y0));
590                                         }
591 #ifdef GPU_UNAI_USE_FLOATMATH
592 #ifdef GPU_UNAI_USE_FLOAT_DIV_MULTINV
593                                         dx4 = ((y2 - y1) != 0) ? (fixed)(((x2 - x1) << FIXED_BITS) * FloatInv(y2 - y1)) : 0;
594 #else
595                                         dx4 = ((y2 - y1) != 0) ? (fixed)(((x2 - x1) << FIXED_BITS) / (float)(y2 - y1)) : 0;
596 #endif
597 #else  // Integer Division:
598 #ifdef GPU_UNAI_USE_INT_DIV_MULTINV
599                                         dx4 = ((y2 - y1) != 0) ? xLoDivx((x2 - x1), (y2 - y1)) : 0;
600 #else
601                                         dx4 = ((y2 - y1) != 0) ? GPU_FAST_DIV((x2 - x1) << FIXED_BITS, (y2 - y1)) : 0;
602 #endif
603 #endif
604                                 } else {
605                                         x3 = i2x(x1);
606                                         x4 = i2x(x0) + (dx4 * (y1 - y0));
607                                         u3 = i2x(u1);
608                                         v3 = i2x(v1);
609 #ifdef GPU_UNAI_USE_FLOATMATH
610 #ifdef GPU_UNAI_USE_FLOAT_DIV_MULTINV
611                                         if ((y2 - y1) != 0) {
612                                                 float finv = FloatInv(y2 - y1);
613                                                 dx3 = (fixed)(((x2 - x1) << FIXED_BITS) * finv);
614                                                 du3 = (fixed)(((u2 - u1) << FIXED_BITS) * finv);
615                                                 dv3 = (fixed)(((v2 - v1) << FIXED_BITS) * finv);
616                                         } else {
617                                                 dx3 = du3 = dv3 = 0;
618                                         }
619 #else
620                                         if ((y2 - y1) != 0) {
621                                                 float fdiv = y2 - y1;
622                                                 dx3 = (fixed)(((x2 - x1) << FIXED_BITS) / fdiv);
623                                                 du3 = (fixed)(((u2 - u1) << FIXED_BITS) / fdiv);
624                                                 dv3 = (fixed)(((v2 - v1) << FIXED_BITS) / fdiv);
625                                         } else {
626                                                 dx3 = du3 = dv3 = 0;
627                                         }
628 #endif
629 #else  // Integer Division:
630 #ifdef GPU_UNAI_USE_INT_DIV_MULTINV
631                                         if ((y2 - y1) != 0) {
632                                                 int iF, iS;
633                                                 xInv((y2 - y1), iF, iS);
634                                                 dx3 = xInvMulx((x2 - x1), iF, iS);
635                                                 du3 = xInvMulx((u2 - u1), iF, iS);
636                                                 dv3 = xInvMulx((v2 - v1), iF, iS);
637                                         } else {
638                                                 dx3 = du3 = dv3 = 0;
639                                         }
640 #else 
641                                         if ((y2 - y1) != 0) {
642                                                 dx3 = GPU_FAST_DIV((x2 - x1) << FIXED_BITS, (y2 - y1));
643                                                 du3 = GPU_FAST_DIV((u2 - u1) << FIXED_BITS, (y2 - y1));
644                                                 dv3 = GPU_FAST_DIV((v2 - v1) << FIXED_BITS, (y2 - y1));
645                                         } else {
646                                                 dx3 = du3 = dv3 = 0;
647                                         }
648 #endif
649 #endif
650                                 }
651                         }
652
653                         s32 xmin, xmax, ymin, ymax;
654                         xmin = gpu_unai.DrawingArea[0];  xmax = gpu_unai.DrawingArea[2];
655                         ymin = gpu_unai.DrawingArea[1];  ymax = gpu_unai.DrawingArea[3];
656
657                         if ((ymin - ya) > 0) {
658                                 x3 += dx3 * (ymin - ya);
659                                 x4 += dx4 * (ymin - ya);
660                                 u3 += du3 * (ymin - ya);
661                                 v3 += dv3 * (ymin - ya);
662                                 ya = ymin;
663                         }
664
665                         if (yb > ymax) yb = ymax;
666
667                         int loop1 = yb - ya;
668                         if (loop1 <= 0)
669                                 continue;
670
671                         le16_t* PixelBase = &gpu_unai.vram[FRAME_OFFSET(0, ya)];
672                         int li=gpu_unai.inn.ilace_mask;
673                         int pi=(ProgressiveInterlaceEnabled()?(gpu_unai.inn.ilace_mask+1):0);
674                         int pif=(ProgressiveInterlaceEnabled()?(gpu_unai.prog_ilace_flag?(gpu_unai.inn.ilace_mask+1):0):1);
675
676                         for (; loop1; --loop1, ++ya, PixelBase += FRAME_WIDTH,
677                                         x3 += dx3, x4 += dx4,
678                                         u3 += du3, v3 += dv3 )
679                         {
680                                 if (ya&li) continue;
681                                 if ((ya&pi)==pif) continue;
682
683                                 u32 u4, v4;
684
685                                 xa = FixedCeilToInt(x3);  xb = FixedCeilToInt(x4);
686                                 u4 = u3;  v4 = v3;
687
688                                 fixed itmp = i2x(xa) - x3;
689                                 if (itmp != 0) {
690                                         u4 += (du4 * itmp) >> FIXED_BITS;
691                                         v4 += (dv4 * itmp) >> FIXED_BITS;
692                                 }
693
694                                 u4 += fixed_HALF;
695                                 v4 += fixed_HALF;
696
697                                 if ((xmin - xa) > 0) {
698                                         u4 += du4 * (xmin - xa);
699                                         v4 += dv4 * (xmin - xa);
700                                         xa = xmin;
701                                 }
702
703                                 // Set u,v coords for inner driver
704                                 gpu_unai.inn.u = u4;
705                                 gpu_unai.inn.v = v4;
706
707                                 if (xb > xmax) xb = xmax;
708                                 if ((xb - xa) > 0)
709                                         gpuPolySpanDriver(gpu_unai, PixelBase + xa, (xb - xa));
710                         }
711                 }
712         } while (++cur_pass < total_passes);
713 }
714
715 /*----------------------------------------------------------------------
716 gpuDrawPolyG - Gouraud-shaded, untextured poly
717 ----------------------------------------------------------------------*/
718 void gpuDrawPolyG(const PtrUnion packet, const PP gpuPolySpanDriver, u32 is_quad)
719 {
720         PolyVertex vbuf[4];
721         polyInitVertexBuffer(vbuf, packet, POLYTYPE_G, is_quad);
722
723         int total_passes = is_quad ? 2 : 1;
724         int cur_pass = 0;
725         do
726         {
727                 const PolyVertex* vptrs[3];
728                 s32 x_off, y_off;
729                 if (!polyUseTriangle(vbuf, cur_pass, vptrs, x_off, y_off))
730                         continue;
731
732                 s32 xa, xb, ya, yb;
733                 s32 x3, dx3, x4, dx4, dx;
734                 s32 r3, dr3, g3, dg3, b3, db3;
735                 s32 x0, x1, x2, y0, y1, y2;
736                 s32 r0, r1, r2, g0, g1, g2, b0, b1, b2;
737                 s32 dr4, dg4, db4;
738
739                 x0 = vptrs[0]->x + x_off; y0 = vptrs[0]->y + y_off;
740                 r0 = vptrs[0]->col.r;     g0 = vptrs[0]->col.g;  b0 = vptrs[0]->col.b;
741                 x1 = vptrs[1]->x + x_off; y1 = vptrs[1]->y + y_off;
742                 r1 = vptrs[1]->col.r;     g1 = vptrs[1]->col.g;  b1 = vptrs[1]->col.b;
743                 x2 = vptrs[2]->x + x_off; y2 = vptrs[2]->y + y_off;
744                 r2 = vptrs[2]->col.r;     g2 = vptrs[2]->col.g;  b2 = vptrs[2]->col.b;
745
746                 ya = y2 - y0;
747                 yb = y2 - y1;
748                 dx4 = (x2 - x1) * ya - (x2 - x0) * yb;
749                 dr4 = (r2 - r1) * ya - (r2 - r0) * yb;
750                 dg4 = (g2 - g1) * ya - (g2 - g0) * yb;
751                 db4 = (b2 - b1) * ya - (b2 - b0) * yb;
752                 dx = dx4;
753                 if (dx4 < 0) {
754                         dx4 = -dx4;
755                         dr4 = -dr4;
756                         dg4 = -dg4;
757                         db4 = -db4;
758                 }
759
760 #ifdef GPU_UNAI_USE_FLOATMATH
761 #ifdef GPU_UNAI_USE_FLOAT_DIV_MULTINV
762                 if (dx4 != 0) {
763                         float finv = FloatInv(dx4);
764                         dr4 = (fixed)((dr4 << FIXED_BITS) * finv);
765                         dg4 = (fixed)((dg4 << FIXED_BITS) * finv);
766                         db4 = (fixed)((db4 << FIXED_BITS) * finv);
767                 } else {
768                         dr4 = dg4 = db4 = 0;
769                 }
770 #else
771                 if (dx4 != 0) {
772                         float fdiv = dx4;
773                         dr4 = (fixed)((dr4 << FIXED_BITS) / fdiv);
774                         dg4 = (fixed)((dg4 << FIXED_BITS) / fdiv);
775                         db4 = (fixed)((db4 << FIXED_BITS) / fdiv);
776                 } else {
777                         dr4 = dg4 = db4 = 0;
778                 }
779 #endif
780 #else  // Integer Division:
781 #ifdef GPU_UNAI_USE_INT_DIV_MULTINV
782                 if (dx4 != 0) {
783                         int iF, iS;
784                         xInv(dx4, iF, iS);
785                         dr4 = xInvMulx(dr4, iF, iS);
786                         dg4 = xInvMulx(dg4, iF, iS);
787                         db4 = xInvMulx(db4, iF, iS);
788                 } else {
789                         dr4 = dg4 = db4 = 0;
790                 }
791 #else
792                 if (dx4 != 0) {
793                         dr4 = GPU_FAST_DIV(dr4 << FIXED_BITS, dx4);
794                         dg4 = GPU_FAST_DIV(dg4 << FIXED_BITS, dx4);
795                         db4 = GPU_FAST_DIV(db4 << FIXED_BITS, dx4);
796                 } else {
797                         dr4 = dg4 = db4 = 0;
798                 }
799 #endif
800 #endif
801                 // Setup packed Gouraud increment for inner driver
802                 gpu_unai.inn.gInc = gpuPackGouraudColInc(dr4, dg4, db4);
803
804                 for (s32 loop0 = 2; loop0; loop0--) {
805                         if (loop0 == 2) {
806                                 ya = y0;
807                                 yb = y1;
808                                 x3 = x4 = i2x(x0);
809                                 r3 = i2x(r0);
810                                 g3 = i2x(g0);
811                                 b3 = i2x(b0);
812                                 if (dx < 0) {
813 #ifdef GPU_UNAI_USE_FLOATMATH
814 #ifdef GPU_UNAI_USE_FLOAT_DIV_MULTINV
815                                         if ((y2 - y0) != 0) {
816                                                 float finv = FloatInv(y2 - y0);
817                                                 dx3 = (fixed)(((x2 - x0) << FIXED_BITS) * finv);
818                                                 dr3 = (fixed)(((r2 - r0) << FIXED_BITS) * finv);
819                                                 dg3 = (fixed)(((g2 - g0) << FIXED_BITS) * finv);
820                                                 db3 = (fixed)(((b2 - b0) << FIXED_BITS) * finv);
821                                         } else {
822                                                 dx3 = dr3 = dg3 = db3 = 0;
823                                         }
824                                         dx4 = ((y1 - y0) != 0) ? (fixed)(((x1 - x0) << FIXED_BITS) * FloatInv(y1 - y0)) : 0;
825 #else
826                                         if ((y2 - y0) != 0) {
827                                                 float fdiv = y2 - y0;
828                                                 dx3 = (fixed)(((x2 - x0) << FIXED_BITS) / fdiv);
829                                                 dr3 = (fixed)(((r2 - r0) << FIXED_BITS) / fdiv);
830                                                 dg3 = (fixed)(((g2 - g0) << FIXED_BITS) / fdiv);
831                                                 db3 = (fixed)(((b2 - b0) << FIXED_BITS) / fdiv);
832                                         } else {
833                                                 dx3 = dr3 = dg3 = db3 = 0;
834                                         }
835                                         dx4 = ((y1 - y0) != 0) ? (fixed)(((x1 - x0) << FIXED_BITS) / (float)(y1 - y0)) : 0;
836 #endif
837 #else  // Integer Division:
838 #ifdef GPU_UNAI_USE_INT_DIV_MULTINV
839                                         if ((y2 - y0) != 0) {
840                                                 int iF, iS;
841                                                 xInv((y2 - y0), iF, iS);
842                                                 dx3 = xInvMulx((x2 - x0), iF, iS);
843                                                 dr3 = xInvMulx((r2 - r0), iF, iS);
844                                                 dg3 = xInvMulx((g2 - g0), iF, iS);
845                                                 db3 = xInvMulx((b2 - b0), iF, iS);
846                                         } else {
847                                                 dx3 = dr3 = dg3 = db3 = 0;
848                                         }
849                                         dx4 = ((y1 - y0) != 0) ? xLoDivx((x1 - x0), (y1 - y0)) : 0;
850 #else
851                                         if ((y2 - y0) != 0) {
852                                                 dx3 = GPU_FAST_DIV((x2 - x0) << FIXED_BITS, (y2 - y0));
853                                                 dr3 = GPU_FAST_DIV((r2 - r0) << FIXED_BITS, (y2 - y0));
854                                                 dg3 = GPU_FAST_DIV((g2 - g0) << FIXED_BITS, (y2 - y0));
855                                                 db3 = GPU_FAST_DIV((b2 - b0) << FIXED_BITS, (y2 - y0));
856                                         } else {
857                                                 dx3 = dr3 = dg3 = db3 = 0;
858                                         }
859                                         dx4 = ((y1 - y0) != 0) ? GPU_FAST_DIV((x1 - x0) << FIXED_BITS, (y1 - y0)) : 0;
860 #endif
861 #endif
862                                 } else {
863 #ifdef GPU_UNAI_USE_FLOATMATH
864 #ifdef GPU_UNAI_USE_FLOAT_DIV_MULTINV
865                                         if ((y1 - y0) != 0) {
866                                                 float finv = FloatInv(y1 - y0);
867                                                 dx3 = (fixed)(((x1 - x0) << FIXED_BITS) * finv);
868                                                 dr3 = (fixed)(((r1 - r0) << FIXED_BITS) * finv);
869                                                 dg3 = (fixed)(((g1 - g0) << FIXED_BITS) * finv);
870                                                 db3 = (fixed)(((b1 - b0) << FIXED_BITS) * finv);
871                                         } else {
872                                                 dx3 = dr3 = dg3 = db3 = 0;
873                                         }
874                                         dx4 = ((y2 - y0) != 0) ? (fixed)(((x2 - x0) << FIXED_BITS) * FloatInv(y2 - y0)) : 0;
875 #else
876                                         if ((y1 - y0) != 0) {
877                                                 float fdiv = y1 - y0;
878                                                 dx3 = (fixed)(((x1 - x0) << FIXED_BITS) / fdiv);
879                                                 dr3 = (fixed)(((r1 - r0) << FIXED_BITS) / fdiv);
880                                                 dg3 = (fixed)(((g1 - g0) << FIXED_BITS) / fdiv);
881                                                 db3 = (fixed)(((b1 - b0) << FIXED_BITS) / fdiv);
882                                         } else {
883                                                 dx3 = dr3 = dg3 = db3 = 0;
884                                         }
885                                         dx4 = ((y2 - y0) != 0) ? (fixed)(((x2 - x0) << FIXED_BITS) / (float)(y2 - y0)) : 0;
886 #endif
887 #else  // Integer Division:
888 #ifdef GPU_UNAI_USE_INT_DIV_MULTINV
889                                         if ((y1 - y0) != 0) {
890                                                 int iF, iS;
891                                                 xInv((y1 - y0), iF, iS);
892                                                 dx3 = xInvMulx((x1 - x0), iF, iS);
893                                                 dr3 = xInvMulx((r1 - r0), iF, iS);
894                                                 dg3 = xInvMulx((g1 - g0), iF, iS);
895                                                 db3 = xInvMulx((b1 - b0), iF, iS);
896                                         } else {
897                                                 dx3 = dr3 = dg3 = db3 = 0;
898                                         }
899                                         dx4 = ((y2 - y0) != 0) ? xLoDivx((x2 - x0), (y2 - y0)) : 0;
900 #else
901                                         if ((y1 - y0) != 0) {
902                                                 dx3 = GPU_FAST_DIV((x1 - x0) << FIXED_BITS, (y1 - y0));
903                                                 dr3 = GPU_FAST_DIV((r1 - r0) << FIXED_BITS, (y1 - y0));
904                                                 dg3 = GPU_FAST_DIV((g1 - g0) << FIXED_BITS, (y1 - y0));
905                                                 db3 = GPU_FAST_DIV((b1 - b0) << FIXED_BITS, (y1 - y0));
906                                         } else {
907                                                 dx3 = dr3 = dg3 = db3 = 0;
908                                         }
909                                         dx4 = ((y2 - y0) != 0) ? GPU_FAST_DIV((x2 - x0) << FIXED_BITS, (y2 - y0)) : 0;
910 #endif
911 #endif
912                                 }
913                         } else {
914                                 //senquack - break out of final loop if nothing to be drawn (1st loop
915                                 //           must always be taken to setup dx3/dx4)
916                                 if (y1 == y2) break;
917
918                                 ya = y1;  yb = y2;
919
920                                 if (dx < 0) {
921                                         x3 = i2x(x0);  x4 = i2x(x1);
922                                         r3 = i2x(r0);  g3 = i2x(g0);  b3 = i2x(b0);
923
924                                         if ((y1 - y0) != 0) {
925                                                 x3 += (dx3 * (y1 - y0));
926                                                 r3 += (dr3 * (y1 - y0));
927                                                 g3 += (dg3 * (y1 - y0));
928                                                 b3 += (db3 * (y1 - y0));
929                                         }
930
931 #ifdef GPU_UNAI_USE_FLOATMATH
932 #ifdef GPU_UNAI_USE_FLOAT_DIV_MULTINV
933                                         dx4 = ((y2 - y1) != 0) ? (fixed)(((x2 - x1) << FIXED_BITS) * FloatInv(y2 - y1)) : 0;
934 #else
935                                         dx4 = ((y2 - y1) != 0) ? (fixed)(((x2 - x1) << FIXED_BITS) / (float)(y2 - y1)) : 0;
936 #endif
937 #else  // Integer Division:
938 #ifdef GPU_UNAI_USE_INT_DIV_MULTINV
939                                         dx4 = ((y2 - y1) != 0) ? xLoDivx((x2 - x1), (y2 - y1)) : 0;
940 #else
941                                         dx4 = ((y2 - y1) != 0) ? GPU_FAST_DIV((x2 - x1) << FIXED_BITS, (y2 - y1)) : 0;
942 #endif
943 #endif
944                                 } else {
945                                         x3 = i2x(x1);
946                                         x4 = i2x(x0) + (dx4 * (y1 - y0));
947
948                                         r3 = i2x(r1);  g3 = i2x(g1);  b3 = i2x(b1);
949
950 #ifdef GPU_UNAI_USE_FLOATMATH
951 #ifdef GPU_UNAI_USE_FLOAT_DIV_MULTINV
952                                         if ((y2 - y1) != 0) {
953                                                 float finv = FloatInv(y2 - y1);
954                                                 dx3 = (fixed)(((x2 - x1) << FIXED_BITS) * finv);
955                                                 dr3 = (fixed)(((r2 - r1) << FIXED_BITS) * finv);
956                                                 dg3 = (fixed)(((g2 - g1) << FIXED_BITS) * finv);
957                                                 db3 = (fixed)(((b2 - b1) << FIXED_BITS) * finv);
958                                         } else {
959                                                 dx3 = dr3 = dg3 = db3 = 0;
960                                         }
961 #else
962                                         if ((y2 - y1) != 0) {
963                                                 float fdiv = y2 - y1;
964                                                 dx3 = (fixed)(((x2 - x1) << FIXED_BITS) / fdiv);
965                                                 dr3 = (fixed)(((r2 - r1) << FIXED_BITS) / fdiv);
966                                                 dg3 = (fixed)(((g2 - g1) << FIXED_BITS) / fdiv);
967                                                 db3 = (fixed)(((b2 - b1) << FIXED_BITS) / fdiv);
968                                         } else {
969                                                 dx3 = dr3 = dg3 = db3 = 0;
970                                         }
971 #endif
972 #else  // Integer Division:
973 #ifdef GPU_UNAI_USE_INT_DIV_MULTINV
974                                         if ((y2 - y1) != 0) {
975                                                 int iF, iS;
976                                                 xInv((y2 - y1), iF, iS);
977                                                 dx3 = xInvMulx((x2 - x1), iF, iS);
978                                                 dr3 = xInvMulx((r2 - r1), iF, iS);
979                                                 dg3 = xInvMulx((g2 - g1), iF, iS);
980                                                 db3 = xInvMulx((b2 - b1), iF, iS);
981                                         } else {
982                                                 dx3 = dr3 = dg3 = db3 = 0;
983                                         }
984 #else
985                                         if ((y2 - y1) != 0) {
986                                                 dx3 = GPU_FAST_DIV((x2 - x1) << FIXED_BITS, (y2 - y1));
987                                                 dr3 = GPU_FAST_DIV((r2 - r1) << FIXED_BITS, (y2 - y1));
988                                                 dg3 = GPU_FAST_DIV((g2 - g1) << FIXED_BITS, (y2 - y1));
989                                                 db3 = GPU_FAST_DIV((b2 - b1) << FIXED_BITS, (y2 - y1));
990                                         } else {
991                                                 dx3 = dr3 = dg3 = db3 = 0;
992                                         }
993 #endif
994 #endif
995                                 }
996                         }
997
998                         s32 xmin, xmax, ymin, ymax;
999                         xmin = gpu_unai.DrawingArea[0];  xmax = gpu_unai.DrawingArea[2];
1000                         ymin = gpu_unai.DrawingArea[1];  ymax = gpu_unai.DrawingArea[3];
1001
1002                         if ((ymin - ya) > 0) {
1003                                 x3 += (dx3 * (ymin - ya));
1004                                 x4 += (dx4 * (ymin - ya));
1005                                 r3 += (dr3 * (ymin - ya));
1006                                 g3 += (dg3 * (ymin - ya));
1007                                 b3 += (db3 * (ymin - ya));
1008                                 ya = ymin;
1009                         }
1010
1011                         if (yb > ymax) yb = ymax;
1012
1013                         int loop1 = yb - ya;
1014                         if (loop1 <= 0)
1015                                 continue;
1016
1017                         le16_t* PixelBase = &gpu_unai.vram[FRAME_OFFSET(0, ya)];
1018                         int li=gpu_unai.inn.ilace_mask;
1019                         int pi=(ProgressiveInterlaceEnabled()?(gpu_unai.inn.ilace_mask+1):0);
1020                         int pif=(ProgressiveInterlaceEnabled()?(gpu_unai.prog_ilace_flag?(gpu_unai.inn.ilace_mask+1):0):1);
1021
1022                         for (; loop1; --loop1, ++ya, PixelBase += FRAME_WIDTH,
1023                                         x3 += dx3, x4 += dx4,
1024                                         r3 += dr3, g3 += dg3, b3 += db3 )
1025                         {
1026                                 if (ya&li) continue;
1027                                 if ((ya&pi)==pif) continue;
1028
1029                                 u32 r4, g4, b4;
1030
1031                                 xa = FixedCeilToInt(x3);
1032                                 xb = FixedCeilToInt(x4);
1033                                 r4 = r3;  g4 = g3;  b4 = b3;
1034
1035                                 fixed itmp = i2x(xa) - x3;
1036                                 if (itmp != 0) {
1037                                         r4 += (dr4 * itmp) >> FIXED_BITS;
1038                                         g4 += (dg4 * itmp) >> FIXED_BITS;
1039                                         b4 += (db4 * itmp) >> FIXED_BITS;
1040                                 }
1041
1042                                 r4 += fixed_HALF;
1043                                 g4 += fixed_HALF;
1044                                 b4 += fixed_HALF;
1045
1046                                 if ((xmin - xa) > 0) {
1047                                         r4 += (dr4 * (xmin - xa));
1048                                         g4 += (dg4 * (xmin - xa));
1049                                         b4 += (db4 * (xmin - xa));
1050                                         xa = xmin;
1051                                 }
1052
1053                                 // Setup packed Gouraud color for inner driver
1054                                 gpu_unai.inn.gCol = gpuPackGouraudCol(r4, g4, b4);
1055
1056                                 if (xb > xmax) xb = xmax;
1057                                 if ((xb - xa) > 0)
1058                                         gpuPolySpanDriver(gpu_unai, PixelBase + xa, (xb - xa));
1059                         }
1060                 }
1061         } while (++cur_pass < total_passes);
1062 }
1063
1064 /*----------------------------------------------------------------------
1065 gpuDrawPolyGT - Gouraud-shaded, textured poly
1066 ----------------------------------------------------------------------*/
1067 void gpuDrawPolyGT(const PtrUnion packet, const PP gpuPolySpanDriver, u32 is_quad)
1068 {
1069         PolyVertex vbuf[4];
1070         polyInitVertexBuffer(vbuf, packet, POLYTYPE_GT, is_quad);
1071
1072         int total_passes = is_quad ? 2 : 1;
1073         int cur_pass = 0;
1074         do
1075         {
1076                 const PolyVertex* vptrs[3];
1077                 s32 x_off, y_off;
1078                 if (!polyUseTriangle(vbuf, cur_pass, vptrs, x_off, y_off))
1079                         continue;
1080
1081                 s32 xa, xb, ya, yb;
1082                 s32 x3, dx3, x4, dx4, dx;
1083                 s32 u3, du3, v3, dv3;
1084                 s32 r3, dr3, g3, dg3, b3, db3;
1085                 s32 x0, x1, x2, y0, y1, y2;
1086                 s32 u0, u1, u2, v0, v1, v2;
1087                 s32 r0, r1, r2, g0, g1, g2, b0, b1, b2;
1088                 s32 du4, dv4;
1089                 s32 dr4, dg4, db4;
1090
1091                 x0 = vptrs[0]->x + x_off; y0 = vptrs[0]->y + y_off;
1092                 u0 = vptrs[0]->tex.u;     v0 = vptrs[0]->tex.v;
1093                 r0 = vptrs[0]->col.r;     g0 = vptrs[0]->col.g;  b0 = vptrs[0]->col.b;
1094                 x1 = vptrs[1]->x + x_off; y1 = vptrs[1]->y + y_off;
1095                 u1 = vptrs[1]->tex.u;     v1 = vptrs[1]->tex.v;
1096                 r1 = vptrs[1]->col.r;     g1 = vptrs[1]->col.g;  b1 = vptrs[1]->col.b;
1097                 x2 = vptrs[2]->x + x_off; y2 = vptrs[2]->y + y_off;
1098                 u2 = vptrs[2]->tex.u;     v2 = vptrs[2]->tex.v;
1099                 r2 = vptrs[2]->col.r;     g2 = vptrs[2]->col.g;  b2 = vptrs[2]->col.b;
1100
1101                 ya = y2 - y0;
1102                 yb = y2 - y1;
1103                 dx4 = (x2 - x1) * ya - (x2 - x0) * yb;
1104                 du4 = (u2 - u1) * ya - (u2 - u0) * yb;
1105                 dv4 = (v2 - v1) * ya - (v2 - v0) * yb;
1106                 dr4 = (r2 - r1) * ya - (r2 - r0) * yb;
1107                 dg4 = (g2 - g1) * ya - (g2 - g0) * yb;
1108                 db4 = (b2 - b1) * ya - (b2 - b0) * yb;
1109                 dx = dx4;
1110                 if (dx4 < 0) {
1111                         dx4 = -dx4;
1112                         du4 = -du4;
1113                         dv4 = -dv4;
1114                         dr4 = -dr4;
1115                         dg4 = -dg4;
1116                         db4 = -db4;
1117                 }
1118
1119 #ifdef GPU_UNAI_USE_FLOATMATH
1120 #ifdef GPU_UNAI_USE_FLOAT_DIV_MULTINV
1121                 if (dx4 != 0) {
1122                         float finv = FloatInv(dx4);
1123                         du4 = (fixed)((du4 << FIXED_BITS) * finv);
1124                         dv4 = (fixed)((dv4 << FIXED_BITS) * finv);
1125                         dr4 = (fixed)((dr4 << FIXED_BITS) * finv);
1126                         dg4 = (fixed)((dg4 << FIXED_BITS) * finv);
1127                         db4 = (fixed)((db4 << FIXED_BITS) * finv);
1128                 } else {
1129                         du4 = dv4 = dr4 = dg4 = db4 = 0;
1130                 }
1131 #else
1132                 if (dx4 != 0) {
1133                         float fdiv = dx4;
1134                         du4 = (fixed)((du4 << FIXED_BITS) / fdiv);
1135                         dv4 = (fixed)((dv4 << FIXED_BITS) / fdiv);
1136                         dr4 = (fixed)((dr4 << FIXED_BITS) / fdiv);
1137                         dg4 = (fixed)((dg4 << FIXED_BITS) / fdiv);
1138                         db4 = (fixed)((db4 << FIXED_BITS) / fdiv);
1139                 } else {
1140                         du4 = dv4 = dr4 = dg4 = db4 = 0;
1141                 }
1142 #endif
1143 #else  // Integer Division:
1144 #ifdef GPU_UNAI_USE_INT_DIV_MULTINV
1145                 if (dx4 != 0) {
1146                         int iF, iS;
1147                         xInv(dx4, iF, iS);
1148                         du4 = xInvMulx(du4, iF, iS);
1149                         dv4 = xInvMulx(dv4, iF, iS);
1150                         dr4 = xInvMulx(dr4, iF, iS);
1151                         dg4 = xInvMulx(dg4, iF, iS);
1152                         db4 = xInvMulx(db4, iF, iS);
1153                 } else {
1154                         du4 = dv4 = dr4 = dg4 = db4 = 0;
1155                 }
1156 #else
1157                 if (dx4 != 0) {
1158                         du4 = GPU_FAST_DIV(du4 << FIXED_BITS, dx4);
1159                         dv4 = GPU_FAST_DIV(dv4 << FIXED_BITS, dx4);
1160                         dr4 = GPU_FAST_DIV(dr4 << FIXED_BITS, dx4);
1161                         dg4 = GPU_FAST_DIV(dg4 << FIXED_BITS, dx4);
1162                         db4 = GPU_FAST_DIV(db4 << FIXED_BITS, dx4);
1163                 } else {
1164                         du4 = dv4 = dr4 = dg4 = db4 = 0;
1165                 }
1166 #endif
1167 #endif
1168                 // Set u,v increments and packed Gouraud increment for inner driver
1169                 gpu_unai.inn.u_inc = du4;
1170                 gpu_unai.inn.v_inc = dv4;
1171                 gpu_unai.inn.gInc = gpuPackGouraudColInc(dr4, dg4, db4);
1172
1173                 for (s32 loop0 = 2; loop0; loop0--) {
1174                         if (loop0 == 2) {
1175                                 ya = y0;  yb = y1;
1176                                 x3 = x4 = i2x(x0);
1177                                 u3 = i2x(u0);  v3 = i2x(v0);
1178                                 r3 = i2x(r0);  g3 = i2x(g0);  b3 = i2x(b0);
1179                                 if (dx < 0) {
1180 #ifdef GPU_UNAI_USE_FLOATMATH
1181 #ifdef GPU_UNAI_USE_FLOAT_DIV_MULTINV
1182                                         if ((y2 - y0) != 0) {
1183                                                 float finv = FloatInv(y2 - y0);
1184                                                 dx3 = (fixed)(((x2 - x0) << FIXED_BITS) * finv);
1185                                                 du3 = (fixed)(((u2 - u0) << FIXED_BITS) * finv);
1186                                                 dv3 = (fixed)(((v2 - v0) << FIXED_BITS) * finv);
1187                                                 dr3 = (fixed)(((r2 - r0) << FIXED_BITS) * finv);
1188                                                 dg3 = (fixed)(((g2 - g0) << FIXED_BITS) * finv);
1189                                                 db3 = (fixed)(((b2 - b0) << FIXED_BITS) * finv);
1190                                         } else {
1191                                                 dx3 = du3 = dv3 = dr3 = dg3 = db3 = 0;
1192                                         }
1193                                         dx4 = ((y1 - y0) != 0) ? (fixed)(((x1 - x0) << FIXED_BITS) * FloatInv(y1 - y0)) : 0;
1194 #else
1195                                         if ((y2 - y0) != 0) {
1196                                                 float fdiv = y2 - y0;
1197                                                 dx3 = (fixed)(((x2 - x0) << FIXED_BITS) / fdiv);
1198                                                 du3 = (fixed)(((u2 - u0) << FIXED_BITS) / fdiv);
1199                                                 dv3 = (fixed)(((v2 - v0) << FIXED_BITS) / fdiv);
1200                                                 dr3 = (fixed)(((r2 - r0) << FIXED_BITS) / fdiv);
1201                                                 dg3 = (fixed)(((g2 - g0) << FIXED_BITS) / fdiv);
1202                                                 db3 = (fixed)(((b2 - b0) << FIXED_BITS) / fdiv);
1203                                         } else {
1204                                                 dx3 = du3 = dv3 = dr3 = dg3 = db3 = 0;
1205                                         }
1206                                         dx4 = ((y1 - y0) != 0) ? (fixed)(((x1 - x0) << FIXED_BITS) / (float)(y1 - y0)) : 0;
1207 #endif
1208 #else  // Integer Division:
1209 #ifdef GPU_UNAI_USE_INT_DIV_MULTINV
1210                                         if ((y2 - y0) != 0) {
1211                                                 int iF, iS;
1212                                                 xInv((y2 - y0), iF, iS);
1213                                                 dx3 = xInvMulx((x2 - x0), iF, iS);
1214                                                 du3 = xInvMulx((u2 - u0), iF, iS);
1215                                                 dv3 = xInvMulx((v2 - v0), iF, iS);
1216                                                 dr3 = xInvMulx((r2 - r0), iF, iS);
1217                                                 dg3 = xInvMulx((g2 - g0), iF, iS);
1218                                                 db3 = xInvMulx((b2 - b0), iF, iS);
1219                                         } else {
1220                                                 dx3 = du3 = dv3 = dr3 = dg3 = db3 = 0;
1221                                         }
1222                                         dx4 = ((y1 - y0) != 0) ? xLoDivx((x1 - x0), (y1 - y0)) : 0;
1223 #else
1224                                         if ((y2 - y0) != 0) {
1225                                                 dx3 = GPU_FAST_DIV((x2 - x0) << FIXED_BITS, (y2 - y0));
1226                                                 du3 = GPU_FAST_DIV((u2 - u0) << FIXED_BITS, (y2 - y0));
1227                                                 dv3 = GPU_FAST_DIV((v2 - v0) << FIXED_BITS, (y2 - y0));
1228                                                 dr3 = GPU_FAST_DIV((r2 - r0) << FIXED_BITS, (y2 - y0));
1229                                                 dg3 = GPU_FAST_DIV((g2 - g0) << FIXED_BITS, (y2 - y0));
1230                                                 db3 = GPU_FAST_DIV((b2 - b0) << FIXED_BITS, (y2 - y0));
1231                                         } else {
1232                                                 dx3 = du3 = dv3 = dr3 = dg3 = db3 = 0;
1233                                         }
1234                                         dx4 = ((y1 - y0) != 0) ? GPU_FAST_DIV((x1 - x0) << FIXED_BITS, (y1 - y0)) : 0;
1235 #endif
1236 #endif
1237                                 } else {
1238 #ifdef GPU_UNAI_USE_FLOATMATH
1239 #ifdef GPU_UNAI_USE_FLOAT_DIV_MULTINV
1240                                         if ((y1 - y0) != 0) {
1241                                                 float finv = FloatInv(y1 - y0);
1242                                                 dx3 = (fixed)(((x1 - x0) << FIXED_BITS) * finv);
1243                                                 du3 = (fixed)(((u1 - u0) << FIXED_BITS) * finv);
1244                                                 dv3 = (fixed)(((v1 - v0) << FIXED_BITS) * finv);
1245                                                 dr3 = (fixed)(((r1 - r0) << FIXED_BITS) * finv);
1246                                                 dg3 = (fixed)(((g1 - g0) << FIXED_BITS) * finv);
1247                                                 db3 = (fixed)(((b1 - b0) << FIXED_BITS) * finv);
1248                                         } else {
1249                                                 dx3 = du3 = dv3 = dr3 = dg3 = db3 = 0;
1250                                         }
1251                                         dx4 = ((y2 - y0) != 0) ? (fixed)(((x2 - x0) << FIXED_BITS) * FloatInv(y2 - y0)) : 0;
1252 #else
1253                                         if ((y1 - y0) != 0) {
1254                                                 float fdiv = y1 - y0;
1255                                                 dx3 = (fixed)(((x1 - x0) << FIXED_BITS) / fdiv);
1256                                                 du3 = (fixed)(((u1 - u0) << FIXED_BITS) / fdiv);
1257                                                 dv3 = (fixed)(((v1 - v0) << FIXED_BITS) / fdiv);
1258                                                 dr3 = (fixed)(((r1 - r0) << FIXED_BITS) / fdiv);
1259                                                 dg3 = (fixed)(((g1 - g0) << FIXED_BITS) / fdiv);
1260                                                 db3 = (fixed)(((b1 - b0) << FIXED_BITS) / fdiv);
1261                                         } else {
1262                                                 dx3 = du3 = dv3 = dr3 = dg3 = db3 = 0;
1263                                         }
1264                                         dx4 = ((y2 - y0) != 0) ? (fixed)(((x2 - x0) << FIXED_BITS) / float(y2 - y0)) : 0;
1265 #endif
1266 #else  // Integer Division:
1267 #ifdef GPU_UNAI_USE_INT_DIV_MULTINV
1268                                         if ((y1 - y0) != 0) {
1269                                                 int iF, iS;
1270                                                 xInv((y1 - y0), iF, iS);
1271                                                 dx3 = xInvMulx((x1 - x0), iF, iS);
1272                                                 du3 = xInvMulx((u1 - u0), iF, iS);
1273                                                 dv3 = xInvMulx((v1 - v0), iF, iS);
1274                                                 dr3 = xInvMulx((r1 - r0), iF, iS);
1275                                                 dg3 = xInvMulx((g1 - g0), iF, iS);
1276                                                 db3 = xInvMulx((b1 - b0), iF, iS);
1277                                         } else {
1278                                                 dx3 = du3 = dv3 = dr3 = dg3 = db3 = 0;
1279                                         }
1280                                         dx4 = ((y2 - y0) != 0) ? xLoDivx((x2 - x0), (y2 - y0)) : 0;
1281 #else
1282                                         if ((y1 - y0) != 0) {
1283                                                 dx3 = GPU_FAST_DIV((x1 - x0) << FIXED_BITS, (y1 - y0));
1284                                                 du3 = GPU_FAST_DIV((u1 - u0) << FIXED_BITS, (y1 - y0));
1285                                                 dv3 = GPU_FAST_DIV((v1 - v0) << FIXED_BITS, (y1 - y0));
1286                                                 dr3 = GPU_FAST_DIV((r1 - r0) << FIXED_BITS, (y1 - y0));
1287                                                 dg3 = GPU_FAST_DIV((g1 - g0) << FIXED_BITS, (y1 - y0));
1288                                                 db3 = GPU_FAST_DIV((b1 - b0) << FIXED_BITS, (y1 - y0));
1289                                         } else {
1290                                                 dx3 = du3 = dv3 = dr3 = dg3 = db3 = 0;
1291                                         }
1292                                         dx4 = ((y2 - y0) != 0) ? GPU_FAST_DIV((x2 - x0) << FIXED_BITS, (y2 - y0)) : 0;
1293 #endif
1294 #endif
1295                                 }
1296                         } else {
1297                                 //senquack - break out of final loop if nothing to be drawn (1st loop
1298                                 //           must always be taken to setup dx3/dx4)
1299                                 if (y1 == y2) break;
1300
1301                                 ya = y1;  yb = y2;
1302
1303                                 if (dx < 0) {
1304                                         x3 = i2x(x0);  x4 = i2x(x1);
1305                                         u3 = i2x(u0);  v3 = i2x(v0);
1306                                         r3 = i2x(r0);  g3 = i2x(g0);  b3 = i2x(b0);
1307
1308                                         if ((y1 - y0) != 0) {
1309                                                 x3 += (dx3 * (y1 - y0));
1310                                                 u3 += (du3 * (y1 - y0));
1311                                                 v3 += (dv3 * (y1 - y0));
1312                                                 r3 += (dr3 * (y1 - y0));
1313                                                 g3 += (dg3 * (y1 - y0));
1314                                                 b3 += (db3 * (y1 - y0));
1315                                         }
1316
1317 #ifdef GPU_UNAI_USE_FLOATMATH
1318 #ifdef GPU_UNAI_USE_FLOAT_DIV_MULTINV
1319                                         dx4 = ((y2 - y1) != 0) ? (fixed)(((x2 - x1) << FIXED_BITS) * FloatInv(y2 - y1)) : 0;
1320 #else
1321                                         dx4 = ((y2 - y1) != 0) ? (fixed)(((x2 - x1) << FIXED_BITS) / (float)(y2 - y1)) : 0;
1322 #endif
1323 #else  // Integer Division:
1324 #ifdef GPU_UNAI_USE_INT_DIV_MULTINV
1325                                         dx4 = ((y2 - y1) != 0) ? xLoDivx((x2 - x1), (y2 - y1)) : 0;
1326 #else
1327                                         dx4 = ((y2 - y1) != 0) ? GPU_FAST_DIV((x2 - x1) << FIXED_BITS, (y2 - y1)) : 0;
1328 #endif
1329 #endif
1330                                 } else {
1331                                         x3 = i2x(x1);
1332                                         x4 = i2x(x0) + (dx4 * (y1 - y0));
1333
1334                                         u3 = i2x(u1);  v3 = i2x(v1);
1335                                         r3 = i2x(r1);  g3 = i2x(g1);  b3 = i2x(b1);
1336 #ifdef GPU_UNAI_USE_FLOATMATH
1337 #ifdef GPU_UNAI_USE_FLOAT_DIV_MULTINV
1338                                         if ((y2 - y1) != 0) {
1339                                                 float finv = FloatInv(y2 - y1);
1340                                                 dx3 = (fixed)(((x2 - x1) << FIXED_BITS) * finv);
1341                                                 du3 = (fixed)(((u2 - u1) << FIXED_BITS) * finv);
1342                                                 dv3 = (fixed)(((v2 - v1) << FIXED_BITS) * finv);
1343                                                 dr3 = (fixed)(((r2 - r1) << FIXED_BITS) * finv);
1344                                                 dg3 = (fixed)(((g2 - g1) << FIXED_BITS) * finv);
1345                                                 db3 = (fixed)(((b2 - b1) << FIXED_BITS) * finv);
1346                                         } else {
1347                                                 dx3 = du3 = dv3 = dr3 = dg3 = db3 = 0;
1348                                         }
1349 #else
1350                                         if ((y2 - y1) != 0) {
1351                                                 float fdiv = y2 - y1;
1352                                                 dx3 = (fixed)(((x2 - x1) << FIXED_BITS) / fdiv);
1353                                                 du3 = (fixed)(((u2 - u1) << FIXED_BITS) / fdiv);
1354                                                 dv3 = (fixed)(((v2 - v1) << FIXED_BITS) / fdiv);
1355                                                 dr3 = (fixed)(((r2 - r1) << FIXED_BITS) / fdiv);
1356                                                 dg3 = (fixed)(((g2 - g1) << FIXED_BITS) / fdiv);
1357                                                 db3 = (fixed)(((b2 - b1) << FIXED_BITS) / fdiv);
1358                                         } else {
1359                                                 dx3 = du3 = dv3 = dr3 = dg3 = db3 = 0;
1360                                         }
1361 #endif
1362 #else  // Integer Division:
1363 #ifdef GPU_UNAI_USE_INT_DIV_MULTINV
1364                                         if ((y2 - y1) != 0) {
1365                                                 int iF, iS;
1366                                                 xInv((y2 - y1), iF, iS);
1367                                                 dx3 = xInvMulx((x2 - x1), iF, iS);
1368                                                 du3 = xInvMulx((u2 - u1), iF, iS);
1369                                                 dv3 = xInvMulx((v2 - v1), iF, iS);
1370                                                 dr3 = xInvMulx((r2 - r1), iF, iS);
1371                                                 dg3 = xInvMulx((g2 - g1), iF, iS);
1372                                                 db3 = xInvMulx((b2 - b1), iF, iS);
1373                                         } else {
1374                                                 dx3 = du3 = dv3 = dr3 = dg3 = db3 = 0;
1375                                         }
1376 #else
1377                                         if ((y2 - y1) != 0) {
1378                                                 dx3 = GPU_FAST_DIV((x2 - x1) << FIXED_BITS, (y2 - y1));
1379                                                 du3 = GPU_FAST_DIV((u2 - u1) << FIXED_BITS, (y2 - y1));
1380                                                 dv3 = GPU_FAST_DIV((v2 - v1) << FIXED_BITS, (y2 - y1));
1381                                                 dr3 = GPU_FAST_DIV((r2 - r1) << FIXED_BITS, (y2 - y1));
1382                                                 dg3 = GPU_FAST_DIV((g2 - g1) << FIXED_BITS, (y2 - y1));
1383                                                 db3 = GPU_FAST_DIV((b2 - b1) << FIXED_BITS, (y2 - y1));
1384                                         } else {
1385                                                 dx3 = du3 = dv3 = dr3 = dg3 = db3 = 0;
1386                                         }
1387 #endif
1388 #endif
1389                                 }
1390                         }
1391
1392                         s32 xmin, xmax, ymin, ymax;
1393                         xmin = gpu_unai.DrawingArea[0];  xmax = gpu_unai.DrawingArea[2];
1394                         ymin = gpu_unai.DrawingArea[1];  ymax = gpu_unai.DrawingArea[3];
1395
1396                         if ((ymin - ya) > 0) {
1397                                 x3 += (dx3 * (ymin - ya));
1398                                 x4 += (dx4 * (ymin - ya));
1399                                 u3 += (du3 * (ymin - ya));
1400                                 v3 += (dv3 * (ymin - ya));
1401                                 r3 += (dr3 * (ymin - ya));
1402                                 g3 += (dg3 * (ymin - ya));
1403                                 b3 += (db3 * (ymin - ya));
1404                                 ya = ymin;
1405                         }
1406
1407                         if (yb > ymax) yb = ymax;
1408
1409                         int loop1 = yb - ya;
1410                         if (loop1 <= 0)
1411                                 continue;
1412
1413                         le16_t* PixelBase = &gpu_unai.vram[FRAME_OFFSET(0, ya)];
1414                         int li=gpu_unai.inn.ilace_mask;
1415                         int pi=(ProgressiveInterlaceEnabled()?(gpu_unai.inn.ilace_mask+1):0);
1416                         int pif=(ProgressiveInterlaceEnabled()?(gpu_unai.prog_ilace_flag?(gpu_unai.inn.ilace_mask+1):0):1);
1417
1418                         for (; loop1; --loop1, ++ya, PixelBase += FRAME_WIDTH,
1419                                         x3 += dx3, x4 += dx4,
1420                                         u3 += du3, v3 += dv3,
1421                                         r3 += dr3, g3 += dg3, b3 += db3 )
1422                         {
1423                                 if (ya&li) continue;
1424                                 if ((ya&pi)==pif) continue;
1425
1426                                 u32 u4, v4;
1427                                 u32 r4, g4, b4;
1428
1429                                 xa = FixedCeilToInt(x3);
1430                                 xb = FixedCeilToInt(x4);
1431                                 u4 = u3;  v4 = v3;
1432                                 r4 = r3;  g4 = g3;  b4 = b3;
1433
1434                                 fixed itmp = i2x(xa) - x3;
1435                                 if (itmp != 0) {
1436                                         u4 += (du4 * itmp) >> FIXED_BITS;
1437                                         v4 += (dv4 * itmp) >> FIXED_BITS;
1438                                         r4 += (dr4 * itmp) >> FIXED_BITS;
1439                                         g4 += (dg4 * itmp) >> FIXED_BITS;
1440                                         b4 += (db4 * itmp) >> FIXED_BITS;
1441                                 }
1442
1443                                 u4 += fixed_HALF;
1444                                 v4 += fixed_HALF;
1445                                 r4 += fixed_HALF;
1446                                 g4 += fixed_HALF;
1447                                 b4 += fixed_HALF;
1448
1449                                 if ((xmin - xa) > 0) {
1450                                         u4 += du4 * (xmin - xa);
1451                                         v4 += dv4 * (xmin - xa);
1452                                         r4 += dr4 * (xmin - xa);
1453                                         g4 += dg4 * (xmin - xa);
1454                                         b4 += db4 * (xmin - xa);
1455                                         xa = xmin;
1456                                 }
1457
1458                                 // Set packed Gouraud color and u,v coords for inner driver
1459                                 gpu_unai.inn.u = u4;
1460                                 gpu_unai.inn.v = v4;
1461                                 gpu_unai.inn.gCol = gpuPackGouraudCol(r4, g4, b4);
1462
1463                                 if (xb > xmax) xb = xmax;
1464                                 if ((xb - xa) > 0)
1465                                         gpuPolySpanDriver(gpu_unai, PixelBase + xa, (xb - xa));
1466                         }
1467                 }
1468         } while (++cur_pass < total_passes);
1469 }
1470
1471 #endif /* __GPU_UNAI_GPU_RASTER_POLYGON_H__ */