Backport GPU Unai plugin from PCSX4ALL
[pcsx_rearmed.git] / plugins / gpu_unai / gpu_raster_polygon.h
1 /***************************************************************************
2 *   Copyright (C) 2010 PCSX4ALL Team                                      *
3 *   Copyright (C) 2010 Unai                                               *
4 *                                                                         *
5 *   This program is free software; you can redistribute it and/or modify  *
6 *   it under the terms of the GNU General Public License as published by  *
7 *   the Free Software Foundation; either version 2 of the License, or     *
8 *   (at your option) any later version.                                   *
9 *                                                                         *
10 *   This program is distributed in the hope that it will be useful,       *
11 *   but WITHOUT ANY WARRANTY; without even the implied warranty of        *
12 *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the         *
13 *   GNU General Public License for more details.                          *
14 *                                                                         *
15 *   You should have received a copy of the GNU General Public License     *
16 *   along with this program; if not, write to the                         *
17 *   Free Software Foundation, Inc.,                                       *
18 *   51 Franklin Street, Fifth Floor, Boston, MA 02111-1307 USA.           *
19 ***************************************************************************/
20
21 //senquack - NOTE: GPU Unai poly routines have been rewritten/adapted
22 // from DrHell routines to fix multiple issues. See README_senquack.txt
23
24 ///////////////////////////////////////////////////////////////////////////////
25 // Shared poly vertex buffer, able to handle 3 or 4-pt polys of any type.
26 ///////////////////////////////////////////////////////////////////////////////
27
28 struct PolyVertex {
29         s32 x, y; // Sign-extended 11-bit X,Y coords
30         union {
31                 struct { u8 u, v, pad[2]; } tex; // Texture coords (if used)
32                 u32 tex_word;
33         };
34         union {
35                 struct { u8 r, g, b, pad; } col; // 24-bit RGB color (if used)
36                 u32 col_word;
37         };
38 };
39
40 enum PolyAttribute {
41         POLYATTR_TEXTURE = (1 << 0),
42         POLYATTR_GOURAUD = (1 << 1)
43 };
44
45 enum PolyType {
46         POLYTYPE_F  = 0,
47         POLYTYPE_FT = (POLYATTR_TEXTURE),
48         POLYTYPE_G  = (POLYATTR_GOURAUD),
49         POLYTYPE_GT = (POLYATTR_TEXTURE | POLYATTR_GOURAUD)
50 };
51
52 ///////////////////////////////////////////////////////////////////////////////
53 // polyInitVertexBuffer()
54 // Fills vbuf[] array with data from any type of poly draw-command packet.
55 ///////////////////////////////////////////////////////////////////////////////
56 static void polyInitVertexBuffer(PolyVertex *vbuf, const PtrUnion packet, PolyType ptype, u32 is_quad)
57 {
58         bool texturing = ptype & POLYATTR_TEXTURE;
59         bool gouraud   = ptype & POLYATTR_GOURAUD;
60
61         int vert_stride = 1; // Stride of vertices in cmd packet, in 32-bit words
62         if (texturing)
63                 vert_stride++;
64         if (gouraud)
65                 vert_stride++;
66
67         int num_verts = (is_quad) ? 4 : 3;
68         u32 *ptr;
69
70         // X,Y coords, adjusted by draw offsets
71         s32 x_off = gpu_unai.DrawingOffset[0];
72         s32 y_off = gpu_unai.DrawingOffset[1];
73         ptr = &packet.U4[1];
74         for (int i=0;  i < num_verts; ++i, ptr += vert_stride) {
75                 s16* coord_ptr = (s16*)ptr;
76                 vbuf[i].x = GPU_EXPANDSIGN(coord_ptr[0]) + x_off;
77                 vbuf[i].y = GPU_EXPANDSIGN(coord_ptr[1]) + y_off;
78         }
79
80         // U,V texture coords (if applicable)
81         if (texturing) {
82                 ptr = &packet.U4[2];
83                 for (int i=0;  i < num_verts; ++i, ptr += vert_stride)
84                         vbuf[i].tex_word = *ptr;
85         }
86
87         // Colors (if applicable)
88         if (gouraud) {
89                 ptr = &packet.U4[0];
90                 for (int i=0;  i < num_verts; ++i, ptr += vert_stride)
91                         vbuf[i].col_word = *ptr;
92         }
93 }
94
95 ///////////////////////////////////////////////////////////////////////////////
96 //  Helper functions to determine which vertex in a 2 or 3 vertex array
97 //   has the highest/lowest X/Y coordinate.
98 //   Note: the comparison logic is such that, given a set of vertices with
99 //    identical values for a given coordinate, a different index will be
100 //    returned from vertIdxOfLeast..() than a call to vertIdxOfHighest..().
101 //    This ensures that, during the vertex-ordering phase of rasterization,
102 //    all three vertices remain unique.
103 ///////////////////////////////////////////////////////////////////////////////
104
105 template<typename T>
106 static inline int vertIdxOfLeastXCoord2(const T *Tptr)
107 {
108         return (Tptr[0].x <= Tptr[1].x) ? 0 : 1;
109 }
110
111 template<typename T>
112 static inline int vertIdxOfLeastXCoord3(const T *Tptr)
113 {
114         int least_of_v0_v1 = vertIdxOfLeastXCoord2(Tptr);
115         return (Tptr[least_of_v0_v1].x <= Tptr[2].x) ? least_of_v0_v1 : 2;
116 }
117
118 template<typename T>
119 static inline int vertIdxOfLeastYCoord2(const T *Tptr)
120 {
121         return (Tptr[0].y <= Tptr[1].y) ? 0 : 1;
122 }
123
124 template<typename T>
125 static inline int vertIdxOfLeastYCoord3(const T *Tptr)
126 {
127         int least_of_v0_v1 = vertIdxOfLeastYCoord2(Tptr);
128         return (Tptr[least_of_v0_v1].y <= Tptr[2].y) ? least_of_v0_v1 : 2;
129 }
130
131 template<typename T>
132 static inline int vertIdxOfHighestXCoord2(const T *Tptr)
133 {
134         return (Tptr[1].x >= Tptr[0].x) ? 1 : 0;
135 }
136
137 template<typename T>
138 static inline int vertIdxOfHighestXCoord3(const T *Tptr)
139 {
140         int highest_of_v0_v1 = vertIdxOfHighestXCoord2(Tptr);
141         return (Tptr[2].x >= Tptr[highest_of_v0_v1].x) ? 2 : highest_of_v0_v1;
142 }
143
144 template<typename T>
145 static inline int vertIdxOfHighestYCoord2(const T *Tptr)
146 {
147         return (Tptr[1].y >= Tptr[0].y) ? 1 : 0;
148 }
149
150 template<typename T>
151 static inline int vertIdxOfHighestYCoord3(const T *Tptr)
152 {
153         int highest_of_v0_v1 = vertIdxOfHighestYCoord2(Tptr);
154         return (Tptr[2].y >= Tptr[highest_of_v0_v1].y) ? 2 : highest_of_v0_v1;
155 }
156
157 ///////////////////////////////////////////////////////////////////////////////
158 // polyUseTriangle()
159 //  Determines if the specified triangle should be rendered. If so, it
160 //  fills the given array of vertex pointers, vert_ptrs, in order of
161 //  increasing Y coordinate values, as required by rasterization algorithm.
162 //  Parameter 'tri_num' is 0 for first triangle (idx 0,1,2 of vbuf[]),
163 //   or 1 for second triangle of a quad (idx 1,2,3 of vbuf[]).
164 //  Returns true if triangle should be rendered, false if not.
165 ///////////////////////////////////////////////////////////////////////////////
166 static bool polyUseTriangle(const PolyVertex *vbuf, int tri_num, const PolyVertex **vert_ptrs)
167 {
168         // Using verts 0,1,2 or is this the 2nd pass of a quad (verts 1,2,3)?
169         const PolyVertex *tri_ptr = &vbuf[(tri_num == 0) ? 0 : 1];
170
171         // Get indices of highest/lowest X,Y coords within triangle
172         int idx_lowest_x  = vertIdxOfLeastXCoord3(tri_ptr);
173         int idx_highest_x = vertIdxOfHighestXCoord3(tri_ptr);
174         int idx_lowest_y  = vertIdxOfLeastYCoord3(tri_ptr);
175         int idx_highest_y = vertIdxOfHighestYCoord3(tri_ptr);
176
177         // Maximum absolute distance between any two X coordinates is 1023,
178         //  and for Y coordinates is 511 (PS1 hardware limitation)
179         int lowest_x  = tri_ptr[idx_lowest_x].x;
180         int highest_x = tri_ptr[idx_highest_x].x;
181         int lowest_y  = tri_ptr[idx_lowest_y].y;
182         int highest_y = tri_ptr[idx_highest_y].y;
183         if ((highest_x - lowest_x) >= CHKMAX_X ||
184             (highest_y - lowest_y) >= CHKMAX_Y)
185                 return false;
186
187         // Determine if triangle is completely outside clipping range
188         int xmin, xmax, ymin, ymax;
189         xmin = gpu_unai.DrawingArea[0];  xmax = gpu_unai.DrawingArea[2];
190         ymin = gpu_unai.DrawingArea[1];  ymax = gpu_unai.DrawingArea[3];
191         int clipped_lowest_x  = Max2(xmin,lowest_x);
192         int clipped_lowest_y  = Max2(ymin,lowest_y);
193         int clipped_highest_x = Min2(xmax,highest_x);
194         int clipped_highest_y = Min2(ymax,highest_y);
195         if (clipped_lowest_x >= clipped_highest_x ||
196             clipped_lowest_y >= clipped_highest_y)
197                 return false;
198
199         // Order vertex ptrs by increasing y value (draw routines need this).
200         // The middle index is deduced by a binary math trick that depends
201         //  on index range always being between 0..2
202         vert_ptrs[0] = tri_ptr + idx_lowest_y;
203         vert_ptrs[1] = tri_ptr + ((idx_lowest_y + idx_highest_y) ^ 3);
204         vert_ptrs[2] = tri_ptr + idx_highest_y;
205         return true;
206 }
207
208 ///////////////////////////////////////////////////////////////////////////////
209 //  GPU internal polygon drawing functions
210 ///////////////////////////////////////////////////////////////////////////////
211
212 /*----------------------------------------------------------------------
213 gpuDrawPolyF - Flat-shaded, untextured poly
214 ----------------------------------------------------------------------*/
215 void gpuDrawPolyF(const PtrUnion packet, const PP gpuPolySpanDriver, u32 is_quad)
216 {
217         // Set up bgr555 color to be used across calls in inner driver
218         gpu_unai.PixelData = GPU_RGB16(packet.U4[0]);
219
220         PolyVertex vbuf[4];
221         polyInitVertexBuffer(vbuf, packet, POLYTYPE_F, is_quad);
222
223         int total_passes = is_quad ? 2 : 1;
224         int cur_pass = 0;
225         do
226         {
227                 const PolyVertex* vptrs[3];
228                 if (polyUseTriangle(vbuf, cur_pass, vptrs) == false)
229                         continue;
230
231                 s32 xa, xb, ya, yb;
232                 s32 x3, dx3, x4, dx4, dx;
233                 s32 x0, x1, x2, y0, y1, y2;
234
235                 x0 = vptrs[0]->x;  y0 = vptrs[0]->y;
236                 x1 = vptrs[1]->x;  y1 = vptrs[1]->y;
237                 x2 = vptrs[2]->x;  y2 = vptrs[2]->y;
238
239                 ya = y2 - y0;
240                 yb = y2 - y1;
241                 dx = (x2 - x1) * ya - (x2 - x0) * yb;
242
243                 for (int loop0 = 2; loop0; loop0--) {
244                         if (loop0 == 2) {
245                                 ya = y0;  yb = y1;
246                                 x3 = x4 = i2x(x0);
247                                 if (dx < 0) {
248 #ifdef GPU_UNAI_USE_FLOATMATH
249 #ifdef GPU_UNAI_USE_FLOAT_DIV_MULTINV
250                                         dx3 = ((y2 - y0) != 0) ? (fixed)(((x2 - x0) << FIXED_BITS) * FloatInv(y2 - y0)) : 0;
251                                         dx4 = ((y1 - y0) != 0) ? (fixed)(((x1 - x0) << FIXED_BITS) * FloatInv(y1 - y0)) : 0;
252 #else
253                                         dx3 = ((y2 - y0) != 0) ? (fixed)(((x2 - x0) << FIXED_BITS) / (float)(y2 - y0)) : 0;
254                                         dx4 = ((y1 - y0) != 0) ? (fixed)(((x1 - x0) << FIXED_BITS) / (float)(y1 - y0)) : 0;
255 #endif
256 #else  // Integer Division:
257 #ifdef GPU_UNAI_USE_INT_DIV_MULTINV
258                                         dx3 = ((y2 - y0) != 0) ? xLoDivx((x2 - x0), (y2 - y0)) : 0;
259                                         dx4 = ((y1 - y0) != 0) ? xLoDivx((x1 - x0), (y1 - y0)) : 0;
260 #else
261                                         dx3 = ((y2 - y0) != 0) ? GPU_FAST_DIV((x2 - x0) << FIXED_BITS, (y2 - y0)) : 0;
262                                         dx4 = ((y1 - y0) != 0) ? GPU_FAST_DIV((x1 - x0) << FIXED_BITS, (y1 - y0)) : 0;
263 #endif
264 #endif
265                                 } else {
266 #ifdef GPU_UNAI_USE_FLOATMATH
267 #ifdef GPU_UNAI_USE_FLOAT_DIV_MULTINV
268                                         dx3 = ((y1 - y0) != 0) ? (fixed)(((x1 - x0) << FIXED_BITS) * FloatInv(y1 - y0)) : 0;
269                                         dx4 = ((y2 - y0) != 0) ? (fixed)(((x2 - x0) << FIXED_BITS) * FloatInv(y2 - y0)) : 0;
270 #else
271                                         dx3 = ((y1 - y0) != 0) ? (fixed)(((x1 - x0) << FIXED_BITS) / (float)(y1 - y0)) : 0;
272                                         dx4 = ((y2 - y0) != 0) ? (fixed)(((x2 - x0) << FIXED_BITS) / (float)(y2 - y0)) : 0;
273 #endif
274 #else  // Integer Division:
275 #ifdef GPU_UNAI_USE_INT_DIV_MULTINV
276                                         dx3 = ((y1 - y0) != 0) ? xLoDivx((x1 - x0), (y1 - y0)) : 0;
277                                         dx4 = ((y2 - y0) != 0) ? xLoDivx((x2 - x0), (y2 - y0)) : 0;
278 #else
279                                         dx3 = ((y1 - y0) != 0) ? GPU_FAST_DIV((x1 - x0) << FIXED_BITS, (y1 - y0)) : 0;
280                                         dx4 = ((y2 - y0) != 0) ? GPU_FAST_DIV((x2 - x0) << FIXED_BITS, (y2 - y0)) : 0;
281 #endif
282 #endif
283                                 }
284                         } else {
285                                 //senquack - break out of final loop if nothing to be drawn (1st loop
286                                 //           must always be taken to setup dx3/dx4)
287                                 if (y1 == y2) break;
288
289                                 ya = y1;  yb = y2;
290
291                                 if (dx < 0) {
292                                         x3 = i2x(x0) + (dx3 * (y1 - y0));
293                                         x4 = i2x(x1);
294 #ifdef GPU_UNAI_USE_FLOATMATH
295 #ifdef GPU_UNAI_USE_FLOAT_DIV_MULTINV
296                                         dx4 = ((y2 - y1) != 0) ? (fixed)(((x2 - x1) << FIXED_BITS) * FloatInv(y2 - y1)) : 0;
297 #else
298                                         dx4 = ((y2 - y1) != 0) ? (fixed)(((x2 - x1) << FIXED_BITS) / (float)(y2 - y1)) : 0;
299 #endif
300 #else  // Integer Division:
301 #ifdef GPU_UNAI_USE_INT_DIV_MULTINV
302                                         dx4 = ((y2 - y1) != 0) ? xLoDivx ((x2 - x1), (y2 - y1)) : 0;
303 #else
304                                         dx4 = ((y2 - y1) != 0) ? GPU_FAST_DIV((x2 - x1) << FIXED_BITS, (y2 - y1)) : 0;
305 #endif
306 #endif
307                                 } else {
308                                         x3 = i2x(x1);
309                                         x4 = i2x(x0) + (dx4 * (y1 - y0));
310 #ifdef GPU_UNAI_USE_FLOATMATH
311 #ifdef GPU_UNAI_USE_FLOAT_DIV_MULTINV
312                                         dx3 = ((y2 - y1) != 0) ? (fixed)(((x2 - x1) << FIXED_BITS) * FloatInv(y2 - y1)) : 0;
313 #else
314                                         dx3 = ((y2 - y1) != 0) ? (fixed)(((x2 - x1) << FIXED_BITS) / (float)(y2 - y1)) : 0;
315 #endif
316 #else  // Integer Division:
317 #ifdef GPU_UNAI_USE_INT_DIV_MULTINV
318                                         dx3 = ((y2 - y1) != 0) ? xLoDivx ((x2 - x1), (y2 - y1)) : 0;
319 #else
320                                         dx3 = ((y2 - y1) != 0) ? GPU_FAST_DIV((x2 - x1) << FIXED_BITS, (y2 - y1)) : 0;
321 #endif
322 #endif
323                                 }
324                         }
325
326                         s32 xmin, xmax, ymin, ymax;
327                         xmin = gpu_unai.DrawingArea[0];  xmax = gpu_unai.DrawingArea[2];
328                         ymin = gpu_unai.DrawingArea[1];  ymax = gpu_unai.DrawingArea[3];
329
330                         if ((ymin - ya) > 0) {
331                                 x3 += (dx3 * (ymin - ya));
332                                 x4 += (dx4 * (ymin - ya));
333                                 ya = ymin;
334                         }
335
336                         if (yb > ymax) yb = ymax;
337
338                         int loop1 = yb - ya;
339                         if (loop1 <= 0)
340                                 continue;
341
342                         u16* PixelBase = &((u16*)gpu_unai.vram)[FRAME_OFFSET(0, ya)];
343                         int li=gpu_unai.ilace_mask;
344                         int pi=(ProgressiveInterlaceEnabled()?(gpu_unai.ilace_mask+1):0);
345                         int pif=(ProgressiveInterlaceEnabled()?(gpu_unai.prog_ilace_flag?(gpu_unai.ilace_mask+1):0):1);
346
347                         for (; loop1; --loop1, ya++, PixelBase += FRAME_WIDTH,
348                                         x3 += dx3, x4 += dx4 )
349                         {
350                                 if (ya&li) continue;
351                                 if ((ya&pi)==pif) continue;
352
353                                 xa = FixedCeilToInt(x3);  xb = FixedCeilToInt(x4);
354                                 if ((xmin - xa) > 0) xa = xmin;
355                                 if (xb > xmax) xb = xmax;
356                                 if ((xb - xa) > 0)
357                                         gpuPolySpanDriver(gpu_unai, PixelBase + xa, (xb - xa));
358                         }
359                 }
360         } while (++cur_pass < total_passes);
361 }
362
363 /*----------------------------------------------------------------------
364 gpuDrawPolyFT - Flat-shaded, textured poly
365 ----------------------------------------------------------------------*/
366 void gpuDrawPolyFT(const PtrUnion packet, const PP gpuPolySpanDriver, u32 is_quad)
367 {
368         // r8/g8/b8 used if texture-blending & dithering is applied (24-bit light)
369         gpu_unai.r8 = packet.U1[0];
370         gpu_unai.g8 = packet.U1[1];
371         gpu_unai.b8 = packet.U1[2];
372         // r5/g5/b5 used if just texture-blending is applied (15-bit light)
373         gpu_unai.r5 = packet.U1[0] >> 3;
374         gpu_unai.g5 = packet.U1[1] >> 3;
375         gpu_unai.b5 = packet.U1[2] >> 3;
376
377         PolyVertex vbuf[4];
378         polyInitVertexBuffer(vbuf, packet, POLYTYPE_FT, is_quad);
379
380         int total_passes = is_quad ? 2 : 1;
381         int cur_pass = 0;
382         do
383         {
384                 const PolyVertex* vptrs[3];
385                 if (polyUseTriangle(vbuf, cur_pass, vptrs) == false)
386                         continue;
387
388                 s32 xa, xb, ya, yb;
389                 s32 x3, dx3, x4, dx4, dx;
390                 s32 u3, du3, v3, dv3;
391                 s32 x0, x1, x2, y0, y1, y2;
392                 s32 u0, u1, u2, v0, v1, v2;
393                 s32 du4, dv4;
394
395                 x0 = vptrs[0]->x;      y0 = vptrs[0]->y;
396                 u0 = vptrs[0]->tex.u;  v0 = vptrs[0]->tex.v;
397                 x1 = vptrs[1]->x;      y1 = vptrs[1]->y;
398                 u1 = vptrs[1]->tex.u;  v1 = vptrs[1]->tex.v;
399                 x2 = vptrs[2]->x;      y2 = vptrs[2]->y;
400                 u2 = vptrs[2]->tex.u;  v2 = vptrs[2]->tex.v;
401
402                 ya = y2 - y0;
403                 yb = y2 - y1;
404                 dx4 = (x2 - x1) * ya - (x2 - x0) * yb;
405                 du4 = (u2 - u1) * ya - (u2 - u0) * yb;
406                 dv4 = (v2 - v1) * ya - (v2 - v0) * yb;
407                 dx = dx4;
408                 if (dx4 < 0) {
409                         dx4 = -dx4;
410                         du4 = -du4;
411                         dv4 = -dv4;
412                 }
413
414 #ifdef GPU_UNAI_USE_FLOATMATH
415 #ifdef GPU_UNAI_USE_FLOAT_DIV_MULTINV
416                 if (dx4 != 0) {
417                         float finv = FloatInv(dx4);
418                         du4 = (fixed)((du4 << FIXED_BITS) * finv);
419                         dv4 = (fixed)((dv4 << FIXED_BITS) * finv);
420                 } else {
421                         du4 = dv4 = 0;
422                 }
423 #else
424                 if (dx4 != 0) {
425                         float fdiv = dx4;
426                         du4 = (fixed)((du4 << FIXED_BITS) / fdiv);
427                         dv4 = (fixed)((dv4 << FIXED_BITS) / fdiv);
428                 } else {
429                         du4 = dv4 = 0;
430                 }
431 #endif
432 #else  // Integer Division:
433 #ifdef GPU_UNAI_USE_INT_DIV_MULTINV
434                 if (dx4 != 0) {
435                         int iF, iS;
436                         xInv(dx4, iF, iS);
437                         du4 = xInvMulx(du4, iF, iS);
438                         dv4 = xInvMulx(dv4, iF, iS);
439                 } else {
440                         du4 = dv4 = 0;
441                 }
442 #else
443                 if (dx4 != 0) {
444                         du4 = GPU_FAST_DIV(du4 << FIXED_BITS, dx4);
445                         dv4 = GPU_FAST_DIV(dv4 << FIXED_BITS, dx4);
446                 } else {
447                         du4 = dv4 = 0;
448                 }
449 #endif
450 #endif
451                 // Set u,v increments for inner driver
452                 gpu_unai.u_inc = du4;
453                 gpu_unai.v_inc = dv4;
454
455                 //senquack - TODO: why is it always going through 2 iterations when sometimes one would suffice here?
456                 //                       (SAME ISSUE ELSEWHERE)
457                 for (s32 loop0 = 2; loop0; loop0--) {
458                         if (loop0 == 2) {
459                                 ya = y0;  yb = y1;
460                                 x3 = x4 = i2x(x0);
461                                 u3 = i2x(u0);  v3 = i2x(v0);
462                                 if (dx < 0) {
463 #ifdef GPU_UNAI_USE_FLOATMATH
464 #ifdef GPU_UNAI_USE_FLOAT_DIV_MULTINV
465                                         if ((y2 - y0) != 0) {
466                                                 float finv = FloatInv(y2 - y0);
467                                                 dx3 = (fixed)(((x2 - x0) << FIXED_BITS) * finv);
468                                                 du3 = (fixed)(((u2 - u0) << FIXED_BITS) * finv);
469                                                 dv3 = (fixed)(((v2 - v0) << FIXED_BITS) * finv);
470                                         } else {
471                                                 dx3 = du3 = dv3 = 0;
472                                         }
473                                         dx4 = ((y1 - y0) != 0) ? (fixed)(((x1 - x0) << FIXED_BITS) * FloatInv(y1 - y0)) : 0;
474 #else
475                                         if ((y2 - y0) != 0) {
476                                                 float fdiv = y2 - y0;
477                                                 dx3 = (fixed)(((x2 - x0) << FIXED_BITS) / fdiv);
478                                                 du3 = (fixed)(((u2 - u0) << FIXED_BITS) / fdiv);
479                                                 dv3 = (fixed)(((v2 - v0) << FIXED_BITS) / fdiv);
480                                         } else {
481                                                 dx3 = du3 = dv3 = 0;
482                                         }
483                                         dx4 = ((y1 - y0) != 0) ? (fixed)(((x1 - x0) << FIXED_BITS) / (float)(y1 - y0)) : 0;
484 #endif
485 #else  // Integer Division:
486 #ifdef GPU_UNAI_USE_INT_DIV_MULTINV
487                                         if ((y2 - y0) != 0) {
488                                                 int iF, iS;
489                                                 xInv((y2 - y0), iF, iS);
490                                                 dx3 = xInvMulx((x2 - x0), iF, iS);
491                                                 du3 = xInvMulx((u2 - u0), iF, iS);
492                                                 dv3 = xInvMulx((v2 - v0), iF, iS);
493                                         } else {
494                                                 dx3 = du3 = dv3 = 0;
495                                         }
496                                         dx4 = ((y1 - y0) != 0) ? xLoDivx((x1 - x0), (y1 - y0)) : 0;
497 #else
498                                         if ((y2 - y0) != 0) {
499                                                 dx3 = GPU_FAST_DIV((x2 - x0) << FIXED_BITS, (y2 - y0));
500                                                 du3 = GPU_FAST_DIV((u2 - u0) << FIXED_BITS, (y2 - y0));
501                                                 dv3 = GPU_FAST_DIV((v2 - v0) << FIXED_BITS, (y2 - y0));
502                                         } else {
503                                                 dx3 = du3 = dv3 = 0;
504                                         }
505                                         dx4 = ((y1 - y0) != 0) ? GPU_FAST_DIV((x1 - x0) << FIXED_BITS, (y1 - y0)) : 0;
506 #endif
507 #endif
508                                 } else {
509 #ifdef GPU_UNAI_USE_FLOATMATH
510 #ifdef GPU_UNAI_USE_FLOAT_DIV_MULTINV
511                                         if ((y1 - y0) != 0) {
512                                                 float finv = FloatInv(y1 - y0);
513                                                 dx3 = (fixed)(((x1 - x0) << FIXED_BITS) * finv);
514                                                 du3 = (fixed)(((u1 - u0) << FIXED_BITS) * finv);
515                                                 dv3 = (fixed)(((v1 - v0) << FIXED_BITS) * finv);
516                                         } else {
517                                                 dx3 = du3 = dv3 = 0;
518                                         }
519                                         dx4 = ((y2 - y0) != 0) ? (fixed)(((x2 - x0) << FIXED_BITS) * FloatInv(y2 - y0)) : 0;
520 #else
521                                         if ((y1 - y0) != 0) {
522                                                 float fdiv = y1 - y0;
523                                                 dx3 = (fixed)(((x1 - x0) << FIXED_BITS) / fdiv);
524                                                 du3 = (fixed)(((u1 - u0) << FIXED_BITS) / fdiv);
525                                                 dv3 = (fixed)(((v1 - v0) << FIXED_BITS) / fdiv);
526                                         } else {
527                                                 dx3 = du3 = dv3 = 0;
528                                         }
529                                         dx4 = ((y2 - y0) != 0) ? (fixed)(((x2 - x0) << FIXED_BITS) / (float)(y2 - y0)) : 0;
530 #endif
531 #else  // Integer Division:
532 #ifdef GPU_UNAI_USE_INT_DIV_MULTINV
533                                         if ((y1 - y0) != 0) {
534                                                 int iF, iS;
535                                                 xInv((y1 - y0), iF, iS);
536                                                 dx3 = xInvMulx((x1 - x0), iF, iS);
537                                                 du3 = xInvMulx((u1 - u0), iF, iS);
538                                                 dv3 = xInvMulx((v1 - v0), iF, iS);
539                                         } else {
540                                                 dx3 = du3 = dv3 = 0;
541                                         }
542                                         dx4 = ((y2 - y0) != 0) ? xLoDivx((x2 - x0), (y2 - y0)) : 0;
543 #else
544                                         if ((y1 - y0) != 0) {
545                                                 dx3 = GPU_FAST_DIV((x1 - x0) << FIXED_BITS, (y1 - y0));
546                                                 du3 = GPU_FAST_DIV((u1 - u0) << FIXED_BITS, (y1 - y0));
547                                                 dv3 = GPU_FAST_DIV((v1 - v0) << FIXED_BITS, (y1 - y0));
548                                         } else {
549                                                 dx3 = du3 = dv3 = 0;
550                                         }
551                                         dx4 = ((y2 - y0) != 0) ? GPU_FAST_DIV((x2 - x0) << FIXED_BITS, (y2 - y0)) : 0;
552 #endif
553 #endif
554                                 }
555                         } else {
556                                 //senquack - break out of final loop if nothing to be drawn (1st loop
557                                 //           must always be taken to setup dx3/dx4)
558                                 if (y1 == y2) break;
559
560                                 ya = y1;  yb = y2;
561
562                                 if (dx < 0) {
563                                         x3 = i2x(x0);
564                                         x4 = i2x(x1);
565                                         u3 = i2x(u0);
566                                         v3 = i2x(v0);
567                                         if ((y1 - y0) != 0) {
568                                                 x3 += (dx3 * (y1 - y0));
569                                                 u3 += (du3 * (y1 - y0));
570                                                 v3 += (dv3 * (y1 - y0));
571                                         }
572 #ifdef GPU_UNAI_USE_FLOATMATH
573 #ifdef GPU_UNAI_USE_FLOAT_DIV_MULTINV
574                                         dx4 = ((y2 - y1) != 0) ? (fixed)(((x2 - x1) << FIXED_BITS) * FloatInv(y2 - y1)) : 0;
575 #else
576                                         dx4 = ((y2 - y1) != 0) ? (fixed)(((x2 - x1) << FIXED_BITS) / (float)(y2 - y1)) : 0;
577 #endif
578 #else  // Integer Division:
579 #ifdef GPU_UNAI_USE_INT_DIV_MULTINV
580                                         dx4 = ((y2 - y1) != 0) ? xLoDivx((x2 - x1), (y2 - y1)) : 0;
581 #else
582                                         dx4 = ((y2 - y1) != 0) ? GPU_FAST_DIV((x2 - x1) << FIXED_BITS, (y2 - y1)) : 0;
583 #endif
584 #endif
585                                 } else {
586                                         x3 = i2x(x1);
587                                         x4 = i2x(x0) + (dx4 * (y1 - y0));
588                                         u3 = i2x(u1);
589                                         v3 = i2x(v1);
590 #ifdef GPU_UNAI_USE_FLOATMATH
591 #ifdef GPU_UNAI_USE_FLOAT_DIV_MULTINV
592                                         if ((y2 - y1) != 0) {
593                                                 float finv = FloatInv(y2 - y1);
594                                                 dx3 = (fixed)(((x2 - x1) << FIXED_BITS) * finv);
595                                                 du3 = (fixed)(((u2 - u1) << FIXED_BITS) * finv);
596                                                 dv3 = (fixed)(((v2 - v1) << FIXED_BITS) * finv);
597                                         } else {
598                                                 dx3 = du3 = dv3 = 0;
599                                         }
600 #else
601                                         if ((y2 - y1) != 0) {
602                                                 float fdiv = y2 - y1;
603                                                 dx3 = (fixed)(((x2 - x1) << FIXED_BITS) / fdiv);
604                                                 du3 = (fixed)(((u2 - u1) << FIXED_BITS) / fdiv);
605                                                 dv3 = (fixed)(((v2 - v1) << FIXED_BITS) / fdiv);
606                                         } else {
607                                                 dx3 = du3 = dv3 = 0;
608                                         }
609 #endif
610 #else  // Integer Division:
611 #ifdef GPU_UNAI_USE_INT_DIV_MULTINV
612                                         if ((y2 - y1) != 0) {
613                                                 int iF, iS;
614                                                 xInv((y2 - y1), iF, iS);
615                                                 dx3 = xInvMulx((x2 - x1), iF, iS);
616                                                 du3 = xInvMulx((u2 - u1), iF, iS);
617                                                 dv3 = xInvMulx((v2 - v1), iF, iS);
618                                         } else {
619                                                 dx3 = du3 = dv3 = 0;
620                                         }
621 #else 
622                                         if ((y2 - y1) != 0) {
623                                                 dx3 = GPU_FAST_DIV((x2 - x1) << FIXED_BITS, (y2 - y1));
624                                                 du3 = GPU_FAST_DIV((u2 - u1) << FIXED_BITS, (y2 - y1));
625                                                 dv3 = GPU_FAST_DIV((v2 - v1) << FIXED_BITS, (y2 - y1));
626                                         } else {
627                                                 dx3 = du3 = dv3 = 0;
628                                         }
629 #endif
630 #endif
631                                 }
632                         }
633
634                         s32 xmin, xmax, ymin, ymax;
635                         xmin = gpu_unai.DrawingArea[0];  xmax = gpu_unai.DrawingArea[2];
636                         ymin = gpu_unai.DrawingArea[1];  ymax = gpu_unai.DrawingArea[3];
637
638                         if ((ymin - ya) > 0) {
639                                 x3 += dx3 * (ymin - ya);
640                                 x4 += dx4 * (ymin - ya);
641                                 u3 += du3 * (ymin - ya);
642                                 v3 += dv3 * (ymin - ya);
643                                 ya = ymin;
644                         }
645
646                         if (yb > ymax) yb = ymax;
647
648                         int loop1 = yb - ya;
649                         if (loop1 <= 0)
650                                 continue;
651
652                         u16* PixelBase = &((u16*)gpu_unai.vram)[FRAME_OFFSET(0, ya)];
653                         int li=gpu_unai.ilace_mask;
654                         int pi=(ProgressiveInterlaceEnabled()?(gpu_unai.ilace_mask+1):0);
655                         int pif=(ProgressiveInterlaceEnabled()?(gpu_unai.prog_ilace_flag?(gpu_unai.ilace_mask+1):0):1);
656
657                         for (; loop1; --loop1, ++ya, PixelBase += FRAME_WIDTH,
658                                         x3 += dx3, x4 += dx4,
659                                         u3 += du3, v3 += dv3 )
660                         {
661                                 if (ya&li) continue;
662                                 if ((ya&pi)==pif) continue;
663
664                                 u32 u4, v4;
665
666                                 xa = FixedCeilToInt(x3);  xb = FixedCeilToInt(x4);
667                                 u4 = u3;  v4 = v3;
668
669                                 fixed itmp = i2x(xa) - x3;
670                                 if (itmp != 0) {
671                                         u4 += (du4 * itmp) >> FIXED_BITS;
672                                         v4 += (dv4 * itmp) >> FIXED_BITS;
673                                 }
674
675                                 u4 += fixed_HALF;
676                                 v4 += fixed_HALF;
677
678                                 if ((xmin - xa) > 0) {
679                                         u4 += du4 * (xmin - xa);
680                                         v4 += dv4 * (xmin - xa);
681                                         xa = xmin;
682                                 }
683
684                                 // Set u,v coords for inner driver
685                                 gpu_unai.u = u4;
686                                 gpu_unai.v = v4;
687
688                                 if (xb > xmax) xb = xmax;
689                                 if ((xb - xa) > 0)
690                                         gpuPolySpanDriver(gpu_unai, PixelBase + xa, (xb - xa));
691                         }
692                 }
693         } while (++cur_pass < total_passes);
694 }
695
696 /*----------------------------------------------------------------------
697 gpuDrawPolyG - Gouraud-shaded, untextured poly
698 ----------------------------------------------------------------------*/
699 void gpuDrawPolyG(const PtrUnion packet, const PP gpuPolySpanDriver, u32 is_quad)
700 {
701         PolyVertex vbuf[4];
702         polyInitVertexBuffer(vbuf, packet, POLYTYPE_G, is_quad);
703
704         int total_passes = is_quad ? 2 : 1;
705         int cur_pass = 0;
706         do
707         {
708                 const PolyVertex* vptrs[3];
709                 if (polyUseTriangle(vbuf, cur_pass, vptrs) == false)
710                         continue;
711
712                 s32 xa, xb, ya, yb;
713                 s32 x3, dx3, x4, dx4, dx;
714                 s32 r3, dr3, g3, dg3, b3, db3;
715                 s32 x0, x1, x2, y0, y1, y2;
716                 s32 r0, r1, r2, g0, g1, g2, b0, b1, b2;
717                 s32 dr4, dg4, db4;
718
719                 x0 = vptrs[0]->x;      y0 = vptrs[0]->y;
720                 r0 = vptrs[0]->col.r;  g0 = vptrs[0]->col.g;  b0 = vptrs[0]->col.b;
721                 x1 = vptrs[1]->x;      y1 = vptrs[1]->y;
722                 r1 = vptrs[1]->col.r;  g1 = vptrs[1]->col.g;  b1 = vptrs[1]->col.b;
723                 x2 = vptrs[2]->x;      y2 = vptrs[2]->y;
724                 r2 = vptrs[2]->col.r;  g2 = vptrs[2]->col.g;  b2 = vptrs[2]->col.b;
725
726                 ya = y2 - y0;
727                 yb = y2 - y1;
728                 dx4 = (x2 - x1) * ya - (x2 - x0) * yb;
729                 dr4 = (r2 - r1) * ya - (r2 - r0) * yb;
730                 dg4 = (g2 - g1) * ya - (g2 - g0) * yb;
731                 db4 = (b2 - b1) * ya - (b2 - b0) * yb;
732                 dx = dx4;
733                 if (dx4 < 0) {
734                         dx4 = -dx4;
735                         dr4 = -dr4;
736                         dg4 = -dg4;
737                         db4 = -db4;
738                 }
739
740 #ifdef GPU_UNAI_USE_FLOATMATH
741 #ifdef GPU_UNAI_USE_FLOAT_DIV_MULTINV
742                 if (dx4 != 0) {
743                         float finv = FloatInv(dx4);
744                         dr4 = (fixed)((dr4 << FIXED_BITS) * finv);
745                         dg4 = (fixed)((dg4 << FIXED_BITS) * finv);
746                         db4 = (fixed)((db4 << FIXED_BITS) * finv);
747                 } else {
748                         dr4 = dg4 = db4 = 0;
749                 }
750 #else
751                 if (dx4 != 0) {
752                         float fdiv = dx4;
753                         dr4 = (fixed)((dr4 << FIXED_BITS) / fdiv);
754                         dg4 = (fixed)((dg4 << FIXED_BITS) / fdiv);
755                         db4 = (fixed)((db4 << FIXED_BITS) / fdiv);
756                 } else {
757                         dr4 = dg4 = db4 = 0;
758                 }
759 #endif
760 #else  // Integer Division:
761 #ifdef GPU_UNAI_USE_INT_DIV_MULTINV
762                 if (dx4 != 0) {
763                         int iF, iS;
764                         xInv(dx4, iF, iS);
765                         dr4 = xInvMulx(dr4, iF, iS);
766                         dg4 = xInvMulx(dg4, iF, iS);
767                         db4 = xInvMulx(db4, iF, iS);
768                 } else {
769                         dr4 = dg4 = db4 = 0;
770                 }
771 #else
772                 if (dx4 != 0) {
773                         dr4 = GPU_FAST_DIV(dr4 << FIXED_BITS, dx4);
774                         dg4 = GPU_FAST_DIV(dg4 << FIXED_BITS, dx4);
775                         db4 = GPU_FAST_DIV(db4 << FIXED_BITS, dx4);
776                 } else {
777                         dr4 = dg4 = db4 = 0;
778                 }
779 #endif
780 #endif
781                 // Setup packed Gouraud increment for inner driver
782                 gpu_unai.gInc = gpuPackGouraudColInc(dr4, dg4, db4);
783
784                 for (s32 loop0 = 2; loop0; loop0--) {
785                         if (loop0 == 2) {
786                                 ya = y0;
787                                 yb = y1;
788                                 x3 = x4 = i2x(x0);
789                                 r3 = i2x(r0);
790                                 g3 = i2x(g0);
791                                 b3 = i2x(b0);
792                                 if (dx < 0) {
793 #ifdef GPU_UNAI_USE_FLOATMATH
794 #ifdef GPU_UNAI_USE_FLOAT_DIV_MULTINV
795                                         if ((y2 - y0) != 0) {
796                                                 float finv = FloatInv(y2 - y0);
797                                                 dx3 = (fixed)(((x2 - x0) << FIXED_BITS) * finv);
798                                                 dr3 = (fixed)(((r2 - r0) << FIXED_BITS) * finv);
799                                                 dg3 = (fixed)(((g2 - g0) << FIXED_BITS) * finv);
800                                                 db3 = (fixed)(((b2 - b0) << FIXED_BITS) * finv);
801                                         } else {
802                                                 dx3 = dr3 = dg3 = db3 = 0;
803                                         }
804                                         dx4 = ((y1 - y0) != 0) ? (fixed)(((x1 - x0) << FIXED_BITS) * FloatInv(y1 - y0)) : 0;
805 #else
806                                         if ((y2 - y0) != 0) {
807                                                 float fdiv = y2 - y0;
808                                                 dx3 = (fixed)(((x2 - x0) << FIXED_BITS) / fdiv);
809                                                 dr3 = (fixed)(((r2 - r0) << FIXED_BITS) / fdiv);
810                                                 dg3 = (fixed)(((g2 - g0) << FIXED_BITS) / fdiv);
811                                                 db3 = (fixed)(((b2 - b0) << FIXED_BITS) / fdiv);
812                                         } else {
813                                                 dx3 = dr3 = dg3 = db3 = 0;
814                                         }
815                                         dx4 = ((y1 - y0) != 0) ? (fixed)(((x1 - x0) << FIXED_BITS) / (float)(y1 - y0)) : 0;
816 #endif
817 #else  // Integer Division:
818 #ifdef GPU_UNAI_USE_INT_DIV_MULTINV
819                                         if ((y2 - y0) != 0) {
820                                                 int iF, iS;
821                                                 xInv((y2 - y0), iF, iS);
822                                                 dx3 = xInvMulx((x2 - x0), iF, iS);
823                                                 dr3 = xInvMulx((r2 - r0), iF, iS);
824                                                 dg3 = xInvMulx((g2 - g0), iF, iS);
825                                                 db3 = xInvMulx((b2 - b0), iF, iS);
826                                         } else {
827                                                 dx3 = dr3 = dg3 = db3 = 0;
828                                         }
829                                         dx4 = ((y1 - y0) != 0) ? xLoDivx((x1 - x0), (y1 - y0)) : 0;
830 #else
831                                         if ((y2 - y0) != 0) {
832                                                 dx3 = GPU_FAST_DIV((x2 - x0) << FIXED_BITS, (y2 - y0));
833                                                 dr3 = GPU_FAST_DIV((r2 - r0) << FIXED_BITS, (y2 - y0));
834                                                 dg3 = GPU_FAST_DIV((g2 - g0) << FIXED_BITS, (y2 - y0));
835                                                 db3 = GPU_FAST_DIV((b2 - b0) << FIXED_BITS, (y2 - y0));
836                                         } else {
837                                                 dx3 = dr3 = dg3 = db3 = 0;
838                                         }
839                                         dx4 = ((y1 - y0) != 0) ? GPU_FAST_DIV((x1 - x0) << FIXED_BITS, (y1 - y0)) : 0;
840 #endif
841 #endif
842                                 } else {
843 #ifdef GPU_UNAI_USE_FLOATMATH
844 #ifdef GPU_UNAI_USE_FLOAT_DIV_MULTINV
845                                         if ((y1 - y0) != 0) {
846                                                 float finv = FloatInv(y1 - y0);
847                                                 dx3 = (fixed)(((x1 - x0) << FIXED_BITS) * finv);
848                                                 dr3 = (fixed)(((r1 - r0) << FIXED_BITS) * finv);
849                                                 dg3 = (fixed)(((g1 - g0) << FIXED_BITS) * finv);
850                                                 db3 = (fixed)(((b1 - b0) << FIXED_BITS) * finv);
851                                         } else {
852                                                 dx3 = dr3 = dg3 = db3 = 0;
853                                         }
854                                         dx4 = ((y2 - y0) != 0) ? (fixed)(((x2 - x0) << FIXED_BITS) * FloatInv(y2 - y0)) : 0;
855 #else
856                                         if ((y1 - y0) != 0) {
857                                                 float fdiv = y1 - y0;
858                                                 dx3 = (fixed)(((x1 - x0) << FIXED_BITS) / fdiv);
859                                                 dr3 = (fixed)(((r1 - r0) << FIXED_BITS) / fdiv);
860                                                 dg3 = (fixed)(((g1 - g0) << FIXED_BITS) / fdiv);
861                                                 db3 = (fixed)(((b1 - b0) << FIXED_BITS) / fdiv);
862                                         } else {
863                                                 dx3 = dr3 = dg3 = db3 = 0;
864                                         }
865                                         dx4 = ((y2 - y0) != 0) ? (fixed)(((x2 - x0) << FIXED_BITS) / (float)(y2 - y0)) : 0;
866 #endif
867 #else  // Integer Division:
868 #ifdef GPU_UNAI_USE_INT_DIV_MULTINV
869                                         if ((y1 - y0) != 0) {
870                                                 int iF, iS;
871                                                 xInv((y1 - y0), iF, iS);
872                                                 dx3 = xInvMulx((x1 - x0), iF, iS);
873                                                 dr3 = xInvMulx((r1 - r0), iF, iS);
874                                                 dg3 = xInvMulx((g1 - g0), iF, iS);
875                                                 db3 = xInvMulx((b1 - b0), iF, iS);
876                                         } else {
877                                                 dx3 = dr3 = dg3 = db3 = 0;
878                                         }
879                                         dx4 = ((y2 - y0) != 0) ? xLoDivx((x2 - x0), (y2 - y0)) : 0;
880 #else
881                                         if ((y1 - y0) != 0) {
882                                                 dx3 = GPU_FAST_DIV((x1 - x0) << FIXED_BITS, (y1 - y0));
883                                                 dr3 = GPU_FAST_DIV((r1 - r0) << FIXED_BITS, (y1 - y0));
884                                                 dg3 = GPU_FAST_DIV((g1 - g0) << FIXED_BITS, (y1 - y0));
885                                                 db3 = GPU_FAST_DIV((b1 - b0) << FIXED_BITS, (y1 - y0));
886                                         } else {
887                                                 dx3 = dr3 = dg3 = db3 = 0;
888                                         }
889                                         dx4 = ((y2 - y0) != 0) ? GPU_FAST_DIV((x2 - x0) << FIXED_BITS, (y2 - y0)) : 0;
890 #endif
891 #endif
892                                 }
893                         } else {
894                                 //senquack - break out of final loop if nothing to be drawn (1st loop
895                                 //           must always be taken to setup dx3/dx4)
896                                 if (y1 == y2) break;
897
898                                 ya = y1;  yb = y2;
899
900                                 if (dx < 0) {
901                                         x3 = i2x(x0);  x4 = i2x(x1);
902                                         r3 = i2x(r0);  g3 = i2x(g0);  b3 = i2x(b0);
903
904                                         if ((y1 - y0) != 0) {
905                                                 x3 += (dx3 * (y1 - y0));
906                                                 r3 += (dr3 * (y1 - y0));
907                                                 g3 += (dg3 * (y1 - y0));
908                                                 b3 += (db3 * (y1 - y0));
909                                         }
910
911 #ifdef GPU_UNAI_USE_FLOATMATH
912 #ifdef GPU_UNAI_USE_FLOAT_DIV_MULTINV
913                                         dx4 = ((y2 - y1) != 0) ? (fixed)(((x2 - x1) << FIXED_BITS) * FloatInv(y2 - y1)) : 0;
914 #else
915                                         dx4 = ((y2 - y1) != 0) ? (fixed)(((x2 - x1) << FIXED_BITS) / (float)(y2 - y1)) : 0;
916 #endif
917 #else  // Integer Division:
918 #ifdef GPU_UNAI_USE_INT_DIV_MULTINV
919                                         dx4 = ((y2 - y1) != 0) ? xLoDivx((x2 - x1), (y2 - y1)) : 0;
920 #else
921                                         dx4 = ((y2 - y1) != 0) ? GPU_FAST_DIV((x2 - x1) << FIXED_BITS, (y2 - y1)) : 0;
922 #endif
923 #endif
924                                 } else {
925                                         x3 = i2x(x1);
926                                         x4 = i2x(x0) + (dx4 * (y1 - y0));
927
928                                         r3 = i2x(r1);  g3 = i2x(g1);  b3 = i2x(b1);
929
930 #ifdef GPU_UNAI_USE_FLOATMATH
931 #ifdef GPU_UNAI_USE_FLOAT_DIV_MULTINV
932                                         if ((y2 - y1) != 0) {
933                                                 float finv = FloatInv(y2 - y1);
934                                                 dx3 = (fixed)(((x2 - x1) << FIXED_BITS) * finv);
935                                                 dr3 = (fixed)(((r2 - r1) << FIXED_BITS) * finv);
936                                                 dg3 = (fixed)(((g2 - g1) << FIXED_BITS) * finv);
937                                                 db3 = (fixed)(((b2 - b1) << FIXED_BITS) * finv);
938                                         } else {
939                                                 dx3 = dr3 = dg3 = db3 = 0;
940                                         }
941 #else
942                                         if ((y2 - y1) != 0) {
943                                                 float fdiv = y2 - y1;
944                                                 dx3 = (fixed)(((x2 - x1) << FIXED_BITS) / fdiv);
945                                                 dr3 = (fixed)(((r2 - r1) << FIXED_BITS) / fdiv);
946                                                 dg3 = (fixed)(((g2 - g1) << FIXED_BITS) / fdiv);
947                                                 db3 = (fixed)(((b2 - b1) << FIXED_BITS) / fdiv);
948                                         } else {
949                                                 dx3 = dr3 = dg3 = db3 = 0;
950                                         }
951 #endif
952 #else  // Integer Division:
953 #ifdef GPU_UNAI_USE_INT_DIV_MULTINV
954                                         if ((y2 - y1) != 0) {
955                                                 int iF, iS;
956                                                 xInv((y2 - y1), iF, iS);
957                                                 dx3 = xInvMulx((x2 - x1), iF, iS);
958                                                 dr3 = xInvMulx((r2 - r1), iF, iS);
959                                                 dg3 = xInvMulx((g2 - g1), iF, iS);
960                                                 db3 = xInvMulx((b2 - b1), iF, iS);
961                                         } else {
962                                                 dx3 = dr3 = dg3 = db3 = 0;
963                                         }
964 #else
965                                         if ((y2 - y1) != 0) {
966                                                 dx3 = GPU_FAST_DIV((x2 - x1) << FIXED_BITS, (y2 - y1));
967                                                 dr3 = GPU_FAST_DIV((r2 - r1) << FIXED_BITS, (y2 - y1));
968                                                 dg3 = GPU_FAST_DIV((g2 - g1) << FIXED_BITS, (y2 - y1));
969                                                 db3 = GPU_FAST_DIV((b2 - b1) << FIXED_BITS, (y2 - y1));
970                                         } else {
971                                                 dx3 = dr3 = dg3 = db3 = 0;
972                                         }
973 #endif
974 #endif
975                                 }
976                         }
977
978                         s32 xmin, xmax, ymin, ymax;
979                         xmin = gpu_unai.DrawingArea[0];  xmax = gpu_unai.DrawingArea[2];
980                         ymin = gpu_unai.DrawingArea[1];  ymax = gpu_unai.DrawingArea[3];
981
982                         if ((ymin - ya) > 0) {
983                                 x3 += (dx3 * (ymin - ya));
984                                 x4 += (dx4 * (ymin - ya));
985                                 r3 += (dr3 * (ymin - ya));
986                                 g3 += (dg3 * (ymin - ya));
987                                 b3 += (db3 * (ymin - ya));
988                                 ya = ymin;
989                         }
990
991                         if (yb > ymax) yb = ymax;
992
993                         int loop1 = yb - ya;
994                         if (loop1 <= 0)
995                                 continue;
996
997                         u16* PixelBase = &((u16*)gpu_unai.vram)[FRAME_OFFSET(0, ya)];
998                         int li=gpu_unai.ilace_mask;
999                         int pi=(ProgressiveInterlaceEnabled()?(gpu_unai.ilace_mask+1):0);
1000                         int pif=(ProgressiveInterlaceEnabled()?(gpu_unai.prog_ilace_flag?(gpu_unai.ilace_mask+1):0):1);
1001
1002                         for (; loop1; --loop1, ++ya, PixelBase += FRAME_WIDTH,
1003                                         x3 += dx3, x4 += dx4,
1004                                         r3 += dr3, g3 += dg3, b3 += db3 )
1005                         {
1006                                 if (ya&li) continue;
1007                                 if ((ya&pi)==pif) continue;
1008
1009                                 u32 r4, g4, b4;
1010
1011                                 xa = FixedCeilToInt(x3);
1012                                 xb = FixedCeilToInt(x4);
1013                                 r4 = r3;  g4 = g3;  b4 = b3;
1014
1015                                 fixed itmp = i2x(xa) - x3;
1016                                 if (itmp != 0) {
1017                                         r4 += (dr4 * itmp) >> FIXED_BITS;
1018                                         g4 += (dg4 * itmp) >> FIXED_BITS;
1019                                         b4 += (db4 * itmp) >> FIXED_BITS;
1020                                 }
1021
1022                                 r4 += fixed_HALF;
1023                                 g4 += fixed_HALF;
1024                                 b4 += fixed_HALF;
1025
1026                                 if ((xmin - xa) > 0) {
1027                                         r4 += (dr4 * (xmin - xa));
1028                                         g4 += (dg4 * (xmin - xa));
1029                                         b4 += (db4 * (xmin - xa));
1030                                         xa = xmin;
1031                                 }
1032
1033                                 // Setup packed Gouraud color for inner driver
1034                                 gpu_unai.gCol = gpuPackGouraudCol(r4, g4, b4);
1035
1036                                 if (xb > xmax) xb = xmax;
1037                                 if ((xb - xa) > 0)
1038                                         gpuPolySpanDriver(gpu_unai, PixelBase + xa, (xb - xa));
1039                         }
1040                 }
1041         } while (++cur_pass < total_passes);
1042 }
1043
1044 /*----------------------------------------------------------------------
1045 gpuDrawPolyGT - Gouraud-shaded, textured poly
1046 ----------------------------------------------------------------------*/
1047 void gpuDrawPolyGT(const PtrUnion packet, const PP gpuPolySpanDriver, u32 is_quad)
1048 {
1049         PolyVertex vbuf[4];
1050         polyInitVertexBuffer(vbuf, packet, POLYTYPE_GT, is_quad);
1051
1052         int total_passes = is_quad ? 2 : 1;
1053         int cur_pass = 0;
1054         do
1055         {
1056                 const PolyVertex* vptrs[3];
1057                 if (polyUseTriangle(vbuf, cur_pass, vptrs) == false)
1058                         continue;
1059
1060                 s32 xa, xb, ya, yb;
1061                 s32 x3, dx3, x4, dx4, dx;
1062                 s32 u3, du3, v3, dv3;
1063                 s32 r3, dr3, g3, dg3, b3, db3;
1064                 s32 x0, x1, x2, y0, y1, y2;
1065                 s32 u0, u1, u2, v0, v1, v2;
1066                 s32 r0, r1, r2, g0, g1, g2, b0, b1, b2;
1067                 s32 du4, dv4;
1068                 s32 dr4, dg4, db4;
1069
1070                 x0 = vptrs[0]->x;      y0 = vptrs[0]->y;
1071                 u0 = vptrs[0]->tex.u;  v0 = vptrs[0]->tex.v;
1072                 r0 = vptrs[0]->col.r;  g0 = vptrs[0]->col.g;  b0 = vptrs[0]->col.b;
1073                 x1 = vptrs[1]->x;      y1 = vptrs[1]->y;
1074                 u1 = vptrs[1]->tex.u;  v1 = vptrs[1]->tex.v;
1075                 r1 = vptrs[1]->col.r;  g1 = vptrs[1]->col.g;  b1 = vptrs[1]->col.b;
1076                 x2 = vptrs[2]->x;      y2 = vptrs[2]->y;
1077                 u2 = vptrs[2]->tex.u;  v2 = vptrs[2]->tex.v;
1078                 r2 = vptrs[2]->col.r;  g2 = vptrs[2]->col.g;  b2 = vptrs[2]->col.b;
1079
1080                 ya = y2 - y0;
1081                 yb = y2 - y1;
1082                 dx4 = (x2 - x1) * ya - (x2 - x0) * yb;
1083                 du4 = (u2 - u1) * ya - (u2 - u0) * yb;
1084                 dv4 = (v2 - v1) * ya - (v2 - v0) * yb;
1085                 dr4 = (r2 - r1) * ya - (r2 - r0) * yb;
1086                 dg4 = (g2 - g1) * ya - (g2 - g0) * yb;
1087                 db4 = (b2 - b1) * ya - (b2 - b0) * yb;
1088                 dx = dx4;
1089                 if (dx4 < 0) {
1090                         dx4 = -dx4;
1091                         du4 = -du4;
1092                         dv4 = -dv4;
1093                         dr4 = -dr4;
1094                         dg4 = -dg4;
1095                         db4 = -db4;
1096                 }
1097
1098 #ifdef GPU_UNAI_USE_FLOATMATH
1099 #ifdef GPU_UNAI_USE_FLOAT_DIV_MULTINV
1100                 if (dx4 != 0) {
1101                         float finv = FloatInv(dx4);
1102                         du4 = (fixed)((du4 << FIXED_BITS) * finv);
1103                         dv4 = (fixed)((dv4 << FIXED_BITS) * finv);
1104                         dr4 = (fixed)((dr4 << FIXED_BITS) * finv);
1105                         dg4 = (fixed)((dg4 << FIXED_BITS) * finv);
1106                         db4 = (fixed)((db4 << FIXED_BITS) * finv);
1107                 } else {
1108                         du4 = dv4 = dr4 = dg4 = db4 = 0;
1109                 }
1110 #else
1111                 if (dx4 != 0) {
1112                         float fdiv = dx4;
1113                         du4 = (fixed)((du4 << FIXED_BITS) / fdiv);
1114                         dv4 = (fixed)((dv4 << FIXED_BITS) / fdiv);
1115                         dr4 = (fixed)((dr4 << FIXED_BITS) / fdiv);
1116                         dg4 = (fixed)((dg4 << FIXED_BITS) / fdiv);
1117                         db4 = (fixed)((db4 << FIXED_BITS) / fdiv);
1118                 } else {
1119                         du4 = dv4 = dr4 = dg4 = db4 = 0;
1120                 }
1121 #endif
1122 #else  // Integer Division:
1123 #ifdef GPU_UNAI_USE_INT_DIV_MULTINV
1124                 if (dx4 != 0) {
1125                         int iF, iS;
1126                         xInv(dx4, iF, iS);
1127                         du4 = xInvMulx(du4, iF, iS);
1128                         dv4 = xInvMulx(dv4, iF, iS);
1129                         dr4 = xInvMulx(dr4, iF, iS);
1130                         dg4 = xInvMulx(dg4, iF, iS);
1131                         db4 = xInvMulx(db4, iF, iS);
1132                 } else {
1133                         du4 = dv4 = dr4 = dg4 = db4 = 0;
1134                 }
1135 #else
1136                 if (dx4 != 0) {
1137                         du4 = GPU_FAST_DIV(du4 << FIXED_BITS, dx4);
1138                         dv4 = GPU_FAST_DIV(dv4 << FIXED_BITS, dx4);
1139                         dr4 = GPU_FAST_DIV(dr4 << FIXED_BITS, dx4);
1140                         dg4 = GPU_FAST_DIV(dg4 << FIXED_BITS, dx4);
1141                         db4 = GPU_FAST_DIV(db4 << FIXED_BITS, dx4);
1142                 } else {
1143                         du4 = dv4 = dr4 = dg4 = db4 = 0;
1144                 }
1145 #endif
1146 #endif
1147                 // Set u,v increments and packed Gouraud increment for inner driver
1148                 gpu_unai.u_inc = du4;
1149                 gpu_unai.v_inc = dv4;
1150                 gpu_unai.gInc = gpuPackGouraudColInc(dr4, dg4, db4);
1151
1152                 for (s32 loop0 = 2; loop0; loop0--) {
1153                         if (loop0 == 2) {
1154                                 ya = y0;  yb = y1;
1155                                 x3 = x4 = i2x(x0);
1156                                 u3 = i2x(u0);  v3 = i2x(v0);
1157                                 r3 = i2x(r0);  g3 = i2x(g0);  b3 = i2x(b0);
1158                                 if (dx < 0) {
1159 #ifdef GPU_UNAI_USE_FLOATMATH
1160 #ifdef GPU_UNAI_USE_FLOAT_DIV_MULTINV
1161                                         if ((y2 - y0) != 0) {
1162                                                 float finv = FloatInv(y2 - y0);
1163                                                 dx3 = (fixed)(((x2 - x0) << FIXED_BITS) * finv);
1164                                                 du3 = (fixed)(((u2 - u0) << FIXED_BITS) * finv);
1165                                                 dv3 = (fixed)(((v2 - v0) << FIXED_BITS) * finv);
1166                                                 dr3 = (fixed)(((r2 - r0) << FIXED_BITS) * finv);
1167                                                 dg3 = (fixed)(((g2 - g0) << FIXED_BITS) * finv);
1168                                                 db3 = (fixed)(((b2 - b0) << FIXED_BITS) * finv);
1169                                         } else {
1170                                                 dx3 = du3 = dv3 = dr3 = dg3 = db3 = 0;
1171                                         }
1172                                         dx4 = ((y1 - y0) != 0) ? (fixed)(((x1 - x0) << FIXED_BITS) * FloatInv(y1 - y0)) : 0;
1173 #else
1174                                         if ((y2 - y0) != 0) {
1175                                                 float fdiv = y2 - y0;
1176                                                 dx3 = (fixed)(((x2 - x0) << FIXED_BITS) / fdiv);
1177                                                 du3 = (fixed)(((u2 - u0) << FIXED_BITS) / fdiv);
1178                                                 dv3 = (fixed)(((v2 - v0) << FIXED_BITS) / fdiv);
1179                                                 dr3 = (fixed)(((r2 - r0) << FIXED_BITS) / fdiv);
1180                                                 dg3 = (fixed)(((g2 - g0) << FIXED_BITS) / fdiv);
1181                                                 db3 = (fixed)(((b2 - b0) << FIXED_BITS) / fdiv);
1182                                         } else {
1183                                                 dx3 = du3 = dv3 = dr3 = dg3 = db3 = 0;
1184                                         }
1185                                         dx4 = ((y1 - y0) != 0) ? (fixed)(((x1 - x0) << FIXED_BITS) / (float)(y1 - y0)) : 0;
1186 #endif
1187 #else  // Integer Division:
1188 #ifdef GPU_UNAI_USE_INT_DIV_MULTINV
1189                                         if ((y2 - y0) != 0) {
1190                                                 int iF, iS;
1191                                                 xInv((y2 - y0), iF, iS);
1192                                                 dx3 = xInvMulx((x2 - x0), iF, iS);
1193                                                 du3 = xInvMulx((u2 - u0), iF, iS);
1194                                                 dv3 = xInvMulx((v2 - v0), iF, iS);
1195                                                 dr3 = xInvMulx((r2 - r0), iF, iS);
1196                                                 dg3 = xInvMulx((g2 - g0), iF, iS);
1197                                                 db3 = xInvMulx((b2 - b0), iF, iS);
1198                                         } else {
1199                                                 dx3 = du3 = dv3 = dr3 = dg3 = db3 = 0;
1200                                         }
1201                                         dx4 = ((y1 - y0) != 0) ? xLoDivx((x1 - x0), (y1 - y0)) : 0;
1202 #else
1203                                         if ((y2 - y0) != 0) {
1204                                                 dx3 = GPU_FAST_DIV((x2 - x0) << FIXED_BITS, (y2 - y0));
1205                                                 du3 = GPU_FAST_DIV((u2 - u0) << FIXED_BITS, (y2 - y0));
1206                                                 dv3 = GPU_FAST_DIV((v2 - v0) << FIXED_BITS, (y2 - y0));
1207                                                 dr3 = GPU_FAST_DIV((r2 - r0) << FIXED_BITS, (y2 - y0));
1208                                                 dg3 = GPU_FAST_DIV((g2 - g0) << FIXED_BITS, (y2 - y0));
1209                                                 db3 = GPU_FAST_DIV((b2 - b0) << FIXED_BITS, (y2 - y0));
1210                                         } else {
1211                                                 dx3 = du3 = dv3 = dr3 = dg3 = db3 = 0;
1212                                         }
1213                                         dx4 = ((y1 - y0) != 0) ? GPU_FAST_DIV((x1 - x0) << FIXED_BITS, (y1 - y0)) : 0;
1214 #endif
1215 #endif
1216                                 } else {
1217 #ifdef GPU_UNAI_USE_FLOATMATH
1218 #ifdef GPU_UNAI_USE_FLOAT_DIV_MULTINV
1219                                         if ((y1 - y0) != 0) {
1220                                                 float finv = FloatInv(y1 - y0);
1221                                                 dx3 = (fixed)(((x1 - x0) << FIXED_BITS) * finv);
1222                                                 du3 = (fixed)(((u1 - u0) << FIXED_BITS) * finv);
1223                                                 dv3 = (fixed)(((v1 - v0) << FIXED_BITS) * finv);
1224                                                 dr3 = (fixed)(((r1 - r0) << FIXED_BITS) * finv);
1225                                                 dg3 = (fixed)(((g1 - g0) << FIXED_BITS) * finv);
1226                                                 db3 = (fixed)(((b1 - b0) << FIXED_BITS) * finv);
1227                                         } else {
1228                                                 dx3 = du3 = dv3 = dr3 = dg3 = db3 = 0;
1229                                         }
1230                                         dx4 = ((y2 - y0) != 0) ? (fixed)(((x2 - x0) << FIXED_BITS) * FloatInv(y2 - y0)) : 0;
1231 #else
1232                                         if ((y1 - y0) != 0) {
1233                                                 float fdiv = y1 - y0;
1234                                                 dx3 = (fixed)(((x1 - x0) << FIXED_BITS) / fdiv);
1235                                                 du3 = (fixed)(((u1 - u0) << FIXED_BITS) / fdiv);
1236                                                 dv3 = (fixed)(((v1 - v0) << FIXED_BITS) / fdiv);
1237                                                 dr3 = (fixed)(((r1 - r0) << FIXED_BITS) / fdiv);
1238                                                 dg3 = (fixed)(((g1 - g0) << FIXED_BITS) / fdiv);
1239                                                 db3 = (fixed)(((b1 - b0) << FIXED_BITS) / fdiv);
1240                                         } else {
1241                                                 dx3 = du3 = dv3 = dr3 = dg3 = db3 = 0;
1242                                         }
1243                                         dx4 = ((y2 - y0) != 0) ? (fixed)(((x2 - x0) << FIXED_BITS) / float(y2 - y0)) : 0;
1244 #endif
1245 #else  // Integer Division:
1246 #ifdef GPU_UNAI_USE_INT_DIV_MULTINV
1247                                         if ((y1 - y0) != 0) {
1248                                                 int iF, iS;
1249                                                 xInv((y1 - y0), iF, iS);
1250                                                 dx3 = xInvMulx((x1 - x0), iF, iS);
1251                                                 du3 = xInvMulx((u1 - u0), iF, iS);
1252                                                 dv3 = xInvMulx((v1 - v0), iF, iS);
1253                                                 dr3 = xInvMulx((r1 - r0), iF, iS);
1254                                                 dg3 = xInvMulx((g1 - g0), iF, iS);
1255                                                 db3 = xInvMulx((b1 - b0), iF, iS);
1256                                         } else {
1257                                                 dx3 = du3 = dv3 = dr3 = dg3 = db3 = 0;
1258                                         }
1259                                         dx4 = ((y2 - y0) != 0) ? xLoDivx((x2 - x0), (y2 - y0)) : 0;
1260 #else
1261                                         if ((y1 - y0) != 0) {
1262                                                 dx3 = GPU_FAST_DIV((x1 - x0) << FIXED_BITS, (y1 - y0));
1263                                                 du3 = GPU_FAST_DIV((u1 - u0) << FIXED_BITS, (y1 - y0));
1264                                                 dv3 = GPU_FAST_DIV((v1 - v0) << FIXED_BITS, (y1 - y0));
1265                                                 dr3 = GPU_FAST_DIV((r1 - r0) << FIXED_BITS, (y1 - y0));
1266                                                 dg3 = GPU_FAST_DIV((g1 - g0) << FIXED_BITS, (y1 - y0));
1267                                                 db3 = GPU_FAST_DIV((b1 - b0) << FIXED_BITS, (y1 - y0));
1268                                         } else {
1269                                                 dx3 = du3 = dv3 = dr3 = dg3 = db3 = 0;
1270                                         }
1271                                         dx4 = ((y2 - y0) != 0) ? GPU_FAST_DIV((x2 - x0) << FIXED_BITS, (y2 - y0)) : 0;
1272 #endif
1273 #endif
1274                                 }
1275                         } else {
1276                                 //senquack - break out of final loop if nothing to be drawn (1st loop
1277                                 //           must always be taken to setup dx3/dx4)
1278                                 if (y1 == y2) break;
1279
1280                                 ya = y1;  yb = y2;
1281
1282                                 if (dx < 0) {
1283                                         x3 = i2x(x0);  x4 = i2x(x1);
1284                                         u3 = i2x(u0);  v3 = i2x(v0);
1285                                         r3 = i2x(r0);  g3 = i2x(g0);  b3 = i2x(b0);
1286
1287                                         if ((y1 - y0) != 0) {
1288                                                 x3 += (dx3 * (y1 - y0));
1289                                                 u3 += (du3 * (y1 - y0));
1290                                                 v3 += (dv3 * (y1 - y0));
1291                                                 r3 += (dr3 * (y1 - y0));
1292                                                 g3 += (dg3 * (y1 - y0));
1293                                                 b3 += (db3 * (y1 - y0));
1294                                         }
1295
1296 #ifdef GPU_UNAI_USE_FLOATMATH
1297 #ifdef GPU_UNAI_USE_FLOAT_DIV_MULTINV
1298                                         dx4 = ((y2 - y1) != 0) ? (fixed)(((x2 - x1) << FIXED_BITS) * FloatInv(y2 - y1)) : 0;
1299 #else
1300                                         dx4 = ((y2 - y1) != 0) ? (fixed)(((x2 - x1) << FIXED_BITS) / (float)(y2 - y1)) : 0;
1301 #endif
1302 #else  // Integer Division:
1303 #ifdef GPU_UNAI_USE_INT_DIV_MULTINV
1304                                         dx4 = ((y2 - y1) != 0) ? xLoDivx((x2 - x1), (y2 - y1)) : 0;
1305 #else
1306                                         dx4 = ((y2 - y1) != 0) ? GPU_FAST_DIV((x2 - x1) << FIXED_BITS, (y2 - y1)) : 0;
1307 #endif
1308 #endif
1309                                 } else {
1310                                         x3 = i2x(x1);
1311                                         x4 = i2x(x0) + (dx4 * (y1 - y0));
1312
1313                                         u3 = i2x(u1);  v3 = i2x(v1);
1314                                         r3 = i2x(r1);  g3 = i2x(g1);  b3 = i2x(b1);
1315 #ifdef GPU_UNAI_USE_FLOATMATH
1316 #ifdef GPU_UNAI_USE_FLOAT_DIV_MULTINV
1317                                         if ((y2 - y1) != 0) {
1318                                                 float finv = FloatInv(y2 - y1);
1319                                                 dx3 = (fixed)(((x2 - x1) << FIXED_BITS) * finv);
1320                                                 du3 = (fixed)(((u2 - u1) << FIXED_BITS) * finv);
1321                                                 dv3 = (fixed)(((v2 - v1) << FIXED_BITS) * finv);
1322                                                 dr3 = (fixed)(((r2 - r1) << FIXED_BITS) * finv);
1323                                                 dg3 = (fixed)(((g2 - g1) << FIXED_BITS) * finv);
1324                                                 db3 = (fixed)(((b2 - b1) << FIXED_BITS) * finv);
1325                                         } else {
1326                                                 dx3 = du3 = dv3 = dr3 = dg3 = db3 = 0;
1327                                         }
1328 #else
1329                                         if ((y2 - y1) != 0) {
1330                                                 float fdiv = y2 - y1;
1331                                                 dx3 = (fixed)(((x2 - x1) << FIXED_BITS) / fdiv);
1332                                                 du3 = (fixed)(((u2 - u1) << FIXED_BITS) / fdiv);
1333                                                 dv3 = (fixed)(((v2 - v1) << FIXED_BITS) / fdiv);
1334                                                 dr3 = (fixed)(((r2 - r1) << FIXED_BITS) / fdiv);
1335                                                 dg3 = (fixed)(((g2 - g1) << FIXED_BITS) / fdiv);
1336                                                 db3 = (fixed)(((b2 - b1) << FIXED_BITS) / fdiv);
1337                                         } else {
1338                                                 dx3 = du3 = dv3 = dr3 = dg3 = db3 = 0;
1339                                         }
1340 #endif
1341 #else  // Integer Division:
1342 #ifdef GPU_UNAI_USE_INT_DIV_MULTINV
1343                                         if ((y2 - y1) != 0) {
1344                                                 int iF, iS;
1345                                                 xInv((y2 - y1), iF, iS);
1346                                                 dx3 = xInvMulx((x2 - x1), iF, iS);
1347                                                 du3 = xInvMulx((u2 - u1), iF, iS);
1348                                                 dv3 = xInvMulx((v2 - v1), iF, iS);
1349                                                 dr3 = xInvMulx((r2 - r1), iF, iS);
1350                                                 dg3 = xInvMulx((g2 - g1), iF, iS);
1351                                                 db3 = xInvMulx((b2 - b1), iF, iS);
1352                                         } else {
1353                                                 dx3 = du3 = dv3 = dr3 = dg3 = db3 = 0;
1354                                         }
1355 #else
1356                                         if ((y2 - y1) != 0) {
1357                                                 dx3 = GPU_FAST_DIV((x2 - x1) << FIXED_BITS, (y2 - y1));
1358                                                 du3 = GPU_FAST_DIV((u2 - u1) << FIXED_BITS, (y2 - y1));
1359                                                 dv3 = GPU_FAST_DIV((v2 - v1) << FIXED_BITS, (y2 - y1));
1360                                                 dr3 = GPU_FAST_DIV((r2 - r1) << FIXED_BITS, (y2 - y1));
1361                                                 dg3 = GPU_FAST_DIV((g2 - g1) << FIXED_BITS, (y2 - y1));
1362                                                 db3 = GPU_FAST_DIV((b2 - b1) << FIXED_BITS, (y2 - y1));
1363                                         } else {
1364                                                 dx3 = du3 = dv3 = dr3 = dg3 = db3 = 0;
1365                                         }
1366 #endif
1367 #endif
1368                                 }
1369                         }
1370
1371                         s32 xmin, xmax, ymin, ymax;
1372                         xmin = gpu_unai.DrawingArea[0];  xmax = gpu_unai.DrawingArea[2];
1373                         ymin = gpu_unai.DrawingArea[1];  ymax = gpu_unai.DrawingArea[3];
1374
1375                         if ((ymin - ya) > 0) {
1376                                 x3 += (dx3 * (ymin - ya));
1377                                 x4 += (dx4 * (ymin - ya));
1378                                 u3 += (du3 * (ymin - ya));
1379                                 v3 += (dv3 * (ymin - ya));
1380                                 r3 += (dr3 * (ymin - ya));
1381                                 g3 += (dg3 * (ymin - ya));
1382                                 b3 += (db3 * (ymin - ya));
1383                                 ya = ymin;
1384                         }
1385
1386                         if (yb > ymax) yb = ymax;
1387
1388                         int loop1 = yb - ya;
1389                         if (loop1 <= 0)
1390                                 continue;
1391
1392                         u16* PixelBase = &((u16*)gpu_unai.vram)[FRAME_OFFSET(0, ya)];
1393                         int li=gpu_unai.ilace_mask;
1394                         int pi=(ProgressiveInterlaceEnabled()?(gpu_unai.ilace_mask+1):0);
1395                         int pif=(ProgressiveInterlaceEnabled()?(gpu_unai.prog_ilace_flag?(gpu_unai.ilace_mask+1):0):1);
1396
1397                         for (; loop1; --loop1, ++ya, PixelBase += FRAME_WIDTH,
1398                                         x3 += dx3, x4 += dx4,
1399                                         u3 += du3, v3 += dv3,
1400                                         r3 += dr3, g3 += dg3, b3 += db3 )
1401                         {
1402                                 if (ya&li) continue;
1403                                 if ((ya&pi)==pif) continue;
1404
1405                                 u32 u4, v4;
1406                                 u32 r4, g4, b4;
1407
1408                                 xa = FixedCeilToInt(x3);
1409                                 xb = FixedCeilToInt(x4);
1410                                 u4 = u3;  v4 = v3;
1411                                 r4 = r3;  g4 = g3;  b4 = b3;
1412
1413                                 fixed itmp = i2x(xa) - x3;
1414                                 if (itmp != 0) {
1415                                         u4 += (du4 * itmp) >> FIXED_BITS;
1416                                         v4 += (dv4 * itmp) >> FIXED_BITS;
1417                                         r4 += (dr4 * itmp) >> FIXED_BITS;
1418                                         g4 += (dg4 * itmp) >> FIXED_BITS;
1419                                         b4 += (db4 * itmp) >> FIXED_BITS;
1420                                 }
1421
1422                                 u4 += fixed_HALF;
1423                                 v4 += fixed_HALF;
1424                                 r4 += fixed_HALF;
1425                                 g4 += fixed_HALF;
1426                                 b4 += fixed_HALF;
1427
1428                                 if ((xmin - xa) > 0) {
1429                                         u4 += du4 * (xmin - xa);
1430                                         v4 += dv4 * (xmin - xa);
1431                                         r4 += dr4 * (xmin - xa);
1432                                         g4 += dg4 * (xmin - xa);
1433                                         b4 += db4 * (xmin - xa);
1434                                         xa = xmin;
1435                                 }
1436
1437                                 // Set packed Gouraud color and u,v coords for inner driver
1438                                 gpu_unai.u = u4;
1439                                 gpu_unai.v = v4;
1440                                 gpu_unai.gCol = gpuPackGouraudCol(r4, g4, b4);
1441
1442                                 if (xb > xmax) xb = xmax;
1443                                 if ((xb - xa) > 0)
1444                                         gpuPolySpanDriver(gpu_unai, PixelBase + xa, (xb - xa));
1445                         }
1446                 }
1447         } while (++cur_pass < total_passes);
1448 }