988e721f946b094ad66a07f1daf985255337b849
[pcsx_rearmed.git] / plugins / gpu_unai / gpu_raster_polygon.h
1 /***************************************************************************
2 *   Copyright (C) 2010 PCSX4ALL Team                                      *
3 *   Copyright (C) 2010 Unai                                               *
4 *                                                                         *
5 *   This program is free software; you can redistribute it and/or modify  *
6 *   it under the terms of the GNU General Public License as published by  *
7 *   the Free Software Foundation; either version 2 of the License, or     *
8 *   (at your option) any later version.                                   *
9 *                                                                         *
10 *   This program is distributed in the hope that it will be useful,       *
11 *   but WITHOUT ANY WARRANTY; without even the implied warranty of        *
12 *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the         *
13 *   GNU General Public License for more details.                          *
14 *                                                                         *
15 *   You should have received a copy of the GNU General Public License     *
16 *   along with this program; if not, write to the                         *
17 *   Free Software Foundation, Inc.,                                       *
18 *   51 Franklin Street, Fifth Floor, Boston, MA 02111-1307 USA.           *
19 ***************************************************************************/
20
21 #ifndef __GPU_UNAI_GPU_RASTER_POLYGON_H__
22 #define __GPU_UNAI_GPU_RASTER_POLYGON_H__
23
24 //senquack - NOTE: GPU Unai poly routines have been rewritten/adapted
25 // from DrHell routines to fix multiple issues. See README_senquack.txt
26
27 ///////////////////////////////////////////////////////////////////////////////
28 // Shared poly vertex buffer, able to handle 3 or 4-pt polys of any type.
29 ///////////////////////////////////////////////////////////////////////////////
30
31 struct PolyVertex {
32         s32 x, y; // Sign-extended 11-bit X,Y coords
33         union {
34 #if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
35                 struct { u8 pad[2], v, u; } tex; // Texture coords (if used)
36 #else
37                 struct { u8 u, v, pad[2]; } tex; // Texture coords (if used)
38 #endif
39                 u32 tex_word;
40         };
41         union {
42 #if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
43                 struct { u8 pad, b, g, r; } col; // 24-bit RGB color (if used)
44 #else
45                 struct { u8 r, g, b, pad; } col; // 24-bit RGB color (if used)
46 #endif
47                 u32 col_word;
48         };
49 };
50
51 enum PolyAttribute {
52         POLYATTR_TEXTURE = (1 << 0),
53         POLYATTR_GOURAUD = (1 << 1)
54 };
55
56 enum PolyType {
57         POLYTYPE_F  = 0,
58         POLYTYPE_FT = (POLYATTR_TEXTURE),
59         POLYTYPE_G  = (POLYATTR_GOURAUD),
60         POLYTYPE_GT = (POLYATTR_TEXTURE | POLYATTR_GOURAUD)
61 };
62
63 ///////////////////////////////////////////////////////////////////////////////
64 // polyInitVertexBuffer()
65 // Fills vbuf[] array with data from any type of poly draw-command packet.
66 ///////////////////////////////////////////////////////////////////////////////
67 static void polyInitVertexBuffer(PolyVertex *vbuf, const PtrUnion packet, PolyType ptype, u32 is_quad)
68 {
69         bool texturing = ptype & POLYATTR_TEXTURE;
70         bool gouraud   = ptype & POLYATTR_GOURAUD;
71
72         int vert_stride = 1; // Stride of vertices in cmd packet, in 32-bit words
73         if (texturing)
74                 vert_stride++;
75         if (gouraud)
76                 vert_stride++;
77
78         int num_verts = (is_quad) ? 4 : 3;
79         le32_t *ptr;
80
81         // X,Y coords
82         ptr = &packet.U4[1];
83         for (int i=0;  i < num_verts; ++i, ptr += vert_stride) {
84                 u32 coords = le32_to_u32(*ptr);
85                 vbuf[i].x = GPU_EXPANDSIGN(coords);
86                 vbuf[i].y = GPU_EXPANDSIGN(coords >> 16);
87         }
88
89         // U,V texture coords (if applicable)
90         if (texturing) {
91                 ptr = &packet.U4[2];
92                 for (int i=0;  i < num_verts; ++i, ptr += vert_stride)
93                         vbuf[i].tex_word = le32_to_u32(*ptr);
94         }
95
96         // Colors (if applicable)
97         if (gouraud) {
98                 ptr = &packet.U4[0];
99                 for (int i=0;  i < num_verts; ++i, ptr += vert_stride)
100                         vbuf[i].col_word = le32_to_u32(*ptr);
101         }
102 }
103
104 ///////////////////////////////////////////////////////////////////////////////
105 //  Helper functions to determine which vertex in a 2 or 3 vertex array
106 //   has the highest/lowest X/Y coordinate.
107 //   Note: the comparison logic is such that, given a set of vertices with
108 //    identical values for a given coordinate, a different index will be
109 //    returned from vertIdxOfLeast..() than a call to vertIdxOfHighest..().
110 //    This ensures that, during the vertex-ordering phase of rasterization,
111 //    all three vertices remain unique.
112 ///////////////////////////////////////////////////////////////////////////////
113
114 template<typename T>
115 static inline int vertIdxOfLeastXCoord2(const T *Tptr)
116 {
117         return (Tptr[0].x <= Tptr[1].x) ? 0 : 1;
118 }
119
120 template<typename T>
121 static inline int vertIdxOfLeastXCoord3(const T *Tptr)
122 {
123         int least_of_v0_v1 = vertIdxOfLeastXCoord2(Tptr);
124         return (Tptr[least_of_v0_v1].x <= Tptr[2].x) ? least_of_v0_v1 : 2;
125 }
126
127 template<typename T>
128 static inline int vertIdxOfLeastYCoord2(const T *Tptr)
129 {
130         return (Tptr[0].y <= Tptr[1].y) ? 0 : 1;
131 }
132
133 template<typename T>
134 static inline int vertIdxOfLeastYCoord3(const T *Tptr)
135 {
136         int least_of_v0_v1 = vertIdxOfLeastYCoord2(Tptr);
137         return (Tptr[least_of_v0_v1].y <= Tptr[2].y) ? least_of_v0_v1 : 2;
138 }
139
140 template<typename T>
141 static inline int vertIdxOfHighestXCoord2(const T *Tptr)
142 {
143         return (Tptr[1].x >= Tptr[0].x) ? 1 : 0;
144 }
145
146 template<typename T>
147 static inline int vertIdxOfHighestXCoord3(const T *Tptr)
148 {
149         int highest_of_v0_v1 = vertIdxOfHighestXCoord2(Tptr);
150         return (Tptr[2].x >= Tptr[highest_of_v0_v1].x) ? 2 : highest_of_v0_v1;
151 }
152
153 template<typename T>
154 static inline int vertIdxOfHighestYCoord2(const T *Tptr)
155 {
156         return (Tptr[1].y >= Tptr[0].y) ? 1 : 0;
157 }
158
159 template<typename T>
160 static inline int vertIdxOfHighestYCoord3(const T *Tptr)
161 {
162         int highest_of_v0_v1 = vertIdxOfHighestYCoord2(Tptr);
163         return (Tptr[2].y >= Tptr[highest_of_v0_v1].y) ? 2 : highest_of_v0_v1;
164 }
165
166 ///////////////////////////////////////////////////////////////////////////////
167 // polyUseTriangle()
168 //  Determines if the specified triangle should be rendered. If so, it
169 //  fills the given array of vertex pointers, vert_ptrs, in order of
170 //  increasing Y coordinate values, as required by rasterization algorithm.
171 //  Parameter 'tri_num' is 0 for first triangle (idx 0,1,2 of vbuf[]),
172 //   or 1 for second triangle of a quad (idx 1,2,3 of vbuf[]).
173 //  Returns true if triangle should be rendered, false if not.
174 ///////////////////////////////////////////////////////////////////////////////
175 static bool polyUseTriangle(const PolyVertex *vbuf, int tri_num, const PolyVertex **vert_ptrs, s32 &x_off, s32 &y_off)
176 {
177         // Using verts 0,1,2 or is this the 2nd pass of a quad (verts 1,2,3)?
178         const PolyVertex *tri_ptr = &vbuf[(tri_num == 0) ? 0 : 1];
179
180         // Get indices of highest/lowest X,Y coords within triangle
181         int idx_lowest_x  = vertIdxOfLeastXCoord3(tri_ptr);
182         int idx_highest_x = vertIdxOfHighestXCoord3(tri_ptr);
183         int idx_lowest_y  = vertIdxOfLeastYCoord3(tri_ptr);
184         int idx_highest_y = vertIdxOfHighestYCoord3(tri_ptr);
185
186         // Maximum absolute distance between any two X coordinates is 1023,
187         //  and for Y coordinates is 511 (PS1 hardware limitation)
188         int lowest_x  = tri_ptr[idx_lowest_x].x;
189         int highest_x = tri_ptr[idx_highest_x].x;
190         int lowest_y  = tri_ptr[idx_lowest_y].y;
191         int highest_y = tri_ptr[idx_highest_y].y;
192         if ((highest_x - lowest_x) >= CHKMAX_X ||
193             (highest_y - lowest_y) >= CHKMAX_Y)
194                 return false;
195
196         // Determine offsets
197         x_off = gpu_unai.DrawingOffset[0];
198         y_off = gpu_unai.DrawingOffset[1];
199         x_off = GPU_EXPANDSIGN(lowest_x + x_off) - lowest_x;
200         y_off = GPU_EXPANDSIGN(lowest_y + y_off) - lowest_y;
201
202         // Determine if triangle is completely outside clipping range
203         s32 xmin, xmax, ymin, ymax;
204         xmin = gpu_unai.DrawingArea[0];  xmax = gpu_unai.DrawingArea[2];
205         ymin = gpu_unai.DrawingArea[1];  ymax = gpu_unai.DrawingArea[3];
206         int clipped_lowest_x  = Max2(xmin, lowest_x + x_off);
207         int clipped_lowest_y  = Max2(ymin, lowest_y + y_off);
208         int clipped_highest_x = Min2(xmax, highest_x + x_off);
209         int clipped_highest_y = Min2(ymax, highest_y + y_off);
210         if (clipped_lowest_x >= clipped_highest_x ||
211             clipped_lowest_y >= clipped_highest_y)
212                 return false;
213
214         // Order vertex ptrs by increasing y value (draw routines need this).
215         // The middle index is deduced by a binary math trick that depends
216         //  on index range always being between 0..2
217         vert_ptrs[0] = tri_ptr + idx_lowest_y;
218         vert_ptrs[1] = tri_ptr + ((idx_lowest_y + idx_highest_y) ^ 3);
219         vert_ptrs[2] = tri_ptr + idx_highest_y;
220         return true;
221 }
222
223 ///////////////////////////////////////////////////////////////////////////////
224 //  GPU internal polygon drawing functions
225 ///////////////////////////////////////////////////////////////////////////////
226
227 /*----------------------------------------------------------------------
228 gpuDrawPolyF - Flat-shaded, untextured poly
229 ----------------------------------------------------------------------*/
230 void gpuDrawPolyF(const PtrUnion packet, const PP gpuPolySpanDriver, u32 is_quad,
231         PolyType ptype = POLYTYPE_F)
232 {
233         // Set up bgr555 color to be used across calls in inner driver
234         gpu_unai.inn.PixelData = GPU_RGB16(le32_to_u32(packet.U4[0]));
235
236         PolyVertex vbuf[4];
237         polyInitVertexBuffer(vbuf, packet, ptype, is_quad);
238
239         int total_passes = is_quad ? 2 : 1;
240         int cur_pass = 0;
241         do
242         {
243                 const PolyVertex* vptrs[3];
244                 s32 x_off, y_off;
245                 if (!polyUseTriangle(vbuf, cur_pass, vptrs, x_off, y_off))
246                         continue;
247
248                 s32 xa, xb, ya, yb;
249                 s32 x3, dx3, x4, dx4, dx;
250                 s32 x0, x1, x2, y0, y1, y2;
251
252                 x0 = vptrs[0]->x + x_off;  y0 = vptrs[0]->y + y_off;
253                 x1 = vptrs[1]->x + x_off;  y1 = vptrs[1]->y + y_off;
254                 x2 = vptrs[2]->x + x_off;  y2 = vptrs[2]->y + y_off;
255
256                 ya = y2 - y0;
257                 yb = y2 - y1;
258                 dx = (x2 - x1) * ya - (x2 - x0) * yb;
259
260                 for (int loop0 = 2; loop0; loop0--) {
261                         if (loop0 == 2) {
262                                 ya = y0;  yb = y1;
263                                 x3 = x4 = i2x(x0);
264                                 if (dx < 0) {
265 #ifdef GPU_UNAI_USE_FLOATMATH
266 #ifdef GPU_UNAI_USE_FLOAT_DIV_MULTINV
267                                         dx3 = ((y2 - y0) != 0) ? (fixed)(((x2 - x0) << FIXED_BITS) * FloatInv(y2 - y0)) : 0;
268                                         dx4 = ((y1 - y0) != 0) ? (fixed)(((x1 - x0) << FIXED_BITS) * FloatInv(y1 - y0)) : 0;
269 #else
270                                         dx3 = ((y2 - y0) != 0) ? (fixed)(((x2 - x0) << FIXED_BITS) / (float)(y2 - y0)) : 0;
271                                         dx4 = ((y1 - y0) != 0) ? (fixed)(((x1 - x0) << FIXED_BITS) / (float)(y1 - y0)) : 0;
272 #endif
273 #else  // Integer Division:
274 #ifdef GPU_UNAI_USE_INT_DIV_MULTINV
275                                         dx3 = ((y2 - y0) != 0) ? xLoDivx((x2 - x0), (y2 - y0)) : 0;
276                                         dx4 = ((y1 - y0) != 0) ? xLoDivx((x1 - x0), (y1 - y0)) : 0;
277 #else
278                                         dx3 = ((y2 - y0) != 0) ? GPU_FAST_DIV((x2 - x0) << FIXED_BITS, (y2 - y0)) : 0;
279                                         dx4 = ((y1 - y0) != 0) ? GPU_FAST_DIV((x1 - x0) << FIXED_BITS, (y1 - y0)) : 0;
280 #endif
281 #endif
282                                 } else {
283 #ifdef GPU_UNAI_USE_FLOATMATH
284 #ifdef GPU_UNAI_USE_FLOAT_DIV_MULTINV
285                                         dx3 = ((y1 - y0) != 0) ? (fixed)(((x1 - x0) << FIXED_BITS) * FloatInv(y1 - y0)) : 0;
286                                         dx4 = ((y2 - y0) != 0) ? (fixed)(((x2 - x0) << FIXED_BITS) * FloatInv(y2 - y0)) : 0;
287 #else
288                                         dx3 = ((y1 - y0) != 0) ? (fixed)(((x1 - x0) << FIXED_BITS) / (float)(y1 - y0)) : 0;
289                                         dx4 = ((y2 - y0) != 0) ? (fixed)(((x2 - x0) << FIXED_BITS) / (float)(y2 - y0)) : 0;
290 #endif
291 #else  // Integer Division:
292 #ifdef GPU_UNAI_USE_INT_DIV_MULTINV
293                                         dx3 = ((y1 - y0) != 0) ? xLoDivx((x1 - x0), (y1 - y0)) : 0;
294                                         dx4 = ((y2 - y0) != 0) ? xLoDivx((x2 - x0), (y2 - y0)) : 0;
295 #else
296                                         dx3 = ((y1 - y0) != 0) ? GPU_FAST_DIV((x1 - x0) << FIXED_BITS, (y1 - y0)) : 0;
297                                         dx4 = ((y2 - y0) != 0) ? GPU_FAST_DIV((x2 - x0) << FIXED_BITS, (y2 - y0)) : 0;
298 #endif
299 #endif
300                                 }
301                         } else {
302                                 //senquack - break out of final loop if nothing to be drawn (1st loop
303                                 //           must always be taken to setup dx3/dx4)
304                                 if (y1 == y2) break;
305
306                                 ya = y1;  yb = y2;
307
308                                 if (dx < 0) {
309                                         x3 = i2x(x0) + (dx3 * (y1 - y0));
310                                         x4 = i2x(x1);
311 #ifdef GPU_UNAI_USE_FLOATMATH
312 #ifdef GPU_UNAI_USE_FLOAT_DIV_MULTINV
313                                         dx4 = ((y2 - y1) != 0) ? (fixed)(((x2 - x1) << FIXED_BITS) * FloatInv(y2 - y1)) : 0;
314 #else
315                                         dx4 = ((y2 - y1) != 0) ? (fixed)(((x2 - x1) << FIXED_BITS) / (float)(y2 - y1)) : 0;
316 #endif
317 #else  // Integer Division:
318 #ifdef GPU_UNAI_USE_INT_DIV_MULTINV
319                                         dx4 = ((y2 - y1) != 0) ? xLoDivx ((x2 - x1), (y2 - y1)) : 0;
320 #else
321                                         dx4 = ((y2 - y1) != 0) ? GPU_FAST_DIV((x2 - x1) << FIXED_BITS, (y2 - y1)) : 0;
322 #endif
323 #endif
324                                 } else {
325                                         x3 = i2x(x1);
326                                         x4 = i2x(x0) + (dx4 * (y1 - y0));
327 #ifdef GPU_UNAI_USE_FLOATMATH
328 #ifdef GPU_UNAI_USE_FLOAT_DIV_MULTINV
329                                         dx3 = ((y2 - y1) != 0) ? (fixed)(((x2 - x1) << FIXED_BITS) * FloatInv(y2 - y1)) : 0;
330 #else
331                                         dx3 = ((y2 - y1) != 0) ? (fixed)(((x2 - x1) << FIXED_BITS) / (float)(y2 - y1)) : 0;
332 #endif
333 #else  // Integer Division:
334 #ifdef GPU_UNAI_USE_INT_DIV_MULTINV
335                                         dx3 = ((y2 - y1) != 0) ? xLoDivx ((x2 - x1), (y2 - y1)) : 0;
336 #else
337                                         dx3 = ((y2 - y1) != 0) ? GPU_FAST_DIV((x2 - x1) << FIXED_BITS, (y2 - y1)) : 0;
338 #endif
339 #endif
340                                 }
341                         }
342
343                         s32 xmin, xmax, ymin, ymax;
344                         xmin = gpu_unai.DrawingArea[0];  xmax = gpu_unai.DrawingArea[2];
345                         ymin = gpu_unai.DrawingArea[1];  ymax = gpu_unai.DrawingArea[3];
346
347                         if ((ymin - ya) > 0) {
348                                 x3 += (dx3 * (ymin - ya));
349                                 x4 += (dx4 * (ymin - ya));
350                                 ya = ymin;
351                         }
352
353                         if (yb > ymax) yb = ymax;
354
355                         int loop1 = yb - ya;
356                         if (loop1 <= 0)
357                                 continue;
358
359                         le16_t* PixelBase = &gpu_unai.vram[FRAME_OFFSET(0, ya)];
360                         int li=gpu_unai.inn.ilace_mask;
361                         int pi=(ProgressiveInterlaceEnabled()?(gpu_unai.inn.ilace_mask+1):0);
362                         int pif=(ProgressiveInterlaceEnabled()?(gpu_unai.prog_ilace_flag?(gpu_unai.inn.ilace_mask+1):0):1);
363
364                         for (; loop1; --loop1, ya++, PixelBase += FRAME_WIDTH,
365                                         x3 += dx3, x4 += dx4 )
366                         {
367                                 if (ya&li) continue;
368                                 if ((ya&pi)==pif) continue;
369
370                                 xa = FixedCeilToInt(x3);  xb = FixedCeilToInt(x4);
371                                 if ((xmin - xa) > 0) xa = xmin;
372                                 if (xb > xmax) xb = xmax;
373                                 if ((xb - xa) > 0)
374                                         gpuPolySpanDriver(gpu_unai, PixelBase + xa, (xb - xa), ya);
375                         }
376                 }
377         } while (++cur_pass < total_passes);
378 }
379
380 /*----------------------------------------------------------------------
381 gpuDrawPolyFT - Flat-shaded, textured poly
382 ----------------------------------------------------------------------*/
383 void gpuDrawPolyFT(const PtrUnion packet, const PP gpuPolySpanDriver, u32 is_quad,
384         PolyType ptype = POLYTYPE_FT)
385 {
386         // r8/g8/b8 used if texture-blending & dithering is applied (24-bit light)
387         gpu_unai.inn.r8 = packet.U1[0];
388         gpu_unai.inn.g8 = packet.U1[1];
389         gpu_unai.inn.b8 = packet.U1[2];
390
391         PolyVertex vbuf[4];
392         polyInitVertexBuffer(vbuf, packet, ptype, is_quad);
393
394         int total_passes = is_quad ? 2 : 1;
395         int cur_pass = 0;
396         do
397         {
398                 const PolyVertex* vptrs[3];
399                 s32 x_off, y_off;
400                 if (!polyUseTriangle(vbuf, cur_pass, vptrs, x_off, y_off))
401                         continue;
402
403                 s32 xa, xb, ya, yb;
404                 s32 x3, dx3, x4, dx4, dx;
405                 s32 u3, du3, v3, dv3;
406                 s32 x0, x1, x2, y0, y1, y2;
407                 s32 u0, u1, u2, v0, v1, v2;
408                 s32 du4, dv4;
409
410                 x0 = vptrs[0]->x + x_off; y0 = vptrs[0]->y + y_off;
411                 u0 = vptrs[0]->tex.u;     v0 = vptrs[0]->tex.v;
412                 x1 = vptrs[1]->x + x_off; y1 = vptrs[1]->y + y_off;
413                 u1 = vptrs[1]->tex.u;     v1 = vptrs[1]->tex.v;
414                 x2 = vptrs[2]->x + x_off; y2 = vptrs[2]->y + y_off;
415                 u2 = vptrs[2]->tex.u;     v2 = vptrs[2]->tex.v;
416
417                 ya = y2 - y0;
418                 yb = y2 - y1;
419                 dx4 = (x2 - x1) * ya - (x2 - x0) * yb;
420                 du4 = (u2 - u1) * ya - (u2 - u0) * yb;
421                 dv4 = (v2 - v1) * ya - (v2 - v0) * yb;
422                 dx = dx4;
423                 if (dx4 < 0) {
424                         dx4 = -dx4;
425                         du4 = -du4;
426                         dv4 = -dv4;
427                 }
428
429 #ifdef GPU_UNAI_USE_FLOATMATH
430 #ifdef GPU_UNAI_USE_FLOAT_DIV_MULTINV
431                 if (dx4 != 0) {
432                         float finv = FloatInv(dx4);
433                         du4 = (fixed)((du4 << FIXED_BITS) * finv);
434                         dv4 = (fixed)((dv4 << FIXED_BITS) * finv);
435                 } else {
436                         du4 = dv4 = 0;
437                 }
438 #else
439                 if (dx4 != 0) {
440                         float fdiv = dx4;
441                         du4 = (fixed)((du4 << FIXED_BITS) / fdiv);
442                         dv4 = (fixed)((dv4 << FIXED_BITS) / fdiv);
443                 } else {
444                         du4 = dv4 = 0;
445                 }
446 #endif
447 #else  // Integer Division:
448 #ifdef GPU_UNAI_USE_INT_DIV_MULTINV
449                 if (dx4 != 0) {
450                         int iF, iS;
451                         xInv(dx4, iF, iS);
452                         du4 = xInvMulx(du4, iF, iS);
453                         dv4 = xInvMulx(dv4, iF, iS);
454                 } else {
455                         du4 = dv4 = 0;
456                 }
457 #else
458                 if (dx4 != 0) {
459                         du4 = GPU_FAST_DIV(du4 << FIXED_BITS, dx4);
460                         dv4 = GPU_FAST_DIV(dv4 << FIXED_BITS, dx4);
461                 } else {
462                         du4 = dv4 = 0;
463                 }
464 #endif
465 #endif
466                 // Set u,v increments for inner driver
467                 gpu_unai.inn.u_inc = du4;
468                 gpu_unai.inn.v_inc = dv4;
469
470                 //senquack - TODO: why is it always going through 2 iterations when sometimes one would suffice here?
471                 //                       (SAME ISSUE ELSEWHERE)
472                 for (s32 loop0 = 2; loop0; loop0--) {
473                         if (loop0 == 2) {
474                                 ya = y0;  yb = y1;
475                                 x3 = x4 = i2x(x0);
476                                 u3 = i2x(u0);  v3 = i2x(v0);
477                                 if (dx < 0) {
478 #ifdef GPU_UNAI_USE_FLOATMATH
479 #ifdef GPU_UNAI_USE_FLOAT_DIV_MULTINV
480                                         if ((y2 - y0) != 0) {
481                                                 float finv = FloatInv(y2 - y0);
482                                                 dx3 = (fixed)(((x2 - x0) << FIXED_BITS) * finv);
483                                                 du3 = (fixed)(((u2 - u0) << FIXED_BITS) * finv);
484                                                 dv3 = (fixed)(((v2 - v0) << FIXED_BITS) * finv);
485                                         } else {
486                                                 dx3 = du3 = dv3 = 0;
487                                         }
488                                         dx4 = ((y1 - y0) != 0) ? (fixed)(((x1 - x0) << FIXED_BITS) * FloatInv(y1 - y0)) : 0;
489 #else
490                                         if ((y2 - y0) != 0) {
491                                                 float fdiv = y2 - y0;
492                                                 dx3 = (fixed)(((x2 - x0) << FIXED_BITS) / fdiv);
493                                                 du3 = (fixed)(((u2 - u0) << FIXED_BITS) / fdiv);
494                                                 dv3 = (fixed)(((v2 - v0) << FIXED_BITS) / fdiv);
495                                         } else {
496                                                 dx3 = du3 = dv3 = 0;
497                                         }
498                                         dx4 = ((y1 - y0) != 0) ? (fixed)(((x1 - x0) << FIXED_BITS) / (float)(y1 - y0)) : 0;
499 #endif
500 #else  // Integer Division:
501 #ifdef GPU_UNAI_USE_INT_DIV_MULTINV
502                                         if ((y2 - y0) != 0) {
503                                                 int iF, iS;
504                                                 xInv((y2 - y0), iF, iS);
505                                                 dx3 = xInvMulx((x2 - x0), iF, iS);
506                                                 du3 = xInvMulx((u2 - u0), iF, iS);
507                                                 dv3 = xInvMulx((v2 - v0), iF, iS);
508                                         } else {
509                                                 dx3 = du3 = dv3 = 0;
510                                         }
511                                         dx4 = ((y1 - y0) != 0) ? xLoDivx((x1 - x0), (y1 - y0)) : 0;
512 #else
513                                         if ((y2 - y0) != 0) {
514                                                 dx3 = GPU_FAST_DIV((x2 - x0) << FIXED_BITS, (y2 - y0));
515                                                 du3 = GPU_FAST_DIV((u2 - u0) << FIXED_BITS, (y2 - y0));
516                                                 dv3 = GPU_FAST_DIV((v2 - v0) << FIXED_BITS, (y2 - y0));
517                                         } else {
518                                                 dx3 = du3 = dv3 = 0;
519                                         }
520                                         dx4 = ((y1 - y0) != 0) ? GPU_FAST_DIV((x1 - x0) << FIXED_BITS, (y1 - y0)) : 0;
521 #endif
522 #endif
523                                 } else {
524 #ifdef GPU_UNAI_USE_FLOATMATH
525 #ifdef GPU_UNAI_USE_FLOAT_DIV_MULTINV
526                                         if ((y1 - y0) != 0) {
527                                                 float finv = FloatInv(y1 - y0);
528                                                 dx3 = (fixed)(((x1 - x0) << FIXED_BITS) * finv);
529                                                 du3 = (fixed)(((u1 - u0) << FIXED_BITS) * finv);
530                                                 dv3 = (fixed)(((v1 - v0) << FIXED_BITS) * finv);
531                                         } else {
532                                                 dx3 = du3 = dv3 = 0;
533                                         }
534                                         dx4 = ((y2 - y0) != 0) ? (fixed)(((x2 - x0) << FIXED_BITS) * FloatInv(y2 - y0)) : 0;
535 #else
536                                         if ((y1 - y0) != 0) {
537                                                 float fdiv = y1 - y0;
538                                                 dx3 = (fixed)(((x1 - x0) << FIXED_BITS) / fdiv);
539                                                 du3 = (fixed)(((u1 - u0) << FIXED_BITS) / fdiv);
540                                                 dv3 = (fixed)(((v1 - v0) << FIXED_BITS) / fdiv);
541                                         } else {
542                                                 dx3 = du3 = dv3 = 0;
543                                         }
544                                         dx4 = ((y2 - y0) != 0) ? (fixed)(((x2 - x0) << FIXED_BITS) / (float)(y2 - y0)) : 0;
545 #endif
546 #else  // Integer Division:
547 #ifdef GPU_UNAI_USE_INT_DIV_MULTINV
548                                         if ((y1 - y0) != 0) {
549                                                 int iF, iS;
550                                                 xInv((y1 - y0), iF, iS);
551                                                 dx3 = xInvMulx((x1 - x0), iF, iS);
552                                                 du3 = xInvMulx((u1 - u0), iF, iS);
553                                                 dv3 = xInvMulx((v1 - v0), iF, iS);
554                                         } else {
555                                                 dx3 = du3 = dv3 = 0;
556                                         }
557                                         dx4 = ((y2 - y0) != 0) ? xLoDivx((x2 - x0), (y2 - y0)) : 0;
558 #else
559                                         if ((y1 - y0) != 0) {
560                                                 dx3 = GPU_FAST_DIV((x1 - x0) << FIXED_BITS, (y1 - y0));
561                                                 du3 = GPU_FAST_DIV((u1 - u0) << FIXED_BITS, (y1 - y0));
562                                                 dv3 = GPU_FAST_DIV((v1 - v0) << FIXED_BITS, (y1 - y0));
563                                         } else {
564                                                 dx3 = du3 = dv3 = 0;
565                                         }
566                                         dx4 = ((y2 - y0) != 0) ? GPU_FAST_DIV((x2 - x0) << FIXED_BITS, (y2 - y0)) : 0;
567 #endif
568 #endif
569                                 }
570                         } else {
571                                 //senquack - break out of final loop if nothing to be drawn (1st loop
572                                 //           must always be taken to setup dx3/dx4)
573                                 if (y1 == y2) break;
574
575                                 ya = y1;  yb = y2;
576
577                                 if (dx < 0) {
578                                         x3 = i2x(x0);
579                                         x4 = i2x(x1);
580                                         u3 = i2x(u0);
581                                         v3 = i2x(v0);
582                                         if ((y1 - y0) != 0) {
583                                                 x3 += (dx3 * (y1 - y0));
584                                                 u3 += (du3 * (y1 - y0));
585                                                 v3 += (dv3 * (y1 - y0));
586                                         }
587 #ifdef GPU_UNAI_USE_FLOATMATH
588 #ifdef GPU_UNAI_USE_FLOAT_DIV_MULTINV
589                                         dx4 = ((y2 - y1) != 0) ? (fixed)(((x2 - x1) << FIXED_BITS) * FloatInv(y2 - y1)) : 0;
590 #else
591                                         dx4 = ((y2 - y1) != 0) ? (fixed)(((x2 - x1) << FIXED_BITS) / (float)(y2 - y1)) : 0;
592 #endif
593 #else  // Integer Division:
594 #ifdef GPU_UNAI_USE_INT_DIV_MULTINV
595                                         dx4 = ((y2 - y1) != 0) ? xLoDivx((x2 - x1), (y2 - y1)) : 0;
596 #else
597                                         dx4 = ((y2 - y1) != 0) ? GPU_FAST_DIV((x2 - x1) << FIXED_BITS, (y2 - y1)) : 0;
598 #endif
599 #endif
600                                 } else {
601                                         x3 = i2x(x1);
602                                         x4 = i2x(x0) + (dx4 * (y1 - y0));
603                                         u3 = i2x(u1);
604                                         v3 = i2x(v1);
605 #ifdef GPU_UNAI_USE_FLOATMATH
606 #ifdef GPU_UNAI_USE_FLOAT_DIV_MULTINV
607                                         if ((y2 - y1) != 0) {
608                                                 float finv = FloatInv(y2 - y1);
609                                                 dx3 = (fixed)(((x2 - x1) << FIXED_BITS) * finv);
610                                                 du3 = (fixed)(((u2 - u1) << FIXED_BITS) * finv);
611                                                 dv3 = (fixed)(((v2 - v1) << FIXED_BITS) * finv);
612                                         } else {
613                                                 dx3 = du3 = dv3 = 0;
614                                         }
615 #else
616                                         if ((y2 - y1) != 0) {
617                                                 float fdiv = y2 - y1;
618                                                 dx3 = (fixed)(((x2 - x1) << FIXED_BITS) / fdiv);
619                                                 du3 = (fixed)(((u2 - u1) << FIXED_BITS) / fdiv);
620                                                 dv3 = (fixed)(((v2 - v1) << FIXED_BITS) / fdiv);
621                                         } else {
622                                                 dx3 = du3 = dv3 = 0;
623                                         }
624 #endif
625 #else  // Integer Division:
626 #ifdef GPU_UNAI_USE_INT_DIV_MULTINV
627                                         if ((y2 - y1) != 0) {
628                                                 int iF, iS;
629                                                 xInv((y2 - y1), iF, iS);
630                                                 dx3 = xInvMulx((x2 - x1), iF, iS);
631                                                 du3 = xInvMulx((u2 - u1), iF, iS);
632                                                 dv3 = xInvMulx((v2 - v1), iF, iS);
633                                         } else {
634                                                 dx3 = du3 = dv3 = 0;
635                                         }
636 #else 
637                                         if ((y2 - y1) != 0) {
638                                                 dx3 = GPU_FAST_DIV((x2 - x1) << FIXED_BITS, (y2 - y1));
639                                                 du3 = GPU_FAST_DIV((u2 - u1) << FIXED_BITS, (y2 - y1));
640                                                 dv3 = GPU_FAST_DIV((v2 - v1) << FIXED_BITS, (y2 - y1));
641                                         } else {
642                                                 dx3 = du3 = dv3 = 0;
643                                         }
644 #endif
645 #endif
646                                 }
647                         }
648
649                         s32 xmin, xmax, ymin, ymax;
650                         xmin = gpu_unai.DrawingArea[0];  xmax = gpu_unai.DrawingArea[2];
651                         ymin = gpu_unai.DrawingArea[1];  ymax = gpu_unai.DrawingArea[3];
652
653                         if ((ymin - ya) > 0) {
654                                 x3 += dx3 * (ymin - ya);
655                                 x4 += dx4 * (ymin - ya);
656                                 u3 += du3 * (ymin - ya);
657                                 v3 += dv3 * (ymin - ya);
658                                 ya = ymin;
659                         }
660
661                         if (yb > ymax) yb = ymax;
662
663                         int loop1 = yb - ya;
664                         if (loop1 <= 0)
665                                 continue;
666
667                         le16_t* PixelBase = &gpu_unai.vram[FRAME_OFFSET(0, ya)];
668                         int li=gpu_unai.inn.ilace_mask;
669                         int pi=(ProgressiveInterlaceEnabled()?(gpu_unai.inn.ilace_mask+1):0);
670                         int pif=(ProgressiveInterlaceEnabled()?(gpu_unai.prog_ilace_flag?(gpu_unai.inn.ilace_mask+1):0):1);
671
672                         for (; loop1; --loop1, ++ya, PixelBase += FRAME_WIDTH,
673                                         x3 += dx3, x4 += dx4,
674                                         u3 += du3, v3 += dv3 )
675                         {
676                                 if (ya&li) continue;
677                                 if ((ya&pi)==pif) continue;
678
679                                 u32 u4, v4;
680
681                                 xa = FixedCeilToInt(x3);  xb = FixedCeilToInt(x4);
682                                 u4 = u3;  v4 = v3;
683
684                                 fixed itmp = i2x(xa) - x3;
685                                 if (itmp != 0) {
686                                         u4 += (du4 * itmp) >> FIXED_BITS;
687                                         v4 += (dv4 * itmp) >> FIXED_BITS;
688                                 }
689
690                                 u4 += fixed_HALF;
691                                 v4 += fixed_HALF;
692
693                                 if ((xmin - xa) > 0) {
694                                         u4 += du4 * (xmin - xa);
695                                         v4 += dv4 * (xmin - xa);
696                                         xa = xmin;
697                                 }
698
699                                 // Set u,v coords for inner driver
700                                 gpu_unai.inn.u = u4;
701                                 gpu_unai.inn.v = v4;
702
703                                 if (xb > xmax) xb = xmax;
704                                 if ((xb - xa) > 0)
705                                         gpuPolySpanDriver(gpu_unai, PixelBase + xa, (xb - xa), ya);
706                         }
707                 }
708         } while (++cur_pass < total_passes);
709 }
710
711 /*----------------------------------------------------------------------
712 gpuDrawPolyG - Gouraud-shaded, untextured poly
713 ----------------------------------------------------------------------*/
714 void gpuDrawPolyG(const PtrUnion packet, const PP gpuPolySpanDriver, u32 is_quad)
715 {
716         PolyVertex vbuf[4];
717         polyInitVertexBuffer(vbuf, packet, POLYTYPE_G, is_quad);
718
719         int total_passes = is_quad ? 2 : 1;
720         int cur_pass = 0;
721         do
722         {
723                 const PolyVertex* vptrs[3];
724                 s32 x_off, y_off;
725                 if (!polyUseTriangle(vbuf, cur_pass, vptrs, x_off, y_off))
726                         continue;
727
728                 s32 xa, xb, ya, yb;
729                 s32 x3, dx3, x4, dx4, dx;
730                 s32 r3, dr3, g3, dg3, b3, db3;
731                 s32 x0, x1, x2, y0, y1, y2;
732                 s32 r0, r1, r2, g0, g1, g2, b0, b1, b2;
733                 s32 dr4, dg4, db4;
734
735                 x0 = vptrs[0]->x + x_off; y0 = vptrs[0]->y + y_off;
736                 r0 = vptrs[0]->col.r;     g0 = vptrs[0]->col.g;  b0 = vptrs[0]->col.b;
737                 x1 = vptrs[1]->x + x_off; y1 = vptrs[1]->y + y_off;
738                 r1 = vptrs[1]->col.r;     g1 = vptrs[1]->col.g;  b1 = vptrs[1]->col.b;
739                 x2 = vptrs[2]->x + x_off; y2 = vptrs[2]->y + y_off;
740                 r2 = vptrs[2]->col.r;     g2 = vptrs[2]->col.g;  b2 = vptrs[2]->col.b;
741
742                 ya = y2 - y0;
743                 yb = y2 - y1;
744                 dx4 = (x2 - x1) * ya - (x2 - x0) * yb;
745                 dr4 = (r2 - r1) * ya - (r2 - r0) * yb;
746                 dg4 = (g2 - g1) * ya - (g2 - g0) * yb;
747                 db4 = (b2 - b1) * ya - (b2 - b0) * yb;
748                 dx = dx4;
749                 if (dx4 < 0) {
750                         dx4 = -dx4;
751                         dr4 = -dr4;
752                         dg4 = -dg4;
753                         db4 = -db4;
754                 }
755
756 #ifdef GPU_UNAI_USE_FLOATMATH
757 #ifdef GPU_UNAI_USE_FLOAT_DIV_MULTINV
758                 if (dx4 != 0) {
759                         float finv = FloatInv(dx4);
760                         dr4 = (fixed)((dr4 << FIXED_BITS) * finv);
761                         dg4 = (fixed)((dg4 << FIXED_BITS) * finv);
762                         db4 = (fixed)((db4 << FIXED_BITS) * finv);
763                 } else {
764                         dr4 = dg4 = db4 = 0;
765                 }
766 #else
767                 if (dx4 != 0) {
768                         float fdiv = dx4;
769                         dr4 = (fixed)((dr4 << FIXED_BITS) / fdiv);
770                         dg4 = (fixed)((dg4 << FIXED_BITS) / fdiv);
771                         db4 = (fixed)((db4 << FIXED_BITS) / fdiv);
772                 } else {
773                         dr4 = dg4 = db4 = 0;
774                 }
775 #endif
776 #else  // Integer Division:
777 #ifdef GPU_UNAI_USE_INT_DIV_MULTINV
778                 if (dx4 != 0) {
779                         int iF, iS;
780                         xInv(dx4, iF, iS);
781                         dr4 = xInvMulx(dr4, iF, iS);
782                         dg4 = xInvMulx(dg4, iF, iS);
783                         db4 = xInvMulx(db4, iF, iS);
784                 } else {
785                         dr4 = dg4 = db4 = 0;
786                 }
787 #else
788                 if (dx4 != 0) {
789                         dr4 = GPU_FAST_DIV(dr4 << FIXED_BITS, dx4);
790                         dg4 = GPU_FAST_DIV(dg4 << FIXED_BITS, dx4);
791                         db4 = GPU_FAST_DIV(db4 << FIXED_BITS, dx4);
792                 } else {
793                         dr4 = dg4 = db4 = 0;
794                 }
795 #endif
796 #endif
797                 // Setup packed Gouraud increment for inner driver
798                 gpu_unai.inn.gInc = gpuPackGouraudColInc(dr4, dg4, db4);
799
800                 for (s32 loop0 = 2; loop0; loop0--) {
801                         if (loop0 == 2) {
802                                 ya = y0;
803                                 yb = y1;
804                                 x3 = x4 = i2x(x0);
805                                 r3 = i2x(r0);
806                                 g3 = i2x(g0);
807                                 b3 = i2x(b0);
808                                 if (dx < 0) {
809 #ifdef GPU_UNAI_USE_FLOATMATH
810 #ifdef GPU_UNAI_USE_FLOAT_DIV_MULTINV
811                                         if ((y2 - y0) != 0) {
812                                                 float finv = FloatInv(y2 - y0);
813                                                 dx3 = (fixed)(((x2 - x0) << FIXED_BITS) * finv);
814                                                 dr3 = (fixed)(((r2 - r0) << FIXED_BITS) * finv);
815                                                 dg3 = (fixed)(((g2 - g0) << FIXED_BITS) * finv);
816                                                 db3 = (fixed)(((b2 - b0) << FIXED_BITS) * finv);
817                                         } else {
818                                                 dx3 = dr3 = dg3 = db3 = 0;
819                                         }
820                                         dx4 = ((y1 - y0) != 0) ? (fixed)(((x1 - x0) << FIXED_BITS) * FloatInv(y1 - y0)) : 0;
821 #else
822                                         if ((y2 - y0) != 0) {
823                                                 float fdiv = y2 - y0;
824                                                 dx3 = (fixed)(((x2 - x0) << FIXED_BITS) / fdiv);
825                                                 dr3 = (fixed)(((r2 - r0) << FIXED_BITS) / fdiv);
826                                                 dg3 = (fixed)(((g2 - g0) << FIXED_BITS) / fdiv);
827                                                 db3 = (fixed)(((b2 - b0) << FIXED_BITS) / fdiv);
828                                         } else {
829                                                 dx3 = dr3 = dg3 = db3 = 0;
830                                         }
831                                         dx4 = ((y1 - y0) != 0) ? (fixed)(((x1 - x0) << FIXED_BITS) / (float)(y1 - y0)) : 0;
832 #endif
833 #else  // Integer Division:
834 #ifdef GPU_UNAI_USE_INT_DIV_MULTINV
835                                         if ((y2 - y0) != 0) {
836                                                 int iF, iS;
837                                                 xInv((y2 - y0), iF, iS);
838                                                 dx3 = xInvMulx((x2 - x0), iF, iS);
839                                                 dr3 = xInvMulx((r2 - r0), iF, iS);
840                                                 dg3 = xInvMulx((g2 - g0), iF, iS);
841                                                 db3 = xInvMulx((b2 - b0), iF, iS);
842                                         } else {
843                                                 dx3 = dr3 = dg3 = db3 = 0;
844                                         }
845                                         dx4 = ((y1 - y0) != 0) ? xLoDivx((x1 - x0), (y1 - y0)) : 0;
846 #else
847                                         if ((y2 - y0) != 0) {
848                                                 dx3 = GPU_FAST_DIV((x2 - x0) << FIXED_BITS, (y2 - y0));
849                                                 dr3 = GPU_FAST_DIV((r2 - r0) << FIXED_BITS, (y2 - y0));
850                                                 dg3 = GPU_FAST_DIV((g2 - g0) << FIXED_BITS, (y2 - y0));
851                                                 db3 = GPU_FAST_DIV((b2 - b0) << FIXED_BITS, (y2 - y0));
852                                         } else {
853                                                 dx3 = dr3 = dg3 = db3 = 0;
854                                         }
855                                         dx4 = ((y1 - y0) != 0) ? GPU_FAST_DIV((x1 - x0) << FIXED_BITS, (y1 - y0)) : 0;
856 #endif
857 #endif
858                                 } else {
859 #ifdef GPU_UNAI_USE_FLOATMATH
860 #ifdef GPU_UNAI_USE_FLOAT_DIV_MULTINV
861                                         if ((y1 - y0) != 0) {
862                                                 float finv = FloatInv(y1 - y0);
863                                                 dx3 = (fixed)(((x1 - x0) << FIXED_BITS) * finv);
864                                                 dr3 = (fixed)(((r1 - r0) << FIXED_BITS) * finv);
865                                                 dg3 = (fixed)(((g1 - g0) << FIXED_BITS) * finv);
866                                                 db3 = (fixed)(((b1 - b0) << FIXED_BITS) * finv);
867                                         } else {
868                                                 dx3 = dr3 = dg3 = db3 = 0;
869                                         }
870                                         dx4 = ((y2 - y0) != 0) ? (fixed)(((x2 - x0) << FIXED_BITS) * FloatInv(y2 - y0)) : 0;
871 #else
872                                         if ((y1 - y0) != 0) {
873                                                 float fdiv = y1 - y0;
874                                                 dx3 = (fixed)(((x1 - x0) << FIXED_BITS) / fdiv);
875                                                 dr3 = (fixed)(((r1 - r0) << FIXED_BITS) / fdiv);
876                                                 dg3 = (fixed)(((g1 - g0) << FIXED_BITS) / fdiv);
877                                                 db3 = (fixed)(((b1 - b0) << FIXED_BITS) / fdiv);
878                                         } else {
879                                                 dx3 = dr3 = dg3 = db3 = 0;
880                                         }
881                                         dx4 = ((y2 - y0) != 0) ? (fixed)(((x2 - x0) << FIXED_BITS) / (float)(y2 - y0)) : 0;
882 #endif
883 #else  // Integer Division:
884 #ifdef GPU_UNAI_USE_INT_DIV_MULTINV
885                                         if ((y1 - y0) != 0) {
886                                                 int iF, iS;
887                                                 xInv((y1 - y0), iF, iS);
888                                                 dx3 = xInvMulx((x1 - x0), iF, iS);
889                                                 dr3 = xInvMulx((r1 - r0), iF, iS);
890                                                 dg3 = xInvMulx((g1 - g0), iF, iS);
891                                                 db3 = xInvMulx((b1 - b0), iF, iS);
892                                         } else {
893                                                 dx3 = dr3 = dg3 = db3 = 0;
894                                         }
895                                         dx4 = ((y2 - y0) != 0) ? xLoDivx((x2 - x0), (y2 - y0)) : 0;
896 #else
897                                         if ((y1 - y0) != 0) {
898                                                 dx3 = GPU_FAST_DIV((x1 - x0) << FIXED_BITS, (y1 - y0));
899                                                 dr3 = GPU_FAST_DIV((r1 - r0) << FIXED_BITS, (y1 - y0));
900                                                 dg3 = GPU_FAST_DIV((g1 - g0) << FIXED_BITS, (y1 - y0));
901                                                 db3 = GPU_FAST_DIV((b1 - b0) << FIXED_BITS, (y1 - y0));
902                                         } else {
903                                                 dx3 = dr3 = dg3 = db3 = 0;
904                                         }
905                                         dx4 = ((y2 - y0) != 0) ? GPU_FAST_DIV((x2 - x0) << FIXED_BITS, (y2 - y0)) : 0;
906 #endif
907 #endif
908                                 }
909                         } else {
910                                 //senquack - break out of final loop if nothing to be drawn (1st loop
911                                 //           must always be taken to setup dx3/dx4)
912                                 if (y1 == y2) break;
913
914                                 ya = y1;  yb = y2;
915
916                                 if (dx < 0) {
917                                         x3 = i2x(x0);  x4 = i2x(x1);
918                                         r3 = i2x(r0);  g3 = i2x(g0);  b3 = i2x(b0);
919
920                                         if ((y1 - y0) != 0) {
921                                                 x3 += (dx3 * (y1 - y0));
922                                                 r3 += (dr3 * (y1 - y0));
923                                                 g3 += (dg3 * (y1 - y0));
924                                                 b3 += (db3 * (y1 - y0));
925                                         }
926
927 #ifdef GPU_UNAI_USE_FLOATMATH
928 #ifdef GPU_UNAI_USE_FLOAT_DIV_MULTINV
929                                         dx4 = ((y2 - y1) != 0) ? (fixed)(((x2 - x1) << FIXED_BITS) * FloatInv(y2 - y1)) : 0;
930 #else
931                                         dx4 = ((y2 - y1) != 0) ? (fixed)(((x2 - x1) << FIXED_BITS) / (float)(y2 - y1)) : 0;
932 #endif
933 #else  // Integer Division:
934 #ifdef GPU_UNAI_USE_INT_DIV_MULTINV
935                                         dx4 = ((y2 - y1) != 0) ? xLoDivx((x2 - x1), (y2 - y1)) : 0;
936 #else
937                                         dx4 = ((y2 - y1) != 0) ? GPU_FAST_DIV((x2 - x1) << FIXED_BITS, (y2 - y1)) : 0;
938 #endif
939 #endif
940                                 } else {
941                                         x3 = i2x(x1);
942                                         x4 = i2x(x0) + (dx4 * (y1 - y0));
943
944                                         r3 = i2x(r1);  g3 = i2x(g1);  b3 = i2x(b1);
945
946 #ifdef GPU_UNAI_USE_FLOATMATH
947 #ifdef GPU_UNAI_USE_FLOAT_DIV_MULTINV
948                                         if ((y2 - y1) != 0) {
949                                                 float finv = FloatInv(y2 - y1);
950                                                 dx3 = (fixed)(((x2 - x1) << FIXED_BITS) * finv);
951                                                 dr3 = (fixed)(((r2 - r1) << FIXED_BITS) * finv);
952                                                 dg3 = (fixed)(((g2 - g1) << FIXED_BITS) * finv);
953                                                 db3 = (fixed)(((b2 - b1) << FIXED_BITS) * finv);
954                                         } else {
955                                                 dx3 = dr3 = dg3 = db3 = 0;
956                                         }
957 #else
958                                         if ((y2 - y1) != 0) {
959                                                 float fdiv = y2 - y1;
960                                                 dx3 = (fixed)(((x2 - x1) << FIXED_BITS) / fdiv);
961                                                 dr3 = (fixed)(((r2 - r1) << FIXED_BITS) / fdiv);
962                                                 dg3 = (fixed)(((g2 - g1) << FIXED_BITS) / fdiv);
963                                                 db3 = (fixed)(((b2 - b1) << FIXED_BITS) / fdiv);
964                                         } else {
965                                                 dx3 = dr3 = dg3 = db3 = 0;
966                                         }
967 #endif
968 #else  // Integer Division:
969 #ifdef GPU_UNAI_USE_INT_DIV_MULTINV
970                                         if ((y2 - y1) != 0) {
971                                                 int iF, iS;
972                                                 xInv((y2 - y1), iF, iS);
973                                                 dx3 = xInvMulx((x2 - x1), iF, iS);
974                                                 dr3 = xInvMulx((r2 - r1), iF, iS);
975                                                 dg3 = xInvMulx((g2 - g1), iF, iS);
976                                                 db3 = xInvMulx((b2 - b1), iF, iS);
977                                         } else {
978                                                 dx3 = dr3 = dg3 = db3 = 0;
979                                         }
980 #else
981                                         if ((y2 - y1) != 0) {
982                                                 dx3 = GPU_FAST_DIV((x2 - x1) << FIXED_BITS, (y2 - y1));
983                                                 dr3 = GPU_FAST_DIV((r2 - r1) << FIXED_BITS, (y2 - y1));
984                                                 dg3 = GPU_FAST_DIV((g2 - g1) << FIXED_BITS, (y2 - y1));
985                                                 db3 = GPU_FAST_DIV((b2 - b1) << FIXED_BITS, (y2 - y1));
986                                         } else {
987                                                 dx3 = dr3 = dg3 = db3 = 0;
988                                         }
989 #endif
990 #endif
991                                 }
992                         }
993
994                         s32 xmin, xmax, ymin, ymax;
995                         xmin = gpu_unai.DrawingArea[0];  xmax = gpu_unai.DrawingArea[2];
996                         ymin = gpu_unai.DrawingArea[1];  ymax = gpu_unai.DrawingArea[3];
997
998                         if ((ymin - ya) > 0) {
999                                 x3 += (dx3 * (ymin - ya));
1000                                 x4 += (dx4 * (ymin - ya));
1001                                 r3 += (dr3 * (ymin - ya));
1002                                 g3 += (dg3 * (ymin - ya));
1003                                 b3 += (db3 * (ymin - ya));
1004                                 ya = ymin;
1005                         }
1006
1007                         if (yb > ymax) yb = ymax;
1008
1009                         int loop1 = yb - ya;
1010                         if (loop1 <= 0)
1011                                 continue;
1012
1013                         le16_t* PixelBase = &gpu_unai.vram[FRAME_OFFSET(0, ya)];
1014                         int li=gpu_unai.inn.ilace_mask;
1015                         int pi=(ProgressiveInterlaceEnabled()?(gpu_unai.inn.ilace_mask+1):0);
1016                         int pif=(ProgressiveInterlaceEnabled()?(gpu_unai.prog_ilace_flag?(gpu_unai.inn.ilace_mask+1):0):1);
1017
1018                         for (; loop1; --loop1, ++ya, PixelBase += FRAME_WIDTH,
1019                                         x3 += dx3, x4 += dx4,
1020                                         r3 += dr3, g3 += dg3, b3 += db3 )
1021                         {
1022                                 if (ya&li) continue;
1023                                 if ((ya&pi)==pif) continue;
1024
1025                                 u32 r4, g4, b4;
1026
1027                                 xa = FixedCeilToInt(x3);
1028                                 xb = FixedCeilToInt(x4);
1029                                 r4 = r3;  g4 = g3;  b4 = b3;
1030
1031                                 fixed itmp = i2x(xa) - x3;
1032                                 if (itmp != 0) {
1033                                         r4 += (dr4 * itmp) >> FIXED_BITS;
1034                                         g4 += (dg4 * itmp) >> FIXED_BITS;
1035                                         b4 += (db4 * itmp) >> FIXED_BITS;
1036                                 }
1037
1038                                 r4 += fixed_HALF;
1039                                 g4 += fixed_HALF;
1040                                 b4 += fixed_HALF;
1041
1042                                 if ((xmin - xa) > 0) {
1043                                         r4 += (dr4 * (xmin - xa));
1044                                         g4 += (dg4 * (xmin - xa));
1045                                         b4 += (db4 * (xmin - xa));
1046                                         xa = xmin;
1047                                 }
1048
1049                                 // Setup packed Gouraud color for inner driver
1050                                 gpu_unai.inn.gCol = gpuPackGouraudCol(r4, g4, b4);
1051
1052                                 if (xb > xmax) xb = xmax;
1053                                 if ((xb - xa) > 0)
1054                                         gpuPolySpanDriver(gpu_unai, PixelBase + xa, (xb - xa), ya);
1055                         }
1056                 }
1057         } while (++cur_pass < total_passes);
1058 }
1059
1060 /*----------------------------------------------------------------------
1061 gpuDrawPolyGT - Gouraud-shaded, textured poly
1062 ----------------------------------------------------------------------*/
1063 void gpuDrawPolyGT(const PtrUnion packet, const PP gpuPolySpanDriver, u32 is_quad)
1064 {
1065         PolyVertex vbuf[4];
1066         polyInitVertexBuffer(vbuf, packet, POLYTYPE_GT, is_quad);
1067
1068         int total_passes = is_quad ? 2 : 1;
1069         int cur_pass = 0;
1070         do
1071         {
1072                 const PolyVertex* vptrs[3];
1073                 s32 x_off, y_off;
1074                 if (!polyUseTriangle(vbuf, cur_pass, vptrs, x_off, y_off))
1075                         continue;
1076
1077                 s32 xa, xb, ya, yb;
1078                 s32 x3, dx3, x4, dx4, dx;
1079                 s32 u3, du3, v3, dv3;
1080                 s32 r3, dr3, g3, dg3, b3, db3;
1081                 s32 x0, x1, x2, y0, y1, y2;
1082                 s32 u0, u1, u2, v0, v1, v2;
1083                 s32 r0, r1, r2, g0, g1, g2, b0, b1, b2;
1084                 s32 du4, dv4;
1085                 s32 dr4, dg4, db4;
1086
1087                 x0 = vptrs[0]->x + x_off; y0 = vptrs[0]->y + y_off;
1088                 u0 = vptrs[0]->tex.u;     v0 = vptrs[0]->tex.v;
1089                 r0 = vptrs[0]->col.r;     g0 = vptrs[0]->col.g;  b0 = vptrs[0]->col.b;
1090                 x1 = vptrs[1]->x + x_off; y1 = vptrs[1]->y + y_off;
1091                 u1 = vptrs[1]->tex.u;     v1 = vptrs[1]->tex.v;
1092                 r1 = vptrs[1]->col.r;     g1 = vptrs[1]->col.g;  b1 = vptrs[1]->col.b;
1093                 x2 = vptrs[2]->x + x_off; y2 = vptrs[2]->y + y_off;
1094                 u2 = vptrs[2]->tex.u;     v2 = vptrs[2]->tex.v;
1095                 r2 = vptrs[2]->col.r;     g2 = vptrs[2]->col.g;  b2 = vptrs[2]->col.b;
1096
1097                 ya = y2 - y0;
1098                 yb = y2 - y1;
1099                 dx4 = (x2 - x1) * ya - (x2 - x0) * yb;
1100                 du4 = (u2 - u1) * ya - (u2 - u0) * yb;
1101                 dv4 = (v2 - v1) * ya - (v2 - v0) * yb;
1102                 dr4 = (r2 - r1) * ya - (r2 - r0) * yb;
1103                 dg4 = (g2 - g1) * ya - (g2 - g0) * yb;
1104                 db4 = (b2 - b1) * ya - (b2 - b0) * yb;
1105                 dx = dx4;
1106                 if (dx4 < 0) {
1107                         dx4 = -dx4;
1108                         du4 = -du4;
1109                         dv4 = -dv4;
1110                         dr4 = -dr4;
1111                         dg4 = -dg4;
1112                         db4 = -db4;
1113                 }
1114
1115 #ifdef GPU_UNAI_USE_FLOATMATH
1116 #ifdef GPU_UNAI_USE_FLOAT_DIV_MULTINV
1117                 if (dx4 != 0) {
1118                         float finv = FloatInv(dx4);
1119                         du4 = (fixed)((du4 << FIXED_BITS) * finv);
1120                         dv4 = (fixed)((dv4 << FIXED_BITS) * finv);
1121                         dr4 = (fixed)((dr4 << FIXED_BITS) * finv);
1122                         dg4 = (fixed)((dg4 << FIXED_BITS) * finv);
1123                         db4 = (fixed)((db4 << FIXED_BITS) * finv);
1124                 } else {
1125                         du4 = dv4 = dr4 = dg4 = db4 = 0;
1126                 }
1127 #else
1128                 if (dx4 != 0) {
1129                         float fdiv = dx4;
1130                         du4 = (fixed)((du4 << FIXED_BITS) / fdiv);
1131                         dv4 = (fixed)((dv4 << FIXED_BITS) / fdiv);
1132                         dr4 = (fixed)((dr4 << FIXED_BITS) / fdiv);
1133                         dg4 = (fixed)((dg4 << FIXED_BITS) / fdiv);
1134                         db4 = (fixed)((db4 << FIXED_BITS) / fdiv);
1135                 } else {
1136                         du4 = dv4 = dr4 = dg4 = db4 = 0;
1137                 }
1138 #endif
1139 #else  // Integer Division:
1140 #ifdef GPU_UNAI_USE_INT_DIV_MULTINV
1141                 if (dx4 != 0) {
1142                         int iF, iS;
1143                         xInv(dx4, iF, iS);
1144                         du4 = xInvMulx(du4, iF, iS);
1145                         dv4 = xInvMulx(dv4, iF, iS);
1146                         dr4 = xInvMulx(dr4, iF, iS);
1147                         dg4 = xInvMulx(dg4, iF, iS);
1148                         db4 = xInvMulx(db4, iF, iS);
1149                 } else {
1150                         du4 = dv4 = dr4 = dg4 = db4 = 0;
1151                 }
1152 #else
1153                 if (dx4 != 0) {
1154                         du4 = GPU_FAST_DIV(du4 << FIXED_BITS, dx4);
1155                         dv4 = GPU_FAST_DIV(dv4 << FIXED_BITS, dx4);
1156                         dr4 = GPU_FAST_DIV(dr4 << FIXED_BITS, dx4);
1157                         dg4 = GPU_FAST_DIV(dg4 << FIXED_BITS, dx4);
1158                         db4 = GPU_FAST_DIV(db4 << FIXED_BITS, dx4);
1159                 } else {
1160                         du4 = dv4 = dr4 = dg4 = db4 = 0;
1161                 }
1162 #endif
1163 #endif
1164                 // Set u,v increments and packed Gouraud increment for inner driver
1165                 gpu_unai.inn.u_inc = du4;
1166                 gpu_unai.inn.v_inc = dv4;
1167                 gpu_unai.inn.gInc = gpuPackGouraudColInc(dr4, dg4, db4);
1168
1169                 for (s32 loop0 = 2; loop0; loop0--) {
1170                         if (loop0 == 2) {
1171                                 ya = y0;  yb = y1;
1172                                 x3 = x4 = i2x(x0);
1173                                 u3 = i2x(u0);  v3 = i2x(v0);
1174                                 r3 = i2x(r0);  g3 = i2x(g0);  b3 = i2x(b0);
1175                                 if (dx < 0) {
1176 #ifdef GPU_UNAI_USE_FLOATMATH
1177 #ifdef GPU_UNAI_USE_FLOAT_DIV_MULTINV
1178                                         if ((y2 - y0) != 0) {
1179                                                 float finv = FloatInv(y2 - y0);
1180                                                 dx3 = (fixed)(((x2 - x0) << FIXED_BITS) * finv);
1181                                                 du3 = (fixed)(((u2 - u0) << FIXED_BITS) * finv);
1182                                                 dv3 = (fixed)(((v2 - v0) << FIXED_BITS) * finv);
1183                                                 dr3 = (fixed)(((r2 - r0) << FIXED_BITS) * finv);
1184                                                 dg3 = (fixed)(((g2 - g0) << FIXED_BITS) * finv);
1185                                                 db3 = (fixed)(((b2 - b0) << FIXED_BITS) * finv);
1186                                         } else {
1187                                                 dx3 = du3 = dv3 = dr3 = dg3 = db3 = 0;
1188                                         }
1189                                         dx4 = ((y1 - y0) != 0) ? (fixed)(((x1 - x0) << FIXED_BITS) * FloatInv(y1 - y0)) : 0;
1190 #else
1191                                         if ((y2 - y0) != 0) {
1192                                                 float fdiv = y2 - y0;
1193                                                 dx3 = (fixed)(((x2 - x0) << FIXED_BITS) / fdiv);
1194                                                 du3 = (fixed)(((u2 - u0) << FIXED_BITS) / fdiv);
1195                                                 dv3 = (fixed)(((v2 - v0) << FIXED_BITS) / fdiv);
1196                                                 dr3 = (fixed)(((r2 - r0) << FIXED_BITS) / fdiv);
1197                                                 dg3 = (fixed)(((g2 - g0) << FIXED_BITS) / fdiv);
1198                                                 db3 = (fixed)(((b2 - b0) << FIXED_BITS) / fdiv);
1199                                         } else {
1200                                                 dx3 = du3 = dv3 = dr3 = dg3 = db3 = 0;
1201                                         }
1202                                         dx4 = ((y1 - y0) != 0) ? (fixed)(((x1 - x0) << FIXED_BITS) / (float)(y1 - y0)) : 0;
1203 #endif
1204 #else  // Integer Division:
1205 #ifdef GPU_UNAI_USE_INT_DIV_MULTINV
1206                                         if ((y2 - y0) != 0) {
1207                                                 int iF, iS;
1208                                                 xInv((y2 - y0), iF, iS);
1209                                                 dx3 = xInvMulx((x2 - x0), iF, iS);
1210                                                 du3 = xInvMulx((u2 - u0), iF, iS);
1211                                                 dv3 = xInvMulx((v2 - v0), iF, iS);
1212                                                 dr3 = xInvMulx((r2 - r0), iF, iS);
1213                                                 dg3 = xInvMulx((g2 - g0), iF, iS);
1214                                                 db3 = xInvMulx((b2 - b0), iF, iS);
1215                                         } else {
1216                                                 dx3 = du3 = dv3 = dr3 = dg3 = db3 = 0;
1217                                         }
1218                                         dx4 = ((y1 - y0) != 0) ? xLoDivx((x1 - x0), (y1 - y0)) : 0;
1219 #else
1220                                         if ((y2 - y0) != 0) {
1221                                                 dx3 = GPU_FAST_DIV((x2 - x0) << FIXED_BITS, (y2 - y0));
1222                                                 du3 = GPU_FAST_DIV((u2 - u0) << FIXED_BITS, (y2 - y0));
1223                                                 dv3 = GPU_FAST_DIV((v2 - v0) << FIXED_BITS, (y2 - y0));
1224                                                 dr3 = GPU_FAST_DIV((r2 - r0) << FIXED_BITS, (y2 - y0));
1225                                                 dg3 = GPU_FAST_DIV((g2 - g0) << FIXED_BITS, (y2 - y0));
1226                                                 db3 = GPU_FAST_DIV((b2 - b0) << FIXED_BITS, (y2 - y0));
1227                                         } else {
1228                                                 dx3 = du3 = dv3 = dr3 = dg3 = db3 = 0;
1229                                         }
1230                                         dx4 = ((y1 - y0) != 0) ? GPU_FAST_DIV((x1 - x0) << FIXED_BITS, (y1 - y0)) : 0;
1231 #endif
1232 #endif
1233                                 } else {
1234 #ifdef GPU_UNAI_USE_FLOATMATH
1235 #ifdef GPU_UNAI_USE_FLOAT_DIV_MULTINV
1236                                         if ((y1 - y0) != 0) {
1237                                                 float finv = FloatInv(y1 - y0);
1238                                                 dx3 = (fixed)(((x1 - x0) << FIXED_BITS) * finv);
1239                                                 du3 = (fixed)(((u1 - u0) << FIXED_BITS) * finv);
1240                                                 dv3 = (fixed)(((v1 - v0) << FIXED_BITS) * finv);
1241                                                 dr3 = (fixed)(((r1 - r0) << FIXED_BITS) * finv);
1242                                                 dg3 = (fixed)(((g1 - g0) << FIXED_BITS) * finv);
1243                                                 db3 = (fixed)(((b1 - b0) << FIXED_BITS) * finv);
1244                                         } else {
1245                                                 dx3 = du3 = dv3 = dr3 = dg3 = db3 = 0;
1246                                         }
1247                                         dx4 = ((y2 - y0) != 0) ? (fixed)(((x2 - x0) << FIXED_BITS) * FloatInv(y2 - y0)) : 0;
1248 #else
1249                                         if ((y1 - y0) != 0) {
1250                                                 float fdiv = y1 - y0;
1251                                                 dx3 = (fixed)(((x1 - x0) << FIXED_BITS) / fdiv);
1252                                                 du3 = (fixed)(((u1 - u0) << FIXED_BITS) / fdiv);
1253                                                 dv3 = (fixed)(((v1 - v0) << FIXED_BITS) / fdiv);
1254                                                 dr3 = (fixed)(((r1 - r0) << FIXED_BITS) / fdiv);
1255                                                 dg3 = (fixed)(((g1 - g0) << FIXED_BITS) / fdiv);
1256                                                 db3 = (fixed)(((b1 - b0) << FIXED_BITS) / fdiv);
1257                                         } else {
1258                                                 dx3 = du3 = dv3 = dr3 = dg3 = db3 = 0;
1259                                         }
1260                                         dx4 = ((y2 - y0) != 0) ? (fixed)(((x2 - x0) << FIXED_BITS) / float(y2 - y0)) : 0;
1261 #endif
1262 #else  // Integer Division:
1263 #ifdef GPU_UNAI_USE_INT_DIV_MULTINV
1264                                         if ((y1 - y0) != 0) {
1265                                                 int iF, iS;
1266                                                 xInv((y1 - y0), iF, iS);
1267                                                 dx3 = xInvMulx((x1 - x0), iF, iS);
1268                                                 du3 = xInvMulx((u1 - u0), iF, iS);
1269                                                 dv3 = xInvMulx((v1 - v0), iF, iS);
1270                                                 dr3 = xInvMulx((r1 - r0), iF, iS);
1271                                                 dg3 = xInvMulx((g1 - g0), iF, iS);
1272                                                 db3 = xInvMulx((b1 - b0), iF, iS);
1273                                         } else {
1274                                                 dx3 = du3 = dv3 = dr3 = dg3 = db3 = 0;
1275                                         }
1276                                         dx4 = ((y2 - y0) != 0) ? xLoDivx((x2 - x0), (y2 - y0)) : 0;
1277 #else
1278                                         if ((y1 - y0) != 0) {
1279                                                 dx3 = GPU_FAST_DIV((x1 - x0) << FIXED_BITS, (y1 - y0));
1280                                                 du3 = GPU_FAST_DIV((u1 - u0) << FIXED_BITS, (y1 - y0));
1281                                                 dv3 = GPU_FAST_DIV((v1 - v0) << FIXED_BITS, (y1 - y0));
1282                                                 dr3 = GPU_FAST_DIV((r1 - r0) << FIXED_BITS, (y1 - y0));
1283                                                 dg3 = GPU_FAST_DIV((g1 - g0) << FIXED_BITS, (y1 - y0));
1284                                                 db3 = GPU_FAST_DIV((b1 - b0) << FIXED_BITS, (y1 - y0));
1285                                         } else {
1286                                                 dx3 = du3 = dv3 = dr3 = dg3 = db3 = 0;
1287                                         }
1288                                         dx4 = ((y2 - y0) != 0) ? GPU_FAST_DIV((x2 - x0) << FIXED_BITS, (y2 - y0)) : 0;
1289 #endif
1290 #endif
1291                                 }
1292                         } else {
1293                                 //senquack - break out of final loop if nothing to be drawn (1st loop
1294                                 //           must always be taken to setup dx3/dx4)
1295                                 if (y1 == y2) break;
1296
1297                                 ya = y1;  yb = y2;
1298
1299                                 if (dx < 0) {
1300                                         x3 = i2x(x0);  x4 = i2x(x1);
1301                                         u3 = i2x(u0);  v3 = i2x(v0);
1302                                         r3 = i2x(r0);  g3 = i2x(g0);  b3 = i2x(b0);
1303
1304                                         if ((y1 - y0) != 0) {
1305                                                 x3 += (dx3 * (y1 - y0));
1306                                                 u3 += (du3 * (y1 - y0));
1307                                                 v3 += (dv3 * (y1 - y0));
1308                                                 r3 += (dr3 * (y1 - y0));
1309                                                 g3 += (dg3 * (y1 - y0));
1310                                                 b3 += (db3 * (y1 - y0));
1311                                         }
1312
1313 #ifdef GPU_UNAI_USE_FLOATMATH
1314 #ifdef GPU_UNAI_USE_FLOAT_DIV_MULTINV
1315                                         dx4 = ((y2 - y1) != 0) ? (fixed)(((x2 - x1) << FIXED_BITS) * FloatInv(y2 - y1)) : 0;
1316 #else
1317                                         dx4 = ((y2 - y1) != 0) ? (fixed)(((x2 - x1) << FIXED_BITS) / (float)(y2 - y1)) : 0;
1318 #endif
1319 #else  // Integer Division:
1320 #ifdef GPU_UNAI_USE_INT_DIV_MULTINV
1321                                         dx4 = ((y2 - y1) != 0) ? xLoDivx((x2 - x1), (y2 - y1)) : 0;
1322 #else
1323                                         dx4 = ((y2 - y1) != 0) ? GPU_FAST_DIV((x2 - x1) << FIXED_BITS, (y2 - y1)) : 0;
1324 #endif
1325 #endif
1326                                 } else {
1327                                         x3 = i2x(x1);
1328                                         x4 = i2x(x0) + (dx4 * (y1 - y0));
1329
1330                                         u3 = i2x(u1);  v3 = i2x(v1);
1331                                         r3 = i2x(r1);  g3 = i2x(g1);  b3 = i2x(b1);
1332 #ifdef GPU_UNAI_USE_FLOATMATH
1333 #ifdef GPU_UNAI_USE_FLOAT_DIV_MULTINV
1334                                         if ((y2 - y1) != 0) {
1335                                                 float finv = FloatInv(y2 - y1);
1336                                                 dx3 = (fixed)(((x2 - x1) << FIXED_BITS) * finv);
1337                                                 du3 = (fixed)(((u2 - u1) << FIXED_BITS) * finv);
1338                                                 dv3 = (fixed)(((v2 - v1) << FIXED_BITS) * finv);
1339                                                 dr3 = (fixed)(((r2 - r1) << FIXED_BITS) * finv);
1340                                                 dg3 = (fixed)(((g2 - g1) << FIXED_BITS) * finv);
1341                                                 db3 = (fixed)(((b2 - b1) << FIXED_BITS) * finv);
1342                                         } else {
1343                                                 dx3 = du3 = dv3 = dr3 = dg3 = db3 = 0;
1344                                         }
1345 #else
1346                                         if ((y2 - y1) != 0) {
1347                                                 float fdiv = y2 - y1;
1348                                                 dx3 = (fixed)(((x2 - x1) << FIXED_BITS) / fdiv);
1349                                                 du3 = (fixed)(((u2 - u1) << FIXED_BITS) / fdiv);
1350                                                 dv3 = (fixed)(((v2 - v1) << FIXED_BITS) / fdiv);
1351                                                 dr3 = (fixed)(((r2 - r1) << FIXED_BITS) / fdiv);
1352                                                 dg3 = (fixed)(((g2 - g1) << FIXED_BITS) / fdiv);
1353                                                 db3 = (fixed)(((b2 - b1) << FIXED_BITS) / fdiv);
1354                                         } else {
1355                                                 dx3 = du3 = dv3 = dr3 = dg3 = db3 = 0;
1356                                         }
1357 #endif
1358 #else  // Integer Division:
1359 #ifdef GPU_UNAI_USE_INT_DIV_MULTINV
1360                                         if ((y2 - y1) != 0) {
1361                                                 int iF, iS;
1362                                                 xInv((y2 - y1), iF, iS);
1363                                                 dx3 = xInvMulx((x2 - x1), iF, iS);
1364                                                 du3 = xInvMulx((u2 - u1), iF, iS);
1365                                                 dv3 = xInvMulx((v2 - v1), iF, iS);
1366                                                 dr3 = xInvMulx((r2 - r1), iF, iS);
1367                                                 dg3 = xInvMulx((g2 - g1), iF, iS);
1368                                                 db3 = xInvMulx((b2 - b1), iF, iS);
1369                                         } else {
1370                                                 dx3 = du3 = dv3 = dr3 = dg3 = db3 = 0;
1371                                         }
1372 #else
1373                                         if ((y2 - y1) != 0) {
1374                                                 dx3 = GPU_FAST_DIV((x2 - x1) << FIXED_BITS, (y2 - y1));
1375                                                 du3 = GPU_FAST_DIV((u2 - u1) << FIXED_BITS, (y2 - y1));
1376                                                 dv3 = GPU_FAST_DIV((v2 - v1) << FIXED_BITS, (y2 - y1));
1377                                                 dr3 = GPU_FAST_DIV((r2 - r1) << FIXED_BITS, (y2 - y1));
1378                                                 dg3 = GPU_FAST_DIV((g2 - g1) << FIXED_BITS, (y2 - y1));
1379                                                 db3 = GPU_FAST_DIV((b2 - b1) << FIXED_BITS, (y2 - y1));
1380                                         } else {
1381                                                 dx3 = du3 = dv3 = dr3 = dg3 = db3 = 0;
1382                                         }
1383 #endif
1384 #endif
1385                                 }
1386                         }
1387
1388                         s32 xmin, xmax, ymin, ymax;
1389                         xmin = gpu_unai.DrawingArea[0];  xmax = gpu_unai.DrawingArea[2];
1390                         ymin = gpu_unai.DrawingArea[1];  ymax = gpu_unai.DrawingArea[3];
1391
1392                         if ((ymin - ya) > 0) {
1393                                 x3 += (dx3 * (ymin - ya));
1394                                 x4 += (dx4 * (ymin - ya));
1395                                 u3 += (du3 * (ymin - ya));
1396                                 v3 += (dv3 * (ymin - ya));
1397                                 r3 += (dr3 * (ymin - ya));
1398                                 g3 += (dg3 * (ymin - ya));
1399                                 b3 += (db3 * (ymin - ya));
1400                                 ya = ymin;
1401                         }
1402
1403                         if (yb > ymax) yb = ymax;
1404
1405                         int loop1 = yb - ya;
1406                         if (loop1 <= 0)
1407                                 continue;
1408
1409                         le16_t* PixelBase = &gpu_unai.vram[FRAME_OFFSET(0, ya)];
1410                         int li=gpu_unai.inn.ilace_mask;
1411                         int pi=(ProgressiveInterlaceEnabled()?(gpu_unai.inn.ilace_mask+1):0);
1412                         int pif=(ProgressiveInterlaceEnabled()?(gpu_unai.prog_ilace_flag?(gpu_unai.inn.ilace_mask+1):0):1);
1413
1414                         for (; loop1; --loop1, ++ya, PixelBase += FRAME_WIDTH,
1415                                         x3 += dx3, x4 += dx4,
1416                                         u3 += du3, v3 += dv3,
1417                                         r3 += dr3, g3 += dg3, b3 += db3 )
1418                         {
1419                                 if (ya&li) continue;
1420                                 if ((ya&pi)==pif) continue;
1421
1422                                 u32 u4, v4;
1423                                 u32 r4, g4, b4;
1424
1425                                 xa = FixedCeilToInt(x3);
1426                                 xb = FixedCeilToInt(x4);
1427                                 u4 = u3;  v4 = v3;
1428                                 r4 = r3;  g4 = g3;  b4 = b3;
1429
1430                                 fixed itmp = i2x(xa) - x3;
1431                                 if (itmp != 0) {
1432                                         u4 += (du4 * itmp) >> FIXED_BITS;
1433                                         v4 += (dv4 * itmp) >> FIXED_BITS;
1434                                         r4 += (dr4 * itmp) >> FIXED_BITS;
1435                                         g4 += (dg4 * itmp) >> FIXED_BITS;
1436                                         b4 += (db4 * itmp) >> FIXED_BITS;
1437                                 }
1438
1439                                 u4 += fixed_HALF;
1440                                 v4 += fixed_HALF;
1441                                 r4 += fixed_HALF;
1442                                 g4 += fixed_HALF;
1443                                 b4 += fixed_HALF;
1444
1445                                 if ((xmin - xa) > 0) {
1446                                         u4 += du4 * (xmin - xa);
1447                                         v4 += dv4 * (xmin - xa);
1448                                         r4 += dr4 * (xmin - xa);
1449                                         g4 += dg4 * (xmin - xa);
1450                                         b4 += db4 * (xmin - xa);
1451                                         xa = xmin;
1452                                 }
1453
1454                                 // Set packed Gouraud color and u,v coords for inner driver
1455                                 gpu_unai.inn.u = u4;
1456                                 gpu_unai.inn.v = v4;
1457                                 gpu_unai.inn.gCol = gpuPackGouraudCol(r4, g4, b4);
1458
1459                                 if (xb > xmax) xb = xmax;
1460                                 if ((xb - xa) > 0)
1461                                         gpuPolySpanDriver(gpu_unai, PixelBase + xa, (xb - xa), ya);
1462                         }
1463                 }
1464         } while (++cur_pass < total_passes);
1465 }
1466
1467 #endif /* __GPU_UNAI_GPU_RASTER_POLYGON_H__ */