ff6dc00d7893d637a821880f3758042901f8d5ee
[pcsx_rearmed.git] / plugins / gpu_unai / gpu_raster_polygon.h
1 /***************************************************************************
2 *   Copyright (C) 2010 PCSX4ALL Team                                      *
3 *   Copyright (C) 2010 Unai                                               *
4 *                                                                         *
5 *   This program is free software; you can redistribute it and/or modify  *
6 *   it under the terms of the GNU General Public License as published by  *
7 *   the Free Software Foundation; either version 2 of the License, or     *
8 *   (at your option) any later version.                                   *
9 *                                                                         *
10 *   This program is distributed in the hope that it will be useful,       *
11 *   but WITHOUT ANY WARRANTY; without even the implied warranty of        *
12 *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the         *
13 *   GNU General Public License for more details.                          *
14 *                                                                         *
15 *   You should have received a copy of the GNU General Public License     *
16 *   along with this program; if not, write to the                         *
17 *   Free Software Foundation, Inc.,                                       *
18 *   51 Franklin Street, Fifth Floor, Boston, MA 02111-1307 USA.           *
19 ***************************************************************************/
20
21 #ifndef __GPU_UNAI_GPU_RASTER_POLYGON_H__
22 #define __GPU_UNAI_GPU_RASTER_POLYGON_H__
23
24 //senquack - NOTE: GPU Unai poly routines have been rewritten/adapted
25 // from DrHell routines to fix multiple issues. See README_senquack.txt
26
27 ///////////////////////////////////////////////////////////////////////////////
28 // Shared poly vertex buffer, able to handle 3 or 4-pt polys of any type.
29 ///////////////////////////////////////////////////////////////////////////////
30
31 struct PolyVertex {
32         s32 x, y; // Sign-extended 11-bit X,Y coords
33         union {
34 #if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
35                 struct { u8 pad[2], v, u; } tex; // Texture coords (if used)
36 #else
37                 struct { u8 u, v, pad[2]; } tex; // Texture coords (if used)
38 #endif
39                 u32 tex_word;
40         };
41         union {
42 #if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
43                 struct { u8 pad, b, g, r; } col; // 24-bit RGB color (if used)
44 #else
45                 struct { u8 r, g, b, pad; } col; // 24-bit RGB color (if used)
46 #endif
47                 u32 col_word;
48         };
49 };
50
51 enum PolyAttribute {
52         POLYATTR_TEXTURE = (1 << 0),
53         POLYATTR_GOURAUD = (1 << 1)
54 };
55
56 enum PolyType {
57         POLYTYPE_F  = 0,
58         POLYTYPE_FT = (POLYATTR_TEXTURE),
59         POLYTYPE_G  = (POLYATTR_GOURAUD),
60         POLYTYPE_GT = (POLYATTR_TEXTURE | POLYATTR_GOURAUD)
61 };
62
63 ///////////////////////////////////////////////////////////////////////////////
64 // polyInitVertexBuffer()
65 // Fills vbuf[] array with data from any type of poly draw-command packet.
66 ///////////////////////////////////////////////////////////////////////////////
67 static void polyInitVertexBuffer(PolyVertex *vbuf, const PtrUnion packet, PolyType ptype, u32 is_quad)
68 {
69         bool texturing = ptype & POLYATTR_TEXTURE;
70         bool gouraud   = ptype & POLYATTR_GOURAUD;
71
72         int vert_stride = 1; // Stride of vertices in cmd packet, in 32-bit words
73         if (texturing)
74                 vert_stride++;
75         if (gouraud)
76                 vert_stride++;
77
78         int num_verts = (is_quad) ? 4 : 3;
79         le32_t *ptr;
80
81         // X,Y coords, adjusted by draw offsets
82         s32 x_off = gpu_unai.DrawingOffset[0];
83         s32 y_off = gpu_unai.DrawingOffset[1];
84         ptr = &packet.U4[1];
85         for (int i=0;  i < num_verts; ++i, ptr += vert_stride) {
86                 u32 coords = le32_to_u32(*ptr);
87                 vbuf[i].x = GPU_EXPANDSIGN((s16)coords) + x_off;
88                 vbuf[i].y = GPU_EXPANDSIGN((s16)(coords >> 16)) + y_off;
89         }
90
91         // U,V texture coords (if applicable)
92         if (texturing) {
93                 ptr = &packet.U4[2];
94                 for (int i=0;  i < num_verts; ++i, ptr += vert_stride)
95                         vbuf[i].tex_word = le32_to_u32(*ptr);
96         }
97
98         // Colors (if applicable)
99         if (gouraud) {
100                 ptr = &packet.U4[0];
101                 for (int i=0;  i < num_verts; ++i, ptr += vert_stride)
102                         vbuf[i].col_word = le32_to_u32(*ptr);
103         }
104 }
105
106 ///////////////////////////////////////////////////////////////////////////////
107 //  Helper functions to determine which vertex in a 2 or 3 vertex array
108 //   has the highest/lowest X/Y coordinate.
109 //   Note: the comparison logic is such that, given a set of vertices with
110 //    identical values for a given coordinate, a different index will be
111 //    returned from vertIdxOfLeast..() than a call to vertIdxOfHighest..().
112 //    This ensures that, during the vertex-ordering phase of rasterization,
113 //    all three vertices remain unique.
114 ///////////////////////////////////////////////////////////////////////////////
115
116 template<typename T>
117 static inline int vertIdxOfLeastXCoord2(const T *Tptr)
118 {
119         return (Tptr[0].x <= Tptr[1].x) ? 0 : 1;
120 }
121
122 template<typename T>
123 static inline int vertIdxOfLeastXCoord3(const T *Tptr)
124 {
125         int least_of_v0_v1 = vertIdxOfLeastXCoord2(Tptr);
126         return (Tptr[least_of_v0_v1].x <= Tptr[2].x) ? least_of_v0_v1 : 2;
127 }
128
129 template<typename T>
130 static inline int vertIdxOfLeastYCoord2(const T *Tptr)
131 {
132         return (Tptr[0].y <= Tptr[1].y) ? 0 : 1;
133 }
134
135 template<typename T>
136 static inline int vertIdxOfLeastYCoord3(const T *Tptr)
137 {
138         int least_of_v0_v1 = vertIdxOfLeastYCoord2(Tptr);
139         return (Tptr[least_of_v0_v1].y <= Tptr[2].y) ? least_of_v0_v1 : 2;
140 }
141
142 template<typename T>
143 static inline int vertIdxOfHighestXCoord2(const T *Tptr)
144 {
145         return (Tptr[1].x >= Tptr[0].x) ? 1 : 0;
146 }
147
148 template<typename T>
149 static inline int vertIdxOfHighestXCoord3(const T *Tptr)
150 {
151         int highest_of_v0_v1 = vertIdxOfHighestXCoord2(Tptr);
152         return (Tptr[2].x >= Tptr[highest_of_v0_v1].x) ? 2 : highest_of_v0_v1;
153 }
154
155 template<typename T>
156 static inline int vertIdxOfHighestYCoord2(const T *Tptr)
157 {
158         return (Tptr[1].y >= Tptr[0].y) ? 1 : 0;
159 }
160
161 template<typename T>
162 static inline int vertIdxOfHighestYCoord3(const T *Tptr)
163 {
164         int highest_of_v0_v1 = vertIdxOfHighestYCoord2(Tptr);
165         return (Tptr[2].y >= Tptr[highest_of_v0_v1].y) ? 2 : highest_of_v0_v1;
166 }
167
168 ///////////////////////////////////////////////////////////////////////////////
169 // polyUseTriangle()
170 //  Determines if the specified triangle should be rendered. If so, it
171 //  fills the given array of vertex pointers, vert_ptrs, in order of
172 //  increasing Y coordinate values, as required by rasterization algorithm.
173 //  Parameter 'tri_num' is 0 for first triangle (idx 0,1,2 of vbuf[]),
174 //   or 1 for second triangle of a quad (idx 1,2,3 of vbuf[]).
175 //  Returns true if triangle should be rendered, false if not.
176 ///////////////////////////////////////////////////////////////////////////////
177 static bool polyUseTriangle(const PolyVertex *vbuf, int tri_num, const PolyVertex **vert_ptrs)
178 {
179         // Using verts 0,1,2 or is this the 2nd pass of a quad (verts 1,2,3)?
180         const PolyVertex *tri_ptr = &vbuf[(tri_num == 0) ? 0 : 1];
181
182         // Get indices of highest/lowest X,Y coords within triangle
183         int idx_lowest_x  = vertIdxOfLeastXCoord3(tri_ptr);
184         int idx_highest_x = vertIdxOfHighestXCoord3(tri_ptr);
185         int idx_lowest_y  = vertIdxOfLeastYCoord3(tri_ptr);
186         int idx_highest_y = vertIdxOfHighestYCoord3(tri_ptr);
187
188         // Maximum absolute distance between any two X coordinates is 1023,
189         //  and for Y coordinates is 511 (PS1 hardware limitation)
190         int lowest_x  = tri_ptr[idx_lowest_x].x;
191         int highest_x = tri_ptr[idx_highest_x].x;
192         int lowest_y  = tri_ptr[idx_lowest_y].y;
193         int highest_y = tri_ptr[idx_highest_y].y;
194         if ((highest_x - lowest_x) >= CHKMAX_X ||
195             (highest_y - lowest_y) >= CHKMAX_Y)
196                 return false;
197
198         // Determine if triangle is completely outside clipping range
199         int xmin, xmax, ymin, ymax;
200         xmin = gpu_unai.DrawingArea[0];  xmax = gpu_unai.DrawingArea[2];
201         ymin = gpu_unai.DrawingArea[1];  ymax = gpu_unai.DrawingArea[3];
202         int clipped_lowest_x  = Max2(xmin,lowest_x);
203         int clipped_lowest_y  = Max2(ymin,lowest_y);
204         int clipped_highest_x = Min2(xmax,highest_x);
205         int clipped_highest_y = Min2(ymax,highest_y);
206         if (clipped_lowest_x >= clipped_highest_x ||
207             clipped_lowest_y >= clipped_highest_y)
208                 return false;
209
210         // Order vertex ptrs by increasing y value (draw routines need this).
211         // The middle index is deduced by a binary math trick that depends
212         //  on index range always being between 0..2
213         vert_ptrs[0] = tri_ptr + idx_lowest_y;
214         vert_ptrs[1] = tri_ptr + ((idx_lowest_y + idx_highest_y) ^ 3);
215         vert_ptrs[2] = tri_ptr + idx_highest_y;
216         return true;
217 }
218
219 ///////////////////////////////////////////////////////////////////////////////
220 //  GPU internal polygon drawing functions
221 ///////////////////////////////////////////////////////////////////////////////
222
223 /*----------------------------------------------------------------------
224 gpuDrawPolyF - Flat-shaded, untextured poly
225 ----------------------------------------------------------------------*/
226 void gpuDrawPolyF(const PtrUnion packet, const PP gpuPolySpanDriver, u32 is_quad)
227 {
228         // Set up bgr555 color to be used across calls in inner driver
229         gpu_unai.PixelData = GPU_RGB16(le32_to_u32(packet.U4[0]));
230
231         PolyVertex vbuf[4];
232         polyInitVertexBuffer(vbuf, packet, POLYTYPE_F, is_quad);
233
234         int total_passes = is_quad ? 2 : 1;
235         int cur_pass = 0;
236         do
237         {
238                 const PolyVertex* vptrs[3];
239                 if (polyUseTriangle(vbuf, cur_pass, vptrs) == false)
240                         continue;
241
242                 s32 xa, xb, ya, yb;
243                 s32 x3, dx3, x4, dx4, dx;
244                 s32 x0, x1, x2, y0, y1, y2;
245
246                 x0 = vptrs[0]->x;  y0 = vptrs[0]->y;
247                 x1 = vptrs[1]->x;  y1 = vptrs[1]->y;
248                 x2 = vptrs[2]->x;  y2 = vptrs[2]->y;
249
250                 ya = y2 - y0;
251                 yb = y2 - y1;
252                 dx = (x2 - x1) * ya - (x2 - x0) * yb;
253
254                 for (int loop0 = 2; loop0; loop0--) {
255                         if (loop0 == 2) {
256                                 ya = y0;  yb = y1;
257                                 x3 = x4 = i2x(x0);
258                                 if (dx < 0) {
259 #ifdef GPU_UNAI_USE_FLOATMATH
260 #ifdef GPU_UNAI_USE_FLOAT_DIV_MULTINV
261                                         dx3 = ((y2 - y0) != 0) ? (fixed)(((x2 - x0) << FIXED_BITS) * FloatInv(y2 - y0)) : 0;
262                                         dx4 = ((y1 - y0) != 0) ? (fixed)(((x1 - x0) << FIXED_BITS) * FloatInv(y1 - y0)) : 0;
263 #else
264                                         dx3 = ((y2 - y0) != 0) ? (fixed)(((x2 - x0) << FIXED_BITS) / (float)(y2 - y0)) : 0;
265                                         dx4 = ((y1 - y0) != 0) ? (fixed)(((x1 - x0) << FIXED_BITS) / (float)(y1 - y0)) : 0;
266 #endif
267 #else  // Integer Division:
268 #ifdef GPU_UNAI_USE_INT_DIV_MULTINV
269                                         dx3 = ((y2 - y0) != 0) ? xLoDivx((x2 - x0), (y2 - y0)) : 0;
270                                         dx4 = ((y1 - y0) != 0) ? xLoDivx((x1 - x0), (y1 - y0)) : 0;
271 #else
272                                         dx3 = ((y2 - y0) != 0) ? GPU_FAST_DIV((x2 - x0) << FIXED_BITS, (y2 - y0)) : 0;
273                                         dx4 = ((y1 - y0) != 0) ? GPU_FAST_DIV((x1 - x0) << FIXED_BITS, (y1 - y0)) : 0;
274 #endif
275 #endif
276                                 } else {
277 #ifdef GPU_UNAI_USE_FLOATMATH
278 #ifdef GPU_UNAI_USE_FLOAT_DIV_MULTINV
279                                         dx3 = ((y1 - y0) != 0) ? (fixed)(((x1 - x0) << FIXED_BITS) * FloatInv(y1 - y0)) : 0;
280                                         dx4 = ((y2 - y0) != 0) ? (fixed)(((x2 - x0) << FIXED_BITS) * FloatInv(y2 - y0)) : 0;
281 #else
282                                         dx3 = ((y1 - y0) != 0) ? (fixed)(((x1 - x0) << FIXED_BITS) / (float)(y1 - y0)) : 0;
283                                         dx4 = ((y2 - y0) != 0) ? (fixed)(((x2 - x0) << FIXED_BITS) / (float)(y2 - y0)) : 0;
284 #endif
285 #else  // Integer Division:
286 #ifdef GPU_UNAI_USE_INT_DIV_MULTINV
287                                         dx3 = ((y1 - y0) != 0) ? xLoDivx((x1 - x0), (y1 - y0)) : 0;
288                                         dx4 = ((y2 - y0) != 0) ? xLoDivx((x2 - x0), (y2 - y0)) : 0;
289 #else
290                                         dx3 = ((y1 - y0) != 0) ? GPU_FAST_DIV((x1 - x0) << FIXED_BITS, (y1 - y0)) : 0;
291                                         dx4 = ((y2 - y0) != 0) ? GPU_FAST_DIV((x2 - x0) << FIXED_BITS, (y2 - y0)) : 0;
292 #endif
293 #endif
294                                 }
295                         } else {
296                                 //senquack - break out of final loop if nothing to be drawn (1st loop
297                                 //           must always be taken to setup dx3/dx4)
298                                 if (y1 == y2) break;
299
300                                 ya = y1;  yb = y2;
301
302                                 if (dx < 0) {
303                                         x3 = i2x(x0) + (dx3 * (y1 - y0));
304                                         x4 = i2x(x1);
305 #ifdef GPU_UNAI_USE_FLOATMATH
306 #ifdef GPU_UNAI_USE_FLOAT_DIV_MULTINV
307                                         dx4 = ((y2 - y1) != 0) ? (fixed)(((x2 - x1) << FIXED_BITS) * FloatInv(y2 - y1)) : 0;
308 #else
309                                         dx4 = ((y2 - y1) != 0) ? (fixed)(((x2 - x1) << FIXED_BITS) / (float)(y2 - y1)) : 0;
310 #endif
311 #else  // Integer Division:
312 #ifdef GPU_UNAI_USE_INT_DIV_MULTINV
313                                         dx4 = ((y2 - y1) != 0) ? xLoDivx ((x2 - x1), (y2 - y1)) : 0;
314 #else
315                                         dx4 = ((y2 - y1) != 0) ? GPU_FAST_DIV((x2 - x1) << FIXED_BITS, (y2 - y1)) : 0;
316 #endif
317 #endif
318                                 } else {
319                                         x3 = i2x(x1);
320                                         x4 = i2x(x0) + (dx4 * (y1 - y0));
321 #ifdef GPU_UNAI_USE_FLOATMATH
322 #ifdef GPU_UNAI_USE_FLOAT_DIV_MULTINV
323                                         dx3 = ((y2 - y1) != 0) ? (fixed)(((x2 - x1) << FIXED_BITS) * FloatInv(y2 - y1)) : 0;
324 #else
325                                         dx3 = ((y2 - y1) != 0) ? (fixed)(((x2 - x1) << FIXED_BITS) / (float)(y2 - y1)) : 0;
326 #endif
327 #else  // Integer Division:
328 #ifdef GPU_UNAI_USE_INT_DIV_MULTINV
329                                         dx3 = ((y2 - y1) != 0) ? xLoDivx ((x2 - x1), (y2 - y1)) : 0;
330 #else
331                                         dx3 = ((y2 - y1) != 0) ? GPU_FAST_DIV((x2 - x1) << FIXED_BITS, (y2 - y1)) : 0;
332 #endif
333 #endif
334                                 }
335                         }
336
337                         s32 xmin, xmax, ymin, ymax;
338                         xmin = gpu_unai.DrawingArea[0];  xmax = gpu_unai.DrawingArea[2];
339                         ymin = gpu_unai.DrawingArea[1];  ymax = gpu_unai.DrawingArea[3];
340
341                         if ((ymin - ya) > 0) {
342                                 x3 += (dx3 * (ymin - ya));
343                                 x4 += (dx4 * (ymin - ya));
344                                 ya = ymin;
345                         }
346
347                         if (yb > ymax) yb = ymax;
348
349                         int loop1 = yb - ya;
350                         if (loop1 <= 0)
351                                 continue;
352
353                         le16_t* PixelBase = &gpu_unai.vram[FRAME_OFFSET(0, ya)];
354                         int li=gpu_unai.ilace_mask;
355                         int pi=(ProgressiveInterlaceEnabled()?(gpu_unai.ilace_mask+1):0);
356                         int pif=(ProgressiveInterlaceEnabled()?(gpu_unai.prog_ilace_flag?(gpu_unai.ilace_mask+1):0):1);
357
358                         for (; loop1; --loop1, ya++, PixelBase += FRAME_WIDTH,
359                                         x3 += dx3, x4 += dx4 )
360                         {
361                                 if (ya&li) continue;
362                                 if ((ya&pi)==pif) continue;
363
364                                 xa = FixedCeilToInt(x3);  xb = FixedCeilToInt(x4);
365                                 if ((xmin - xa) > 0) xa = xmin;
366                                 if (xb > xmax) xb = xmax;
367                                 if ((xb - xa) > 0)
368                                         gpuPolySpanDriver(gpu_unai, PixelBase + xa, (xb - xa));
369                         }
370                 }
371         } while (++cur_pass < total_passes);
372 }
373
374 /*----------------------------------------------------------------------
375 gpuDrawPolyFT - Flat-shaded, textured poly
376 ----------------------------------------------------------------------*/
377 void gpuDrawPolyFT(const PtrUnion packet, const PP gpuPolySpanDriver, u32 is_quad)
378 {
379         // r8/g8/b8 used if texture-blending & dithering is applied (24-bit light)
380         gpu_unai.r8 = packet.U1[0];
381         gpu_unai.g8 = packet.U1[1];
382         gpu_unai.b8 = packet.U1[2];
383         // r5/g5/b5 used if just texture-blending is applied (15-bit light)
384         gpu_unai.r5 = packet.U1[0] >> 3;
385         gpu_unai.g5 = packet.U1[1] >> 3;
386         gpu_unai.b5 = packet.U1[2] >> 3;
387
388         PolyVertex vbuf[4];
389         polyInitVertexBuffer(vbuf, packet, POLYTYPE_FT, is_quad);
390
391         int total_passes = is_quad ? 2 : 1;
392         int cur_pass = 0;
393         do
394         {
395                 const PolyVertex* vptrs[3];
396                 if (polyUseTriangle(vbuf, cur_pass, vptrs) == false)
397                         continue;
398
399                 s32 xa, xb, ya, yb;
400                 s32 x3, dx3, x4, dx4, dx;
401                 s32 u3, du3, v3, dv3;
402                 s32 x0, x1, x2, y0, y1, y2;
403                 s32 u0, u1, u2, v0, v1, v2;
404                 s32 du4, dv4;
405
406                 x0 = vptrs[0]->x;      y0 = vptrs[0]->y;
407                 u0 = vptrs[0]->tex.u;  v0 = vptrs[0]->tex.v;
408                 x1 = vptrs[1]->x;      y1 = vptrs[1]->y;
409                 u1 = vptrs[1]->tex.u;  v1 = vptrs[1]->tex.v;
410                 x2 = vptrs[2]->x;      y2 = vptrs[2]->y;
411                 u2 = vptrs[2]->tex.u;  v2 = vptrs[2]->tex.v;
412
413                 ya = y2 - y0;
414                 yb = y2 - y1;
415                 dx4 = (x2 - x1) * ya - (x2 - x0) * yb;
416                 du4 = (u2 - u1) * ya - (u2 - u0) * yb;
417                 dv4 = (v2 - v1) * ya - (v2 - v0) * yb;
418                 dx = dx4;
419                 if (dx4 < 0) {
420                         dx4 = -dx4;
421                         du4 = -du4;
422                         dv4 = -dv4;
423                 }
424
425 #ifdef GPU_UNAI_USE_FLOATMATH
426 #ifdef GPU_UNAI_USE_FLOAT_DIV_MULTINV
427                 if (dx4 != 0) {
428                         float finv = FloatInv(dx4);
429                         du4 = (fixed)((du4 << FIXED_BITS) * finv);
430                         dv4 = (fixed)((dv4 << FIXED_BITS) * finv);
431                 } else {
432                         du4 = dv4 = 0;
433                 }
434 #else
435                 if (dx4 != 0) {
436                         float fdiv = dx4;
437                         du4 = (fixed)((du4 << FIXED_BITS) / fdiv);
438                         dv4 = (fixed)((dv4 << FIXED_BITS) / fdiv);
439                 } else {
440                         du4 = dv4 = 0;
441                 }
442 #endif
443 #else  // Integer Division:
444 #ifdef GPU_UNAI_USE_INT_DIV_MULTINV
445                 if (dx4 != 0) {
446                         int iF, iS;
447                         xInv(dx4, iF, iS);
448                         du4 = xInvMulx(du4, iF, iS);
449                         dv4 = xInvMulx(dv4, iF, iS);
450                 } else {
451                         du4 = dv4 = 0;
452                 }
453 #else
454                 if (dx4 != 0) {
455                         du4 = GPU_FAST_DIV(du4 << FIXED_BITS, dx4);
456                         dv4 = GPU_FAST_DIV(dv4 << FIXED_BITS, dx4);
457                 } else {
458                         du4 = dv4 = 0;
459                 }
460 #endif
461 #endif
462                 // Set u,v increments for inner driver
463                 gpu_unai.u_inc = du4;
464                 gpu_unai.v_inc = dv4;
465
466                 //senquack - TODO: why is it always going through 2 iterations when sometimes one would suffice here?
467                 //                       (SAME ISSUE ELSEWHERE)
468                 for (s32 loop0 = 2; loop0; loop0--) {
469                         if (loop0 == 2) {
470                                 ya = y0;  yb = y1;
471                                 x3 = x4 = i2x(x0);
472                                 u3 = i2x(u0);  v3 = i2x(v0);
473                                 if (dx < 0) {
474 #ifdef GPU_UNAI_USE_FLOATMATH
475 #ifdef GPU_UNAI_USE_FLOAT_DIV_MULTINV
476                                         if ((y2 - y0) != 0) {
477                                                 float finv = FloatInv(y2 - y0);
478                                                 dx3 = (fixed)(((x2 - x0) << FIXED_BITS) * finv);
479                                                 du3 = (fixed)(((u2 - u0) << FIXED_BITS) * finv);
480                                                 dv3 = (fixed)(((v2 - v0) << FIXED_BITS) * finv);
481                                         } else {
482                                                 dx3 = du3 = dv3 = 0;
483                                         }
484                                         dx4 = ((y1 - y0) != 0) ? (fixed)(((x1 - x0) << FIXED_BITS) * FloatInv(y1 - y0)) : 0;
485 #else
486                                         if ((y2 - y0) != 0) {
487                                                 float fdiv = y2 - y0;
488                                                 dx3 = (fixed)(((x2 - x0) << FIXED_BITS) / fdiv);
489                                                 du3 = (fixed)(((u2 - u0) << FIXED_BITS) / fdiv);
490                                                 dv3 = (fixed)(((v2 - v0) << FIXED_BITS) / fdiv);
491                                         } else {
492                                                 dx3 = du3 = dv3 = 0;
493                                         }
494                                         dx4 = ((y1 - y0) != 0) ? (fixed)(((x1 - x0) << FIXED_BITS) / (float)(y1 - y0)) : 0;
495 #endif
496 #else  // Integer Division:
497 #ifdef GPU_UNAI_USE_INT_DIV_MULTINV
498                                         if ((y2 - y0) != 0) {
499                                                 int iF, iS;
500                                                 xInv((y2 - y0), iF, iS);
501                                                 dx3 = xInvMulx((x2 - x0), iF, iS);
502                                                 du3 = xInvMulx((u2 - u0), iF, iS);
503                                                 dv3 = xInvMulx((v2 - v0), iF, iS);
504                                         } else {
505                                                 dx3 = du3 = dv3 = 0;
506                                         }
507                                         dx4 = ((y1 - y0) != 0) ? xLoDivx((x1 - x0), (y1 - y0)) : 0;
508 #else
509                                         if ((y2 - y0) != 0) {
510                                                 dx3 = GPU_FAST_DIV((x2 - x0) << FIXED_BITS, (y2 - y0));
511                                                 du3 = GPU_FAST_DIV((u2 - u0) << FIXED_BITS, (y2 - y0));
512                                                 dv3 = GPU_FAST_DIV((v2 - v0) << FIXED_BITS, (y2 - y0));
513                                         } else {
514                                                 dx3 = du3 = dv3 = 0;
515                                         }
516                                         dx4 = ((y1 - y0) != 0) ? GPU_FAST_DIV((x1 - x0) << FIXED_BITS, (y1 - y0)) : 0;
517 #endif
518 #endif
519                                 } else {
520 #ifdef GPU_UNAI_USE_FLOATMATH
521 #ifdef GPU_UNAI_USE_FLOAT_DIV_MULTINV
522                                         if ((y1 - y0) != 0) {
523                                                 float finv = FloatInv(y1 - y0);
524                                                 dx3 = (fixed)(((x1 - x0) << FIXED_BITS) * finv);
525                                                 du3 = (fixed)(((u1 - u0) << FIXED_BITS) * finv);
526                                                 dv3 = (fixed)(((v1 - v0) << FIXED_BITS) * finv);
527                                         } else {
528                                                 dx3 = du3 = dv3 = 0;
529                                         }
530                                         dx4 = ((y2 - y0) != 0) ? (fixed)(((x2 - x0) << FIXED_BITS) * FloatInv(y2 - y0)) : 0;
531 #else
532                                         if ((y1 - y0) != 0) {
533                                                 float fdiv = y1 - y0;
534                                                 dx3 = (fixed)(((x1 - x0) << FIXED_BITS) / fdiv);
535                                                 du3 = (fixed)(((u1 - u0) << FIXED_BITS) / fdiv);
536                                                 dv3 = (fixed)(((v1 - v0) << FIXED_BITS) / fdiv);
537                                         } else {
538                                                 dx3 = du3 = dv3 = 0;
539                                         }
540                                         dx4 = ((y2 - y0) != 0) ? (fixed)(((x2 - x0) << FIXED_BITS) / (float)(y2 - y0)) : 0;
541 #endif
542 #else  // Integer Division:
543 #ifdef GPU_UNAI_USE_INT_DIV_MULTINV
544                                         if ((y1 - y0) != 0) {
545                                                 int iF, iS;
546                                                 xInv((y1 - y0), iF, iS);
547                                                 dx3 = xInvMulx((x1 - x0), iF, iS);
548                                                 du3 = xInvMulx((u1 - u0), iF, iS);
549                                                 dv3 = xInvMulx((v1 - v0), iF, iS);
550                                         } else {
551                                                 dx3 = du3 = dv3 = 0;
552                                         }
553                                         dx4 = ((y2 - y0) != 0) ? xLoDivx((x2 - x0), (y2 - y0)) : 0;
554 #else
555                                         if ((y1 - y0) != 0) {
556                                                 dx3 = GPU_FAST_DIV((x1 - x0) << FIXED_BITS, (y1 - y0));
557                                                 du3 = GPU_FAST_DIV((u1 - u0) << FIXED_BITS, (y1 - y0));
558                                                 dv3 = GPU_FAST_DIV((v1 - v0) << FIXED_BITS, (y1 - y0));
559                                         } else {
560                                                 dx3 = du3 = dv3 = 0;
561                                         }
562                                         dx4 = ((y2 - y0) != 0) ? GPU_FAST_DIV((x2 - x0) << FIXED_BITS, (y2 - y0)) : 0;
563 #endif
564 #endif
565                                 }
566                         } else {
567                                 //senquack - break out of final loop if nothing to be drawn (1st loop
568                                 //           must always be taken to setup dx3/dx4)
569                                 if (y1 == y2) break;
570
571                                 ya = y1;  yb = y2;
572
573                                 if (dx < 0) {
574                                         x3 = i2x(x0);
575                                         x4 = i2x(x1);
576                                         u3 = i2x(u0);
577                                         v3 = i2x(v0);
578                                         if ((y1 - y0) != 0) {
579                                                 x3 += (dx3 * (y1 - y0));
580                                                 u3 += (du3 * (y1 - y0));
581                                                 v3 += (dv3 * (y1 - y0));
582                                         }
583 #ifdef GPU_UNAI_USE_FLOATMATH
584 #ifdef GPU_UNAI_USE_FLOAT_DIV_MULTINV
585                                         dx4 = ((y2 - y1) != 0) ? (fixed)(((x2 - x1) << FIXED_BITS) * FloatInv(y2 - y1)) : 0;
586 #else
587                                         dx4 = ((y2 - y1) != 0) ? (fixed)(((x2 - x1) << FIXED_BITS) / (float)(y2 - y1)) : 0;
588 #endif
589 #else  // Integer Division:
590 #ifdef GPU_UNAI_USE_INT_DIV_MULTINV
591                                         dx4 = ((y2 - y1) != 0) ? xLoDivx((x2 - x1), (y2 - y1)) : 0;
592 #else
593                                         dx4 = ((y2 - y1) != 0) ? GPU_FAST_DIV((x2 - x1) << FIXED_BITS, (y2 - y1)) : 0;
594 #endif
595 #endif
596                                 } else {
597                                         x3 = i2x(x1);
598                                         x4 = i2x(x0) + (dx4 * (y1 - y0));
599                                         u3 = i2x(u1);
600                                         v3 = i2x(v1);
601 #ifdef GPU_UNAI_USE_FLOATMATH
602 #ifdef GPU_UNAI_USE_FLOAT_DIV_MULTINV
603                                         if ((y2 - y1) != 0) {
604                                                 float finv = FloatInv(y2 - y1);
605                                                 dx3 = (fixed)(((x2 - x1) << FIXED_BITS) * finv);
606                                                 du3 = (fixed)(((u2 - u1) << FIXED_BITS) * finv);
607                                                 dv3 = (fixed)(((v2 - v1) << FIXED_BITS) * finv);
608                                         } else {
609                                                 dx3 = du3 = dv3 = 0;
610                                         }
611 #else
612                                         if ((y2 - y1) != 0) {
613                                                 float fdiv = y2 - y1;
614                                                 dx3 = (fixed)(((x2 - x1) << FIXED_BITS) / fdiv);
615                                                 du3 = (fixed)(((u2 - u1) << FIXED_BITS) / fdiv);
616                                                 dv3 = (fixed)(((v2 - v1) << FIXED_BITS) / fdiv);
617                                         } else {
618                                                 dx3 = du3 = dv3 = 0;
619                                         }
620 #endif
621 #else  // Integer Division:
622 #ifdef GPU_UNAI_USE_INT_DIV_MULTINV
623                                         if ((y2 - y1) != 0) {
624                                                 int iF, iS;
625                                                 xInv((y2 - y1), iF, iS);
626                                                 dx3 = xInvMulx((x2 - x1), iF, iS);
627                                                 du3 = xInvMulx((u2 - u1), iF, iS);
628                                                 dv3 = xInvMulx((v2 - v1), iF, iS);
629                                         } else {
630                                                 dx3 = du3 = dv3 = 0;
631                                         }
632 #else 
633                                         if ((y2 - y1) != 0) {
634                                                 dx3 = GPU_FAST_DIV((x2 - x1) << FIXED_BITS, (y2 - y1));
635                                                 du3 = GPU_FAST_DIV((u2 - u1) << FIXED_BITS, (y2 - y1));
636                                                 dv3 = GPU_FAST_DIV((v2 - v1) << FIXED_BITS, (y2 - y1));
637                                         } else {
638                                                 dx3 = du3 = dv3 = 0;
639                                         }
640 #endif
641 #endif
642                                 }
643                         }
644
645                         s32 xmin, xmax, ymin, ymax;
646                         xmin = gpu_unai.DrawingArea[0];  xmax = gpu_unai.DrawingArea[2];
647                         ymin = gpu_unai.DrawingArea[1];  ymax = gpu_unai.DrawingArea[3];
648
649                         if ((ymin - ya) > 0) {
650                                 x3 += dx3 * (ymin - ya);
651                                 x4 += dx4 * (ymin - ya);
652                                 u3 += du3 * (ymin - ya);
653                                 v3 += dv3 * (ymin - ya);
654                                 ya = ymin;
655                         }
656
657                         if (yb > ymax) yb = ymax;
658
659                         int loop1 = yb - ya;
660                         if (loop1 <= 0)
661                                 continue;
662
663                         le16_t* PixelBase = &gpu_unai.vram[FRAME_OFFSET(0, ya)];
664                         int li=gpu_unai.ilace_mask;
665                         int pi=(ProgressiveInterlaceEnabled()?(gpu_unai.ilace_mask+1):0);
666                         int pif=(ProgressiveInterlaceEnabled()?(gpu_unai.prog_ilace_flag?(gpu_unai.ilace_mask+1):0):1);
667
668                         for (; loop1; --loop1, ++ya, PixelBase += FRAME_WIDTH,
669                                         x3 += dx3, x4 += dx4,
670                                         u3 += du3, v3 += dv3 )
671                         {
672                                 if (ya&li) continue;
673                                 if ((ya&pi)==pif) continue;
674
675                                 u32 u4, v4;
676
677                                 xa = FixedCeilToInt(x3);  xb = FixedCeilToInt(x4);
678                                 u4 = u3;  v4 = v3;
679
680                                 fixed itmp = i2x(xa) - x3;
681                                 if (itmp != 0) {
682                                         u4 += (du4 * itmp) >> FIXED_BITS;
683                                         v4 += (dv4 * itmp) >> FIXED_BITS;
684                                 }
685
686                                 u4 += fixed_HALF;
687                                 v4 += fixed_HALF;
688
689                                 if ((xmin - xa) > 0) {
690                                         u4 += du4 * (xmin - xa);
691                                         v4 += dv4 * (xmin - xa);
692                                         xa = xmin;
693                                 }
694
695                                 // Set u,v coords for inner driver
696                                 gpu_unai.u = u4;
697                                 gpu_unai.v = v4;
698
699                                 if (xb > xmax) xb = xmax;
700                                 if ((xb - xa) > 0)
701                                         gpuPolySpanDriver(gpu_unai, PixelBase + xa, (xb - xa));
702                         }
703                 }
704         } while (++cur_pass < total_passes);
705 }
706
707 /*----------------------------------------------------------------------
708 gpuDrawPolyG - Gouraud-shaded, untextured poly
709 ----------------------------------------------------------------------*/
710 void gpuDrawPolyG(const PtrUnion packet, const PP gpuPolySpanDriver, u32 is_quad)
711 {
712         PolyVertex vbuf[4];
713         polyInitVertexBuffer(vbuf, packet, POLYTYPE_G, is_quad);
714
715         int total_passes = is_quad ? 2 : 1;
716         int cur_pass = 0;
717         do
718         {
719                 const PolyVertex* vptrs[3];
720                 if (polyUseTriangle(vbuf, cur_pass, vptrs) == false)
721                         continue;
722
723                 s32 xa, xb, ya, yb;
724                 s32 x3, dx3, x4, dx4, dx;
725                 s32 r3, dr3, g3, dg3, b3, db3;
726                 s32 x0, x1, x2, y0, y1, y2;
727                 s32 r0, r1, r2, g0, g1, g2, b0, b1, b2;
728                 s32 dr4, dg4, db4;
729
730                 x0 = vptrs[0]->x;      y0 = vptrs[0]->y;
731                 r0 = vptrs[0]->col.r;  g0 = vptrs[0]->col.g;  b0 = vptrs[0]->col.b;
732                 x1 = vptrs[1]->x;      y1 = vptrs[1]->y;
733                 r1 = vptrs[1]->col.r;  g1 = vptrs[1]->col.g;  b1 = vptrs[1]->col.b;
734                 x2 = vptrs[2]->x;      y2 = vptrs[2]->y;
735                 r2 = vptrs[2]->col.r;  g2 = vptrs[2]->col.g;  b2 = vptrs[2]->col.b;
736
737                 ya = y2 - y0;
738                 yb = y2 - y1;
739                 dx4 = (x2 - x1) * ya - (x2 - x0) * yb;
740                 dr4 = (r2 - r1) * ya - (r2 - r0) * yb;
741                 dg4 = (g2 - g1) * ya - (g2 - g0) * yb;
742                 db4 = (b2 - b1) * ya - (b2 - b0) * yb;
743                 dx = dx4;
744                 if (dx4 < 0) {
745                         dx4 = -dx4;
746                         dr4 = -dr4;
747                         dg4 = -dg4;
748                         db4 = -db4;
749                 }
750
751 #ifdef GPU_UNAI_USE_FLOATMATH
752 #ifdef GPU_UNAI_USE_FLOAT_DIV_MULTINV
753                 if (dx4 != 0) {
754                         float finv = FloatInv(dx4);
755                         dr4 = (fixed)((dr4 << FIXED_BITS) * finv);
756                         dg4 = (fixed)((dg4 << FIXED_BITS) * finv);
757                         db4 = (fixed)((db4 << FIXED_BITS) * finv);
758                 } else {
759                         dr4 = dg4 = db4 = 0;
760                 }
761 #else
762                 if (dx4 != 0) {
763                         float fdiv = dx4;
764                         dr4 = (fixed)((dr4 << FIXED_BITS) / fdiv);
765                         dg4 = (fixed)((dg4 << FIXED_BITS) / fdiv);
766                         db4 = (fixed)((db4 << FIXED_BITS) / fdiv);
767                 } else {
768                         dr4 = dg4 = db4 = 0;
769                 }
770 #endif
771 #else  // Integer Division:
772 #ifdef GPU_UNAI_USE_INT_DIV_MULTINV
773                 if (dx4 != 0) {
774                         int iF, iS;
775                         xInv(dx4, iF, iS);
776                         dr4 = xInvMulx(dr4, iF, iS);
777                         dg4 = xInvMulx(dg4, iF, iS);
778                         db4 = xInvMulx(db4, iF, iS);
779                 } else {
780                         dr4 = dg4 = db4 = 0;
781                 }
782 #else
783                 if (dx4 != 0) {
784                         dr4 = GPU_FAST_DIV(dr4 << FIXED_BITS, dx4);
785                         dg4 = GPU_FAST_DIV(dg4 << FIXED_BITS, dx4);
786                         db4 = GPU_FAST_DIV(db4 << FIXED_BITS, dx4);
787                 } else {
788                         dr4 = dg4 = db4 = 0;
789                 }
790 #endif
791 #endif
792                 // Setup packed Gouraud increment for inner driver
793                 gpu_unai.gInc = gpuPackGouraudColInc(dr4, dg4, db4);
794
795                 for (s32 loop0 = 2; loop0; loop0--) {
796                         if (loop0 == 2) {
797                                 ya = y0;
798                                 yb = y1;
799                                 x3 = x4 = i2x(x0);
800                                 r3 = i2x(r0);
801                                 g3 = i2x(g0);
802                                 b3 = i2x(b0);
803                                 if (dx < 0) {
804 #ifdef GPU_UNAI_USE_FLOATMATH
805 #ifdef GPU_UNAI_USE_FLOAT_DIV_MULTINV
806                                         if ((y2 - y0) != 0) {
807                                                 float finv = FloatInv(y2 - y0);
808                                                 dx3 = (fixed)(((x2 - x0) << FIXED_BITS) * finv);
809                                                 dr3 = (fixed)(((r2 - r0) << FIXED_BITS) * finv);
810                                                 dg3 = (fixed)(((g2 - g0) << FIXED_BITS) * finv);
811                                                 db3 = (fixed)(((b2 - b0) << FIXED_BITS) * finv);
812                                         } else {
813                                                 dx3 = dr3 = dg3 = db3 = 0;
814                                         }
815                                         dx4 = ((y1 - y0) != 0) ? (fixed)(((x1 - x0) << FIXED_BITS) * FloatInv(y1 - y0)) : 0;
816 #else
817                                         if ((y2 - y0) != 0) {
818                                                 float fdiv = y2 - y0;
819                                                 dx3 = (fixed)(((x2 - x0) << FIXED_BITS) / fdiv);
820                                                 dr3 = (fixed)(((r2 - r0) << FIXED_BITS) / fdiv);
821                                                 dg3 = (fixed)(((g2 - g0) << FIXED_BITS) / fdiv);
822                                                 db3 = (fixed)(((b2 - b0) << FIXED_BITS) / fdiv);
823                                         } else {
824                                                 dx3 = dr3 = dg3 = db3 = 0;
825                                         }
826                                         dx4 = ((y1 - y0) != 0) ? (fixed)(((x1 - x0) << FIXED_BITS) / (float)(y1 - y0)) : 0;
827 #endif
828 #else  // Integer Division:
829 #ifdef GPU_UNAI_USE_INT_DIV_MULTINV
830                                         if ((y2 - y0) != 0) {
831                                                 int iF, iS;
832                                                 xInv((y2 - y0), iF, iS);
833                                                 dx3 = xInvMulx((x2 - x0), iF, iS);
834                                                 dr3 = xInvMulx((r2 - r0), iF, iS);
835                                                 dg3 = xInvMulx((g2 - g0), iF, iS);
836                                                 db3 = xInvMulx((b2 - b0), iF, iS);
837                                         } else {
838                                                 dx3 = dr3 = dg3 = db3 = 0;
839                                         }
840                                         dx4 = ((y1 - y0) != 0) ? xLoDivx((x1 - x0), (y1 - y0)) : 0;
841 #else
842                                         if ((y2 - y0) != 0) {
843                                                 dx3 = GPU_FAST_DIV((x2 - x0) << FIXED_BITS, (y2 - y0));
844                                                 dr3 = GPU_FAST_DIV((r2 - r0) << FIXED_BITS, (y2 - y0));
845                                                 dg3 = GPU_FAST_DIV((g2 - g0) << FIXED_BITS, (y2 - y0));
846                                                 db3 = GPU_FAST_DIV((b2 - b0) << FIXED_BITS, (y2 - y0));
847                                         } else {
848                                                 dx3 = dr3 = dg3 = db3 = 0;
849                                         }
850                                         dx4 = ((y1 - y0) != 0) ? GPU_FAST_DIV((x1 - x0) << FIXED_BITS, (y1 - y0)) : 0;
851 #endif
852 #endif
853                                 } else {
854 #ifdef GPU_UNAI_USE_FLOATMATH
855 #ifdef GPU_UNAI_USE_FLOAT_DIV_MULTINV
856                                         if ((y1 - y0) != 0) {
857                                                 float finv = FloatInv(y1 - y0);
858                                                 dx3 = (fixed)(((x1 - x0) << FIXED_BITS) * finv);
859                                                 dr3 = (fixed)(((r1 - r0) << FIXED_BITS) * finv);
860                                                 dg3 = (fixed)(((g1 - g0) << FIXED_BITS) * finv);
861                                                 db3 = (fixed)(((b1 - b0) << FIXED_BITS) * finv);
862                                         } else {
863                                                 dx3 = dr3 = dg3 = db3 = 0;
864                                         }
865                                         dx4 = ((y2 - y0) != 0) ? (fixed)(((x2 - x0) << FIXED_BITS) * FloatInv(y2 - y0)) : 0;
866 #else
867                                         if ((y1 - y0) != 0) {
868                                                 float fdiv = y1 - y0;
869                                                 dx3 = (fixed)(((x1 - x0) << FIXED_BITS) / fdiv);
870                                                 dr3 = (fixed)(((r1 - r0) << FIXED_BITS) / fdiv);
871                                                 dg3 = (fixed)(((g1 - g0) << FIXED_BITS) / fdiv);
872                                                 db3 = (fixed)(((b1 - b0) << FIXED_BITS) / fdiv);
873                                         } else {
874                                                 dx3 = dr3 = dg3 = db3 = 0;
875                                         }
876                                         dx4 = ((y2 - y0) != 0) ? (fixed)(((x2 - x0) << FIXED_BITS) / (float)(y2 - y0)) : 0;
877 #endif
878 #else  // Integer Division:
879 #ifdef GPU_UNAI_USE_INT_DIV_MULTINV
880                                         if ((y1 - y0) != 0) {
881                                                 int iF, iS;
882                                                 xInv((y1 - y0), iF, iS);
883                                                 dx3 = xInvMulx((x1 - x0), iF, iS);
884                                                 dr3 = xInvMulx((r1 - r0), iF, iS);
885                                                 dg3 = xInvMulx((g1 - g0), iF, iS);
886                                                 db3 = xInvMulx((b1 - b0), iF, iS);
887                                         } else {
888                                                 dx3 = dr3 = dg3 = db3 = 0;
889                                         }
890                                         dx4 = ((y2 - y0) != 0) ? xLoDivx((x2 - x0), (y2 - y0)) : 0;
891 #else
892                                         if ((y1 - y0) != 0) {
893                                                 dx3 = GPU_FAST_DIV((x1 - x0) << FIXED_BITS, (y1 - y0));
894                                                 dr3 = GPU_FAST_DIV((r1 - r0) << FIXED_BITS, (y1 - y0));
895                                                 dg3 = GPU_FAST_DIV((g1 - g0) << FIXED_BITS, (y1 - y0));
896                                                 db3 = GPU_FAST_DIV((b1 - b0) << FIXED_BITS, (y1 - y0));
897                                         } else {
898                                                 dx3 = dr3 = dg3 = db3 = 0;
899                                         }
900                                         dx4 = ((y2 - y0) != 0) ? GPU_FAST_DIV((x2 - x0) << FIXED_BITS, (y2 - y0)) : 0;
901 #endif
902 #endif
903                                 }
904                         } else {
905                                 //senquack - break out of final loop if nothing to be drawn (1st loop
906                                 //           must always be taken to setup dx3/dx4)
907                                 if (y1 == y2) break;
908
909                                 ya = y1;  yb = y2;
910
911                                 if (dx < 0) {
912                                         x3 = i2x(x0);  x4 = i2x(x1);
913                                         r3 = i2x(r0);  g3 = i2x(g0);  b3 = i2x(b0);
914
915                                         if ((y1 - y0) != 0) {
916                                                 x3 += (dx3 * (y1 - y0));
917                                                 r3 += (dr3 * (y1 - y0));
918                                                 g3 += (dg3 * (y1 - y0));
919                                                 b3 += (db3 * (y1 - y0));
920                                         }
921
922 #ifdef GPU_UNAI_USE_FLOATMATH
923 #ifdef GPU_UNAI_USE_FLOAT_DIV_MULTINV
924                                         dx4 = ((y2 - y1) != 0) ? (fixed)(((x2 - x1) << FIXED_BITS) * FloatInv(y2 - y1)) : 0;
925 #else
926                                         dx4 = ((y2 - y1) != 0) ? (fixed)(((x2 - x1) << FIXED_BITS) / (float)(y2 - y1)) : 0;
927 #endif
928 #else  // Integer Division:
929 #ifdef GPU_UNAI_USE_INT_DIV_MULTINV
930                                         dx4 = ((y2 - y1) != 0) ? xLoDivx((x2 - x1), (y2 - y1)) : 0;
931 #else
932                                         dx4 = ((y2 - y1) != 0) ? GPU_FAST_DIV((x2 - x1) << FIXED_BITS, (y2 - y1)) : 0;
933 #endif
934 #endif
935                                 } else {
936                                         x3 = i2x(x1);
937                                         x4 = i2x(x0) + (dx4 * (y1 - y0));
938
939                                         r3 = i2x(r1);  g3 = i2x(g1);  b3 = i2x(b1);
940
941 #ifdef GPU_UNAI_USE_FLOATMATH
942 #ifdef GPU_UNAI_USE_FLOAT_DIV_MULTINV
943                                         if ((y2 - y1) != 0) {
944                                                 float finv = FloatInv(y2 - y1);
945                                                 dx3 = (fixed)(((x2 - x1) << FIXED_BITS) * finv);
946                                                 dr3 = (fixed)(((r2 - r1) << FIXED_BITS) * finv);
947                                                 dg3 = (fixed)(((g2 - g1) << FIXED_BITS) * finv);
948                                                 db3 = (fixed)(((b2 - b1) << FIXED_BITS) * finv);
949                                         } else {
950                                                 dx3 = dr3 = dg3 = db3 = 0;
951                                         }
952 #else
953                                         if ((y2 - y1) != 0) {
954                                                 float fdiv = y2 - y1;
955                                                 dx3 = (fixed)(((x2 - x1) << FIXED_BITS) / fdiv);
956                                                 dr3 = (fixed)(((r2 - r1) << FIXED_BITS) / fdiv);
957                                                 dg3 = (fixed)(((g2 - g1) << FIXED_BITS) / fdiv);
958                                                 db3 = (fixed)(((b2 - b1) << FIXED_BITS) / fdiv);
959                                         } else {
960                                                 dx3 = dr3 = dg3 = db3 = 0;
961                                         }
962 #endif
963 #else  // Integer Division:
964 #ifdef GPU_UNAI_USE_INT_DIV_MULTINV
965                                         if ((y2 - y1) != 0) {
966                                                 int iF, iS;
967                                                 xInv((y2 - y1), iF, iS);
968                                                 dx3 = xInvMulx((x2 - x1), iF, iS);
969                                                 dr3 = xInvMulx((r2 - r1), iF, iS);
970                                                 dg3 = xInvMulx((g2 - g1), iF, iS);
971                                                 db3 = xInvMulx((b2 - b1), iF, iS);
972                                         } else {
973                                                 dx3 = dr3 = dg3 = db3 = 0;
974                                         }
975 #else
976                                         if ((y2 - y1) != 0) {
977                                                 dx3 = GPU_FAST_DIV((x2 - x1) << FIXED_BITS, (y2 - y1));
978                                                 dr3 = GPU_FAST_DIV((r2 - r1) << FIXED_BITS, (y2 - y1));
979                                                 dg3 = GPU_FAST_DIV((g2 - g1) << FIXED_BITS, (y2 - y1));
980                                                 db3 = GPU_FAST_DIV((b2 - b1) << FIXED_BITS, (y2 - y1));
981                                         } else {
982                                                 dx3 = dr3 = dg3 = db3 = 0;
983                                         }
984 #endif
985 #endif
986                                 }
987                         }
988
989                         s32 xmin, xmax, ymin, ymax;
990                         xmin = gpu_unai.DrawingArea[0];  xmax = gpu_unai.DrawingArea[2];
991                         ymin = gpu_unai.DrawingArea[1];  ymax = gpu_unai.DrawingArea[3];
992
993                         if ((ymin - ya) > 0) {
994                                 x3 += (dx3 * (ymin - ya));
995                                 x4 += (dx4 * (ymin - ya));
996                                 r3 += (dr3 * (ymin - ya));
997                                 g3 += (dg3 * (ymin - ya));
998                                 b3 += (db3 * (ymin - ya));
999                                 ya = ymin;
1000                         }
1001
1002                         if (yb > ymax) yb = ymax;
1003
1004                         int loop1 = yb - ya;
1005                         if (loop1 <= 0)
1006                                 continue;
1007
1008                         le16_t* PixelBase = &gpu_unai.vram[FRAME_OFFSET(0, ya)];
1009                         int li=gpu_unai.ilace_mask;
1010                         int pi=(ProgressiveInterlaceEnabled()?(gpu_unai.ilace_mask+1):0);
1011                         int pif=(ProgressiveInterlaceEnabled()?(gpu_unai.prog_ilace_flag?(gpu_unai.ilace_mask+1):0):1);
1012
1013                         for (; loop1; --loop1, ++ya, PixelBase += FRAME_WIDTH,
1014                                         x3 += dx3, x4 += dx4,
1015                                         r3 += dr3, g3 += dg3, b3 += db3 )
1016                         {
1017                                 if (ya&li) continue;
1018                                 if ((ya&pi)==pif) continue;
1019
1020                                 u32 r4, g4, b4;
1021
1022                                 xa = FixedCeilToInt(x3);
1023                                 xb = FixedCeilToInt(x4);
1024                                 r4 = r3;  g4 = g3;  b4 = b3;
1025
1026                                 fixed itmp = i2x(xa) - x3;
1027                                 if (itmp != 0) {
1028                                         r4 += (dr4 * itmp) >> FIXED_BITS;
1029                                         g4 += (dg4 * itmp) >> FIXED_BITS;
1030                                         b4 += (db4 * itmp) >> FIXED_BITS;
1031                                 }
1032
1033                                 r4 += fixed_HALF;
1034                                 g4 += fixed_HALF;
1035                                 b4 += fixed_HALF;
1036
1037                                 if ((xmin - xa) > 0) {
1038                                         r4 += (dr4 * (xmin - xa));
1039                                         g4 += (dg4 * (xmin - xa));
1040                                         b4 += (db4 * (xmin - xa));
1041                                         xa = xmin;
1042                                 }
1043
1044                                 // Setup packed Gouraud color for inner driver
1045                                 gpu_unai.gCol = gpuPackGouraudCol(r4, g4, b4);
1046
1047                                 if (xb > xmax) xb = xmax;
1048                                 if ((xb - xa) > 0)
1049                                         gpuPolySpanDriver(gpu_unai, PixelBase + xa, (xb - xa));
1050                         }
1051                 }
1052         } while (++cur_pass < total_passes);
1053 }
1054
1055 /*----------------------------------------------------------------------
1056 gpuDrawPolyGT - Gouraud-shaded, textured poly
1057 ----------------------------------------------------------------------*/
1058 void gpuDrawPolyGT(const PtrUnion packet, const PP gpuPolySpanDriver, u32 is_quad)
1059 {
1060         PolyVertex vbuf[4];
1061         polyInitVertexBuffer(vbuf, packet, POLYTYPE_GT, is_quad);
1062
1063         int total_passes = is_quad ? 2 : 1;
1064         int cur_pass = 0;
1065         do
1066         {
1067                 const PolyVertex* vptrs[3];
1068                 if (polyUseTriangle(vbuf, cur_pass, vptrs) == false)
1069                         continue;
1070
1071                 s32 xa, xb, ya, yb;
1072                 s32 x3, dx3, x4, dx4, dx;
1073                 s32 u3, du3, v3, dv3;
1074                 s32 r3, dr3, g3, dg3, b3, db3;
1075                 s32 x0, x1, x2, y0, y1, y2;
1076                 s32 u0, u1, u2, v0, v1, v2;
1077                 s32 r0, r1, r2, g0, g1, g2, b0, b1, b2;
1078                 s32 du4, dv4;
1079                 s32 dr4, dg4, db4;
1080
1081                 x0 = vptrs[0]->x;      y0 = vptrs[0]->y;
1082                 u0 = vptrs[0]->tex.u;  v0 = vptrs[0]->tex.v;
1083                 r0 = vptrs[0]->col.r;  g0 = vptrs[0]->col.g;  b0 = vptrs[0]->col.b;
1084                 x1 = vptrs[1]->x;      y1 = vptrs[1]->y;
1085                 u1 = vptrs[1]->tex.u;  v1 = vptrs[1]->tex.v;
1086                 r1 = vptrs[1]->col.r;  g1 = vptrs[1]->col.g;  b1 = vptrs[1]->col.b;
1087                 x2 = vptrs[2]->x;      y2 = vptrs[2]->y;
1088                 u2 = vptrs[2]->tex.u;  v2 = vptrs[2]->tex.v;
1089                 r2 = vptrs[2]->col.r;  g2 = vptrs[2]->col.g;  b2 = vptrs[2]->col.b;
1090
1091                 ya = y2 - y0;
1092                 yb = y2 - y1;
1093                 dx4 = (x2 - x1) * ya - (x2 - x0) * yb;
1094                 du4 = (u2 - u1) * ya - (u2 - u0) * yb;
1095                 dv4 = (v2 - v1) * ya - (v2 - v0) * yb;
1096                 dr4 = (r2 - r1) * ya - (r2 - r0) * yb;
1097                 dg4 = (g2 - g1) * ya - (g2 - g0) * yb;
1098                 db4 = (b2 - b1) * ya - (b2 - b0) * yb;
1099                 dx = dx4;
1100                 if (dx4 < 0) {
1101                         dx4 = -dx4;
1102                         du4 = -du4;
1103                         dv4 = -dv4;
1104                         dr4 = -dr4;
1105                         dg4 = -dg4;
1106                         db4 = -db4;
1107                 }
1108
1109 #ifdef GPU_UNAI_USE_FLOATMATH
1110 #ifdef GPU_UNAI_USE_FLOAT_DIV_MULTINV
1111                 if (dx4 != 0) {
1112                         float finv = FloatInv(dx4);
1113                         du4 = (fixed)((du4 << FIXED_BITS) * finv);
1114                         dv4 = (fixed)((dv4 << FIXED_BITS) * finv);
1115                         dr4 = (fixed)((dr4 << FIXED_BITS) * finv);
1116                         dg4 = (fixed)((dg4 << FIXED_BITS) * finv);
1117                         db4 = (fixed)((db4 << FIXED_BITS) * finv);
1118                 } else {
1119                         du4 = dv4 = dr4 = dg4 = db4 = 0;
1120                 }
1121 #else
1122                 if (dx4 != 0) {
1123                         float fdiv = dx4;
1124                         du4 = (fixed)((du4 << FIXED_BITS) / fdiv);
1125                         dv4 = (fixed)((dv4 << FIXED_BITS) / fdiv);
1126                         dr4 = (fixed)((dr4 << FIXED_BITS) / fdiv);
1127                         dg4 = (fixed)((dg4 << FIXED_BITS) / fdiv);
1128                         db4 = (fixed)((db4 << FIXED_BITS) / fdiv);
1129                 } else {
1130                         du4 = dv4 = dr4 = dg4 = db4 = 0;
1131                 }
1132 #endif
1133 #else  // Integer Division:
1134 #ifdef GPU_UNAI_USE_INT_DIV_MULTINV
1135                 if (dx4 != 0) {
1136                         int iF, iS;
1137                         xInv(dx4, iF, iS);
1138                         du4 = xInvMulx(du4, iF, iS);
1139                         dv4 = xInvMulx(dv4, iF, iS);
1140                         dr4 = xInvMulx(dr4, iF, iS);
1141                         dg4 = xInvMulx(dg4, iF, iS);
1142                         db4 = xInvMulx(db4, iF, iS);
1143                 } else {
1144                         du4 = dv4 = dr4 = dg4 = db4 = 0;
1145                 }
1146 #else
1147                 if (dx4 != 0) {
1148                         du4 = GPU_FAST_DIV(du4 << FIXED_BITS, dx4);
1149                         dv4 = GPU_FAST_DIV(dv4 << FIXED_BITS, dx4);
1150                         dr4 = GPU_FAST_DIV(dr4 << FIXED_BITS, dx4);
1151                         dg4 = GPU_FAST_DIV(dg4 << FIXED_BITS, dx4);
1152                         db4 = GPU_FAST_DIV(db4 << FIXED_BITS, dx4);
1153                 } else {
1154                         du4 = dv4 = dr4 = dg4 = db4 = 0;
1155                 }
1156 #endif
1157 #endif
1158                 // Set u,v increments and packed Gouraud increment for inner driver
1159                 gpu_unai.u_inc = du4;
1160                 gpu_unai.v_inc = dv4;
1161                 gpu_unai.gInc = gpuPackGouraudColInc(dr4, dg4, db4);
1162
1163                 for (s32 loop0 = 2; loop0; loop0--) {
1164                         if (loop0 == 2) {
1165                                 ya = y0;  yb = y1;
1166                                 x3 = x4 = i2x(x0);
1167                                 u3 = i2x(u0);  v3 = i2x(v0);
1168                                 r3 = i2x(r0);  g3 = i2x(g0);  b3 = i2x(b0);
1169                                 if (dx < 0) {
1170 #ifdef GPU_UNAI_USE_FLOATMATH
1171 #ifdef GPU_UNAI_USE_FLOAT_DIV_MULTINV
1172                                         if ((y2 - y0) != 0) {
1173                                                 float finv = FloatInv(y2 - y0);
1174                                                 dx3 = (fixed)(((x2 - x0) << FIXED_BITS) * finv);
1175                                                 du3 = (fixed)(((u2 - u0) << FIXED_BITS) * finv);
1176                                                 dv3 = (fixed)(((v2 - v0) << FIXED_BITS) * finv);
1177                                                 dr3 = (fixed)(((r2 - r0) << FIXED_BITS) * finv);
1178                                                 dg3 = (fixed)(((g2 - g0) << FIXED_BITS) * finv);
1179                                                 db3 = (fixed)(((b2 - b0) << FIXED_BITS) * finv);
1180                                         } else {
1181                                                 dx3 = du3 = dv3 = dr3 = dg3 = db3 = 0;
1182                                         }
1183                                         dx4 = ((y1 - y0) != 0) ? (fixed)(((x1 - x0) << FIXED_BITS) * FloatInv(y1 - y0)) : 0;
1184 #else
1185                                         if ((y2 - y0) != 0) {
1186                                                 float fdiv = y2 - y0;
1187                                                 dx3 = (fixed)(((x2 - x0) << FIXED_BITS) / fdiv);
1188                                                 du3 = (fixed)(((u2 - u0) << FIXED_BITS) / fdiv);
1189                                                 dv3 = (fixed)(((v2 - v0) << FIXED_BITS) / fdiv);
1190                                                 dr3 = (fixed)(((r2 - r0) << FIXED_BITS) / fdiv);
1191                                                 dg3 = (fixed)(((g2 - g0) << FIXED_BITS) / fdiv);
1192                                                 db3 = (fixed)(((b2 - b0) << FIXED_BITS) / fdiv);
1193                                         } else {
1194                                                 dx3 = du3 = dv3 = dr3 = dg3 = db3 = 0;
1195                                         }
1196                                         dx4 = ((y1 - y0) != 0) ? (fixed)(((x1 - x0) << FIXED_BITS) / (float)(y1 - y0)) : 0;
1197 #endif
1198 #else  // Integer Division:
1199 #ifdef GPU_UNAI_USE_INT_DIV_MULTINV
1200                                         if ((y2 - y0) != 0) {
1201                                                 int iF, iS;
1202                                                 xInv((y2 - y0), iF, iS);
1203                                                 dx3 = xInvMulx((x2 - x0), iF, iS);
1204                                                 du3 = xInvMulx((u2 - u0), iF, iS);
1205                                                 dv3 = xInvMulx((v2 - v0), iF, iS);
1206                                                 dr3 = xInvMulx((r2 - r0), iF, iS);
1207                                                 dg3 = xInvMulx((g2 - g0), iF, iS);
1208                                                 db3 = xInvMulx((b2 - b0), iF, iS);
1209                                         } else {
1210                                                 dx3 = du3 = dv3 = dr3 = dg3 = db3 = 0;
1211                                         }
1212                                         dx4 = ((y1 - y0) != 0) ? xLoDivx((x1 - x0), (y1 - y0)) : 0;
1213 #else
1214                                         if ((y2 - y0) != 0) {
1215                                                 dx3 = GPU_FAST_DIV((x2 - x0) << FIXED_BITS, (y2 - y0));
1216                                                 du3 = GPU_FAST_DIV((u2 - u0) << FIXED_BITS, (y2 - y0));
1217                                                 dv3 = GPU_FAST_DIV((v2 - v0) << FIXED_BITS, (y2 - y0));
1218                                                 dr3 = GPU_FAST_DIV((r2 - r0) << FIXED_BITS, (y2 - y0));
1219                                                 dg3 = GPU_FAST_DIV((g2 - g0) << FIXED_BITS, (y2 - y0));
1220                                                 db3 = GPU_FAST_DIV((b2 - b0) << FIXED_BITS, (y2 - y0));
1221                                         } else {
1222                                                 dx3 = du3 = dv3 = dr3 = dg3 = db3 = 0;
1223                                         }
1224                                         dx4 = ((y1 - y0) != 0) ? GPU_FAST_DIV((x1 - x0) << FIXED_BITS, (y1 - y0)) : 0;
1225 #endif
1226 #endif
1227                                 } else {
1228 #ifdef GPU_UNAI_USE_FLOATMATH
1229 #ifdef GPU_UNAI_USE_FLOAT_DIV_MULTINV
1230                                         if ((y1 - y0) != 0) {
1231                                                 float finv = FloatInv(y1 - y0);
1232                                                 dx3 = (fixed)(((x1 - x0) << FIXED_BITS) * finv);
1233                                                 du3 = (fixed)(((u1 - u0) << FIXED_BITS) * finv);
1234                                                 dv3 = (fixed)(((v1 - v0) << FIXED_BITS) * finv);
1235                                                 dr3 = (fixed)(((r1 - r0) << FIXED_BITS) * finv);
1236                                                 dg3 = (fixed)(((g1 - g0) << FIXED_BITS) * finv);
1237                                                 db3 = (fixed)(((b1 - b0) << FIXED_BITS) * finv);
1238                                         } else {
1239                                                 dx3 = du3 = dv3 = dr3 = dg3 = db3 = 0;
1240                                         }
1241                                         dx4 = ((y2 - y0) != 0) ? (fixed)(((x2 - x0) << FIXED_BITS) * FloatInv(y2 - y0)) : 0;
1242 #else
1243                                         if ((y1 - y0) != 0) {
1244                                                 float fdiv = y1 - y0;
1245                                                 dx3 = (fixed)(((x1 - x0) << FIXED_BITS) / fdiv);
1246                                                 du3 = (fixed)(((u1 - u0) << FIXED_BITS) / fdiv);
1247                                                 dv3 = (fixed)(((v1 - v0) << FIXED_BITS) / fdiv);
1248                                                 dr3 = (fixed)(((r1 - r0) << FIXED_BITS) / fdiv);
1249                                                 dg3 = (fixed)(((g1 - g0) << FIXED_BITS) / fdiv);
1250                                                 db3 = (fixed)(((b1 - b0) << FIXED_BITS) / fdiv);
1251                                         } else {
1252                                                 dx3 = du3 = dv3 = dr3 = dg3 = db3 = 0;
1253                                         }
1254                                         dx4 = ((y2 - y0) != 0) ? (fixed)(((x2 - x0) << FIXED_BITS) / float(y2 - y0)) : 0;
1255 #endif
1256 #else  // Integer Division:
1257 #ifdef GPU_UNAI_USE_INT_DIV_MULTINV
1258                                         if ((y1 - y0) != 0) {
1259                                                 int iF, iS;
1260                                                 xInv((y1 - y0), iF, iS);
1261                                                 dx3 = xInvMulx((x1 - x0), iF, iS);
1262                                                 du3 = xInvMulx((u1 - u0), iF, iS);
1263                                                 dv3 = xInvMulx((v1 - v0), iF, iS);
1264                                                 dr3 = xInvMulx((r1 - r0), iF, iS);
1265                                                 dg3 = xInvMulx((g1 - g0), iF, iS);
1266                                                 db3 = xInvMulx((b1 - b0), iF, iS);
1267                                         } else {
1268                                                 dx3 = du3 = dv3 = dr3 = dg3 = db3 = 0;
1269                                         }
1270                                         dx4 = ((y2 - y0) != 0) ? xLoDivx((x2 - x0), (y2 - y0)) : 0;
1271 #else
1272                                         if ((y1 - y0) != 0) {
1273                                                 dx3 = GPU_FAST_DIV((x1 - x0) << FIXED_BITS, (y1 - y0));
1274                                                 du3 = GPU_FAST_DIV((u1 - u0) << FIXED_BITS, (y1 - y0));
1275                                                 dv3 = GPU_FAST_DIV((v1 - v0) << FIXED_BITS, (y1 - y0));
1276                                                 dr3 = GPU_FAST_DIV((r1 - r0) << FIXED_BITS, (y1 - y0));
1277                                                 dg3 = GPU_FAST_DIV((g1 - g0) << FIXED_BITS, (y1 - y0));
1278                                                 db3 = GPU_FAST_DIV((b1 - b0) << FIXED_BITS, (y1 - y0));
1279                                         } else {
1280                                                 dx3 = du3 = dv3 = dr3 = dg3 = db3 = 0;
1281                                         }
1282                                         dx4 = ((y2 - y0) != 0) ? GPU_FAST_DIV((x2 - x0) << FIXED_BITS, (y2 - y0)) : 0;
1283 #endif
1284 #endif
1285                                 }
1286                         } else {
1287                                 //senquack - break out of final loop if nothing to be drawn (1st loop
1288                                 //           must always be taken to setup dx3/dx4)
1289                                 if (y1 == y2) break;
1290
1291                                 ya = y1;  yb = y2;
1292
1293                                 if (dx < 0) {
1294                                         x3 = i2x(x0);  x4 = i2x(x1);
1295                                         u3 = i2x(u0);  v3 = i2x(v0);
1296                                         r3 = i2x(r0);  g3 = i2x(g0);  b3 = i2x(b0);
1297
1298                                         if ((y1 - y0) != 0) {
1299                                                 x3 += (dx3 * (y1 - y0));
1300                                                 u3 += (du3 * (y1 - y0));
1301                                                 v3 += (dv3 * (y1 - y0));
1302                                                 r3 += (dr3 * (y1 - y0));
1303                                                 g3 += (dg3 * (y1 - y0));
1304                                                 b3 += (db3 * (y1 - y0));
1305                                         }
1306
1307 #ifdef GPU_UNAI_USE_FLOATMATH
1308 #ifdef GPU_UNAI_USE_FLOAT_DIV_MULTINV
1309                                         dx4 = ((y2 - y1) != 0) ? (fixed)(((x2 - x1) << FIXED_BITS) * FloatInv(y2 - y1)) : 0;
1310 #else
1311                                         dx4 = ((y2 - y1) != 0) ? (fixed)(((x2 - x1) << FIXED_BITS) / (float)(y2 - y1)) : 0;
1312 #endif
1313 #else  // Integer Division:
1314 #ifdef GPU_UNAI_USE_INT_DIV_MULTINV
1315                                         dx4 = ((y2 - y1) != 0) ? xLoDivx((x2 - x1), (y2 - y1)) : 0;
1316 #else
1317                                         dx4 = ((y2 - y1) != 0) ? GPU_FAST_DIV((x2 - x1) << FIXED_BITS, (y2 - y1)) : 0;
1318 #endif
1319 #endif
1320                                 } else {
1321                                         x3 = i2x(x1);
1322                                         x4 = i2x(x0) + (dx4 * (y1 - y0));
1323
1324                                         u3 = i2x(u1);  v3 = i2x(v1);
1325                                         r3 = i2x(r1);  g3 = i2x(g1);  b3 = i2x(b1);
1326 #ifdef GPU_UNAI_USE_FLOATMATH
1327 #ifdef GPU_UNAI_USE_FLOAT_DIV_MULTINV
1328                                         if ((y2 - y1) != 0) {
1329                                                 float finv = FloatInv(y2 - y1);
1330                                                 dx3 = (fixed)(((x2 - x1) << FIXED_BITS) * finv);
1331                                                 du3 = (fixed)(((u2 - u1) << FIXED_BITS) * finv);
1332                                                 dv3 = (fixed)(((v2 - v1) << FIXED_BITS) * finv);
1333                                                 dr3 = (fixed)(((r2 - r1) << FIXED_BITS) * finv);
1334                                                 dg3 = (fixed)(((g2 - g1) << FIXED_BITS) * finv);
1335                                                 db3 = (fixed)(((b2 - b1) << FIXED_BITS) * finv);
1336                                         } else {
1337                                                 dx3 = du3 = dv3 = dr3 = dg3 = db3 = 0;
1338                                         }
1339 #else
1340                                         if ((y2 - y1) != 0) {
1341                                                 float fdiv = y2 - y1;
1342                                                 dx3 = (fixed)(((x2 - x1) << FIXED_BITS) / fdiv);
1343                                                 du3 = (fixed)(((u2 - u1) << FIXED_BITS) / fdiv);
1344                                                 dv3 = (fixed)(((v2 - v1) << FIXED_BITS) / fdiv);
1345                                                 dr3 = (fixed)(((r2 - r1) << FIXED_BITS) / fdiv);
1346                                                 dg3 = (fixed)(((g2 - g1) << FIXED_BITS) / fdiv);
1347                                                 db3 = (fixed)(((b2 - b1) << FIXED_BITS) / fdiv);
1348                                         } else {
1349                                                 dx3 = du3 = dv3 = dr3 = dg3 = db3 = 0;
1350                                         }
1351 #endif
1352 #else  // Integer Division:
1353 #ifdef GPU_UNAI_USE_INT_DIV_MULTINV
1354                                         if ((y2 - y1) != 0) {
1355                                                 int iF, iS;
1356                                                 xInv((y2 - y1), iF, iS);
1357                                                 dx3 = xInvMulx((x2 - x1), iF, iS);
1358                                                 du3 = xInvMulx((u2 - u1), iF, iS);
1359                                                 dv3 = xInvMulx((v2 - v1), iF, iS);
1360                                                 dr3 = xInvMulx((r2 - r1), iF, iS);
1361                                                 dg3 = xInvMulx((g2 - g1), iF, iS);
1362                                                 db3 = xInvMulx((b2 - b1), iF, iS);
1363                                         } else {
1364                                                 dx3 = du3 = dv3 = dr3 = dg3 = db3 = 0;
1365                                         }
1366 #else
1367                                         if ((y2 - y1) != 0) {
1368                                                 dx3 = GPU_FAST_DIV((x2 - x1) << FIXED_BITS, (y2 - y1));
1369                                                 du3 = GPU_FAST_DIV((u2 - u1) << FIXED_BITS, (y2 - y1));
1370                                                 dv3 = GPU_FAST_DIV((v2 - v1) << FIXED_BITS, (y2 - y1));
1371                                                 dr3 = GPU_FAST_DIV((r2 - r1) << FIXED_BITS, (y2 - y1));
1372                                                 dg3 = GPU_FAST_DIV((g2 - g1) << FIXED_BITS, (y2 - y1));
1373                                                 db3 = GPU_FAST_DIV((b2 - b1) << FIXED_BITS, (y2 - y1));
1374                                         } else {
1375                                                 dx3 = du3 = dv3 = dr3 = dg3 = db3 = 0;
1376                                         }
1377 #endif
1378 #endif
1379                                 }
1380                         }
1381
1382                         s32 xmin, xmax, ymin, ymax;
1383                         xmin = gpu_unai.DrawingArea[0];  xmax = gpu_unai.DrawingArea[2];
1384                         ymin = gpu_unai.DrawingArea[1];  ymax = gpu_unai.DrawingArea[3];
1385
1386                         if ((ymin - ya) > 0) {
1387                                 x3 += (dx3 * (ymin - ya));
1388                                 x4 += (dx4 * (ymin - ya));
1389                                 u3 += (du3 * (ymin - ya));
1390                                 v3 += (dv3 * (ymin - ya));
1391                                 r3 += (dr3 * (ymin - ya));
1392                                 g3 += (dg3 * (ymin - ya));
1393                                 b3 += (db3 * (ymin - ya));
1394                                 ya = ymin;
1395                         }
1396
1397                         if (yb > ymax) yb = ymax;
1398
1399                         int loop1 = yb - ya;
1400                         if (loop1 <= 0)
1401                                 continue;
1402
1403                         le16_t* PixelBase = &gpu_unai.vram[FRAME_OFFSET(0, ya)];
1404                         int li=gpu_unai.ilace_mask;
1405                         int pi=(ProgressiveInterlaceEnabled()?(gpu_unai.ilace_mask+1):0);
1406                         int pif=(ProgressiveInterlaceEnabled()?(gpu_unai.prog_ilace_flag?(gpu_unai.ilace_mask+1):0):1);
1407
1408                         for (; loop1; --loop1, ++ya, PixelBase += FRAME_WIDTH,
1409                                         x3 += dx3, x4 += dx4,
1410                                         u3 += du3, v3 += dv3,
1411                                         r3 += dr3, g3 += dg3, b3 += db3 )
1412                         {
1413                                 if (ya&li) continue;
1414                                 if ((ya&pi)==pif) continue;
1415
1416                                 u32 u4, v4;
1417                                 u32 r4, g4, b4;
1418
1419                                 xa = FixedCeilToInt(x3);
1420                                 xb = FixedCeilToInt(x4);
1421                                 u4 = u3;  v4 = v3;
1422                                 r4 = r3;  g4 = g3;  b4 = b3;
1423
1424                                 fixed itmp = i2x(xa) - x3;
1425                                 if (itmp != 0) {
1426                                         u4 += (du4 * itmp) >> FIXED_BITS;
1427                                         v4 += (dv4 * itmp) >> FIXED_BITS;
1428                                         r4 += (dr4 * itmp) >> FIXED_BITS;
1429                                         g4 += (dg4 * itmp) >> FIXED_BITS;
1430                                         b4 += (db4 * itmp) >> FIXED_BITS;
1431                                 }
1432
1433                                 u4 += fixed_HALF;
1434                                 v4 += fixed_HALF;
1435                                 r4 += fixed_HALF;
1436                                 g4 += fixed_HALF;
1437                                 b4 += fixed_HALF;
1438
1439                                 if ((xmin - xa) > 0) {
1440                                         u4 += du4 * (xmin - xa);
1441                                         v4 += dv4 * (xmin - xa);
1442                                         r4 += dr4 * (xmin - xa);
1443                                         g4 += dg4 * (xmin - xa);
1444                                         b4 += db4 * (xmin - xa);
1445                                         xa = xmin;
1446                                 }
1447
1448                                 // Set packed Gouraud color and u,v coords for inner driver
1449                                 gpu_unai.u = u4;
1450                                 gpu_unai.v = v4;
1451                                 gpu_unai.gCol = gpuPackGouraudCol(r4, g4, b4);
1452
1453                                 if (xb > xmax) xb = xmax;
1454                                 if ((xb - xa) > 0)
1455                                         gpuPolySpanDriver(gpu_unai, PixelBase + xa, (xb - xa));
1456                         }
1457                 }
1458         } while (++cur_pass < total_passes);
1459 }
1460
1461 #endif /* __GPU_UNAI_GPU_RASTER_POLYGON_H__ */