1 /***************************************************************************
2 * Copyright (C) 2010 PCSX4ALL Team *
3 * Copyright (C) 2010 Unai *
5 * This program is free software; you can redistribute it and/or modify *
6 * it under the terms of the GNU General Public License as published by *
7 * the Free Software Foundation; either version 2 of the License, or *
8 * (at your option) any later version. *
10 * This program is distributed in the hope that it will be useful, *
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of *
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
13 * GNU General Public License for more details. *
15 * You should have received a copy of the GNU General Public License *
16 * along with this program; if not, write to the *
17 * Free Software Foundation, Inc., *
18 * 51 Franklin Street, Fifth Floor, Boston, MA 02111-1307 USA. *
19 ***************************************************************************/
21 //senquack - NOTE: GPU Unai poly routines have been rewritten/adapted
22 // from DrHell routines to fix multiple issues. See README_senquack.txt
24 ///////////////////////////////////////////////////////////////////////////////
25 // Shared poly vertex buffer, able to handle 3 or 4-pt polys of any type.
26 ///////////////////////////////////////////////////////////////////////////////
29 s32 x, y; // Sign-extended 11-bit X,Y coords
31 struct { u8 u, v, pad[2]; } tex; // Texture coords (if used)
35 struct { u8 r, g, b, pad; } col; // 24-bit RGB color (if used)
41 POLYATTR_TEXTURE = (1 << 0),
42 POLYATTR_GOURAUD = (1 << 1)
47 POLYTYPE_FT = (POLYATTR_TEXTURE),
48 POLYTYPE_G = (POLYATTR_GOURAUD),
49 POLYTYPE_GT = (POLYATTR_TEXTURE | POLYATTR_GOURAUD)
52 ///////////////////////////////////////////////////////////////////////////////
53 // polyInitVertexBuffer()
54 // Fills vbuf[] array with data from any type of poly draw-command packet.
55 ///////////////////////////////////////////////////////////////////////////////
56 static void polyInitVertexBuffer(PolyVertex *vbuf, const PtrUnion packet, PolyType ptype, u32 is_quad)
58 bool texturing = ptype & POLYATTR_TEXTURE;
59 bool gouraud = ptype & POLYATTR_GOURAUD;
61 int vert_stride = 1; // Stride of vertices in cmd packet, in 32-bit words
67 int num_verts = (is_quad) ? 4 : 3;
70 // X,Y coords, adjusted by draw offsets
71 s32 x_off = gpu_unai.DrawingOffset[0];
72 s32 y_off = gpu_unai.DrawingOffset[1];
74 for (int i=0; i < num_verts; ++i, ptr += vert_stride) {
75 s16* coord_ptr = (s16*)ptr;
76 vbuf[i].x = GPU_EXPANDSIGN(coord_ptr[0]) + x_off;
77 vbuf[i].y = GPU_EXPANDSIGN(coord_ptr[1]) + y_off;
80 // U,V texture coords (if applicable)
83 for (int i=0; i < num_verts; ++i, ptr += vert_stride)
84 vbuf[i].tex_word = *ptr;
87 // Colors (if applicable)
90 for (int i=0; i < num_verts; ++i, ptr += vert_stride)
91 vbuf[i].col_word = *ptr;
95 ///////////////////////////////////////////////////////////////////////////////
96 // Helper functions to determine which vertex in a 2 or 3 vertex array
97 // has the highest/lowest X/Y coordinate.
98 // Note: the comparison logic is such that, given a set of vertices with
99 // identical values for a given coordinate, a different index will be
100 // returned from vertIdxOfLeast..() than a call to vertIdxOfHighest..().
101 // This ensures that, during the vertex-ordering phase of rasterization,
102 // all three vertices remain unique.
103 ///////////////////////////////////////////////////////////////////////////////
106 static inline int vertIdxOfLeastXCoord2(const T *Tptr)
108 return (Tptr[0].x <= Tptr[1].x) ? 0 : 1;
112 static inline int vertIdxOfLeastXCoord3(const T *Tptr)
114 int least_of_v0_v1 = vertIdxOfLeastXCoord2(Tptr);
115 return (Tptr[least_of_v0_v1].x <= Tptr[2].x) ? least_of_v0_v1 : 2;
119 static inline int vertIdxOfLeastYCoord2(const T *Tptr)
121 return (Tptr[0].y <= Tptr[1].y) ? 0 : 1;
125 static inline int vertIdxOfLeastYCoord3(const T *Tptr)
127 int least_of_v0_v1 = vertIdxOfLeastYCoord2(Tptr);
128 return (Tptr[least_of_v0_v1].y <= Tptr[2].y) ? least_of_v0_v1 : 2;
132 static inline int vertIdxOfHighestXCoord2(const T *Tptr)
134 return (Tptr[1].x >= Tptr[0].x) ? 1 : 0;
138 static inline int vertIdxOfHighestXCoord3(const T *Tptr)
140 int highest_of_v0_v1 = vertIdxOfHighestXCoord2(Tptr);
141 return (Tptr[2].x >= Tptr[highest_of_v0_v1].x) ? 2 : highest_of_v0_v1;
145 static inline int vertIdxOfHighestYCoord2(const T *Tptr)
147 return (Tptr[1].y >= Tptr[0].y) ? 1 : 0;
151 static inline int vertIdxOfHighestYCoord3(const T *Tptr)
153 int highest_of_v0_v1 = vertIdxOfHighestYCoord2(Tptr);
154 return (Tptr[2].y >= Tptr[highest_of_v0_v1].y) ? 2 : highest_of_v0_v1;
157 ///////////////////////////////////////////////////////////////////////////////
159 // Determines if the specified triangle should be rendered. If so, it
160 // fills the given array of vertex pointers, vert_ptrs, in order of
161 // increasing Y coordinate values, as required by rasterization algorithm.
162 // Parameter 'tri_num' is 0 for first triangle (idx 0,1,2 of vbuf[]),
163 // or 1 for second triangle of a quad (idx 1,2,3 of vbuf[]).
164 // Returns true if triangle should be rendered, false if not.
165 ///////////////////////////////////////////////////////////////////////////////
166 static bool polyUseTriangle(const PolyVertex *vbuf, int tri_num, const PolyVertex **vert_ptrs)
168 // Using verts 0,1,2 or is this the 2nd pass of a quad (verts 1,2,3)?
169 const PolyVertex *tri_ptr = &vbuf[(tri_num == 0) ? 0 : 1];
171 // Get indices of highest/lowest X,Y coords within triangle
172 int idx_lowest_x = vertIdxOfLeastXCoord3(tri_ptr);
173 int idx_highest_x = vertIdxOfHighestXCoord3(tri_ptr);
174 int idx_lowest_y = vertIdxOfLeastYCoord3(tri_ptr);
175 int idx_highest_y = vertIdxOfHighestYCoord3(tri_ptr);
177 // Maximum absolute distance between any two X coordinates is 1023,
178 // and for Y coordinates is 511 (PS1 hardware limitation)
179 int lowest_x = tri_ptr[idx_lowest_x].x;
180 int highest_x = tri_ptr[idx_highest_x].x;
181 int lowest_y = tri_ptr[idx_lowest_y].y;
182 int highest_y = tri_ptr[idx_highest_y].y;
183 if ((highest_x - lowest_x) >= CHKMAX_X ||
184 (highest_y - lowest_y) >= CHKMAX_Y)
187 // Determine if triangle is completely outside clipping range
188 int xmin, xmax, ymin, ymax;
189 xmin = gpu_unai.DrawingArea[0]; xmax = gpu_unai.DrawingArea[2];
190 ymin = gpu_unai.DrawingArea[1]; ymax = gpu_unai.DrawingArea[3];
191 int clipped_lowest_x = Max2(xmin,lowest_x);
192 int clipped_lowest_y = Max2(ymin,lowest_y);
193 int clipped_highest_x = Min2(xmax,highest_x);
194 int clipped_highest_y = Min2(ymax,highest_y);
195 if (clipped_lowest_x >= clipped_highest_x ||
196 clipped_lowest_y >= clipped_highest_y)
199 // Order vertex ptrs by increasing y value (draw routines need this).
200 // The middle index is deduced by a binary math trick that depends
201 // on index range always being between 0..2
202 vert_ptrs[0] = tri_ptr + idx_lowest_y;
203 vert_ptrs[1] = tri_ptr + ((idx_lowest_y + idx_highest_y) ^ 3);
204 vert_ptrs[2] = tri_ptr + idx_highest_y;
208 ///////////////////////////////////////////////////////////////////////////////
209 // GPU internal polygon drawing functions
210 ///////////////////////////////////////////////////////////////////////////////
212 /*----------------------------------------------------------------------
213 gpuDrawPolyF - Flat-shaded, untextured poly
214 ----------------------------------------------------------------------*/
215 void gpuDrawPolyF(const PtrUnion packet, const PP gpuPolySpanDriver, u32 is_quad)
217 // Set up bgr555 color to be used across calls in inner driver
218 gpu_unai.PixelData = GPU_RGB16(packet.U4[0]);
221 polyInitVertexBuffer(vbuf, packet, POLYTYPE_F, is_quad);
223 int total_passes = is_quad ? 2 : 1;
227 const PolyVertex* vptrs[3];
228 if (polyUseTriangle(vbuf, cur_pass, vptrs) == false)
232 s32 x3, dx3, x4, dx4, dx;
233 s32 x0, x1, x2, y0, y1, y2;
235 x0 = vptrs[0]->x; y0 = vptrs[0]->y;
236 x1 = vptrs[1]->x; y1 = vptrs[1]->y;
237 x2 = vptrs[2]->x; y2 = vptrs[2]->y;
241 dx = (x2 - x1) * ya - (x2 - x0) * yb;
243 for (int loop0 = 2; loop0; loop0--) {
248 #ifdef GPU_UNAI_USE_FLOATMATH
249 #ifdef GPU_UNAI_USE_FLOAT_DIV_MULTINV
250 dx3 = ((y2 - y0) != 0) ? (fixed)(((x2 - x0) << FIXED_BITS) * FloatInv(y2 - y0)) : 0;
251 dx4 = ((y1 - y0) != 0) ? (fixed)(((x1 - x0) << FIXED_BITS) * FloatInv(y1 - y0)) : 0;
253 dx3 = ((y2 - y0) != 0) ? (fixed)(((x2 - x0) << FIXED_BITS) / (float)(y2 - y0)) : 0;
254 dx4 = ((y1 - y0) != 0) ? (fixed)(((x1 - x0) << FIXED_BITS) / (float)(y1 - y0)) : 0;
256 #else // Integer Division:
257 #ifdef GPU_UNAI_USE_INT_DIV_MULTINV
258 dx3 = ((y2 - y0) != 0) ? xLoDivx((x2 - x0), (y2 - y0)) : 0;
259 dx4 = ((y1 - y0) != 0) ? xLoDivx((x1 - x0), (y1 - y0)) : 0;
261 dx3 = ((y2 - y0) != 0) ? GPU_FAST_DIV((x2 - x0) << FIXED_BITS, (y2 - y0)) : 0;
262 dx4 = ((y1 - y0) != 0) ? GPU_FAST_DIV((x1 - x0) << FIXED_BITS, (y1 - y0)) : 0;
266 #ifdef GPU_UNAI_USE_FLOATMATH
267 #ifdef GPU_UNAI_USE_FLOAT_DIV_MULTINV
268 dx3 = ((y1 - y0) != 0) ? (fixed)(((x1 - x0) << FIXED_BITS) * FloatInv(y1 - y0)) : 0;
269 dx4 = ((y2 - y0) != 0) ? (fixed)(((x2 - x0) << FIXED_BITS) * FloatInv(y2 - y0)) : 0;
271 dx3 = ((y1 - y0) != 0) ? (fixed)(((x1 - x0) << FIXED_BITS) / (float)(y1 - y0)) : 0;
272 dx4 = ((y2 - y0) != 0) ? (fixed)(((x2 - x0) << FIXED_BITS) / (float)(y2 - y0)) : 0;
274 #else // Integer Division:
275 #ifdef GPU_UNAI_USE_INT_DIV_MULTINV
276 dx3 = ((y1 - y0) != 0) ? xLoDivx((x1 - x0), (y1 - y0)) : 0;
277 dx4 = ((y2 - y0) != 0) ? xLoDivx((x2 - x0), (y2 - y0)) : 0;
279 dx3 = ((y1 - y0) != 0) ? GPU_FAST_DIV((x1 - x0) << FIXED_BITS, (y1 - y0)) : 0;
280 dx4 = ((y2 - y0) != 0) ? GPU_FAST_DIV((x2 - x0) << FIXED_BITS, (y2 - y0)) : 0;
285 //senquack - break out of final loop if nothing to be drawn (1st loop
286 // must always be taken to setup dx3/dx4)
292 x3 = i2x(x0) + (dx3 * (y1 - y0));
294 #ifdef GPU_UNAI_USE_FLOATMATH
295 #ifdef GPU_UNAI_USE_FLOAT_DIV_MULTINV
296 dx4 = ((y2 - y1) != 0) ? (fixed)(((x2 - x1) << FIXED_BITS) * FloatInv(y2 - y1)) : 0;
298 dx4 = ((y2 - y1) != 0) ? (fixed)(((x2 - x1) << FIXED_BITS) / (float)(y2 - y1)) : 0;
300 #else // Integer Division:
301 #ifdef GPU_UNAI_USE_INT_DIV_MULTINV
302 dx4 = ((y2 - y1) != 0) ? xLoDivx ((x2 - x1), (y2 - y1)) : 0;
304 dx4 = ((y2 - y1) != 0) ? GPU_FAST_DIV((x2 - x1) << FIXED_BITS, (y2 - y1)) : 0;
309 x4 = i2x(x0) + (dx4 * (y1 - y0));
310 #ifdef GPU_UNAI_USE_FLOATMATH
311 #ifdef GPU_UNAI_USE_FLOAT_DIV_MULTINV
312 dx3 = ((y2 - y1) != 0) ? (fixed)(((x2 - x1) << FIXED_BITS) * FloatInv(y2 - y1)) : 0;
314 dx3 = ((y2 - y1) != 0) ? (fixed)(((x2 - x1) << FIXED_BITS) / (float)(y2 - y1)) : 0;
316 #else // Integer Division:
317 #ifdef GPU_UNAI_USE_INT_DIV_MULTINV
318 dx3 = ((y2 - y1) != 0) ? xLoDivx ((x2 - x1), (y2 - y1)) : 0;
320 dx3 = ((y2 - y1) != 0) ? GPU_FAST_DIV((x2 - x1) << FIXED_BITS, (y2 - y1)) : 0;
326 s32 xmin, xmax, ymin, ymax;
327 xmin = gpu_unai.DrawingArea[0]; xmax = gpu_unai.DrawingArea[2];
328 ymin = gpu_unai.DrawingArea[1]; ymax = gpu_unai.DrawingArea[3];
330 if ((ymin - ya) > 0) {
331 x3 += (dx3 * (ymin - ya));
332 x4 += (dx4 * (ymin - ya));
336 if (yb > ymax) yb = ymax;
342 u16* PixelBase = &((u16*)gpu_unai.vram)[FRAME_OFFSET(0, ya)];
343 int li=gpu_unai.ilace_mask;
344 int pi=(ProgressiveInterlaceEnabled()?(gpu_unai.ilace_mask+1):0);
345 int pif=(ProgressiveInterlaceEnabled()?(gpu_unai.prog_ilace_flag?(gpu_unai.ilace_mask+1):0):1);
347 for (; loop1; --loop1, ya++, PixelBase += FRAME_WIDTH,
348 x3 += dx3, x4 += dx4 )
351 if ((ya&pi)==pif) continue;
353 xa = FixedCeilToInt(x3); xb = FixedCeilToInt(x4);
354 if ((xmin - xa) > 0) xa = xmin;
355 if (xb > xmax) xb = xmax;
357 gpuPolySpanDriver(gpu_unai, PixelBase + xa, (xb - xa));
360 } while (++cur_pass < total_passes);
363 /*----------------------------------------------------------------------
364 gpuDrawPolyFT - Flat-shaded, textured poly
365 ----------------------------------------------------------------------*/
366 void gpuDrawPolyFT(const PtrUnion packet, const PP gpuPolySpanDriver, u32 is_quad)
368 // r8/g8/b8 used if texture-blending & dithering is applied (24-bit light)
369 gpu_unai.r8 = packet.U1[0];
370 gpu_unai.g8 = packet.U1[1];
371 gpu_unai.b8 = packet.U1[2];
372 // r5/g5/b5 used if just texture-blending is applied (15-bit light)
373 gpu_unai.r5 = packet.U1[0] >> 3;
374 gpu_unai.g5 = packet.U1[1] >> 3;
375 gpu_unai.b5 = packet.U1[2] >> 3;
378 polyInitVertexBuffer(vbuf, packet, POLYTYPE_FT, is_quad);
380 int total_passes = is_quad ? 2 : 1;
384 const PolyVertex* vptrs[3];
385 if (polyUseTriangle(vbuf, cur_pass, vptrs) == false)
389 s32 x3, dx3, x4, dx4, dx;
390 s32 u3, du3, v3, dv3;
391 s32 x0, x1, x2, y0, y1, y2;
392 s32 u0, u1, u2, v0, v1, v2;
395 x0 = vptrs[0]->x; y0 = vptrs[0]->y;
396 u0 = vptrs[0]->tex.u; v0 = vptrs[0]->tex.v;
397 x1 = vptrs[1]->x; y1 = vptrs[1]->y;
398 u1 = vptrs[1]->tex.u; v1 = vptrs[1]->tex.v;
399 x2 = vptrs[2]->x; y2 = vptrs[2]->y;
400 u2 = vptrs[2]->tex.u; v2 = vptrs[2]->tex.v;
404 dx4 = (x2 - x1) * ya - (x2 - x0) * yb;
405 du4 = (u2 - u1) * ya - (u2 - u0) * yb;
406 dv4 = (v2 - v1) * ya - (v2 - v0) * yb;
414 #ifdef GPU_UNAI_USE_FLOATMATH
415 #ifdef GPU_UNAI_USE_FLOAT_DIV_MULTINV
417 float finv = FloatInv(dx4);
418 du4 = (fixed)((du4 << FIXED_BITS) * finv);
419 dv4 = (fixed)((dv4 << FIXED_BITS) * finv);
426 du4 = (fixed)((du4 << FIXED_BITS) / fdiv);
427 dv4 = (fixed)((dv4 << FIXED_BITS) / fdiv);
432 #else // Integer Division:
433 #ifdef GPU_UNAI_USE_INT_DIV_MULTINV
437 du4 = xInvMulx(du4, iF, iS);
438 dv4 = xInvMulx(dv4, iF, iS);
444 du4 = GPU_FAST_DIV(du4 << FIXED_BITS, dx4);
445 dv4 = GPU_FAST_DIV(dv4 << FIXED_BITS, dx4);
451 // Set u,v increments for inner driver
452 gpu_unai.u_inc = du4;
453 gpu_unai.v_inc = dv4;
455 //senquack - TODO: why is it always going through 2 iterations when sometimes one would suffice here?
456 // (SAME ISSUE ELSEWHERE)
457 for (s32 loop0 = 2; loop0; loop0--) {
461 u3 = i2x(u0); v3 = i2x(v0);
463 #ifdef GPU_UNAI_USE_FLOATMATH
464 #ifdef GPU_UNAI_USE_FLOAT_DIV_MULTINV
465 if ((y2 - y0) != 0) {
466 float finv = FloatInv(y2 - y0);
467 dx3 = (fixed)(((x2 - x0) << FIXED_BITS) * finv);
468 du3 = (fixed)(((u2 - u0) << FIXED_BITS) * finv);
469 dv3 = (fixed)(((v2 - v0) << FIXED_BITS) * finv);
473 dx4 = ((y1 - y0) != 0) ? (fixed)(((x1 - x0) << FIXED_BITS) * FloatInv(y1 - y0)) : 0;
475 if ((y2 - y0) != 0) {
476 float fdiv = y2 - y0;
477 dx3 = (fixed)(((x2 - x0) << FIXED_BITS) / fdiv);
478 du3 = (fixed)(((u2 - u0) << FIXED_BITS) / fdiv);
479 dv3 = (fixed)(((v2 - v0) << FIXED_BITS) / fdiv);
483 dx4 = ((y1 - y0) != 0) ? (fixed)(((x1 - x0) << FIXED_BITS) / (float)(y1 - y0)) : 0;
485 #else // Integer Division:
486 #ifdef GPU_UNAI_USE_INT_DIV_MULTINV
487 if ((y2 - y0) != 0) {
489 xInv((y2 - y0), iF, iS);
490 dx3 = xInvMulx((x2 - x0), iF, iS);
491 du3 = xInvMulx((u2 - u0), iF, iS);
492 dv3 = xInvMulx((v2 - v0), iF, iS);
496 dx4 = ((y1 - y0) != 0) ? xLoDivx((x1 - x0), (y1 - y0)) : 0;
498 if ((y2 - y0) != 0) {
499 dx3 = GPU_FAST_DIV((x2 - x0) << FIXED_BITS, (y2 - y0));
500 du3 = GPU_FAST_DIV((u2 - u0) << FIXED_BITS, (y2 - y0));
501 dv3 = GPU_FAST_DIV((v2 - v0) << FIXED_BITS, (y2 - y0));
505 dx4 = ((y1 - y0) != 0) ? GPU_FAST_DIV((x1 - x0) << FIXED_BITS, (y1 - y0)) : 0;
509 #ifdef GPU_UNAI_USE_FLOATMATH
510 #ifdef GPU_UNAI_USE_FLOAT_DIV_MULTINV
511 if ((y1 - y0) != 0) {
512 float finv = FloatInv(y1 - y0);
513 dx3 = (fixed)(((x1 - x0) << FIXED_BITS) * finv);
514 du3 = (fixed)(((u1 - u0) << FIXED_BITS) * finv);
515 dv3 = (fixed)(((v1 - v0) << FIXED_BITS) * finv);
519 dx4 = ((y2 - y0) != 0) ? (fixed)(((x2 - x0) << FIXED_BITS) * FloatInv(y2 - y0)) : 0;
521 if ((y1 - y0) != 0) {
522 float fdiv = y1 - y0;
523 dx3 = (fixed)(((x1 - x0) << FIXED_BITS) / fdiv);
524 du3 = (fixed)(((u1 - u0) << FIXED_BITS) / fdiv);
525 dv3 = (fixed)(((v1 - v0) << FIXED_BITS) / fdiv);
529 dx4 = ((y2 - y0) != 0) ? (fixed)(((x2 - x0) << FIXED_BITS) / (float)(y2 - y0)) : 0;
531 #else // Integer Division:
532 #ifdef GPU_UNAI_USE_INT_DIV_MULTINV
533 if ((y1 - y0) != 0) {
535 xInv((y1 - y0), iF, iS);
536 dx3 = xInvMulx((x1 - x0), iF, iS);
537 du3 = xInvMulx((u1 - u0), iF, iS);
538 dv3 = xInvMulx((v1 - v0), iF, iS);
542 dx4 = ((y2 - y0) != 0) ? xLoDivx((x2 - x0), (y2 - y0)) : 0;
544 if ((y1 - y0) != 0) {
545 dx3 = GPU_FAST_DIV((x1 - x0) << FIXED_BITS, (y1 - y0));
546 du3 = GPU_FAST_DIV((u1 - u0) << FIXED_BITS, (y1 - y0));
547 dv3 = GPU_FAST_DIV((v1 - v0) << FIXED_BITS, (y1 - y0));
551 dx4 = ((y2 - y0) != 0) ? GPU_FAST_DIV((x2 - x0) << FIXED_BITS, (y2 - y0)) : 0;
556 //senquack - break out of final loop if nothing to be drawn (1st loop
557 // must always be taken to setup dx3/dx4)
567 if ((y1 - y0) != 0) {
568 x3 += (dx3 * (y1 - y0));
569 u3 += (du3 * (y1 - y0));
570 v3 += (dv3 * (y1 - y0));
572 #ifdef GPU_UNAI_USE_FLOATMATH
573 #ifdef GPU_UNAI_USE_FLOAT_DIV_MULTINV
574 dx4 = ((y2 - y1) != 0) ? (fixed)(((x2 - x1) << FIXED_BITS) * FloatInv(y2 - y1)) : 0;
576 dx4 = ((y2 - y1) != 0) ? (fixed)(((x2 - x1) << FIXED_BITS) / (float)(y2 - y1)) : 0;
578 #else // Integer Division:
579 #ifdef GPU_UNAI_USE_INT_DIV_MULTINV
580 dx4 = ((y2 - y1) != 0) ? xLoDivx((x2 - x1), (y2 - y1)) : 0;
582 dx4 = ((y2 - y1) != 0) ? GPU_FAST_DIV((x2 - x1) << FIXED_BITS, (y2 - y1)) : 0;
587 x4 = i2x(x0) + (dx4 * (y1 - y0));
590 #ifdef GPU_UNAI_USE_FLOATMATH
591 #ifdef GPU_UNAI_USE_FLOAT_DIV_MULTINV
592 if ((y2 - y1) != 0) {
593 float finv = FloatInv(y2 - y1);
594 dx3 = (fixed)(((x2 - x1) << FIXED_BITS) * finv);
595 du3 = (fixed)(((u2 - u1) << FIXED_BITS) * finv);
596 dv3 = (fixed)(((v2 - v1) << FIXED_BITS) * finv);
601 if ((y2 - y1) != 0) {
602 float fdiv = y2 - y1;
603 dx3 = (fixed)(((x2 - x1) << FIXED_BITS) / fdiv);
604 du3 = (fixed)(((u2 - u1) << FIXED_BITS) / fdiv);
605 dv3 = (fixed)(((v2 - v1) << FIXED_BITS) / fdiv);
610 #else // Integer Division:
611 #ifdef GPU_UNAI_USE_INT_DIV_MULTINV
612 if ((y2 - y1) != 0) {
614 xInv((y2 - y1), iF, iS);
615 dx3 = xInvMulx((x2 - x1), iF, iS);
616 du3 = xInvMulx((u2 - u1), iF, iS);
617 dv3 = xInvMulx((v2 - v1), iF, iS);
622 if ((y2 - y1) != 0) {
623 dx3 = GPU_FAST_DIV((x2 - x1) << FIXED_BITS, (y2 - y1));
624 du3 = GPU_FAST_DIV((u2 - u1) << FIXED_BITS, (y2 - y1));
625 dv3 = GPU_FAST_DIV((v2 - v1) << FIXED_BITS, (y2 - y1));
634 s32 xmin, xmax, ymin, ymax;
635 xmin = gpu_unai.DrawingArea[0]; xmax = gpu_unai.DrawingArea[2];
636 ymin = gpu_unai.DrawingArea[1]; ymax = gpu_unai.DrawingArea[3];
638 if ((ymin - ya) > 0) {
639 x3 += dx3 * (ymin - ya);
640 x4 += dx4 * (ymin - ya);
641 u3 += du3 * (ymin - ya);
642 v3 += dv3 * (ymin - ya);
646 if (yb > ymax) yb = ymax;
652 u16* PixelBase = &((u16*)gpu_unai.vram)[FRAME_OFFSET(0, ya)];
653 int li=gpu_unai.ilace_mask;
654 int pi=(ProgressiveInterlaceEnabled()?(gpu_unai.ilace_mask+1):0);
655 int pif=(ProgressiveInterlaceEnabled()?(gpu_unai.prog_ilace_flag?(gpu_unai.ilace_mask+1):0):1);
657 for (; loop1; --loop1, ++ya, PixelBase += FRAME_WIDTH,
658 x3 += dx3, x4 += dx4,
659 u3 += du3, v3 += dv3 )
662 if ((ya&pi)==pif) continue;
666 xa = FixedCeilToInt(x3); xb = FixedCeilToInt(x4);
669 fixed itmp = i2x(xa) - x3;
671 u4 += (du4 * itmp) >> FIXED_BITS;
672 v4 += (dv4 * itmp) >> FIXED_BITS;
678 if ((xmin - xa) > 0) {
679 u4 += du4 * (xmin - xa);
680 v4 += dv4 * (xmin - xa);
684 // Set u,v coords for inner driver
688 if (xb > xmax) xb = xmax;
690 gpuPolySpanDriver(gpu_unai, PixelBase + xa, (xb - xa));
693 } while (++cur_pass < total_passes);
696 /*----------------------------------------------------------------------
697 gpuDrawPolyG - Gouraud-shaded, untextured poly
698 ----------------------------------------------------------------------*/
699 void gpuDrawPolyG(const PtrUnion packet, const PP gpuPolySpanDriver, u32 is_quad)
702 polyInitVertexBuffer(vbuf, packet, POLYTYPE_G, is_quad);
704 int total_passes = is_quad ? 2 : 1;
708 const PolyVertex* vptrs[3];
709 if (polyUseTriangle(vbuf, cur_pass, vptrs) == false)
713 s32 x3, dx3, x4, dx4, dx;
714 s32 r3, dr3, g3, dg3, b3, db3;
715 s32 x0, x1, x2, y0, y1, y2;
716 s32 r0, r1, r2, g0, g1, g2, b0, b1, b2;
719 x0 = vptrs[0]->x; y0 = vptrs[0]->y;
720 r0 = vptrs[0]->col.r; g0 = vptrs[0]->col.g; b0 = vptrs[0]->col.b;
721 x1 = vptrs[1]->x; y1 = vptrs[1]->y;
722 r1 = vptrs[1]->col.r; g1 = vptrs[1]->col.g; b1 = vptrs[1]->col.b;
723 x2 = vptrs[2]->x; y2 = vptrs[2]->y;
724 r2 = vptrs[2]->col.r; g2 = vptrs[2]->col.g; b2 = vptrs[2]->col.b;
728 dx4 = (x2 - x1) * ya - (x2 - x0) * yb;
729 dr4 = (r2 - r1) * ya - (r2 - r0) * yb;
730 dg4 = (g2 - g1) * ya - (g2 - g0) * yb;
731 db4 = (b2 - b1) * ya - (b2 - b0) * yb;
740 #ifdef GPU_UNAI_USE_FLOATMATH
741 #ifdef GPU_UNAI_USE_FLOAT_DIV_MULTINV
743 float finv = FloatInv(dx4);
744 dr4 = (fixed)((dr4 << FIXED_BITS) * finv);
745 dg4 = (fixed)((dg4 << FIXED_BITS) * finv);
746 db4 = (fixed)((db4 << FIXED_BITS) * finv);
753 dr4 = (fixed)((dr4 << FIXED_BITS) / fdiv);
754 dg4 = (fixed)((dg4 << FIXED_BITS) / fdiv);
755 db4 = (fixed)((db4 << FIXED_BITS) / fdiv);
760 #else // Integer Division:
761 #ifdef GPU_UNAI_USE_INT_DIV_MULTINV
765 dr4 = xInvMulx(dr4, iF, iS);
766 dg4 = xInvMulx(dg4, iF, iS);
767 db4 = xInvMulx(db4, iF, iS);
773 dr4 = GPU_FAST_DIV(dr4 << FIXED_BITS, dx4);
774 dg4 = GPU_FAST_DIV(dg4 << FIXED_BITS, dx4);
775 db4 = GPU_FAST_DIV(db4 << FIXED_BITS, dx4);
781 // Setup packed Gouraud increment for inner driver
782 gpu_unai.gInc = gpuPackGouraudColInc(dr4, dg4, db4);
784 for (s32 loop0 = 2; loop0; loop0--) {
793 #ifdef GPU_UNAI_USE_FLOATMATH
794 #ifdef GPU_UNAI_USE_FLOAT_DIV_MULTINV
795 if ((y2 - y0) != 0) {
796 float finv = FloatInv(y2 - y0);
797 dx3 = (fixed)(((x2 - x0) << FIXED_BITS) * finv);
798 dr3 = (fixed)(((r2 - r0) << FIXED_BITS) * finv);
799 dg3 = (fixed)(((g2 - g0) << FIXED_BITS) * finv);
800 db3 = (fixed)(((b2 - b0) << FIXED_BITS) * finv);
802 dx3 = dr3 = dg3 = db3 = 0;
804 dx4 = ((y1 - y0) != 0) ? (fixed)(((x1 - x0) << FIXED_BITS) * FloatInv(y1 - y0)) : 0;
806 if ((y2 - y0) != 0) {
807 float fdiv = y2 - y0;
808 dx3 = (fixed)(((x2 - x0) << FIXED_BITS) / fdiv);
809 dr3 = (fixed)(((r2 - r0) << FIXED_BITS) / fdiv);
810 dg3 = (fixed)(((g2 - g0) << FIXED_BITS) / fdiv);
811 db3 = (fixed)(((b2 - b0) << FIXED_BITS) / fdiv);
813 dx3 = dr3 = dg3 = db3 = 0;
815 dx4 = ((y1 - y0) != 0) ? (fixed)(((x1 - x0) << FIXED_BITS) / (float)(y1 - y0)) : 0;
817 #else // Integer Division:
818 #ifdef GPU_UNAI_USE_INT_DIV_MULTINV
819 if ((y2 - y0) != 0) {
821 xInv((y2 - y0), iF, iS);
822 dx3 = xInvMulx((x2 - x0), iF, iS);
823 dr3 = xInvMulx((r2 - r0), iF, iS);
824 dg3 = xInvMulx((g2 - g0), iF, iS);
825 db3 = xInvMulx((b2 - b0), iF, iS);
827 dx3 = dr3 = dg3 = db3 = 0;
829 dx4 = ((y1 - y0) != 0) ? xLoDivx((x1 - x0), (y1 - y0)) : 0;
831 if ((y2 - y0) != 0) {
832 dx3 = GPU_FAST_DIV((x2 - x0) << FIXED_BITS, (y2 - y0));
833 dr3 = GPU_FAST_DIV((r2 - r0) << FIXED_BITS, (y2 - y0));
834 dg3 = GPU_FAST_DIV((g2 - g0) << FIXED_BITS, (y2 - y0));
835 db3 = GPU_FAST_DIV((b2 - b0) << FIXED_BITS, (y2 - y0));
837 dx3 = dr3 = dg3 = db3 = 0;
839 dx4 = ((y1 - y0) != 0) ? GPU_FAST_DIV((x1 - x0) << FIXED_BITS, (y1 - y0)) : 0;
843 #ifdef GPU_UNAI_USE_FLOATMATH
844 #ifdef GPU_UNAI_USE_FLOAT_DIV_MULTINV
845 if ((y1 - y0) != 0) {
846 float finv = FloatInv(y1 - y0);
847 dx3 = (fixed)(((x1 - x0) << FIXED_BITS) * finv);
848 dr3 = (fixed)(((r1 - r0) << FIXED_BITS) * finv);
849 dg3 = (fixed)(((g1 - g0) << FIXED_BITS) * finv);
850 db3 = (fixed)(((b1 - b0) << FIXED_BITS) * finv);
852 dx3 = dr3 = dg3 = db3 = 0;
854 dx4 = ((y2 - y0) != 0) ? (fixed)(((x2 - x0) << FIXED_BITS) * FloatInv(y2 - y0)) : 0;
856 if ((y1 - y0) != 0) {
857 float fdiv = y1 - y0;
858 dx3 = (fixed)(((x1 - x0) << FIXED_BITS) / fdiv);
859 dr3 = (fixed)(((r1 - r0) << FIXED_BITS) / fdiv);
860 dg3 = (fixed)(((g1 - g0) << FIXED_BITS) / fdiv);
861 db3 = (fixed)(((b1 - b0) << FIXED_BITS) / fdiv);
863 dx3 = dr3 = dg3 = db3 = 0;
865 dx4 = ((y2 - y0) != 0) ? (fixed)(((x2 - x0) << FIXED_BITS) / (float)(y2 - y0)) : 0;
867 #else // Integer Division:
868 #ifdef GPU_UNAI_USE_INT_DIV_MULTINV
869 if ((y1 - y0) != 0) {
871 xInv((y1 - y0), iF, iS);
872 dx3 = xInvMulx((x1 - x0), iF, iS);
873 dr3 = xInvMulx((r1 - r0), iF, iS);
874 dg3 = xInvMulx((g1 - g0), iF, iS);
875 db3 = xInvMulx((b1 - b0), iF, iS);
877 dx3 = dr3 = dg3 = db3 = 0;
879 dx4 = ((y2 - y0) != 0) ? xLoDivx((x2 - x0), (y2 - y0)) : 0;
881 if ((y1 - y0) != 0) {
882 dx3 = GPU_FAST_DIV((x1 - x0) << FIXED_BITS, (y1 - y0));
883 dr3 = GPU_FAST_DIV((r1 - r0) << FIXED_BITS, (y1 - y0));
884 dg3 = GPU_FAST_DIV((g1 - g0) << FIXED_BITS, (y1 - y0));
885 db3 = GPU_FAST_DIV((b1 - b0) << FIXED_BITS, (y1 - y0));
887 dx3 = dr3 = dg3 = db3 = 0;
889 dx4 = ((y2 - y0) != 0) ? GPU_FAST_DIV((x2 - x0) << FIXED_BITS, (y2 - y0)) : 0;
894 //senquack - break out of final loop if nothing to be drawn (1st loop
895 // must always be taken to setup dx3/dx4)
901 x3 = i2x(x0); x4 = i2x(x1);
902 r3 = i2x(r0); g3 = i2x(g0); b3 = i2x(b0);
904 if ((y1 - y0) != 0) {
905 x3 += (dx3 * (y1 - y0));
906 r3 += (dr3 * (y1 - y0));
907 g3 += (dg3 * (y1 - y0));
908 b3 += (db3 * (y1 - y0));
911 #ifdef GPU_UNAI_USE_FLOATMATH
912 #ifdef GPU_UNAI_USE_FLOAT_DIV_MULTINV
913 dx4 = ((y2 - y1) != 0) ? (fixed)(((x2 - x1) << FIXED_BITS) * FloatInv(y2 - y1)) : 0;
915 dx4 = ((y2 - y1) != 0) ? (fixed)(((x2 - x1) << FIXED_BITS) / (float)(y2 - y1)) : 0;
917 #else // Integer Division:
918 #ifdef GPU_UNAI_USE_INT_DIV_MULTINV
919 dx4 = ((y2 - y1) != 0) ? xLoDivx((x2 - x1), (y2 - y1)) : 0;
921 dx4 = ((y2 - y1) != 0) ? GPU_FAST_DIV((x2 - x1) << FIXED_BITS, (y2 - y1)) : 0;
926 x4 = i2x(x0) + (dx4 * (y1 - y0));
928 r3 = i2x(r1); g3 = i2x(g1); b3 = i2x(b1);
930 #ifdef GPU_UNAI_USE_FLOATMATH
931 #ifdef GPU_UNAI_USE_FLOAT_DIV_MULTINV
932 if ((y2 - y1) != 0) {
933 float finv = FloatInv(y2 - y1);
934 dx3 = (fixed)(((x2 - x1) << FIXED_BITS) * finv);
935 dr3 = (fixed)(((r2 - r1) << FIXED_BITS) * finv);
936 dg3 = (fixed)(((g2 - g1) << FIXED_BITS) * finv);
937 db3 = (fixed)(((b2 - b1) << FIXED_BITS) * finv);
939 dx3 = dr3 = dg3 = db3 = 0;
942 if ((y2 - y1) != 0) {
943 float fdiv = y2 - y1;
944 dx3 = (fixed)(((x2 - x1) << FIXED_BITS) / fdiv);
945 dr3 = (fixed)(((r2 - r1) << FIXED_BITS) / fdiv);
946 dg3 = (fixed)(((g2 - g1) << FIXED_BITS) / fdiv);
947 db3 = (fixed)(((b2 - b1) << FIXED_BITS) / fdiv);
949 dx3 = dr3 = dg3 = db3 = 0;
952 #else // Integer Division:
953 #ifdef GPU_UNAI_USE_INT_DIV_MULTINV
954 if ((y2 - y1) != 0) {
956 xInv((y2 - y1), iF, iS);
957 dx3 = xInvMulx((x2 - x1), iF, iS);
958 dr3 = xInvMulx((r2 - r1), iF, iS);
959 dg3 = xInvMulx((g2 - g1), iF, iS);
960 db3 = xInvMulx((b2 - b1), iF, iS);
962 dx3 = dr3 = dg3 = db3 = 0;
965 if ((y2 - y1) != 0) {
966 dx3 = GPU_FAST_DIV((x2 - x1) << FIXED_BITS, (y2 - y1));
967 dr3 = GPU_FAST_DIV((r2 - r1) << FIXED_BITS, (y2 - y1));
968 dg3 = GPU_FAST_DIV((g2 - g1) << FIXED_BITS, (y2 - y1));
969 db3 = GPU_FAST_DIV((b2 - b1) << FIXED_BITS, (y2 - y1));
971 dx3 = dr3 = dg3 = db3 = 0;
978 s32 xmin, xmax, ymin, ymax;
979 xmin = gpu_unai.DrawingArea[0]; xmax = gpu_unai.DrawingArea[2];
980 ymin = gpu_unai.DrawingArea[1]; ymax = gpu_unai.DrawingArea[3];
982 if ((ymin - ya) > 0) {
983 x3 += (dx3 * (ymin - ya));
984 x4 += (dx4 * (ymin - ya));
985 r3 += (dr3 * (ymin - ya));
986 g3 += (dg3 * (ymin - ya));
987 b3 += (db3 * (ymin - ya));
991 if (yb > ymax) yb = ymax;
997 u16* PixelBase = &((u16*)gpu_unai.vram)[FRAME_OFFSET(0, ya)];
998 int li=gpu_unai.ilace_mask;
999 int pi=(ProgressiveInterlaceEnabled()?(gpu_unai.ilace_mask+1):0);
1000 int pif=(ProgressiveInterlaceEnabled()?(gpu_unai.prog_ilace_flag?(gpu_unai.ilace_mask+1):0):1);
1002 for (; loop1; --loop1, ++ya, PixelBase += FRAME_WIDTH,
1003 x3 += dx3, x4 += dx4,
1004 r3 += dr3, g3 += dg3, b3 += db3 )
1006 if (ya&li) continue;
1007 if ((ya&pi)==pif) continue;
1011 xa = FixedCeilToInt(x3);
1012 xb = FixedCeilToInt(x4);
1013 r4 = r3; g4 = g3; b4 = b3;
1015 fixed itmp = i2x(xa) - x3;
1017 r4 += (dr4 * itmp) >> FIXED_BITS;
1018 g4 += (dg4 * itmp) >> FIXED_BITS;
1019 b4 += (db4 * itmp) >> FIXED_BITS;
1026 if ((xmin - xa) > 0) {
1027 r4 += (dr4 * (xmin - xa));
1028 g4 += (dg4 * (xmin - xa));
1029 b4 += (db4 * (xmin - xa));
1033 // Setup packed Gouraud color for inner driver
1034 gpu_unai.gCol = gpuPackGouraudCol(r4, g4, b4);
1036 if (xb > xmax) xb = xmax;
1038 gpuPolySpanDriver(gpu_unai, PixelBase + xa, (xb - xa));
1041 } while (++cur_pass < total_passes);
1044 /*----------------------------------------------------------------------
1045 gpuDrawPolyGT - Gouraud-shaded, textured poly
1046 ----------------------------------------------------------------------*/
1047 void gpuDrawPolyGT(const PtrUnion packet, const PP gpuPolySpanDriver, u32 is_quad)
1050 polyInitVertexBuffer(vbuf, packet, POLYTYPE_GT, is_quad);
1052 int total_passes = is_quad ? 2 : 1;
1056 const PolyVertex* vptrs[3];
1057 if (polyUseTriangle(vbuf, cur_pass, vptrs) == false)
1061 s32 x3, dx3, x4, dx4, dx;
1062 s32 u3, du3, v3, dv3;
1063 s32 r3, dr3, g3, dg3, b3, db3;
1064 s32 x0, x1, x2, y0, y1, y2;
1065 s32 u0, u1, u2, v0, v1, v2;
1066 s32 r0, r1, r2, g0, g1, g2, b0, b1, b2;
1070 x0 = vptrs[0]->x; y0 = vptrs[0]->y;
1071 u0 = vptrs[0]->tex.u; v0 = vptrs[0]->tex.v;
1072 r0 = vptrs[0]->col.r; g0 = vptrs[0]->col.g; b0 = vptrs[0]->col.b;
1073 x1 = vptrs[1]->x; y1 = vptrs[1]->y;
1074 u1 = vptrs[1]->tex.u; v1 = vptrs[1]->tex.v;
1075 r1 = vptrs[1]->col.r; g1 = vptrs[1]->col.g; b1 = vptrs[1]->col.b;
1076 x2 = vptrs[2]->x; y2 = vptrs[2]->y;
1077 u2 = vptrs[2]->tex.u; v2 = vptrs[2]->tex.v;
1078 r2 = vptrs[2]->col.r; g2 = vptrs[2]->col.g; b2 = vptrs[2]->col.b;
1082 dx4 = (x2 - x1) * ya - (x2 - x0) * yb;
1083 du4 = (u2 - u1) * ya - (u2 - u0) * yb;
1084 dv4 = (v2 - v1) * ya - (v2 - v0) * yb;
1085 dr4 = (r2 - r1) * ya - (r2 - r0) * yb;
1086 dg4 = (g2 - g1) * ya - (g2 - g0) * yb;
1087 db4 = (b2 - b1) * ya - (b2 - b0) * yb;
1098 #ifdef GPU_UNAI_USE_FLOATMATH
1099 #ifdef GPU_UNAI_USE_FLOAT_DIV_MULTINV
1101 float finv = FloatInv(dx4);
1102 du4 = (fixed)((du4 << FIXED_BITS) * finv);
1103 dv4 = (fixed)((dv4 << FIXED_BITS) * finv);
1104 dr4 = (fixed)((dr4 << FIXED_BITS) * finv);
1105 dg4 = (fixed)((dg4 << FIXED_BITS) * finv);
1106 db4 = (fixed)((db4 << FIXED_BITS) * finv);
1108 du4 = dv4 = dr4 = dg4 = db4 = 0;
1113 du4 = (fixed)((du4 << FIXED_BITS) / fdiv);
1114 dv4 = (fixed)((dv4 << FIXED_BITS) / fdiv);
1115 dr4 = (fixed)((dr4 << FIXED_BITS) / fdiv);
1116 dg4 = (fixed)((dg4 << FIXED_BITS) / fdiv);
1117 db4 = (fixed)((db4 << FIXED_BITS) / fdiv);
1119 du4 = dv4 = dr4 = dg4 = db4 = 0;
1122 #else // Integer Division:
1123 #ifdef GPU_UNAI_USE_INT_DIV_MULTINV
1127 du4 = xInvMulx(du4, iF, iS);
1128 dv4 = xInvMulx(dv4, iF, iS);
1129 dr4 = xInvMulx(dr4, iF, iS);
1130 dg4 = xInvMulx(dg4, iF, iS);
1131 db4 = xInvMulx(db4, iF, iS);
1133 du4 = dv4 = dr4 = dg4 = db4 = 0;
1137 du4 = GPU_FAST_DIV(du4 << FIXED_BITS, dx4);
1138 dv4 = GPU_FAST_DIV(dv4 << FIXED_BITS, dx4);
1139 dr4 = GPU_FAST_DIV(dr4 << FIXED_BITS, dx4);
1140 dg4 = GPU_FAST_DIV(dg4 << FIXED_BITS, dx4);
1141 db4 = GPU_FAST_DIV(db4 << FIXED_BITS, dx4);
1143 du4 = dv4 = dr4 = dg4 = db4 = 0;
1147 // Set u,v increments and packed Gouraud increment for inner driver
1148 gpu_unai.u_inc = du4;
1149 gpu_unai.v_inc = dv4;
1150 gpu_unai.gInc = gpuPackGouraudColInc(dr4, dg4, db4);
1152 for (s32 loop0 = 2; loop0; loop0--) {
1156 u3 = i2x(u0); v3 = i2x(v0);
1157 r3 = i2x(r0); g3 = i2x(g0); b3 = i2x(b0);
1159 #ifdef GPU_UNAI_USE_FLOATMATH
1160 #ifdef GPU_UNAI_USE_FLOAT_DIV_MULTINV
1161 if ((y2 - y0) != 0) {
1162 float finv = FloatInv(y2 - y0);
1163 dx3 = (fixed)(((x2 - x0) << FIXED_BITS) * finv);
1164 du3 = (fixed)(((u2 - u0) << FIXED_BITS) * finv);
1165 dv3 = (fixed)(((v2 - v0) << FIXED_BITS) * finv);
1166 dr3 = (fixed)(((r2 - r0) << FIXED_BITS) * finv);
1167 dg3 = (fixed)(((g2 - g0) << FIXED_BITS) * finv);
1168 db3 = (fixed)(((b2 - b0) << FIXED_BITS) * finv);
1170 dx3 = du3 = dv3 = dr3 = dg3 = db3 = 0;
1172 dx4 = ((y1 - y0) != 0) ? (fixed)(((x1 - x0) << FIXED_BITS) * FloatInv(y1 - y0)) : 0;
1174 if ((y2 - y0) != 0) {
1175 float fdiv = y2 - y0;
1176 dx3 = (fixed)(((x2 - x0) << FIXED_BITS) / fdiv);
1177 du3 = (fixed)(((u2 - u0) << FIXED_BITS) / fdiv);
1178 dv3 = (fixed)(((v2 - v0) << FIXED_BITS) / fdiv);
1179 dr3 = (fixed)(((r2 - r0) << FIXED_BITS) / fdiv);
1180 dg3 = (fixed)(((g2 - g0) << FIXED_BITS) / fdiv);
1181 db3 = (fixed)(((b2 - b0) << FIXED_BITS) / fdiv);
1183 dx3 = du3 = dv3 = dr3 = dg3 = db3 = 0;
1185 dx4 = ((y1 - y0) != 0) ? (fixed)(((x1 - x0) << FIXED_BITS) / (float)(y1 - y0)) : 0;
1187 #else // Integer Division:
1188 #ifdef GPU_UNAI_USE_INT_DIV_MULTINV
1189 if ((y2 - y0) != 0) {
1191 xInv((y2 - y0), iF, iS);
1192 dx3 = xInvMulx((x2 - x0), iF, iS);
1193 du3 = xInvMulx((u2 - u0), iF, iS);
1194 dv3 = xInvMulx((v2 - v0), iF, iS);
1195 dr3 = xInvMulx((r2 - r0), iF, iS);
1196 dg3 = xInvMulx((g2 - g0), iF, iS);
1197 db3 = xInvMulx((b2 - b0), iF, iS);
1199 dx3 = du3 = dv3 = dr3 = dg3 = db3 = 0;
1201 dx4 = ((y1 - y0) != 0) ? xLoDivx((x1 - x0), (y1 - y0)) : 0;
1203 if ((y2 - y0) != 0) {
1204 dx3 = GPU_FAST_DIV((x2 - x0) << FIXED_BITS, (y2 - y0));
1205 du3 = GPU_FAST_DIV((u2 - u0) << FIXED_BITS, (y2 - y0));
1206 dv3 = GPU_FAST_DIV((v2 - v0) << FIXED_BITS, (y2 - y0));
1207 dr3 = GPU_FAST_DIV((r2 - r0) << FIXED_BITS, (y2 - y0));
1208 dg3 = GPU_FAST_DIV((g2 - g0) << FIXED_BITS, (y2 - y0));
1209 db3 = GPU_FAST_DIV((b2 - b0) << FIXED_BITS, (y2 - y0));
1211 dx3 = du3 = dv3 = dr3 = dg3 = db3 = 0;
1213 dx4 = ((y1 - y0) != 0) ? GPU_FAST_DIV((x1 - x0) << FIXED_BITS, (y1 - y0)) : 0;
1217 #ifdef GPU_UNAI_USE_FLOATMATH
1218 #ifdef GPU_UNAI_USE_FLOAT_DIV_MULTINV
1219 if ((y1 - y0) != 0) {
1220 float finv = FloatInv(y1 - y0);
1221 dx3 = (fixed)(((x1 - x0) << FIXED_BITS) * finv);
1222 du3 = (fixed)(((u1 - u0) << FIXED_BITS) * finv);
1223 dv3 = (fixed)(((v1 - v0) << FIXED_BITS) * finv);
1224 dr3 = (fixed)(((r1 - r0) << FIXED_BITS) * finv);
1225 dg3 = (fixed)(((g1 - g0) << FIXED_BITS) * finv);
1226 db3 = (fixed)(((b1 - b0) << FIXED_BITS) * finv);
1228 dx3 = du3 = dv3 = dr3 = dg3 = db3 = 0;
1230 dx4 = ((y2 - y0) != 0) ? (fixed)(((x2 - x0) << FIXED_BITS) * FloatInv(y2 - y0)) : 0;
1232 if ((y1 - y0) != 0) {
1233 float fdiv = y1 - y0;
1234 dx3 = (fixed)(((x1 - x0) << FIXED_BITS) / fdiv);
1235 du3 = (fixed)(((u1 - u0) << FIXED_BITS) / fdiv);
1236 dv3 = (fixed)(((v1 - v0) << FIXED_BITS) / fdiv);
1237 dr3 = (fixed)(((r1 - r0) << FIXED_BITS) / fdiv);
1238 dg3 = (fixed)(((g1 - g0) << FIXED_BITS) / fdiv);
1239 db3 = (fixed)(((b1 - b0) << FIXED_BITS) / fdiv);
1241 dx3 = du3 = dv3 = dr3 = dg3 = db3 = 0;
1243 dx4 = ((y2 - y0) != 0) ? (fixed)(((x2 - x0) << FIXED_BITS) / float(y2 - y0)) : 0;
1245 #else // Integer Division:
1246 #ifdef GPU_UNAI_USE_INT_DIV_MULTINV
1247 if ((y1 - y0) != 0) {
1249 xInv((y1 - y0), iF, iS);
1250 dx3 = xInvMulx((x1 - x0), iF, iS);
1251 du3 = xInvMulx((u1 - u0), iF, iS);
1252 dv3 = xInvMulx((v1 - v0), iF, iS);
1253 dr3 = xInvMulx((r1 - r0), iF, iS);
1254 dg3 = xInvMulx((g1 - g0), iF, iS);
1255 db3 = xInvMulx((b1 - b0), iF, iS);
1257 dx3 = du3 = dv3 = dr3 = dg3 = db3 = 0;
1259 dx4 = ((y2 - y0) != 0) ? xLoDivx((x2 - x0), (y2 - y0)) : 0;
1261 if ((y1 - y0) != 0) {
1262 dx3 = GPU_FAST_DIV((x1 - x0) << FIXED_BITS, (y1 - y0));
1263 du3 = GPU_FAST_DIV((u1 - u0) << FIXED_BITS, (y1 - y0));
1264 dv3 = GPU_FAST_DIV((v1 - v0) << FIXED_BITS, (y1 - y0));
1265 dr3 = GPU_FAST_DIV((r1 - r0) << FIXED_BITS, (y1 - y0));
1266 dg3 = GPU_FAST_DIV((g1 - g0) << FIXED_BITS, (y1 - y0));
1267 db3 = GPU_FAST_DIV((b1 - b0) << FIXED_BITS, (y1 - y0));
1269 dx3 = du3 = dv3 = dr3 = dg3 = db3 = 0;
1271 dx4 = ((y2 - y0) != 0) ? GPU_FAST_DIV((x2 - x0) << FIXED_BITS, (y2 - y0)) : 0;
1276 //senquack - break out of final loop if nothing to be drawn (1st loop
1277 // must always be taken to setup dx3/dx4)
1278 if (y1 == y2) break;
1283 x3 = i2x(x0); x4 = i2x(x1);
1284 u3 = i2x(u0); v3 = i2x(v0);
1285 r3 = i2x(r0); g3 = i2x(g0); b3 = i2x(b0);
1287 if ((y1 - y0) != 0) {
1288 x3 += (dx3 * (y1 - y0));
1289 u3 += (du3 * (y1 - y0));
1290 v3 += (dv3 * (y1 - y0));
1291 r3 += (dr3 * (y1 - y0));
1292 g3 += (dg3 * (y1 - y0));
1293 b3 += (db3 * (y1 - y0));
1296 #ifdef GPU_UNAI_USE_FLOATMATH
1297 #ifdef GPU_UNAI_USE_FLOAT_DIV_MULTINV
1298 dx4 = ((y2 - y1) != 0) ? (fixed)(((x2 - x1) << FIXED_BITS) * FloatInv(y2 - y1)) : 0;
1300 dx4 = ((y2 - y1) != 0) ? (fixed)(((x2 - x1) << FIXED_BITS) / (float)(y2 - y1)) : 0;
1302 #else // Integer Division:
1303 #ifdef GPU_UNAI_USE_INT_DIV_MULTINV
1304 dx4 = ((y2 - y1) != 0) ? xLoDivx((x2 - x1), (y2 - y1)) : 0;
1306 dx4 = ((y2 - y1) != 0) ? GPU_FAST_DIV((x2 - x1) << FIXED_BITS, (y2 - y1)) : 0;
1311 x4 = i2x(x0) + (dx4 * (y1 - y0));
1313 u3 = i2x(u1); v3 = i2x(v1);
1314 r3 = i2x(r1); g3 = i2x(g1); b3 = i2x(b1);
1315 #ifdef GPU_UNAI_USE_FLOATMATH
1316 #ifdef GPU_UNAI_USE_FLOAT_DIV_MULTINV
1317 if ((y2 - y1) != 0) {
1318 float finv = FloatInv(y2 - y1);
1319 dx3 = (fixed)(((x2 - x1) << FIXED_BITS) * finv);
1320 du3 = (fixed)(((u2 - u1) << FIXED_BITS) * finv);
1321 dv3 = (fixed)(((v2 - v1) << FIXED_BITS) * finv);
1322 dr3 = (fixed)(((r2 - r1) << FIXED_BITS) * finv);
1323 dg3 = (fixed)(((g2 - g1) << FIXED_BITS) * finv);
1324 db3 = (fixed)(((b2 - b1) << FIXED_BITS) * finv);
1326 dx3 = du3 = dv3 = dr3 = dg3 = db3 = 0;
1329 if ((y2 - y1) != 0) {
1330 float fdiv = y2 - y1;
1331 dx3 = (fixed)(((x2 - x1) << FIXED_BITS) / fdiv);
1332 du3 = (fixed)(((u2 - u1) << FIXED_BITS) / fdiv);
1333 dv3 = (fixed)(((v2 - v1) << FIXED_BITS) / fdiv);
1334 dr3 = (fixed)(((r2 - r1) << FIXED_BITS) / fdiv);
1335 dg3 = (fixed)(((g2 - g1) << FIXED_BITS) / fdiv);
1336 db3 = (fixed)(((b2 - b1) << FIXED_BITS) / fdiv);
1338 dx3 = du3 = dv3 = dr3 = dg3 = db3 = 0;
1341 #else // Integer Division:
1342 #ifdef GPU_UNAI_USE_INT_DIV_MULTINV
1343 if ((y2 - y1) != 0) {
1345 xInv((y2 - y1), iF, iS);
1346 dx3 = xInvMulx((x2 - x1), iF, iS);
1347 du3 = xInvMulx((u2 - u1), iF, iS);
1348 dv3 = xInvMulx((v2 - v1), iF, iS);
1349 dr3 = xInvMulx((r2 - r1), iF, iS);
1350 dg3 = xInvMulx((g2 - g1), iF, iS);
1351 db3 = xInvMulx((b2 - b1), iF, iS);
1353 dx3 = du3 = dv3 = dr3 = dg3 = db3 = 0;
1356 if ((y2 - y1) != 0) {
1357 dx3 = GPU_FAST_DIV((x2 - x1) << FIXED_BITS, (y2 - y1));
1358 du3 = GPU_FAST_DIV((u2 - u1) << FIXED_BITS, (y2 - y1));
1359 dv3 = GPU_FAST_DIV((v2 - v1) << FIXED_BITS, (y2 - y1));
1360 dr3 = GPU_FAST_DIV((r2 - r1) << FIXED_BITS, (y2 - y1));
1361 dg3 = GPU_FAST_DIV((g2 - g1) << FIXED_BITS, (y2 - y1));
1362 db3 = GPU_FAST_DIV((b2 - b1) << FIXED_BITS, (y2 - y1));
1364 dx3 = du3 = dv3 = dr3 = dg3 = db3 = 0;
1371 s32 xmin, xmax, ymin, ymax;
1372 xmin = gpu_unai.DrawingArea[0]; xmax = gpu_unai.DrawingArea[2];
1373 ymin = gpu_unai.DrawingArea[1]; ymax = gpu_unai.DrawingArea[3];
1375 if ((ymin - ya) > 0) {
1376 x3 += (dx3 * (ymin - ya));
1377 x4 += (dx4 * (ymin - ya));
1378 u3 += (du3 * (ymin - ya));
1379 v3 += (dv3 * (ymin - ya));
1380 r3 += (dr3 * (ymin - ya));
1381 g3 += (dg3 * (ymin - ya));
1382 b3 += (db3 * (ymin - ya));
1386 if (yb > ymax) yb = ymax;
1388 int loop1 = yb - ya;
1392 u16* PixelBase = &((u16*)gpu_unai.vram)[FRAME_OFFSET(0, ya)];
1393 int li=gpu_unai.ilace_mask;
1394 int pi=(ProgressiveInterlaceEnabled()?(gpu_unai.ilace_mask+1):0);
1395 int pif=(ProgressiveInterlaceEnabled()?(gpu_unai.prog_ilace_flag?(gpu_unai.ilace_mask+1):0):1);
1397 for (; loop1; --loop1, ++ya, PixelBase += FRAME_WIDTH,
1398 x3 += dx3, x4 += dx4,
1399 u3 += du3, v3 += dv3,
1400 r3 += dr3, g3 += dg3, b3 += db3 )
1402 if (ya&li) continue;
1403 if ((ya&pi)==pif) continue;
1408 xa = FixedCeilToInt(x3);
1409 xb = FixedCeilToInt(x4);
1411 r4 = r3; g4 = g3; b4 = b3;
1413 fixed itmp = i2x(xa) - x3;
1415 u4 += (du4 * itmp) >> FIXED_BITS;
1416 v4 += (dv4 * itmp) >> FIXED_BITS;
1417 r4 += (dr4 * itmp) >> FIXED_BITS;
1418 g4 += (dg4 * itmp) >> FIXED_BITS;
1419 b4 += (db4 * itmp) >> FIXED_BITS;
1428 if ((xmin - xa) > 0) {
1429 u4 += du4 * (xmin - xa);
1430 v4 += dv4 * (xmin - xa);
1431 r4 += dr4 * (xmin - xa);
1432 g4 += dg4 * (xmin - xa);
1433 b4 += db4 * (xmin - xa);
1437 // Set packed Gouraud color and u,v coords for inner driver
1440 gpu_unai.gCol = gpuPackGouraudCol(r4, g4, b4);
1442 if (xb > xmax) xb = xmax;
1444 gpuPolySpanDriver(gpu_unai, PixelBase + xa, (xb - xa));
1447 } while (++cur_pass < total_passes);