1 /***************************************************************************
2 * Copyright (C) 2010 PCSX4ALL Team *
3 * Copyright (C) 2010 Unai *
5 * This program is free software; you can redistribute it and/or modify *
6 * it under the terms of the GNU General Public License as published by *
7 * the Free Software Foundation; either version 2 of the License, or *
8 * (at your option) any later version. *
10 * This program is distributed in the hope that it will be useful, *
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of *
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
13 * GNU General Public License for more details. *
15 * You should have received a copy of the GNU General Public License *
16 * along with this program; if not, write to the *
17 * Free Software Foundation, Inc., *
18 * 51 Franklin Street, Fifth Floor, Boston, MA 02111-1307 USA. *
19 ***************************************************************************/
21 #ifndef __GPU_UNAI_GPU_RASTER_POLYGON_H__
22 #define __GPU_UNAI_GPU_RASTER_POLYGON_H__
24 //senquack - NOTE: GPU Unai poly routines have been rewritten/adapted
25 // from DrHell routines to fix multiple issues. See README_senquack.txt
27 ///////////////////////////////////////////////////////////////////////////////
28 // Shared poly vertex buffer, able to handle 3 or 4-pt polys of any type.
29 ///////////////////////////////////////////////////////////////////////////////
32 s32 x, y; // Sign-extended 11-bit X,Y coords
34 #if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
35 struct { u8 pad[2], v, u; } tex; // Texture coords (if used)
37 struct { u8 u, v, pad[2]; } tex; // Texture coords (if used)
42 #if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
43 struct { u8 pad, b, g, r; } col; // 24-bit RGB color (if used)
45 struct { u8 r, g, b, pad; } col; // 24-bit RGB color (if used)
52 POLYATTR_TEXTURE = (1 << 0),
53 POLYATTR_GOURAUD = (1 << 1)
58 POLYTYPE_FT = (POLYATTR_TEXTURE),
59 POLYTYPE_G = (POLYATTR_GOURAUD),
60 POLYTYPE_GT = (POLYATTR_TEXTURE | POLYATTR_GOURAUD)
63 ///////////////////////////////////////////////////////////////////////////////
64 // polyInitVertexBuffer()
65 // Fills vbuf[] array with data from any type of poly draw-command packet.
66 ///////////////////////////////////////////////////////////////////////////////
67 static void polyInitVertexBuffer(PolyVertex *vbuf, const PtrUnion packet, PolyType ptype, u32 is_quad)
69 bool texturing = ptype & POLYATTR_TEXTURE;
70 bool gouraud = ptype & POLYATTR_GOURAUD;
72 int vert_stride = 1; // Stride of vertices in cmd packet, in 32-bit words
78 int num_verts = (is_quad) ? 4 : 3;
83 for (int i=0; i < num_verts; ++i, ptr += vert_stride) {
84 u32 coords = le32_to_u32(*ptr);
85 vbuf[i].x = GPU_EXPANDSIGN(coords);
86 vbuf[i].y = GPU_EXPANDSIGN(coords >> 16);
89 // U,V texture coords (if applicable)
92 for (int i=0; i < num_verts; ++i, ptr += vert_stride)
93 vbuf[i].tex_word = le32_to_u32(*ptr);
96 // Colors (if applicable)
99 for (int i=0; i < num_verts; ++i, ptr += vert_stride)
100 vbuf[i].col_word = le32_to_u32(*ptr);
104 ///////////////////////////////////////////////////////////////////////////////
105 // Helper functions to determine which vertex in a 2 or 3 vertex array
106 // has the highest/lowest X/Y coordinate.
107 // Note: the comparison logic is such that, given a set of vertices with
108 // identical values for a given coordinate, a different index will be
109 // returned from vertIdxOfLeast..() than a call to vertIdxOfHighest..().
110 // This ensures that, during the vertex-ordering phase of rasterization,
111 // all three vertices remain unique.
112 ///////////////////////////////////////////////////////////////////////////////
115 static inline int vertIdxOfLeastXCoord2(const T *Tptr)
117 return (Tptr[0].x <= Tptr[1].x) ? 0 : 1;
121 static inline int vertIdxOfLeastXCoord3(const T *Tptr)
123 int least_of_v0_v1 = vertIdxOfLeastXCoord2(Tptr);
124 return (Tptr[least_of_v0_v1].x <= Tptr[2].x) ? least_of_v0_v1 : 2;
128 static inline int vertIdxOfLeastYCoord2(const T *Tptr)
130 return (Tptr[0].y <= Tptr[1].y) ? 0 : 1;
134 static inline int vertIdxOfLeastYCoord3(const T *Tptr)
136 int least_of_v0_v1 = vertIdxOfLeastYCoord2(Tptr);
137 return (Tptr[least_of_v0_v1].y <= Tptr[2].y) ? least_of_v0_v1 : 2;
141 static inline int vertIdxOfHighestXCoord2(const T *Tptr)
143 return (Tptr[1].x >= Tptr[0].x) ? 1 : 0;
147 static inline int vertIdxOfHighestXCoord3(const T *Tptr)
149 int highest_of_v0_v1 = vertIdxOfHighestXCoord2(Tptr);
150 return (Tptr[2].x >= Tptr[highest_of_v0_v1].x) ? 2 : highest_of_v0_v1;
154 static inline int vertIdxOfHighestYCoord2(const T *Tptr)
156 return (Tptr[1].y >= Tptr[0].y) ? 1 : 0;
160 static inline int vertIdxOfHighestYCoord3(const T *Tptr)
162 int highest_of_v0_v1 = vertIdxOfHighestYCoord2(Tptr);
163 return (Tptr[2].y >= Tptr[highest_of_v0_v1].y) ? 2 : highest_of_v0_v1;
166 ///////////////////////////////////////////////////////////////////////////////
168 // Determines if the specified triangle should be rendered. If so, it
169 // fills the given array of vertex pointers, vert_ptrs, in order of
170 // increasing Y coordinate values, as required by rasterization algorithm.
171 // Parameter 'tri_num' is 0 for first triangle (idx 0,1,2 of vbuf[]),
172 // or 1 for second triangle of a quad (idx 1,2,3 of vbuf[]).
173 // Returns true if triangle should be rendered, false if not.
174 ///////////////////////////////////////////////////////////////////////////////
175 static bool polyUseTriangle(const PolyVertex *vbuf, int tri_num, const PolyVertex **vert_ptrs, s32 &x_off, s32 &y_off)
177 // Using verts 0,1,2 or is this the 2nd pass of a quad (verts 1,2,3)?
178 const PolyVertex *tri_ptr = &vbuf[(tri_num == 0) ? 0 : 1];
180 // Get indices of highest/lowest X,Y coords within triangle
181 int idx_lowest_x = vertIdxOfLeastXCoord3(tri_ptr);
182 int idx_highest_x = vertIdxOfHighestXCoord3(tri_ptr);
183 int idx_lowest_y = vertIdxOfLeastYCoord3(tri_ptr);
184 int idx_highest_y = vertIdxOfHighestYCoord3(tri_ptr);
186 // Maximum absolute distance between any two X coordinates is 1023,
187 // and for Y coordinates is 511 (PS1 hardware limitation)
188 int lowest_x = tri_ptr[idx_lowest_x].x;
189 int highest_x = tri_ptr[idx_highest_x].x;
190 int lowest_y = tri_ptr[idx_lowest_y].y;
191 int highest_y = tri_ptr[idx_highest_y].y;
192 if ((highest_x - lowest_x) >= CHKMAX_X ||
193 (highest_y - lowest_y) >= CHKMAX_Y)
197 x_off = gpu_unai.DrawingOffset[0];
198 y_off = gpu_unai.DrawingOffset[1];
199 x_off = GPU_EXPANDSIGN(lowest_x + x_off) - lowest_x;
200 y_off = GPU_EXPANDSIGN(lowest_y + y_off) - lowest_y;
202 // Determine if triangle is completely outside clipping range
203 s32 xmin, xmax, ymin, ymax;
204 xmin = gpu_unai.DrawingArea[0]; xmax = gpu_unai.DrawingArea[2];
205 ymin = gpu_unai.DrawingArea[1]; ymax = gpu_unai.DrawingArea[3];
206 int clipped_lowest_x = Max2(xmin, lowest_x + x_off);
207 int clipped_lowest_y = Max2(ymin, lowest_y + y_off);
208 int clipped_highest_x = Min2(xmax, highest_x + x_off);
209 int clipped_highest_y = Min2(ymax, highest_y + y_off);
210 if (clipped_lowest_x >= clipped_highest_x ||
211 clipped_lowest_y >= clipped_highest_y)
214 // Order vertex ptrs by increasing y value (draw routines need this).
215 // The middle index is deduced by a binary math trick that depends
216 // on index range always being between 0..2
217 vert_ptrs[0] = tri_ptr + idx_lowest_y;
218 vert_ptrs[1] = tri_ptr + ((idx_lowest_y + idx_highest_y) ^ 3);
219 vert_ptrs[2] = tri_ptr + idx_highest_y;
223 ///////////////////////////////////////////////////////////////////////////////
224 // GPU internal polygon drawing functions
225 ///////////////////////////////////////////////////////////////////////////////
227 /*----------------------------------------------------------------------
228 gpuDrawPolyF - Flat-shaded, untextured poly
229 ----------------------------------------------------------------------*/
230 void gpuDrawPolyF(const PtrUnion packet, const PP gpuPolySpanDriver, u32 is_quad,
231 PolyType ptype = POLYTYPE_F)
233 // Set up bgr555 color to be used across calls in inner driver
234 gpu_unai.inn.PixelData = GPU_RGB16(le32_to_u32(packet.U4[0]));
237 polyInitVertexBuffer(vbuf, packet, ptype, is_quad);
239 int total_passes = is_quad ? 2 : 1;
243 const PolyVertex* vptrs[3];
245 if (!polyUseTriangle(vbuf, cur_pass, vptrs, x_off, y_off))
249 s32 x3, dx3, x4, dx4, dx;
250 s32 x0, x1, x2, y0, y1, y2;
252 x0 = vptrs[0]->x + x_off; y0 = vptrs[0]->y + y_off;
253 x1 = vptrs[1]->x + x_off; y1 = vptrs[1]->y + y_off;
254 x2 = vptrs[2]->x + x_off; y2 = vptrs[2]->y + y_off;
258 dx = (x2 - x1) * ya - (x2 - x0) * yb;
260 for (int loop0 = 2; loop0; loop0--) {
265 #ifdef GPU_UNAI_USE_FLOATMATH
266 #ifdef GPU_UNAI_USE_FLOAT_DIV_MULTINV
267 dx3 = ((y2 - y0) != 0) ? (fixed)(((x2 - x0) << FIXED_BITS) * FloatInv(y2 - y0)) : 0;
268 dx4 = ((y1 - y0) != 0) ? (fixed)(((x1 - x0) << FIXED_BITS) * FloatInv(y1 - y0)) : 0;
270 dx3 = ((y2 - y0) != 0) ? (fixed)(((x2 - x0) << FIXED_BITS) / (float)(y2 - y0)) : 0;
271 dx4 = ((y1 - y0) != 0) ? (fixed)(((x1 - x0) << FIXED_BITS) / (float)(y1 - y0)) : 0;
273 #else // Integer Division:
274 #ifdef GPU_UNAI_USE_INT_DIV_MULTINV
275 dx3 = ((y2 - y0) != 0) ? xLoDivx((x2 - x0), (y2 - y0)) : 0;
276 dx4 = ((y1 - y0) != 0) ? xLoDivx((x1 - x0), (y1 - y0)) : 0;
278 dx3 = ((y2 - y0) != 0) ? GPU_FAST_DIV((x2 - x0) << FIXED_BITS, (y2 - y0)) : 0;
279 dx4 = ((y1 - y0) != 0) ? GPU_FAST_DIV((x1 - x0) << FIXED_BITS, (y1 - y0)) : 0;
283 #ifdef GPU_UNAI_USE_FLOATMATH
284 #ifdef GPU_UNAI_USE_FLOAT_DIV_MULTINV
285 dx3 = ((y1 - y0) != 0) ? (fixed)(((x1 - x0) << FIXED_BITS) * FloatInv(y1 - y0)) : 0;
286 dx4 = ((y2 - y0) != 0) ? (fixed)(((x2 - x0) << FIXED_BITS) * FloatInv(y2 - y0)) : 0;
288 dx3 = ((y1 - y0) != 0) ? (fixed)(((x1 - x0) << FIXED_BITS) / (float)(y1 - y0)) : 0;
289 dx4 = ((y2 - y0) != 0) ? (fixed)(((x2 - x0) << FIXED_BITS) / (float)(y2 - y0)) : 0;
291 #else // Integer Division:
292 #ifdef GPU_UNAI_USE_INT_DIV_MULTINV
293 dx3 = ((y1 - y0) != 0) ? xLoDivx((x1 - x0), (y1 - y0)) : 0;
294 dx4 = ((y2 - y0) != 0) ? xLoDivx((x2 - x0), (y2 - y0)) : 0;
296 dx3 = ((y1 - y0) != 0) ? GPU_FAST_DIV((x1 - x0) << FIXED_BITS, (y1 - y0)) : 0;
297 dx4 = ((y2 - y0) != 0) ? GPU_FAST_DIV((x2 - x0) << FIXED_BITS, (y2 - y0)) : 0;
302 //senquack - break out of final loop if nothing to be drawn (1st loop
303 // must always be taken to setup dx3/dx4)
309 x3 = i2x(x0) + (dx3 * (y1 - y0));
311 #ifdef GPU_UNAI_USE_FLOATMATH
312 #ifdef GPU_UNAI_USE_FLOAT_DIV_MULTINV
313 dx4 = ((y2 - y1) != 0) ? (fixed)(((x2 - x1) << FIXED_BITS) * FloatInv(y2 - y1)) : 0;
315 dx4 = ((y2 - y1) != 0) ? (fixed)(((x2 - x1) << FIXED_BITS) / (float)(y2 - y1)) : 0;
317 #else // Integer Division:
318 #ifdef GPU_UNAI_USE_INT_DIV_MULTINV
319 dx4 = ((y2 - y1) != 0) ? xLoDivx ((x2 - x1), (y2 - y1)) : 0;
321 dx4 = ((y2 - y1) != 0) ? GPU_FAST_DIV((x2 - x1) << FIXED_BITS, (y2 - y1)) : 0;
326 x4 = i2x(x0) + (dx4 * (y1 - y0));
327 #ifdef GPU_UNAI_USE_FLOATMATH
328 #ifdef GPU_UNAI_USE_FLOAT_DIV_MULTINV
329 dx3 = ((y2 - y1) != 0) ? (fixed)(((x2 - x1) << FIXED_BITS) * FloatInv(y2 - y1)) : 0;
331 dx3 = ((y2 - y1) != 0) ? (fixed)(((x2 - x1) << FIXED_BITS) / (float)(y2 - y1)) : 0;
333 #else // Integer Division:
334 #ifdef GPU_UNAI_USE_INT_DIV_MULTINV
335 dx3 = ((y2 - y1) != 0) ? xLoDivx ((x2 - x1), (y2 - y1)) : 0;
337 dx3 = ((y2 - y1) != 0) ? GPU_FAST_DIV((x2 - x1) << FIXED_BITS, (y2 - y1)) : 0;
343 s32 xmin, xmax, ymin, ymax;
344 xmin = gpu_unai.DrawingArea[0]; xmax = gpu_unai.DrawingArea[2];
345 ymin = gpu_unai.DrawingArea[1]; ymax = gpu_unai.DrawingArea[3];
347 if ((ymin - ya) > 0) {
348 x3 += (dx3 * (ymin - ya));
349 x4 += (dx4 * (ymin - ya));
353 if (yb > ymax) yb = ymax;
359 le16_t* PixelBase = &gpu_unai.vram[FRAME_OFFSET(0, ya)];
360 int li=gpu_unai.inn.ilace_mask;
361 int pi=(ProgressiveInterlaceEnabled()?(gpu_unai.inn.ilace_mask+1):0);
362 int pif=(ProgressiveInterlaceEnabled()?(gpu_unai.prog_ilace_flag?(gpu_unai.inn.ilace_mask+1):0):1);
364 for (; loop1; --loop1, ya++, PixelBase += FRAME_WIDTH,
365 x3 += dx3, x4 += dx4 )
368 if ((ya&pi)==pif) continue;
370 xa = FixedCeilToInt(x3); xb = FixedCeilToInt(x4);
371 if ((xmin - xa) > 0) xa = xmin;
372 if (xb > xmax) xb = xmax;
374 gpuPolySpanDriver(gpu_unai, PixelBase + xa, (xb - xa), ya);
377 } while (++cur_pass < total_passes);
380 /*----------------------------------------------------------------------
381 gpuDrawPolyFT - Flat-shaded, textured poly
382 ----------------------------------------------------------------------*/
383 void gpuDrawPolyFT(const PtrUnion packet, const PP gpuPolySpanDriver, u32 is_quad,
384 PolyType ptype = POLYTYPE_FT)
386 // r8/g8/b8 used if texture-blending & dithering is applied (24-bit light)
387 gpu_unai.inn.r8 = packet.U1[0];
388 gpu_unai.inn.g8 = packet.U1[1];
389 gpu_unai.inn.b8 = packet.U1[2];
392 polyInitVertexBuffer(vbuf, packet, ptype, is_quad);
394 int total_passes = is_quad ? 2 : 1;
398 const PolyVertex* vptrs[3];
400 if (!polyUseTriangle(vbuf, cur_pass, vptrs, x_off, y_off))
404 s32 x3, dx3, x4, dx4, dx;
405 s32 u3, du3, v3, dv3;
406 s32 x0, x1, x2, y0, y1, y2;
407 s32 u0, u1, u2, v0, v1, v2;
410 x0 = vptrs[0]->x + x_off; y0 = vptrs[0]->y + y_off;
411 u0 = vptrs[0]->tex.u; v0 = vptrs[0]->tex.v;
412 x1 = vptrs[1]->x + x_off; y1 = vptrs[1]->y + y_off;
413 u1 = vptrs[1]->tex.u; v1 = vptrs[1]->tex.v;
414 x2 = vptrs[2]->x + x_off; y2 = vptrs[2]->y + y_off;
415 u2 = vptrs[2]->tex.u; v2 = vptrs[2]->tex.v;
419 dx4 = (x2 - x1) * ya - (x2 - x0) * yb;
420 du4 = (u2 - u1) * ya - (u2 - u0) * yb;
421 dv4 = (v2 - v1) * ya - (v2 - v0) * yb;
429 #ifdef GPU_UNAI_USE_FLOATMATH
430 #ifdef GPU_UNAI_USE_FLOAT_DIV_MULTINV
432 float finv = FloatInv(dx4);
433 du4 = (fixed)((du4 << FIXED_BITS) * finv);
434 dv4 = (fixed)((dv4 << FIXED_BITS) * finv);
441 du4 = (fixed)((du4 << FIXED_BITS) / fdiv);
442 dv4 = (fixed)((dv4 << FIXED_BITS) / fdiv);
447 #else // Integer Division:
448 #ifdef GPU_UNAI_USE_INT_DIV_MULTINV
452 du4 = xInvMulx(du4, iF, iS);
453 dv4 = xInvMulx(dv4, iF, iS);
459 du4 = GPU_FAST_DIV(du4 << FIXED_BITS, dx4);
460 dv4 = GPU_FAST_DIV(dv4 << FIXED_BITS, dx4);
466 // Set u,v increments for inner driver
467 gpu_unai.inn.u_inc = du4;
468 gpu_unai.inn.v_inc = dv4;
470 //senquack - TODO: why is it always going through 2 iterations when sometimes one would suffice here?
471 // (SAME ISSUE ELSEWHERE)
472 for (s32 loop0 = 2; loop0; loop0--) {
476 u3 = i2x(u0); v3 = i2x(v0);
478 #ifdef GPU_UNAI_USE_FLOATMATH
479 #ifdef GPU_UNAI_USE_FLOAT_DIV_MULTINV
480 if ((y2 - y0) != 0) {
481 float finv = FloatInv(y2 - y0);
482 dx3 = (fixed)(((x2 - x0) << FIXED_BITS) * finv);
483 du3 = (fixed)(((u2 - u0) << FIXED_BITS) * finv);
484 dv3 = (fixed)(((v2 - v0) << FIXED_BITS) * finv);
488 dx4 = ((y1 - y0) != 0) ? (fixed)(((x1 - x0) << FIXED_BITS) * FloatInv(y1 - y0)) : 0;
490 if ((y2 - y0) != 0) {
491 float fdiv = y2 - y0;
492 dx3 = (fixed)(((x2 - x0) << FIXED_BITS) / fdiv);
493 du3 = (fixed)(((u2 - u0) << FIXED_BITS) / fdiv);
494 dv3 = (fixed)(((v2 - v0) << FIXED_BITS) / fdiv);
498 dx4 = ((y1 - y0) != 0) ? (fixed)(((x1 - x0) << FIXED_BITS) / (float)(y1 - y0)) : 0;
500 #else // Integer Division:
501 #ifdef GPU_UNAI_USE_INT_DIV_MULTINV
502 if ((y2 - y0) != 0) {
504 xInv((y2 - y0), iF, iS);
505 dx3 = xInvMulx((x2 - x0), iF, iS);
506 du3 = xInvMulx((u2 - u0), iF, iS);
507 dv3 = xInvMulx((v2 - v0), iF, iS);
511 dx4 = ((y1 - y0) != 0) ? xLoDivx((x1 - x0), (y1 - y0)) : 0;
513 if ((y2 - y0) != 0) {
514 dx3 = GPU_FAST_DIV((x2 - x0) << FIXED_BITS, (y2 - y0));
515 du3 = GPU_FAST_DIV((u2 - u0) << FIXED_BITS, (y2 - y0));
516 dv3 = GPU_FAST_DIV((v2 - v0) << FIXED_BITS, (y2 - y0));
520 dx4 = ((y1 - y0) != 0) ? GPU_FAST_DIV((x1 - x0) << FIXED_BITS, (y1 - y0)) : 0;
524 #ifdef GPU_UNAI_USE_FLOATMATH
525 #ifdef GPU_UNAI_USE_FLOAT_DIV_MULTINV
526 if ((y1 - y0) != 0) {
527 float finv = FloatInv(y1 - y0);
528 dx3 = (fixed)(((x1 - x0) << FIXED_BITS) * finv);
529 du3 = (fixed)(((u1 - u0) << FIXED_BITS) * finv);
530 dv3 = (fixed)(((v1 - v0) << FIXED_BITS) * finv);
534 dx4 = ((y2 - y0) != 0) ? (fixed)(((x2 - x0) << FIXED_BITS) * FloatInv(y2 - y0)) : 0;
536 if ((y1 - y0) != 0) {
537 float fdiv = y1 - y0;
538 dx3 = (fixed)(((x1 - x0) << FIXED_BITS) / fdiv);
539 du3 = (fixed)(((u1 - u0) << FIXED_BITS) / fdiv);
540 dv3 = (fixed)(((v1 - v0) << FIXED_BITS) / fdiv);
544 dx4 = ((y2 - y0) != 0) ? (fixed)(((x2 - x0) << FIXED_BITS) / (float)(y2 - y0)) : 0;
546 #else // Integer Division:
547 #ifdef GPU_UNAI_USE_INT_DIV_MULTINV
548 if ((y1 - y0) != 0) {
550 xInv((y1 - y0), iF, iS);
551 dx3 = xInvMulx((x1 - x0), iF, iS);
552 du3 = xInvMulx((u1 - u0), iF, iS);
553 dv3 = xInvMulx((v1 - v0), iF, iS);
557 dx4 = ((y2 - y0) != 0) ? xLoDivx((x2 - x0), (y2 - y0)) : 0;
559 if ((y1 - y0) != 0) {
560 dx3 = GPU_FAST_DIV((x1 - x0) << FIXED_BITS, (y1 - y0));
561 du3 = GPU_FAST_DIV((u1 - u0) << FIXED_BITS, (y1 - y0));
562 dv3 = GPU_FAST_DIV((v1 - v0) << FIXED_BITS, (y1 - y0));
566 dx4 = ((y2 - y0) != 0) ? GPU_FAST_DIV((x2 - x0) << FIXED_BITS, (y2 - y0)) : 0;
571 //senquack - break out of final loop if nothing to be drawn (1st loop
572 // must always be taken to setup dx3/dx4)
582 if ((y1 - y0) != 0) {
583 x3 += (dx3 * (y1 - y0));
584 u3 += (du3 * (y1 - y0));
585 v3 += (dv3 * (y1 - y0));
587 #ifdef GPU_UNAI_USE_FLOATMATH
588 #ifdef GPU_UNAI_USE_FLOAT_DIV_MULTINV
589 dx4 = ((y2 - y1) != 0) ? (fixed)(((x2 - x1) << FIXED_BITS) * FloatInv(y2 - y1)) : 0;
591 dx4 = ((y2 - y1) != 0) ? (fixed)(((x2 - x1) << FIXED_BITS) / (float)(y2 - y1)) : 0;
593 #else // Integer Division:
594 #ifdef GPU_UNAI_USE_INT_DIV_MULTINV
595 dx4 = ((y2 - y1) != 0) ? xLoDivx((x2 - x1), (y2 - y1)) : 0;
597 dx4 = ((y2 - y1) != 0) ? GPU_FAST_DIV((x2 - x1) << FIXED_BITS, (y2 - y1)) : 0;
602 x4 = i2x(x0) + (dx4 * (y1 - y0));
605 #ifdef GPU_UNAI_USE_FLOATMATH
606 #ifdef GPU_UNAI_USE_FLOAT_DIV_MULTINV
607 if ((y2 - y1) != 0) {
608 float finv = FloatInv(y2 - y1);
609 dx3 = (fixed)(((x2 - x1) << FIXED_BITS) * finv);
610 du3 = (fixed)(((u2 - u1) << FIXED_BITS) * finv);
611 dv3 = (fixed)(((v2 - v1) << FIXED_BITS) * finv);
616 if ((y2 - y1) != 0) {
617 float fdiv = y2 - y1;
618 dx3 = (fixed)(((x2 - x1) << FIXED_BITS) / fdiv);
619 du3 = (fixed)(((u2 - u1) << FIXED_BITS) / fdiv);
620 dv3 = (fixed)(((v2 - v1) << FIXED_BITS) / fdiv);
625 #else // Integer Division:
626 #ifdef GPU_UNAI_USE_INT_DIV_MULTINV
627 if ((y2 - y1) != 0) {
629 xInv((y2 - y1), iF, iS);
630 dx3 = xInvMulx((x2 - x1), iF, iS);
631 du3 = xInvMulx((u2 - u1), iF, iS);
632 dv3 = xInvMulx((v2 - v1), iF, iS);
637 if ((y2 - y1) != 0) {
638 dx3 = GPU_FAST_DIV((x2 - x1) << FIXED_BITS, (y2 - y1));
639 du3 = GPU_FAST_DIV((u2 - u1) << FIXED_BITS, (y2 - y1));
640 dv3 = GPU_FAST_DIV((v2 - v1) << FIXED_BITS, (y2 - y1));
649 s32 xmin, xmax, ymin, ymax;
650 xmin = gpu_unai.DrawingArea[0]; xmax = gpu_unai.DrawingArea[2];
651 ymin = gpu_unai.DrawingArea[1]; ymax = gpu_unai.DrawingArea[3];
653 if ((ymin - ya) > 0) {
654 x3 += dx3 * (ymin - ya);
655 x4 += dx4 * (ymin - ya);
656 u3 += du3 * (ymin - ya);
657 v3 += dv3 * (ymin - ya);
661 if (yb > ymax) yb = ymax;
667 le16_t* PixelBase = &gpu_unai.vram[FRAME_OFFSET(0, ya)];
668 int li=gpu_unai.inn.ilace_mask;
669 int pi=(ProgressiveInterlaceEnabled()?(gpu_unai.inn.ilace_mask+1):0);
670 int pif=(ProgressiveInterlaceEnabled()?(gpu_unai.prog_ilace_flag?(gpu_unai.inn.ilace_mask+1):0):1);
672 for (; loop1; --loop1, ++ya, PixelBase += FRAME_WIDTH,
673 x3 += dx3, x4 += dx4,
674 u3 += du3, v3 += dv3 )
677 if ((ya&pi)==pif) continue;
681 xa = FixedCeilToInt(x3); xb = FixedCeilToInt(x4);
684 fixed itmp = i2x(xa) - x3;
686 u4 += (du4 * itmp) >> FIXED_BITS;
687 v4 += (dv4 * itmp) >> FIXED_BITS;
693 if ((xmin - xa) > 0) {
694 u4 += du4 * (xmin - xa);
695 v4 += dv4 * (xmin - xa);
699 // Set u,v coords for inner driver
703 if (xb > xmax) xb = xmax;
705 gpuPolySpanDriver(gpu_unai, PixelBase + xa, (xb - xa), ya);
708 } while (++cur_pass < total_passes);
711 /*----------------------------------------------------------------------
712 gpuDrawPolyG - Gouraud-shaded, untextured poly
713 ----------------------------------------------------------------------*/
714 void gpuDrawPolyG(const PtrUnion packet, const PP gpuPolySpanDriver, u32 is_quad)
717 polyInitVertexBuffer(vbuf, packet, POLYTYPE_G, is_quad);
719 int total_passes = is_quad ? 2 : 1;
723 const PolyVertex* vptrs[3];
725 if (!polyUseTriangle(vbuf, cur_pass, vptrs, x_off, y_off))
729 s32 x3, dx3, x4, dx4, dx;
730 s32 r3, dr3, g3, dg3, b3, db3;
731 s32 x0, x1, x2, y0, y1, y2;
732 s32 r0, r1, r2, g0, g1, g2, b0, b1, b2;
735 x0 = vptrs[0]->x + x_off; y0 = vptrs[0]->y + y_off;
736 r0 = vptrs[0]->col.r; g0 = vptrs[0]->col.g; b0 = vptrs[0]->col.b;
737 x1 = vptrs[1]->x + x_off; y1 = vptrs[1]->y + y_off;
738 r1 = vptrs[1]->col.r; g1 = vptrs[1]->col.g; b1 = vptrs[1]->col.b;
739 x2 = vptrs[2]->x + x_off; y2 = vptrs[2]->y + y_off;
740 r2 = vptrs[2]->col.r; g2 = vptrs[2]->col.g; b2 = vptrs[2]->col.b;
744 dx4 = (x2 - x1) * ya - (x2 - x0) * yb;
745 dr4 = (r2 - r1) * ya - (r2 - r0) * yb;
746 dg4 = (g2 - g1) * ya - (g2 - g0) * yb;
747 db4 = (b2 - b1) * ya - (b2 - b0) * yb;
756 #ifdef GPU_UNAI_USE_FLOATMATH
757 #ifdef GPU_UNAI_USE_FLOAT_DIV_MULTINV
759 float finv = FloatInv(dx4);
760 dr4 = (fixed)((dr4 << FIXED_BITS) * finv);
761 dg4 = (fixed)((dg4 << FIXED_BITS) * finv);
762 db4 = (fixed)((db4 << FIXED_BITS) * finv);
769 dr4 = (fixed)((dr4 << FIXED_BITS) / fdiv);
770 dg4 = (fixed)((dg4 << FIXED_BITS) / fdiv);
771 db4 = (fixed)((db4 << FIXED_BITS) / fdiv);
776 #else // Integer Division:
777 #ifdef GPU_UNAI_USE_INT_DIV_MULTINV
781 dr4 = xInvMulx(dr4, iF, iS);
782 dg4 = xInvMulx(dg4, iF, iS);
783 db4 = xInvMulx(db4, iF, iS);
789 dr4 = GPU_FAST_DIV(dr4 << FIXED_BITS, dx4);
790 dg4 = GPU_FAST_DIV(dg4 << FIXED_BITS, dx4);
791 db4 = GPU_FAST_DIV(db4 << FIXED_BITS, dx4);
797 // Setup packed Gouraud increment for inner driver
798 gpu_unai.inn.gInc = gpuPackGouraudColInc(dr4, dg4, db4);
800 for (s32 loop0 = 2; loop0; loop0--) {
809 #ifdef GPU_UNAI_USE_FLOATMATH
810 #ifdef GPU_UNAI_USE_FLOAT_DIV_MULTINV
811 if ((y2 - y0) != 0) {
812 float finv = FloatInv(y2 - y0);
813 dx3 = (fixed)(((x2 - x0) << FIXED_BITS) * finv);
814 dr3 = (fixed)(((r2 - r0) << FIXED_BITS) * finv);
815 dg3 = (fixed)(((g2 - g0) << FIXED_BITS) * finv);
816 db3 = (fixed)(((b2 - b0) << FIXED_BITS) * finv);
818 dx3 = dr3 = dg3 = db3 = 0;
820 dx4 = ((y1 - y0) != 0) ? (fixed)(((x1 - x0) << FIXED_BITS) * FloatInv(y1 - y0)) : 0;
822 if ((y2 - y0) != 0) {
823 float fdiv = y2 - y0;
824 dx3 = (fixed)(((x2 - x0) << FIXED_BITS) / fdiv);
825 dr3 = (fixed)(((r2 - r0) << FIXED_BITS) / fdiv);
826 dg3 = (fixed)(((g2 - g0) << FIXED_BITS) / fdiv);
827 db3 = (fixed)(((b2 - b0) << FIXED_BITS) / fdiv);
829 dx3 = dr3 = dg3 = db3 = 0;
831 dx4 = ((y1 - y0) != 0) ? (fixed)(((x1 - x0) << FIXED_BITS) / (float)(y1 - y0)) : 0;
833 #else // Integer Division:
834 #ifdef GPU_UNAI_USE_INT_DIV_MULTINV
835 if ((y2 - y0) != 0) {
837 xInv((y2 - y0), iF, iS);
838 dx3 = xInvMulx((x2 - x0), iF, iS);
839 dr3 = xInvMulx((r2 - r0), iF, iS);
840 dg3 = xInvMulx((g2 - g0), iF, iS);
841 db3 = xInvMulx((b2 - b0), iF, iS);
843 dx3 = dr3 = dg3 = db3 = 0;
845 dx4 = ((y1 - y0) != 0) ? xLoDivx((x1 - x0), (y1 - y0)) : 0;
847 if ((y2 - y0) != 0) {
848 dx3 = GPU_FAST_DIV((x2 - x0) << FIXED_BITS, (y2 - y0));
849 dr3 = GPU_FAST_DIV((r2 - r0) << FIXED_BITS, (y2 - y0));
850 dg3 = GPU_FAST_DIV((g2 - g0) << FIXED_BITS, (y2 - y0));
851 db3 = GPU_FAST_DIV((b2 - b0) << FIXED_BITS, (y2 - y0));
853 dx3 = dr3 = dg3 = db3 = 0;
855 dx4 = ((y1 - y0) != 0) ? GPU_FAST_DIV((x1 - x0) << FIXED_BITS, (y1 - y0)) : 0;
859 #ifdef GPU_UNAI_USE_FLOATMATH
860 #ifdef GPU_UNAI_USE_FLOAT_DIV_MULTINV
861 if ((y1 - y0) != 0) {
862 float finv = FloatInv(y1 - y0);
863 dx3 = (fixed)(((x1 - x0) << FIXED_BITS) * finv);
864 dr3 = (fixed)(((r1 - r0) << FIXED_BITS) * finv);
865 dg3 = (fixed)(((g1 - g0) << FIXED_BITS) * finv);
866 db3 = (fixed)(((b1 - b0) << FIXED_BITS) * finv);
868 dx3 = dr3 = dg3 = db3 = 0;
870 dx4 = ((y2 - y0) != 0) ? (fixed)(((x2 - x0) << FIXED_BITS) * FloatInv(y2 - y0)) : 0;
872 if ((y1 - y0) != 0) {
873 float fdiv = y1 - y0;
874 dx3 = (fixed)(((x1 - x0) << FIXED_BITS) / fdiv);
875 dr3 = (fixed)(((r1 - r0) << FIXED_BITS) / fdiv);
876 dg3 = (fixed)(((g1 - g0) << FIXED_BITS) / fdiv);
877 db3 = (fixed)(((b1 - b0) << FIXED_BITS) / fdiv);
879 dx3 = dr3 = dg3 = db3 = 0;
881 dx4 = ((y2 - y0) != 0) ? (fixed)(((x2 - x0) << FIXED_BITS) / (float)(y2 - y0)) : 0;
883 #else // Integer Division:
884 #ifdef GPU_UNAI_USE_INT_DIV_MULTINV
885 if ((y1 - y0) != 0) {
887 xInv((y1 - y0), iF, iS);
888 dx3 = xInvMulx((x1 - x0), iF, iS);
889 dr3 = xInvMulx((r1 - r0), iF, iS);
890 dg3 = xInvMulx((g1 - g0), iF, iS);
891 db3 = xInvMulx((b1 - b0), iF, iS);
893 dx3 = dr3 = dg3 = db3 = 0;
895 dx4 = ((y2 - y0) != 0) ? xLoDivx((x2 - x0), (y2 - y0)) : 0;
897 if ((y1 - y0) != 0) {
898 dx3 = GPU_FAST_DIV((x1 - x0) << FIXED_BITS, (y1 - y0));
899 dr3 = GPU_FAST_DIV((r1 - r0) << FIXED_BITS, (y1 - y0));
900 dg3 = GPU_FAST_DIV((g1 - g0) << FIXED_BITS, (y1 - y0));
901 db3 = GPU_FAST_DIV((b1 - b0) << FIXED_BITS, (y1 - y0));
903 dx3 = dr3 = dg3 = db3 = 0;
905 dx4 = ((y2 - y0) != 0) ? GPU_FAST_DIV((x2 - x0) << FIXED_BITS, (y2 - y0)) : 0;
910 //senquack - break out of final loop if nothing to be drawn (1st loop
911 // must always be taken to setup dx3/dx4)
917 x3 = i2x(x0); x4 = i2x(x1);
918 r3 = i2x(r0); g3 = i2x(g0); b3 = i2x(b0);
920 if ((y1 - y0) != 0) {
921 x3 += (dx3 * (y1 - y0));
922 r3 += (dr3 * (y1 - y0));
923 g3 += (dg3 * (y1 - y0));
924 b3 += (db3 * (y1 - y0));
927 #ifdef GPU_UNAI_USE_FLOATMATH
928 #ifdef GPU_UNAI_USE_FLOAT_DIV_MULTINV
929 dx4 = ((y2 - y1) != 0) ? (fixed)(((x2 - x1) << FIXED_BITS) * FloatInv(y2 - y1)) : 0;
931 dx4 = ((y2 - y1) != 0) ? (fixed)(((x2 - x1) << FIXED_BITS) / (float)(y2 - y1)) : 0;
933 #else // Integer Division:
934 #ifdef GPU_UNAI_USE_INT_DIV_MULTINV
935 dx4 = ((y2 - y1) != 0) ? xLoDivx((x2 - x1), (y2 - y1)) : 0;
937 dx4 = ((y2 - y1) != 0) ? GPU_FAST_DIV((x2 - x1) << FIXED_BITS, (y2 - y1)) : 0;
942 x4 = i2x(x0) + (dx4 * (y1 - y0));
944 r3 = i2x(r1); g3 = i2x(g1); b3 = i2x(b1);
946 #ifdef GPU_UNAI_USE_FLOATMATH
947 #ifdef GPU_UNAI_USE_FLOAT_DIV_MULTINV
948 if ((y2 - y1) != 0) {
949 float finv = FloatInv(y2 - y1);
950 dx3 = (fixed)(((x2 - x1) << FIXED_BITS) * finv);
951 dr3 = (fixed)(((r2 - r1) << FIXED_BITS) * finv);
952 dg3 = (fixed)(((g2 - g1) << FIXED_BITS) * finv);
953 db3 = (fixed)(((b2 - b1) << FIXED_BITS) * finv);
955 dx3 = dr3 = dg3 = db3 = 0;
958 if ((y2 - y1) != 0) {
959 float fdiv = y2 - y1;
960 dx3 = (fixed)(((x2 - x1) << FIXED_BITS) / fdiv);
961 dr3 = (fixed)(((r2 - r1) << FIXED_BITS) / fdiv);
962 dg3 = (fixed)(((g2 - g1) << FIXED_BITS) / fdiv);
963 db3 = (fixed)(((b2 - b1) << FIXED_BITS) / fdiv);
965 dx3 = dr3 = dg3 = db3 = 0;
968 #else // Integer Division:
969 #ifdef GPU_UNAI_USE_INT_DIV_MULTINV
970 if ((y2 - y1) != 0) {
972 xInv((y2 - y1), iF, iS);
973 dx3 = xInvMulx((x2 - x1), iF, iS);
974 dr3 = xInvMulx((r2 - r1), iF, iS);
975 dg3 = xInvMulx((g2 - g1), iF, iS);
976 db3 = xInvMulx((b2 - b1), iF, iS);
978 dx3 = dr3 = dg3 = db3 = 0;
981 if ((y2 - y1) != 0) {
982 dx3 = GPU_FAST_DIV((x2 - x1) << FIXED_BITS, (y2 - y1));
983 dr3 = GPU_FAST_DIV((r2 - r1) << FIXED_BITS, (y2 - y1));
984 dg3 = GPU_FAST_DIV((g2 - g1) << FIXED_BITS, (y2 - y1));
985 db3 = GPU_FAST_DIV((b2 - b1) << FIXED_BITS, (y2 - y1));
987 dx3 = dr3 = dg3 = db3 = 0;
994 s32 xmin, xmax, ymin, ymax;
995 xmin = gpu_unai.DrawingArea[0]; xmax = gpu_unai.DrawingArea[2];
996 ymin = gpu_unai.DrawingArea[1]; ymax = gpu_unai.DrawingArea[3];
998 if ((ymin - ya) > 0) {
999 x3 += (dx3 * (ymin - ya));
1000 x4 += (dx4 * (ymin - ya));
1001 r3 += (dr3 * (ymin - ya));
1002 g3 += (dg3 * (ymin - ya));
1003 b3 += (db3 * (ymin - ya));
1007 if (yb > ymax) yb = ymax;
1009 int loop1 = yb - ya;
1013 le16_t* PixelBase = &gpu_unai.vram[FRAME_OFFSET(0, ya)];
1014 int li=gpu_unai.inn.ilace_mask;
1015 int pi=(ProgressiveInterlaceEnabled()?(gpu_unai.inn.ilace_mask+1):0);
1016 int pif=(ProgressiveInterlaceEnabled()?(gpu_unai.prog_ilace_flag?(gpu_unai.inn.ilace_mask+1):0):1);
1018 for (; loop1; --loop1, ++ya, PixelBase += FRAME_WIDTH,
1019 x3 += dx3, x4 += dx4,
1020 r3 += dr3, g3 += dg3, b3 += db3 )
1022 if (ya&li) continue;
1023 if ((ya&pi)==pif) continue;
1027 xa = FixedCeilToInt(x3);
1028 xb = FixedCeilToInt(x4);
1029 r4 = r3; g4 = g3; b4 = b3;
1031 fixed itmp = i2x(xa) - x3;
1033 r4 += (dr4 * itmp) >> FIXED_BITS;
1034 g4 += (dg4 * itmp) >> FIXED_BITS;
1035 b4 += (db4 * itmp) >> FIXED_BITS;
1042 if ((xmin - xa) > 0) {
1043 r4 += (dr4 * (xmin - xa));
1044 g4 += (dg4 * (xmin - xa));
1045 b4 += (db4 * (xmin - xa));
1049 // Setup packed Gouraud color for inner driver
1050 gpu_unai.inn.gCol = gpuPackGouraudCol(r4, g4, b4);
1052 if (xb > xmax) xb = xmax;
1054 gpuPolySpanDriver(gpu_unai, PixelBase + xa, (xb - xa), ya);
1057 } while (++cur_pass < total_passes);
1060 /*----------------------------------------------------------------------
1061 gpuDrawPolyGT - Gouraud-shaded, textured poly
1062 ----------------------------------------------------------------------*/
1063 void gpuDrawPolyGT(const PtrUnion packet, const PP gpuPolySpanDriver, u32 is_quad)
1066 polyInitVertexBuffer(vbuf, packet, POLYTYPE_GT, is_quad);
1068 int total_passes = is_quad ? 2 : 1;
1072 const PolyVertex* vptrs[3];
1074 if (!polyUseTriangle(vbuf, cur_pass, vptrs, x_off, y_off))
1078 s32 x3, dx3, x4, dx4, dx;
1079 s32 u3, du3, v3, dv3;
1080 s32 r3, dr3, g3, dg3, b3, db3;
1081 s32 x0, x1, x2, y0, y1, y2;
1082 s32 u0, u1, u2, v0, v1, v2;
1083 s32 r0, r1, r2, g0, g1, g2, b0, b1, b2;
1087 x0 = vptrs[0]->x + x_off; y0 = vptrs[0]->y + y_off;
1088 u0 = vptrs[0]->tex.u; v0 = vptrs[0]->tex.v;
1089 r0 = vptrs[0]->col.r; g0 = vptrs[0]->col.g; b0 = vptrs[0]->col.b;
1090 x1 = vptrs[1]->x + x_off; y1 = vptrs[1]->y + y_off;
1091 u1 = vptrs[1]->tex.u; v1 = vptrs[1]->tex.v;
1092 r1 = vptrs[1]->col.r; g1 = vptrs[1]->col.g; b1 = vptrs[1]->col.b;
1093 x2 = vptrs[2]->x + x_off; y2 = vptrs[2]->y + y_off;
1094 u2 = vptrs[2]->tex.u; v2 = vptrs[2]->tex.v;
1095 r2 = vptrs[2]->col.r; g2 = vptrs[2]->col.g; b2 = vptrs[2]->col.b;
1099 dx4 = (x2 - x1) * ya - (x2 - x0) * yb;
1100 du4 = (u2 - u1) * ya - (u2 - u0) * yb;
1101 dv4 = (v2 - v1) * ya - (v2 - v0) * yb;
1102 dr4 = (r2 - r1) * ya - (r2 - r0) * yb;
1103 dg4 = (g2 - g1) * ya - (g2 - g0) * yb;
1104 db4 = (b2 - b1) * ya - (b2 - b0) * yb;
1115 #ifdef GPU_UNAI_USE_FLOATMATH
1116 #ifdef GPU_UNAI_USE_FLOAT_DIV_MULTINV
1118 float finv = FloatInv(dx4);
1119 du4 = (fixed)((du4 << FIXED_BITS) * finv);
1120 dv4 = (fixed)((dv4 << FIXED_BITS) * finv);
1121 dr4 = (fixed)((dr4 << FIXED_BITS) * finv);
1122 dg4 = (fixed)((dg4 << FIXED_BITS) * finv);
1123 db4 = (fixed)((db4 << FIXED_BITS) * finv);
1125 du4 = dv4 = dr4 = dg4 = db4 = 0;
1130 du4 = (fixed)((du4 << FIXED_BITS) / fdiv);
1131 dv4 = (fixed)((dv4 << FIXED_BITS) / fdiv);
1132 dr4 = (fixed)((dr4 << FIXED_BITS) / fdiv);
1133 dg4 = (fixed)((dg4 << FIXED_BITS) / fdiv);
1134 db4 = (fixed)((db4 << FIXED_BITS) / fdiv);
1136 du4 = dv4 = dr4 = dg4 = db4 = 0;
1139 #else // Integer Division:
1140 #ifdef GPU_UNAI_USE_INT_DIV_MULTINV
1144 du4 = xInvMulx(du4, iF, iS);
1145 dv4 = xInvMulx(dv4, iF, iS);
1146 dr4 = xInvMulx(dr4, iF, iS);
1147 dg4 = xInvMulx(dg4, iF, iS);
1148 db4 = xInvMulx(db4, iF, iS);
1150 du4 = dv4 = dr4 = dg4 = db4 = 0;
1154 du4 = GPU_FAST_DIV(du4 << FIXED_BITS, dx4);
1155 dv4 = GPU_FAST_DIV(dv4 << FIXED_BITS, dx4);
1156 dr4 = GPU_FAST_DIV(dr4 << FIXED_BITS, dx4);
1157 dg4 = GPU_FAST_DIV(dg4 << FIXED_BITS, dx4);
1158 db4 = GPU_FAST_DIV(db4 << FIXED_BITS, dx4);
1160 du4 = dv4 = dr4 = dg4 = db4 = 0;
1164 // Set u,v increments and packed Gouraud increment for inner driver
1165 gpu_unai.inn.u_inc = du4;
1166 gpu_unai.inn.v_inc = dv4;
1167 gpu_unai.inn.gInc = gpuPackGouraudColInc(dr4, dg4, db4);
1169 for (s32 loop0 = 2; loop0; loop0--) {
1173 u3 = i2x(u0); v3 = i2x(v0);
1174 r3 = i2x(r0); g3 = i2x(g0); b3 = i2x(b0);
1176 #ifdef GPU_UNAI_USE_FLOATMATH
1177 #ifdef GPU_UNAI_USE_FLOAT_DIV_MULTINV
1178 if ((y2 - y0) != 0) {
1179 float finv = FloatInv(y2 - y0);
1180 dx3 = (fixed)(((x2 - x0) << FIXED_BITS) * finv);
1181 du3 = (fixed)(((u2 - u0) << FIXED_BITS) * finv);
1182 dv3 = (fixed)(((v2 - v0) << FIXED_BITS) * finv);
1183 dr3 = (fixed)(((r2 - r0) << FIXED_BITS) * finv);
1184 dg3 = (fixed)(((g2 - g0) << FIXED_BITS) * finv);
1185 db3 = (fixed)(((b2 - b0) << FIXED_BITS) * finv);
1187 dx3 = du3 = dv3 = dr3 = dg3 = db3 = 0;
1189 dx4 = ((y1 - y0) != 0) ? (fixed)(((x1 - x0) << FIXED_BITS) * FloatInv(y1 - y0)) : 0;
1191 if ((y2 - y0) != 0) {
1192 float fdiv = y2 - y0;
1193 dx3 = (fixed)(((x2 - x0) << FIXED_BITS) / fdiv);
1194 du3 = (fixed)(((u2 - u0) << FIXED_BITS) / fdiv);
1195 dv3 = (fixed)(((v2 - v0) << FIXED_BITS) / fdiv);
1196 dr3 = (fixed)(((r2 - r0) << FIXED_BITS) / fdiv);
1197 dg3 = (fixed)(((g2 - g0) << FIXED_BITS) / fdiv);
1198 db3 = (fixed)(((b2 - b0) << FIXED_BITS) / fdiv);
1200 dx3 = du3 = dv3 = dr3 = dg3 = db3 = 0;
1202 dx4 = ((y1 - y0) != 0) ? (fixed)(((x1 - x0) << FIXED_BITS) / (float)(y1 - y0)) : 0;
1204 #else // Integer Division:
1205 #ifdef GPU_UNAI_USE_INT_DIV_MULTINV
1206 if ((y2 - y0) != 0) {
1208 xInv((y2 - y0), iF, iS);
1209 dx3 = xInvMulx((x2 - x0), iF, iS);
1210 du3 = xInvMulx((u2 - u0), iF, iS);
1211 dv3 = xInvMulx((v2 - v0), iF, iS);
1212 dr3 = xInvMulx((r2 - r0), iF, iS);
1213 dg3 = xInvMulx((g2 - g0), iF, iS);
1214 db3 = xInvMulx((b2 - b0), iF, iS);
1216 dx3 = du3 = dv3 = dr3 = dg3 = db3 = 0;
1218 dx4 = ((y1 - y0) != 0) ? xLoDivx((x1 - x0), (y1 - y0)) : 0;
1220 if ((y2 - y0) != 0) {
1221 dx3 = GPU_FAST_DIV((x2 - x0) << FIXED_BITS, (y2 - y0));
1222 du3 = GPU_FAST_DIV((u2 - u0) << FIXED_BITS, (y2 - y0));
1223 dv3 = GPU_FAST_DIV((v2 - v0) << FIXED_BITS, (y2 - y0));
1224 dr3 = GPU_FAST_DIV((r2 - r0) << FIXED_BITS, (y2 - y0));
1225 dg3 = GPU_FAST_DIV((g2 - g0) << FIXED_BITS, (y2 - y0));
1226 db3 = GPU_FAST_DIV((b2 - b0) << FIXED_BITS, (y2 - y0));
1228 dx3 = du3 = dv3 = dr3 = dg3 = db3 = 0;
1230 dx4 = ((y1 - y0) != 0) ? GPU_FAST_DIV((x1 - x0) << FIXED_BITS, (y1 - y0)) : 0;
1234 #ifdef GPU_UNAI_USE_FLOATMATH
1235 #ifdef GPU_UNAI_USE_FLOAT_DIV_MULTINV
1236 if ((y1 - y0) != 0) {
1237 float finv = FloatInv(y1 - y0);
1238 dx3 = (fixed)(((x1 - x0) << FIXED_BITS) * finv);
1239 du3 = (fixed)(((u1 - u0) << FIXED_BITS) * finv);
1240 dv3 = (fixed)(((v1 - v0) << FIXED_BITS) * finv);
1241 dr3 = (fixed)(((r1 - r0) << FIXED_BITS) * finv);
1242 dg3 = (fixed)(((g1 - g0) << FIXED_BITS) * finv);
1243 db3 = (fixed)(((b1 - b0) << FIXED_BITS) * finv);
1245 dx3 = du3 = dv3 = dr3 = dg3 = db3 = 0;
1247 dx4 = ((y2 - y0) != 0) ? (fixed)(((x2 - x0) << FIXED_BITS) * FloatInv(y2 - y0)) : 0;
1249 if ((y1 - y0) != 0) {
1250 float fdiv = y1 - y0;
1251 dx3 = (fixed)(((x1 - x0) << FIXED_BITS) / fdiv);
1252 du3 = (fixed)(((u1 - u0) << FIXED_BITS) / fdiv);
1253 dv3 = (fixed)(((v1 - v0) << FIXED_BITS) / fdiv);
1254 dr3 = (fixed)(((r1 - r0) << FIXED_BITS) / fdiv);
1255 dg3 = (fixed)(((g1 - g0) << FIXED_BITS) / fdiv);
1256 db3 = (fixed)(((b1 - b0) << FIXED_BITS) / fdiv);
1258 dx3 = du3 = dv3 = dr3 = dg3 = db3 = 0;
1260 dx4 = ((y2 - y0) != 0) ? (fixed)(((x2 - x0) << FIXED_BITS) / float(y2 - y0)) : 0;
1262 #else // Integer Division:
1263 #ifdef GPU_UNAI_USE_INT_DIV_MULTINV
1264 if ((y1 - y0) != 0) {
1266 xInv((y1 - y0), iF, iS);
1267 dx3 = xInvMulx((x1 - x0), iF, iS);
1268 du3 = xInvMulx((u1 - u0), iF, iS);
1269 dv3 = xInvMulx((v1 - v0), iF, iS);
1270 dr3 = xInvMulx((r1 - r0), iF, iS);
1271 dg3 = xInvMulx((g1 - g0), iF, iS);
1272 db3 = xInvMulx((b1 - b0), iF, iS);
1274 dx3 = du3 = dv3 = dr3 = dg3 = db3 = 0;
1276 dx4 = ((y2 - y0) != 0) ? xLoDivx((x2 - x0), (y2 - y0)) : 0;
1278 if ((y1 - y0) != 0) {
1279 dx3 = GPU_FAST_DIV((x1 - x0) << FIXED_BITS, (y1 - y0));
1280 du3 = GPU_FAST_DIV((u1 - u0) << FIXED_BITS, (y1 - y0));
1281 dv3 = GPU_FAST_DIV((v1 - v0) << FIXED_BITS, (y1 - y0));
1282 dr3 = GPU_FAST_DIV((r1 - r0) << FIXED_BITS, (y1 - y0));
1283 dg3 = GPU_FAST_DIV((g1 - g0) << FIXED_BITS, (y1 - y0));
1284 db3 = GPU_FAST_DIV((b1 - b0) << FIXED_BITS, (y1 - y0));
1286 dx3 = du3 = dv3 = dr3 = dg3 = db3 = 0;
1288 dx4 = ((y2 - y0) != 0) ? GPU_FAST_DIV((x2 - x0) << FIXED_BITS, (y2 - y0)) : 0;
1293 //senquack - break out of final loop if nothing to be drawn (1st loop
1294 // must always be taken to setup dx3/dx4)
1295 if (y1 == y2) break;
1300 x3 = i2x(x0); x4 = i2x(x1);
1301 u3 = i2x(u0); v3 = i2x(v0);
1302 r3 = i2x(r0); g3 = i2x(g0); b3 = i2x(b0);
1304 if ((y1 - y0) != 0) {
1305 x3 += (dx3 * (y1 - y0));
1306 u3 += (du3 * (y1 - y0));
1307 v3 += (dv3 * (y1 - y0));
1308 r3 += (dr3 * (y1 - y0));
1309 g3 += (dg3 * (y1 - y0));
1310 b3 += (db3 * (y1 - y0));
1313 #ifdef GPU_UNAI_USE_FLOATMATH
1314 #ifdef GPU_UNAI_USE_FLOAT_DIV_MULTINV
1315 dx4 = ((y2 - y1) != 0) ? (fixed)(((x2 - x1) << FIXED_BITS) * FloatInv(y2 - y1)) : 0;
1317 dx4 = ((y2 - y1) != 0) ? (fixed)(((x2 - x1) << FIXED_BITS) / (float)(y2 - y1)) : 0;
1319 #else // Integer Division:
1320 #ifdef GPU_UNAI_USE_INT_DIV_MULTINV
1321 dx4 = ((y2 - y1) != 0) ? xLoDivx((x2 - x1), (y2 - y1)) : 0;
1323 dx4 = ((y2 - y1) != 0) ? GPU_FAST_DIV((x2 - x1) << FIXED_BITS, (y2 - y1)) : 0;
1328 x4 = i2x(x0) + (dx4 * (y1 - y0));
1330 u3 = i2x(u1); v3 = i2x(v1);
1331 r3 = i2x(r1); g3 = i2x(g1); b3 = i2x(b1);
1332 #ifdef GPU_UNAI_USE_FLOATMATH
1333 #ifdef GPU_UNAI_USE_FLOAT_DIV_MULTINV
1334 if ((y2 - y1) != 0) {
1335 float finv = FloatInv(y2 - y1);
1336 dx3 = (fixed)(((x2 - x1) << FIXED_BITS) * finv);
1337 du3 = (fixed)(((u2 - u1) << FIXED_BITS) * finv);
1338 dv3 = (fixed)(((v2 - v1) << FIXED_BITS) * finv);
1339 dr3 = (fixed)(((r2 - r1) << FIXED_BITS) * finv);
1340 dg3 = (fixed)(((g2 - g1) << FIXED_BITS) * finv);
1341 db3 = (fixed)(((b2 - b1) << FIXED_BITS) * finv);
1343 dx3 = du3 = dv3 = dr3 = dg3 = db3 = 0;
1346 if ((y2 - y1) != 0) {
1347 float fdiv = y2 - y1;
1348 dx3 = (fixed)(((x2 - x1) << FIXED_BITS) / fdiv);
1349 du3 = (fixed)(((u2 - u1) << FIXED_BITS) / fdiv);
1350 dv3 = (fixed)(((v2 - v1) << FIXED_BITS) / fdiv);
1351 dr3 = (fixed)(((r2 - r1) << FIXED_BITS) / fdiv);
1352 dg3 = (fixed)(((g2 - g1) << FIXED_BITS) / fdiv);
1353 db3 = (fixed)(((b2 - b1) << FIXED_BITS) / fdiv);
1355 dx3 = du3 = dv3 = dr3 = dg3 = db3 = 0;
1358 #else // Integer Division:
1359 #ifdef GPU_UNAI_USE_INT_DIV_MULTINV
1360 if ((y2 - y1) != 0) {
1362 xInv((y2 - y1), iF, iS);
1363 dx3 = xInvMulx((x2 - x1), iF, iS);
1364 du3 = xInvMulx((u2 - u1), iF, iS);
1365 dv3 = xInvMulx((v2 - v1), iF, iS);
1366 dr3 = xInvMulx((r2 - r1), iF, iS);
1367 dg3 = xInvMulx((g2 - g1), iF, iS);
1368 db3 = xInvMulx((b2 - b1), iF, iS);
1370 dx3 = du3 = dv3 = dr3 = dg3 = db3 = 0;
1373 if ((y2 - y1) != 0) {
1374 dx3 = GPU_FAST_DIV((x2 - x1) << FIXED_BITS, (y2 - y1));
1375 du3 = GPU_FAST_DIV((u2 - u1) << FIXED_BITS, (y2 - y1));
1376 dv3 = GPU_FAST_DIV((v2 - v1) << FIXED_BITS, (y2 - y1));
1377 dr3 = GPU_FAST_DIV((r2 - r1) << FIXED_BITS, (y2 - y1));
1378 dg3 = GPU_FAST_DIV((g2 - g1) << FIXED_BITS, (y2 - y1));
1379 db3 = GPU_FAST_DIV((b2 - b1) << FIXED_BITS, (y2 - y1));
1381 dx3 = du3 = dv3 = dr3 = dg3 = db3 = 0;
1388 s32 xmin, xmax, ymin, ymax;
1389 xmin = gpu_unai.DrawingArea[0]; xmax = gpu_unai.DrawingArea[2];
1390 ymin = gpu_unai.DrawingArea[1]; ymax = gpu_unai.DrawingArea[3];
1392 if ((ymin - ya) > 0) {
1393 x3 += (dx3 * (ymin - ya));
1394 x4 += (dx4 * (ymin - ya));
1395 u3 += (du3 * (ymin - ya));
1396 v3 += (dv3 * (ymin - ya));
1397 r3 += (dr3 * (ymin - ya));
1398 g3 += (dg3 * (ymin - ya));
1399 b3 += (db3 * (ymin - ya));
1403 if (yb > ymax) yb = ymax;
1405 int loop1 = yb - ya;
1409 le16_t* PixelBase = &gpu_unai.vram[FRAME_OFFSET(0, ya)];
1410 int li=gpu_unai.inn.ilace_mask;
1411 int pi=(ProgressiveInterlaceEnabled()?(gpu_unai.inn.ilace_mask+1):0);
1412 int pif=(ProgressiveInterlaceEnabled()?(gpu_unai.prog_ilace_flag?(gpu_unai.inn.ilace_mask+1):0):1);
1414 for (; loop1; --loop1, ++ya, PixelBase += FRAME_WIDTH,
1415 x3 += dx3, x4 += dx4,
1416 u3 += du3, v3 += dv3,
1417 r3 += dr3, g3 += dg3, b3 += db3 )
1419 if (ya&li) continue;
1420 if ((ya&pi)==pif) continue;
1425 xa = FixedCeilToInt(x3);
1426 xb = FixedCeilToInt(x4);
1428 r4 = r3; g4 = g3; b4 = b3;
1430 fixed itmp = i2x(xa) - x3;
1432 u4 += (du4 * itmp) >> FIXED_BITS;
1433 v4 += (dv4 * itmp) >> FIXED_BITS;
1434 r4 += (dr4 * itmp) >> FIXED_BITS;
1435 g4 += (dg4 * itmp) >> FIXED_BITS;
1436 b4 += (db4 * itmp) >> FIXED_BITS;
1445 if ((xmin - xa) > 0) {
1446 u4 += du4 * (xmin - xa);
1447 v4 += dv4 * (xmin - xa);
1448 r4 += dr4 * (xmin - xa);
1449 g4 += dg4 * (xmin - xa);
1450 b4 += db4 * (xmin - xa);
1454 // Set packed Gouraud color and u,v coords for inner driver
1455 gpu_unai.inn.u = u4;
1456 gpu_unai.inn.v = v4;
1457 gpu_unai.inn.gCol = gpuPackGouraudCol(r4, g4, b4);
1459 if (xb > xmax) xb = xmax;
1461 gpuPolySpanDriver(gpu_unai, PixelBase + xa, (xb - xa), ya);
1464 } while (++cur_pass < total_passes);
1467 #endif /* __GPU_UNAI_GPU_RASTER_POLYGON_H__ */