1 /***************************************************************************
2 * Copyright (C) 2010 PCSX4ALL Team *
3 * Copyright (C) 2010 Unai *
5 * This program is free software; you can redistribute it and/or modify *
6 * it under the terms of the GNU General Public License as published by *
7 * the Free Software Foundation; either version 2 of the License, or *
8 * (at your option) any later version. *
10 * This program is distributed in the hope that it will be useful, *
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of *
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
13 * GNU General Public License for more details. *
15 * You should have received a copy of the GNU General Public License *
16 * along with this program; if not, write to the *
17 * Free Software Foundation, Inc., *
18 * 51 Franklin Street, Fifth Floor, Boston, MA 02111-1307 USA. *
19 ***************************************************************************/
21 #ifndef __GPU_UNAI_GPU_RASTER_POLYGON_H__
22 #define __GPU_UNAI_GPU_RASTER_POLYGON_H__
24 //senquack - NOTE: GPU Unai poly routines have been rewritten/adapted
25 // from DrHell routines to fix multiple issues. See README_senquack.txt
27 ///////////////////////////////////////////////////////////////////////////////
28 // Shared poly vertex buffer, able to handle 3 or 4-pt polys of any type.
29 ///////////////////////////////////////////////////////////////////////////////
32 s32 x, y; // Sign-extended 11-bit X,Y coords
34 #if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
35 struct { u8 pad[2], v, u; } tex; // Texture coords (if used)
37 struct { u8 u, v, pad[2]; } tex; // Texture coords (if used)
42 #if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
43 struct { u8 pad, b, g, r; } col; // 24-bit RGB color (if used)
45 struct { u8 r, g, b, pad; } col; // 24-bit RGB color (if used)
52 POLYATTR_TEXTURE = (1 << 0),
53 POLYATTR_GOURAUD = (1 << 1)
58 POLYTYPE_FT = (POLYATTR_TEXTURE),
59 POLYTYPE_G = (POLYATTR_GOURAUD),
60 POLYTYPE_GT = (POLYATTR_TEXTURE | POLYATTR_GOURAUD)
63 ///////////////////////////////////////////////////////////////////////////////
64 // polyInitVertexBuffer()
65 // Fills vbuf[] array with data from any type of poly draw-command packet.
66 ///////////////////////////////////////////////////////////////////////////////
67 static void polyInitVertexBuffer(PolyVertex *vbuf, const PtrUnion packet, PolyType ptype, u32 is_quad)
69 bool texturing = ptype & POLYATTR_TEXTURE;
70 bool gouraud = ptype & POLYATTR_GOURAUD;
72 int vert_stride = 1; // Stride of vertices in cmd packet, in 32-bit words
78 int num_verts = (is_quad) ? 4 : 3;
83 for (int i=0; i < num_verts; ++i, ptr += vert_stride) {
84 u32 coords = le32_to_u32(*ptr);
85 vbuf[i].x = GPU_EXPANDSIGN(coords);
86 vbuf[i].y = GPU_EXPANDSIGN(coords >> 16);
89 // U,V texture coords (if applicable)
92 for (int i=0; i < num_verts; ++i, ptr += vert_stride)
93 vbuf[i].tex_word = le32_to_u32(*ptr);
96 // Colors (if applicable)
99 for (int i=0; i < num_verts; ++i, ptr += vert_stride)
100 vbuf[i].col_word = le32_to_u32(*ptr);
104 ///////////////////////////////////////////////////////////////////////////////
105 // Helper functions to determine which vertex in a 2 or 3 vertex array
106 // has the highest/lowest X/Y coordinate.
107 // Note: the comparison logic is such that, given a set of vertices with
108 // identical values for a given coordinate, a different index will be
109 // returned from vertIdxOfLeast..() than a call to vertIdxOfHighest..().
110 // This ensures that, during the vertex-ordering phase of rasterization,
111 // all three vertices remain unique.
112 ///////////////////////////////////////////////////////////////////////////////
115 static inline int vertIdxOfLeastXCoord2(const T *Tptr)
117 return (Tptr[0].x <= Tptr[1].x) ? 0 : 1;
121 static inline int vertIdxOfLeastXCoord3(const T *Tptr)
123 int least_of_v0_v1 = vertIdxOfLeastXCoord2(Tptr);
124 return (Tptr[least_of_v0_v1].x <= Tptr[2].x) ? least_of_v0_v1 : 2;
128 static inline int vertIdxOfLeastYCoord2(const T *Tptr)
130 return (Tptr[0].y <= Tptr[1].y) ? 0 : 1;
134 static inline int vertIdxOfLeastYCoord3(const T *Tptr)
136 int least_of_v0_v1 = vertIdxOfLeastYCoord2(Tptr);
137 return (Tptr[least_of_v0_v1].y <= Tptr[2].y) ? least_of_v0_v1 : 2;
141 static inline int vertIdxOfHighestXCoord2(const T *Tptr)
143 return (Tptr[1].x >= Tptr[0].x) ? 1 : 0;
147 static inline int vertIdxOfHighestXCoord3(const T *Tptr)
149 int highest_of_v0_v1 = vertIdxOfHighestXCoord2(Tptr);
150 return (Tptr[2].x >= Tptr[highest_of_v0_v1].x) ? 2 : highest_of_v0_v1;
154 static inline int vertIdxOfHighestYCoord2(const T *Tptr)
156 return (Tptr[1].y >= Tptr[0].y) ? 1 : 0;
160 static inline int vertIdxOfHighestYCoord3(const T *Tptr)
162 int highest_of_v0_v1 = vertIdxOfHighestYCoord2(Tptr);
163 return (Tptr[2].y >= Tptr[highest_of_v0_v1].y) ? 2 : highest_of_v0_v1;
166 ///////////////////////////////////////////////////////////////////////////////
168 // Determines if the specified triangle should be rendered. If so, it
169 // fills the given array of vertex pointers, vert_ptrs, in order of
170 // increasing Y coordinate values, as required by rasterization algorithm.
171 // Parameter 'tri_num' is 0 for first triangle (idx 0,1,2 of vbuf[]),
172 // or 1 for second triangle of a quad (idx 1,2,3 of vbuf[]).
173 // Returns true if triangle should be rendered, false if not.
174 ///////////////////////////////////////////////////////////////////////////////
175 static bool polyUseTriangle(const PolyVertex *vbuf, int tri_num, const PolyVertex **vert_ptrs, s32 &x_off, s32 &y_off)
177 // Using verts 0,1,2 or is this the 2nd pass of a quad (verts 1,2,3)?
178 const PolyVertex *tri_ptr = &vbuf[(tri_num == 0) ? 0 : 1];
180 // Get indices of highest/lowest X,Y coords within triangle
181 int idx_lowest_x = vertIdxOfLeastXCoord3(tri_ptr);
182 int idx_highest_x = vertIdxOfHighestXCoord3(tri_ptr);
183 int idx_lowest_y = vertIdxOfLeastYCoord3(tri_ptr);
184 int idx_highest_y = vertIdxOfHighestYCoord3(tri_ptr);
186 // Maximum absolute distance between any two X coordinates is 1023,
187 // and for Y coordinates is 511 (PS1 hardware limitation)
188 int lowest_x = tri_ptr[idx_lowest_x].x;
189 int highest_x = tri_ptr[idx_highest_x].x;
190 int lowest_y = tri_ptr[idx_lowest_y].y;
191 int highest_y = tri_ptr[idx_highest_y].y;
192 if ((highest_x - lowest_x) >= CHKMAX_X ||
193 (highest_y - lowest_y) >= CHKMAX_Y)
197 x_off = gpu_unai.DrawingOffset[0];
198 y_off = gpu_unai.DrawingOffset[1];
199 x_off = GPU_EXPANDSIGN(lowest_x + x_off) - lowest_x;
200 y_off = GPU_EXPANDSIGN(lowest_y + y_off) - lowest_y;
202 // Determine if triangle is completely outside clipping range
203 s32 xmin, xmax, ymin, ymax;
204 xmin = gpu_unai.DrawingArea[0]; xmax = gpu_unai.DrawingArea[2];
205 ymin = gpu_unai.DrawingArea[1]; ymax = gpu_unai.DrawingArea[3];
206 int clipped_lowest_x = Max2(xmin, lowest_x + x_off);
207 int clipped_lowest_y = Max2(ymin, lowest_y + y_off);
208 int clipped_highest_x = Min2(xmax, highest_x + x_off);
209 int clipped_highest_y = Min2(ymax, highest_y + y_off);
210 if (clipped_lowest_x >= clipped_highest_x ||
211 clipped_lowest_y >= clipped_highest_y)
214 // Order vertex ptrs by increasing y value (draw routines need this).
215 // The middle index is deduced by a binary math trick that depends
216 // on index range always being between 0..2
217 vert_ptrs[0] = tri_ptr + idx_lowest_y;
218 vert_ptrs[1] = tri_ptr + ((idx_lowest_y + idx_highest_y) ^ 3);
219 vert_ptrs[2] = tri_ptr + idx_highest_y;
223 ///////////////////////////////////////////////////////////////////////////////
224 // GPU internal polygon drawing functions
225 ///////////////////////////////////////////////////////////////////////////////
227 /*----------------------------------------------------------------------
228 gpuDrawPolyF - Flat-shaded, untextured poly
229 ----------------------------------------------------------------------*/
230 void gpuDrawPolyF(const PtrUnion packet, const PP gpuPolySpanDriver, u32 is_quad,
231 PolyType ptype = POLYTYPE_F)
233 // Set up bgr555 color to be used across calls in inner driver
234 gpu_unai.inn.PixelData = GPU_RGB16(le32_to_u32(packet.U4[0]));
237 polyInitVertexBuffer(vbuf, packet, ptype, is_quad);
239 int total_passes = is_quad ? 2 : 1;
243 const PolyVertex* vptrs[3];
245 if (!polyUseTriangle(vbuf, cur_pass, vptrs, x_off, y_off))
249 s32 x3, dx3, x4, dx4, dx;
250 s32 x0, x1, x2, y0, y1, y2;
252 x0 = vptrs[0]->x + x_off; y0 = vptrs[0]->y + y_off;
253 x1 = vptrs[1]->x + x_off; y1 = vptrs[1]->y + y_off;
254 x2 = vptrs[2]->x + x_off; y2 = vptrs[2]->y + y_off;
258 dx = (x2 - x1) * ya - (x2 - x0) * yb;
260 for (int loop0 = 2; loop0; loop0--) {
265 #ifdef GPU_UNAI_USE_FLOATMATH
266 #ifdef GPU_UNAI_USE_FLOAT_DIV_MULTINV
267 dx3 = ((y2 - y0) != 0) ? (fixed)(((x2 - x0) << FIXED_BITS) * FloatInv(y2 - y0)) : 0;
268 dx4 = ((y1 - y0) != 0) ? (fixed)(((x1 - x0) << FIXED_BITS) * FloatInv(y1 - y0)) : 0;
270 dx3 = ((y2 - y0) != 0) ? (fixed)(((x2 - x0) << FIXED_BITS) / (float)(y2 - y0)) : 0;
271 dx4 = ((y1 - y0) != 0) ? (fixed)(((x1 - x0) << FIXED_BITS) / (float)(y1 - y0)) : 0;
273 #else // Integer Division:
274 #ifdef GPU_UNAI_USE_INT_DIV_MULTINV
275 dx3 = ((y2 - y0) != 0) ? xLoDivx((x2 - x0), (y2 - y0)) : 0;
276 dx4 = ((y1 - y0) != 0) ? xLoDivx((x1 - x0), (y1 - y0)) : 0;
278 dx3 = ((y2 - y0) != 0) ? GPU_FAST_DIV((x2 - x0) << FIXED_BITS, (y2 - y0)) : 0;
279 dx4 = ((y1 - y0) != 0) ? GPU_FAST_DIV((x1 - x0) << FIXED_BITS, (y1 - y0)) : 0;
283 #ifdef GPU_UNAI_USE_FLOATMATH
284 #ifdef GPU_UNAI_USE_FLOAT_DIV_MULTINV
285 dx3 = ((y1 - y0) != 0) ? (fixed)(((x1 - x0) << FIXED_BITS) * FloatInv(y1 - y0)) : 0;
286 dx4 = ((y2 - y0) != 0) ? (fixed)(((x2 - x0) << FIXED_BITS) * FloatInv(y2 - y0)) : 0;
288 dx3 = ((y1 - y0) != 0) ? (fixed)(((x1 - x0) << FIXED_BITS) / (float)(y1 - y0)) : 0;
289 dx4 = ((y2 - y0) != 0) ? (fixed)(((x2 - x0) << FIXED_BITS) / (float)(y2 - y0)) : 0;
291 #else // Integer Division:
292 #ifdef GPU_UNAI_USE_INT_DIV_MULTINV
293 dx3 = ((y1 - y0) != 0) ? xLoDivx((x1 - x0), (y1 - y0)) : 0;
294 dx4 = ((y2 - y0) != 0) ? xLoDivx((x2 - x0), (y2 - y0)) : 0;
296 dx3 = ((y1 - y0) != 0) ? GPU_FAST_DIV((x1 - x0) << FIXED_BITS, (y1 - y0)) : 0;
297 dx4 = ((y2 - y0) != 0) ? GPU_FAST_DIV((x2 - x0) << FIXED_BITS, (y2 - y0)) : 0;
302 //senquack - break out of final loop if nothing to be drawn (1st loop
303 // must always be taken to setup dx3/dx4)
309 x3 = i2x(x0) + (dx3 * (y1 - y0));
311 #ifdef GPU_UNAI_USE_FLOATMATH
312 #ifdef GPU_UNAI_USE_FLOAT_DIV_MULTINV
313 dx4 = ((y2 - y1) != 0) ? (fixed)(((x2 - x1) << FIXED_BITS) * FloatInv(y2 - y1)) : 0;
315 dx4 = ((y2 - y1) != 0) ? (fixed)(((x2 - x1) << FIXED_BITS) / (float)(y2 - y1)) : 0;
317 #else // Integer Division:
318 #ifdef GPU_UNAI_USE_INT_DIV_MULTINV
319 dx4 = ((y2 - y1) != 0) ? xLoDivx ((x2 - x1), (y2 - y1)) : 0;
321 dx4 = ((y2 - y1) != 0) ? GPU_FAST_DIV((x2 - x1) << FIXED_BITS, (y2 - y1)) : 0;
326 x4 = i2x(x0) + (dx4 * (y1 - y0));
327 #ifdef GPU_UNAI_USE_FLOATMATH
328 #ifdef GPU_UNAI_USE_FLOAT_DIV_MULTINV
329 dx3 = ((y2 - y1) != 0) ? (fixed)(((x2 - x1) << FIXED_BITS) * FloatInv(y2 - y1)) : 0;
331 dx3 = ((y2 - y1) != 0) ? (fixed)(((x2 - x1) << FIXED_BITS) / (float)(y2 - y1)) : 0;
333 #else // Integer Division:
334 #ifdef GPU_UNAI_USE_INT_DIV_MULTINV
335 dx3 = ((y2 - y1) != 0) ? xLoDivx ((x2 - x1), (y2 - y1)) : 0;
337 dx3 = ((y2 - y1) != 0) ? GPU_FAST_DIV((x2 - x1) << FIXED_BITS, (y2 - y1)) : 0;
343 s32 xmin, xmax, ymin, ymax;
344 xmin = gpu_unai.DrawingArea[0]; xmax = gpu_unai.DrawingArea[2];
345 ymin = gpu_unai.DrawingArea[1]; ymax = gpu_unai.DrawingArea[3];
347 if ((ymin - ya) > 0) {
348 x3 += (dx3 * (ymin - ya));
349 x4 += (dx4 * (ymin - ya));
353 if (yb > ymax) yb = ymax;
359 le16_t* PixelBase = &gpu_unai.vram[FRAME_OFFSET(0, ya)];
360 int li=gpu_unai.inn.ilace_mask;
361 int pi=(ProgressiveInterlaceEnabled()?(gpu_unai.inn.ilace_mask+1):0);
362 int pif=(ProgressiveInterlaceEnabled()?(gpu_unai.prog_ilace_flag?(gpu_unai.inn.ilace_mask+1):0):1);
364 for (; loop1; --loop1, ya++, PixelBase += FRAME_WIDTH,
365 x3 += dx3, x4 += dx4 )
368 if ((ya&pi)==pif) continue;
370 xa = FixedCeilToInt(x3); xb = FixedCeilToInt(x4);
371 if ((xmin - xa) > 0) xa = xmin;
372 if (xb > xmax) xb = xmax;
374 gpuPolySpanDriver(gpu_unai, PixelBase + xa, (xb - xa));
377 } while (++cur_pass < total_passes);
380 /*----------------------------------------------------------------------
381 gpuDrawPolyFT - Flat-shaded, textured poly
382 ----------------------------------------------------------------------*/
383 void gpuDrawPolyFT(const PtrUnion packet, const PP gpuPolySpanDriver, u32 is_quad,
384 PolyType ptype = POLYTYPE_FT)
386 // r8/g8/b8 used if texture-blending & dithering is applied (24-bit light)
387 gpu_unai.inn.r8 = packet.U1[0];
388 gpu_unai.inn.g8 = packet.U1[1];
389 gpu_unai.inn.b8 = packet.U1[2];
390 // r5/g5/b5 used if just texture-blending is applied (15-bit light)
391 gpu_unai.inn.r5 = packet.U1[0] >> 3;
392 gpu_unai.inn.g5 = packet.U1[1] >> 3;
393 gpu_unai.inn.b5 = packet.U1[2] >> 3;
396 polyInitVertexBuffer(vbuf, packet, ptype, is_quad);
398 int total_passes = is_quad ? 2 : 1;
402 const PolyVertex* vptrs[3];
404 if (!polyUseTriangle(vbuf, cur_pass, vptrs, x_off, y_off))
408 s32 x3, dx3, x4, dx4, dx;
409 s32 u3, du3, v3, dv3;
410 s32 x0, x1, x2, y0, y1, y2;
411 s32 u0, u1, u2, v0, v1, v2;
414 x0 = vptrs[0]->x + x_off; y0 = vptrs[0]->y + y_off;
415 u0 = vptrs[0]->tex.u; v0 = vptrs[0]->tex.v;
416 x1 = vptrs[1]->x + x_off; y1 = vptrs[1]->y + y_off;
417 u1 = vptrs[1]->tex.u; v1 = vptrs[1]->tex.v;
418 x2 = vptrs[2]->x + x_off; y2 = vptrs[2]->y + y_off;
419 u2 = vptrs[2]->tex.u; v2 = vptrs[2]->tex.v;
423 dx4 = (x2 - x1) * ya - (x2 - x0) * yb;
424 du4 = (u2 - u1) * ya - (u2 - u0) * yb;
425 dv4 = (v2 - v1) * ya - (v2 - v0) * yb;
433 #ifdef GPU_UNAI_USE_FLOATMATH
434 #ifdef GPU_UNAI_USE_FLOAT_DIV_MULTINV
436 float finv = FloatInv(dx4);
437 du4 = (fixed)((du4 << FIXED_BITS) * finv);
438 dv4 = (fixed)((dv4 << FIXED_BITS) * finv);
445 du4 = (fixed)((du4 << FIXED_BITS) / fdiv);
446 dv4 = (fixed)((dv4 << FIXED_BITS) / fdiv);
451 #else // Integer Division:
452 #ifdef GPU_UNAI_USE_INT_DIV_MULTINV
456 du4 = xInvMulx(du4, iF, iS);
457 dv4 = xInvMulx(dv4, iF, iS);
463 du4 = GPU_FAST_DIV(du4 << FIXED_BITS, dx4);
464 dv4 = GPU_FAST_DIV(dv4 << FIXED_BITS, dx4);
470 // Set u,v increments for inner driver
471 gpu_unai.inn.u_inc = du4;
472 gpu_unai.inn.v_inc = dv4;
474 //senquack - TODO: why is it always going through 2 iterations when sometimes one would suffice here?
475 // (SAME ISSUE ELSEWHERE)
476 for (s32 loop0 = 2; loop0; loop0--) {
480 u3 = i2x(u0); v3 = i2x(v0);
482 #ifdef GPU_UNAI_USE_FLOATMATH
483 #ifdef GPU_UNAI_USE_FLOAT_DIV_MULTINV
484 if ((y2 - y0) != 0) {
485 float finv = FloatInv(y2 - y0);
486 dx3 = (fixed)(((x2 - x0) << FIXED_BITS) * finv);
487 du3 = (fixed)(((u2 - u0) << FIXED_BITS) * finv);
488 dv3 = (fixed)(((v2 - v0) << FIXED_BITS) * finv);
492 dx4 = ((y1 - y0) != 0) ? (fixed)(((x1 - x0) << FIXED_BITS) * FloatInv(y1 - y0)) : 0;
494 if ((y2 - y0) != 0) {
495 float fdiv = y2 - y0;
496 dx3 = (fixed)(((x2 - x0) << FIXED_BITS) / fdiv);
497 du3 = (fixed)(((u2 - u0) << FIXED_BITS) / fdiv);
498 dv3 = (fixed)(((v2 - v0) << FIXED_BITS) / fdiv);
502 dx4 = ((y1 - y0) != 0) ? (fixed)(((x1 - x0) << FIXED_BITS) / (float)(y1 - y0)) : 0;
504 #else // Integer Division:
505 #ifdef GPU_UNAI_USE_INT_DIV_MULTINV
506 if ((y2 - y0) != 0) {
508 xInv((y2 - y0), iF, iS);
509 dx3 = xInvMulx((x2 - x0), iF, iS);
510 du3 = xInvMulx((u2 - u0), iF, iS);
511 dv3 = xInvMulx((v2 - v0), iF, iS);
515 dx4 = ((y1 - y0) != 0) ? xLoDivx((x1 - x0), (y1 - y0)) : 0;
517 if ((y2 - y0) != 0) {
518 dx3 = GPU_FAST_DIV((x2 - x0) << FIXED_BITS, (y2 - y0));
519 du3 = GPU_FAST_DIV((u2 - u0) << FIXED_BITS, (y2 - y0));
520 dv3 = GPU_FAST_DIV((v2 - v0) << FIXED_BITS, (y2 - y0));
524 dx4 = ((y1 - y0) != 0) ? GPU_FAST_DIV((x1 - x0) << FIXED_BITS, (y1 - y0)) : 0;
528 #ifdef GPU_UNAI_USE_FLOATMATH
529 #ifdef GPU_UNAI_USE_FLOAT_DIV_MULTINV
530 if ((y1 - y0) != 0) {
531 float finv = FloatInv(y1 - y0);
532 dx3 = (fixed)(((x1 - x0) << FIXED_BITS) * finv);
533 du3 = (fixed)(((u1 - u0) << FIXED_BITS) * finv);
534 dv3 = (fixed)(((v1 - v0) << FIXED_BITS) * finv);
538 dx4 = ((y2 - y0) != 0) ? (fixed)(((x2 - x0) << FIXED_BITS) * FloatInv(y2 - y0)) : 0;
540 if ((y1 - y0) != 0) {
541 float fdiv = y1 - y0;
542 dx3 = (fixed)(((x1 - x0) << FIXED_BITS) / fdiv);
543 du3 = (fixed)(((u1 - u0) << FIXED_BITS) / fdiv);
544 dv3 = (fixed)(((v1 - v0) << FIXED_BITS) / fdiv);
548 dx4 = ((y2 - y0) != 0) ? (fixed)(((x2 - x0) << FIXED_BITS) / (float)(y2 - y0)) : 0;
550 #else // Integer Division:
551 #ifdef GPU_UNAI_USE_INT_DIV_MULTINV
552 if ((y1 - y0) != 0) {
554 xInv((y1 - y0), iF, iS);
555 dx3 = xInvMulx((x1 - x0), iF, iS);
556 du3 = xInvMulx((u1 - u0), iF, iS);
557 dv3 = xInvMulx((v1 - v0), iF, iS);
561 dx4 = ((y2 - y0) != 0) ? xLoDivx((x2 - x0), (y2 - y0)) : 0;
563 if ((y1 - y0) != 0) {
564 dx3 = GPU_FAST_DIV((x1 - x0) << FIXED_BITS, (y1 - y0));
565 du3 = GPU_FAST_DIV((u1 - u0) << FIXED_BITS, (y1 - y0));
566 dv3 = GPU_FAST_DIV((v1 - v0) << FIXED_BITS, (y1 - y0));
570 dx4 = ((y2 - y0) != 0) ? GPU_FAST_DIV((x2 - x0) << FIXED_BITS, (y2 - y0)) : 0;
575 //senquack - break out of final loop if nothing to be drawn (1st loop
576 // must always be taken to setup dx3/dx4)
586 if ((y1 - y0) != 0) {
587 x3 += (dx3 * (y1 - y0));
588 u3 += (du3 * (y1 - y0));
589 v3 += (dv3 * (y1 - y0));
591 #ifdef GPU_UNAI_USE_FLOATMATH
592 #ifdef GPU_UNAI_USE_FLOAT_DIV_MULTINV
593 dx4 = ((y2 - y1) != 0) ? (fixed)(((x2 - x1) << FIXED_BITS) * FloatInv(y2 - y1)) : 0;
595 dx4 = ((y2 - y1) != 0) ? (fixed)(((x2 - x1) << FIXED_BITS) / (float)(y2 - y1)) : 0;
597 #else // Integer Division:
598 #ifdef GPU_UNAI_USE_INT_DIV_MULTINV
599 dx4 = ((y2 - y1) != 0) ? xLoDivx((x2 - x1), (y2 - y1)) : 0;
601 dx4 = ((y2 - y1) != 0) ? GPU_FAST_DIV((x2 - x1) << FIXED_BITS, (y2 - y1)) : 0;
606 x4 = i2x(x0) + (dx4 * (y1 - y0));
609 #ifdef GPU_UNAI_USE_FLOATMATH
610 #ifdef GPU_UNAI_USE_FLOAT_DIV_MULTINV
611 if ((y2 - y1) != 0) {
612 float finv = FloatInv(y2 - y1);
613 dx3 = (fixed)(((x2 - x1) << FIXED_BITS) * finv);
614 du3 = (fixed)(((u2 - u1) << FIXED_BITS) * finv);
615 dv3 = (fixed)(((v2 - v1) << FIXED_BITS) * finv);
620 if ((y2 - y1) != 0) {
621 float fdiv = y2 - y1;
622 dx3 = (fixed)(((x2 - x1) << FIXED_BITS) / fdiv);
623 du3 = (fixed)(((u2 - u1) << FIXED_BITS) / fdiv);
624 dv3 = (fixed)(((v2 - v1) << FIXED_BITS) / fdiv);
629 #else // Integer Division:
630 #ifdef GPU_UNAI_USE_INT_DIV_MULTINV
631 if ((y2 - y1) != 0) {
633 xInv((y2 - y1), iF, iS);
634 dx3 = xInvMulx((x2 - x1), iF, iS);
635 du3 = xInvMulx((u2 - u1), iF, iS);
636 dv3 = xInvMulx((v2 - v1), iF, iS);
641 if ((y2 - y1) != 0) {
642 dx3 = GPU_FAST_DIV((x2 - x1) << FIXED_BITS, (y2 - y1));
643 du3 = GPU_FAST_DIV((u2 - u1) << FIXED_BITS, (y2 - y1));
644 dv3 = GPU_FAST_DIV((v2 - v1) << FIXED_BITS, (y2 - y1));
653 s32 xmin, xmax, ymin, ymax;
654 xmin = gpu_unai.DrawingArea[0]; xmax = gpu_unai.DrawingArea[2];
655 ymin = gpu_unai.DrawingArea[1]; ymax = gpu_unai.DrawingArea[3];
657 if ((ymin - ya) > 0) {
658 x3 += dx3 * (ymin - ya);
659 x4 += dx4 * (ymin - ya);
660 u3 += du3 * (ymin - ya);
661 v3 += dv3 * (ymin - ya);
665 if (yb > ymax) yb = ymax;
671 le16_t* PixelBase = &gpu_unai.vram[FRAME_OFFSET(0, ya)];
672 int li=gpu_unai.inn.ilace_mask;
673 int pi=(ProgressiveInterlaceEnabled()?(gpu_unai.inn.ilace_mask+1):0);
674 int pif=(ProgressiveInterlaceEnabled()?(gpu_unai.prog_ilace_flag?(gpu_unai.inn.ilace_mask+1):0):1);
676 for (; loop1; --loop1, ++ya, PixelBase += FRAME_WIDTH,
677 x3 += dx3, x4 += dx4,
678 u3 += du3, v3 += dv3 )
681 if ((ya&pi)==pif) continue;
685 xa = FixedCeilToInt(x3); xb = FixedCeilToInt(x4);
688 fixed itmp = i2x(xa) - x3;
690 u4 += (du4 * itmp) >> FIXED_BITS;
691 v4 += (dv4 * itmp) >> FIXED_BITS;
697 if ((xmin - xa) > 0) {
698 u4 += du4 * (xmin - xa);
699 v4 += dv4 * (xmin - xa);
703 // Set u,v coords for inner driver
707 if (xb > xmax) xb = xmax;
709 gpuPolySpanDriver(gpu_unai, PixelBase + xa, (xb - xa));
712 } while (++cur_pass < total_passes);
715 /*----------------------------------------------------------------------
716 gpuDrawPolyG - Gouraud-shaded, untextured poly
717 ----------------------------------------------------------------------*/
718 void gpuDrawPolyG(const PtrUnion packet, const PP gpuPolySpanDriver, u32 is_quad)
721 polyInitVertexBuffer(vbuf, packet, POLYTYPE_G, is_quad);
723 int total_passes = is_quad ? 2 : 1;
727 const PolyVertex* vptrs[3];
729 if (!polyUseTriangle(vbuf, cur_pass, vptrs, x_off, y_off))
733 s32 x3, dx3, x4, dx4, dx;
734 s32 r3, dr3, g3, dg3, b3, db3;
735 s32 x0, x1, x2, y0, y1, y2;
736 s32 r0, r1, r2, g0, g1, g2, b0, b1, b2;
739 x0 = vptrs[0]->x + x_off; y0 = vptrs[0]->y + y_off;
740 r0 = vptrs[0]->col.r; g0 = vptrs[0]->col.g; b0 = vptrs[0]->col.b;
741 x1 = vptrs[1]->x + x_off; y1 = vptrs[1]->y + y_off;
742 r1 = vptrs[1]->col.r; g1 = vptrs[1]->col.g; b1 = vptrs[1]->col.b;
743 x2 = vptrs[2]->x + x_off; y2 = vptrs[2]->y + y_off;
744 r2 = vptrs[2]->col.r; g2 = vptrs[2]->col.g; b2 = vptrs[2]->col.b;
748 dx4 = (x2 - x1) * ya - (x2 - x0) * yb;
749 dr4 = (r2 - r1) * ya - (r2 - r0) * yb;
750 dg4 = (g2 - g1) * ya - (g2 - g0) * yb;
751 db4 = (b2 - b1) * ya - (b2 - b0) * yb;
760 #ifdef GPU_UNAI_USE_FLOATMATH
761 #ifdef GPU_UNAI_USE_FLOAT_DIV_MULTINV
763 float finv = FloatInv(dx4);
764 dr4 = (fixed)((dr4 << FIXED_BITS) * finv);
765 dg4 = (fixed)((dg4 << FIXED_BITS) * finv);
766 db4 = (fixed)((db4 << FIXED_BITS) * finv);
773 dr4 = (fixed)((dr4 << FIXED_BITS) / fdiv);
774 dg4 = (fixed)((dg4 << FIXED_BITS) / fdiv);
775 db4 = (fixed)((db4 << FIXED_BITS) / fdiv);
780 #else // Integer Division:
781 #ifdef GPU_UNAI_USE_INT_DIV_MULTINV
785 dr4 = xInvMulx(dr4, iF, iS);
786 dg4 = xInvMulx(dg4, iF, iS);
787 db4 = xInvMulx(db4, iF, iS);
793 dr4 = GPU_FAST_DIV(dr4 << FIXED_BITS, dx4);
794 dg4 = GPU_FAST_DIV(dg4 << FIXED_BITS, dx4);
795 db4 = GPU_FAST_DIV(db4 << FIXED_BITS, dx4);
801 // Setup packed Gouraud increment for inner driver
802 gpu_unai.inn.gInc = gpuPackGouraudColInc(dr4, dg4, db4);
804 for (s32 loop0 = 2; loop0; loop0--) {
813 #ifdef GPU_UNAI_USE_FLOATMATH
814 #ifdef GPU_UNAI_USE_FLOAT_DIV_MULTINV
815 if ((y2 - y0) != 0) {
816 float finv = FloatInv(y2 - y0);
817 dx3 = (fixed)(((x2 - x0) << FIXED_BITS) * finv);
818 dr3 = (fixed)(((r2 - r0) << FIXED_BITS) * finv);
819 dg3 = (fixed)(((g2 - g0) << FIXED_BITS) * finv);
820 db3 = (fixed)(((b2 - b0) << FIXED_BITS) * finv);
822 dx3 = dr3 = dg3 = db3 = 0;
824 dx4 = ((y1 - y0) != 0) ? (fixed)(((x1 - x0) << FIXED_BITS) * FloatInv(y1 - y0)) : 0;
826 if ((y2 - y0) != 0) {
827 float fdiv = y2 - y0;
828 dx3 = (fixed)(((x2 - x0) << FIXED_BITS) / fdiv);
829 dr3 = (fixed)(((r2 - r0) << FIXED_BITS) / fdiv);
830 dg3 = (fixed)(((g2 - g0) << FIXED_BITS) / fdiv);
831 db3 = (fixed)(((b2 - b0) << FIXED_BITS) / fdiv);
833 dx3 = dr3 = dg3 = db3 = 0;
835 dx4 = ((y1 - y0) != 0) ? (fixed)(((x1 - x0) << FIXED_BITS) / (float)(y1 - y0)) : 0;
837 #else // Integer Division:
838 #ifdef GPU_UNAI_USE_INT_DIV_MULTINV
839 if ((y2 - y0) != 0) {
841 xInv((y2 - y0), iF, iS);
842 dx3 = xInvMulx((x2 - x0), iF, iS);
843 dr3 = xInvMulx((r2 - r0), iF, iS);
844 dg3 = xInvMulx((g2 - g0), iF, iS);
845 db3 = xInvMulx((b2 - b0), iF, iS);
847 dx3 = dr3 = dg3 = db3 = 0;
849 dx4 = ((y1 - y0) != 0) ? xLoDivx((x1 - x0), (y1 - y0)) : 0;
851 if ((y2 - y0) != 0) {
852 dx3 = GPU_FAST_DIV((x2 - x0) << FIXED_BITS, (y2 - y0));
853 dr3 = GPU_FAST_DIV((r2 - r0) << FIXED_BITS, (y2 - y0));
854 dg3 = GPU_FAST_DIV((g2 - g0) << FIXED_BITS, (y2 - y0));
855 db3 = GPU_FAST_DIV((b2 - b0) << FIXED_BITS, (y2 - y0));
857 dx3 = dr3 = dg3 = db3 = 0;
859 dx4 = ((y1 - y0) != 0) ? GPU_FAST_DIV((x1 - x0) << FIXED_BITS, (y1 - y0)) : 0;
863 #ifdef GPU_UNAI_USE_FLOATMATH
864 #ifdef GPU_UNAI_USE_FLOAT_DIV_MULTINV
865 if ((y1 - y0) != 0) {
866 float finv = FloatInv(y1 - y0);
867 dx3 = (fixed)(((x1 - x0) << FIXED_BITS) * finv);
868 dr3 = (fixed)(((r1 - r0) << FIXED_BITS) * finv);
869 dg3 = (fixed)(((g1 - g0) << FIXED_BITS) * finv);
870 db3 = (fixed)(((b1 - b0) << FIXED_BITS) * finv);
872 dx3 = dr3 = dg3 = db3 = 0;
874 dx4 = ((y2 - y0) != 0) ? (fixed)(((x2 - x0) << FIXED_BITS) * FloatInv(y2 - y0)) : 0;
876 if ((y1 - y0) != 0) {
877 float fdiv = y1 - y0;
878 dx3 = (fixed)(((x1 - x0) << FIXED_BITS) / fdiv);
879 dr3 = (fixed)(((r1 - r0) << FIXED_BITS) / fdiv);
880 dg3 = (fixed)(((g1 - g0) << FIXED_BITS) / fdiv);
881 db3 = (fixed)(((b1 - b0) << FIXED_BITS) / fdiv);
883 dx3 = dr3 = dg3 = db3 = 0;
885 dx4 = ((y2 - y0) != 0) ? (fixed)(((x2 - x0) << FIXED_BITS) / (float)(y2 - y0)) : 0;
887 #else // Integer Division:
888 #ifdef GPU_UNAI_USE_INT_DIV_MULTINV
889 if ((y1 - y0) != 0) {
891 xInv((y1 - y0), iF, iS);
892 dx3 = xInvMulx((x1 - x0), iF, iS);
893 dr3 = xInvMulx((r1 - r0), iF, iS);
894 dg3 = xInvMulx((g1 - g0), iF, iS);
895 db3 = xInvMulx((b1 - b0), iF, iS);
897 dx3 = dr3 = dg3 = db3 = 0;
899 dx4 = ((y2 - y0) != 0) ? xLoDivx((x2 - x0), (y2 - y0)) : 0;
901 if ((y1 - y0) != 0) {
902 dx3 = GPU_FAST_DIV((x1 - x0) << FIXED_BITS, (y1 - y0));
903 dr3 = GPU_FAST_DIV((r1 - r0) << FIXED_BITS, (y1 - y0));
904 dg3 = GPU_FAST_DIV((g1 - g0) << FIXED_BITS, (y1 - y0));
905 db3 = GPU_FAST_DIV((b1 - b0) << FIXED_BITS, (y1 - y0));
907 dx3 = dr3 = dg3 = db3 = 0;
909 dx4 = ((y2 - y0) != 0) ? GPU_FAST_DIV((x2 - x0) << FIXED_BITS, (y2 - y0)) : 0;
914 //senquack - break out of final loop if nothing to be drawn (1st loop
915 // must always be taken to setup dx3/dx4)
921 x3 = i2x(x0); x4 = i2x(x1);
922 r3 = i2x(r0); g3 = i2x(g0); b3 = i2x(b0);
924 if ((y1 - y0) != 0) {
925 x3 += (dx3 * (y1 - y0));
926 r3 += (dr3 * (y1 - y0));
927 g3 += (dg3 * (y1 - y0));
928 b3 += (db3 * (y1 - y0));
931 #ifdef GPU_UNAI_USE_FLOATMATH
932 #ifdef GPU_UNAI_USE_FLOAT_DIV_MULTINV
933 dx4 = ((y2 - y1) != 0) ? (fixed)(((x2 - x1) << FIXED_BITS) * FloatInv(y2 - y1)) : 0;
935 dx4 = ((y2 - y1) != 0) ? (fixed)(((x2 - x1) << FIXED_BITS) / (float)(y2 - y1)) : 0;
937 #else // Integer Division:
938 #ifdef GPU_UNAI_USE_INT_DIV_MULTINV
939 dx4 = ((y2 - y1) != 0) ? xLoDivx((x2 - x1), (y2 - y1)) : 0;
941 dx4 = ((y2 - y1) != 0) ? GPU_FAST_DIV((x2 - x1) << FIXED_BITS, (y2 - y1)) : 0;
946 x4 = i2x(x0) + (dx4 * (y1 - y0));
948 r3 = i2x(r1); g3 = i2x(g1); b3 = i2x(b1);
950 #ifdef GPU_UNAI_USE_FLOATMATH
951 #ifdef GPU_UNAI_USE_FLOAT_DIV_MULTINV
952 if ((y2 - y1) != 0) {
953 float finv = FloatInv(y2 - y1);
954 dx3 = (fixed)(((x2 - x1) << FIXED_BITS) * finv);
955 dr3 = (fixed)(((r2 - r1) << FIXED_BITS) * finv);
956 dg3 = (fixed)(((g2 - g1) << FIXED_BITS) * finv);
957 db3 = (fixed)(((b2 - b1) << FIXED_BITS) * finv);
959 dx3 = dr3 = dg3 = db3 = 0;
962 if ((y2 - y1) != 0) {
963 float fdiv = y2 - y1;
964 dx3 = (fixed)(((x2 - x1) << FIXED_BITS) / fdiv);
965 dr3 = (fixed)(((r2 - r1) << FIXED_BITS) / fdiv);
966 dg3 = (fixed)(((g2 - g1) << FIXED_BITS) / fdiv);
967 db3 = (fixed)(((b2 - b1) << FIXED_BITS) / fdiv);
969 dx3 = dr3 = dg3 = db3 = 0;
972 #else // Integer Division:
973 #ifdef GPU_UNAI_USE_INT_DIV_MULTINV
974 if ((y2 - y1) != 0) {
976 xInv((y2 - y1), iF, iS);
977 dx3 = xInvMulx((x2 - x1), iF, iS);
978 dr3 = xInvMulx((r2 - r1), iF, iS);
979 dg3 = xInvMulx((g2 - g1), iF, iS);
980 db3 = xInvMulx((b2 - b1), iF, iS);
982 dx3 = dr3 = dg3 = db3 = 0;
985 if ((y2 - y1) != 0) {
986 dx3 = GPU_FAST_DIV((x2 - x1) << FIXED_BITS, (y2 - y1));
987 dr3 = GPU_FAST_DIV((r2 - r1) << FIXED_BITS, (y2 - y1));
988 dg3 = GPU_FAST_DIV((g2 - g1) << FIXED_BITS, (y2 - y1));
989 db3 = GPU_FAST_DIV((b2 - b1) << FIXED_BITS, (y2 - y1));
991 dx3 = dr3 = dg3 = db3 = 0;
998 s32 xmin, xmax, ymin, ymax;
999 xmin = gpu_unai.DrawingArea[0]; xmax = gpu_unai.DrawingArea[2];
1000 ymin = gpu_unai.DrawingArea[1]; ymax = gpu_unai.DrawingArea[3];
1002 if ((ymin - ya) > 0) {
1003 x3 += (dx3 * (ymin - ya));
1004 x4 += (dx4 * (ymin - ya));
1005 r3 += (dr3 * (ymin - ya));
1006 g3 += (dg3 * (ymin - ya));
1007 b3 += (db3 * (ymin - ya));
1011 if (yb > ymax) yb = ymax;
1013 int loop1 = yb - ya;
1017 le16_t* PixelBase = &gpu_unai.vram[FRAME_OFFSET(0, ya)];
1018 int li=gpu_unai.inn.ilace_mask;
1019 int pi=(ProgressiveInterlaceEnabled()?(gpu_unai.inn.ilace_mask+1):0);
1020 int pif=(ProgressiveInterlaceEnabled()?(gpu_unai.prog_ilace_flag?(gpu_unai.inn.ilace_mask+1):0):1);
1022 for (; loop1; --loop1, ++ya, PixelBase += FRAME_WIDTH,
1023 x3 += dx3, x4 += dx4,
1024 r3 += dr3, g3 += dg3, b3 += db3 )
1026 if (ya&li) continue;
1027 if ((ya&pi)==pif) continue;
1031 xa = FixedCeilToInt(x3);
1032 xb = FixedCeilToInt(x4);
1033 r4 = r3; g4 = g3; b4 = b3;
1035 fixed itmp = i2x(xa) - x3;
1037 r4 += (dr4 * itmp) >> FIXED_BITS;
1038 g4 += (dg4 * itmp) >> FIXED_BITS;
1039 b4 += (db4 * itmp) >> FIXED_BITS;
1046 if ((xmin - xa) > 0) {
1047 r4 += (dr4 * (xmin - xa));
1048 g4 += (dg4 * (xmin - xa));
1049 b4 += (db4 * (xmin - xa));
1053 // Setup packed Gouraud color for inner driver
1054 gpu_unai.inn.gCol = gpuPackGouraudCol(r4, g4, b4);
1056 if (xb > xmax) xb = xmax;
1058 gpuPolySpanDriver(gpu_unai, PixelBase + xa, (xb - xa));
1061 } while (++cur_pass < total_passes);
1064 /*----------------------------------------------------------------------
1065 gpuDrawPolyGT - Gouraud-shaded, textured poly
1066 ----------------------------------------------------------------------*/
1067 void gpuDrawPolyGT(const PtrUnion packet, const PP gpuPolySpanDriver, u32 is_quad)
1070 polyInitVertexBuffer(vbuf, packet, POLYTYPE_GT, is_quad);
1072 int total_passes = is_quad ? 2 : 1;
1076 const PolyVertex* vptrs[3];
1078 if (!polyUseTriangle(vbuf, cur_pass, vptrs, x_off, y_off))
1082 s32 x3, dx3, x4, dx4, dx;
1083 s32 u3, du3, v3, dv3;
1084 s32 r3, dr3, g3, dg3, b3, db3;
1085 s32 x0, x1, x2, y0, y1, y2;
1086 s32 u0, u1, u2, v0, v1, v2;
1087 s32 r0, r1, r2, g0, g1, g2, b0, b1, b2;
1091 x0 = vptrs[0]->x + x_off; y0 = vptrs[0]->y + y_off;
1092 u0 = vptrs[0]->tex.u; v0 = vptrs[0]->tex.v;
1093 r0 = vptrs[0]->col.r; g0 = vptrs[0]->col.g; b0 = vptrs[0]->col.b;
1094 x1 = vptrs[1]->x + x_off; y1 = vptrs[1]->y + y_off;
1095 u1 = vptrs[1]->tex.u; v1 = vptrs[1]->tex.v;
1096 r1 = vptrs[1]->col.r; g1 = vptrs[1]->col.g; b1 = vptrs[1]->col.b;
1097 x2 = vptrs[2]->x + x_off; y2 = vptrs[2]->y + y_off;
1098 u2 = vptrs[2]->tex.u; v2 = vptrs[2]->tex.v;
1099 r2 = vptrs[2]->col.r; g2 = vptrs[2]->col.g; b2 = vptrs[2]->col.b;
1103 dx4 = (x2 - x1) * ya - (x2 - x0) * yb;
1104 du4 = (u2 - u1) * ya - (u2 - u0) * yb;
1105 dv4 = (v2 - v1) * ya - (v2 - v0) * yb;
1106 dr4 = (r2 - r1) * ya - (r2 - r0) * yb;
1107 dg4 = (g2 - g1) * ya - (g2 - g0) * yb;
1108 db4 = (b2 - b1) * ya - (b2 - b0) * yb;
1119 #ifdef GPU_UNAI_USE_FLOATMATH
1120 #ifdef GPU_UNAI_USE_FLOAT_DIV_MULTINV
1122 float finv = FloatInv(dx4);
1123 du4 = (fixed)((du4 << FIXED_BITS) * finv);
1124 dv4 = (fixed)((dv4 << FIXED_BITS) * finv);
1125 dr4 = (fixed)((dr4 << FIXED_BITS) * finv);
1126 dg4 = (fixed)((dg4 << FIXED_BITS) * finv);
1127 db4 = (fixed)((db4 << FIXED_BITS) * finv);
1129 du4 = dv4 = dr4 = dg4 = db4 = 0;
1134 du4 = (fixed)((du4 << FIXED_BITS) / fdiv);
1135 dv4 = (fixed)((dv4 << FIXED_BITS) / fdiv);
1136 dr4 = (fixed)((dr4 << FIXED_BITS) / fdiv);
1137 dg4 = (fixed)((dg4 << FIXED_BITS) / fdiv);
1138 db4 = (fixed)((db4 << FIXED_BITS) / fdiv);
1140 du4 = dv4 = dr4 = dg4 = db4 = 0;
1143 #else // Integer Division:
1144 #ifdef GPU_UNAI_USE_INT_DIV_MULTINV
1148 du4 = xInvMulx(du4, iF, iS);
1149 dv4 = xInvMulx(dv4, iF, iS);
1150 dr4 = xInvMulx(dr4, iF, iS);
1151 dg4 = xInvMulx(dg4, iF, iS);
1152 db4 = xInvMulx(db4, iF, iS);
1154 du4 = dv4 = dr4 = dg4 = db4 = 0;
1158 du4 = GPU_FAST_DIV(du4 << FIXED_BITS, dx4);
1159 dv4 = GPU_FAST_DIV(dv4 << FIXED_BITS, dx4);
1160 dr4 = GPU_FAST_DIV(dr4 << FIXED_BITS, dx4);
1161 dg4 = GPU_FAST_DIV(dg4 << FIXED_BITS, dx4);
1162 db4 = GPU_FAST_DIV(db4 << FIXED_BITS, dx4);
1164 du4 = dv4 = dr4 = dg4 = db4 = 0;
1168 // Set u,v increments and packed Gouraud increment for inner driver
1169 gpu_unai.inn.u_inc = du4;
1170 gpu_unai.inn.v_inc = dv4;
1171 gpu_unai.inn.gInc = gpuPackGouraudColInc(dr4, dg4, db4);
1173 for (s32 loop0 = 2; loop0; loop0--) {
1177 u3 = i2x(u0); v3 = i2x(v0);
1178 r3 = i2x(r0); g3 = i2x(g0); b3 = i2x(b0);
1180 #ifdef GPU_UNAI_USE_FLOATMATH
1181 #ifdef GPU_UNAI_USE_FLOAT_DIV_MULTINV
1182 if ((y2 - y0) != 0) {
1183 float finv = FloatInv(y2 - y0);
1184 dx3 = (fixed)(((x2 - x0) << FIXED_BITS) * finv);
1185 du3 = (fixed)(((u2 - u0) << FIXED_BITS) * finv);
1186 dv3 = (fixed)(((v2 - v0) << FIXED_BITS) * finv);
1187 dr3 = (fixed)(((r2 - r0) << FIXED_BITS) * finv);
1188 dg3 = (fixed)(((g2 - g0) << FIXED_BITS) * finv);
1189 db3 = (fixed)(((b2 - b0) << FIXED_BITS) * finv);
1191 dx3 = du3 = dv3 = dr3 = dg3 = db3 = 0;
1193 dx4 = ((y1 - y0) != 0) ? (fixed)(((x1 - x0) << FIXED_BITS) * FloatInv(y1 - y0)) : 0;
1195 if ((y2 - y0) != 0) {
1196 float fdiv = y2 - y0;
1197 dx3 = (fixed)(((x2 - x0) << FIXED_BITS) / fdiv);
1198 du3 = (fixed)(((u2 - u0) << FIXED_BITS) / fdiv);
1199 dv3 = (fixed)(((v2 - v0) << FIXED_BITS) / fdiv);
1200 dr3 = (fixed)(((r2 - r0) << FIXED_BITS) / fdiv);
1201 dg3 = (fixed)(((g2 - g0) << FIXED_BITS) / fdiv);
1202 db3 = (fixed)(((b2 - b0) << FIXED_BITS) / fdiv);
1204 dx3 = du3 = dv3 = dr3 = dg3 = db3 = 0;
1206 dx4 = ((y1 - y0) != 0) ? (fixed)(((x1 - x0) << FIXED_BITS) / (float)(y1 - y0)) : 0;
1208 #else // Integer Division:
1209 #ifdef GPU_UNAI_USE_INT_DIV_MULTINV
1210 if ((y2 - y0) != 0) {
1212 xInv((y2 - y0), iF, iS);
1213 dx3 = xInvMulx((x2 - x0), iF, iS);
1214 du3 = xInvMulx((u2 - u0), iF, iS);
1215 dv3 = xInvMulx((v2 - v0), iF, iS);
1216 dr3 = xInvMulx((r2 - r0), iF, iS);
1217 dg3 = xInvMulx((g2 - g0), iF, iS);
1218 db3 = xInvMulx((b2 - b0), iF, iS);
1220 dx3 = du3 = dv3 = dr3 = dg3 = db3 = 0;
1222 dx4 = ((y1 - y0) != 0) ? xLoDivx((x1 - x0), (y1 - y0)) : 0;
1224 if ((y2 - y0) != 0) {
1225 dx3 = GPU_FAST_DIV((x2 - x0) << FIXED_BITS, (y2 - y0));
1226 du3 = GPU_FAST_DIV((u2 - u0) << FIXED_BITS, (y2 - y0));
1227 dv3 = GPU_FAST_DIV((v2 - v0) << FIXED_BITS, (y2 - y0));
1228 dr3 = GPU_FAST_DIV((r2 - r0) << FIXED_BITS, (y2 - y0));
1229 dg3 = GPU_FAST_DIV((g2 - g0) << FIXED_BITS, (y2 - y0));
1230 db3 = GPU_FAST_DIV((b2 - b0) << FIXED_BITS, (y2 - y0));
1232 dx3 = du3 = dv3 = dr3 = dg3 = db3 = 0;
1234 dx4 = ((y1 - y0) != 0) ? GPU_FAST_DIV((x1 - x0) << FIXED_BITS, (y1 - y0)) : 0;
1238 #ifdef GPU_UNAI_USE_FLOATMATH
1239 #ifdef GPU_UNAI_USE_FLOAT_DIV_MULTINV
1240 if ((y1 - y0) != 0) {
1241 float finv = FloatInv(y1 - y0);
1242 dx3 = (fixed)(((x1 - x0) << FIXED_BITS) * finv);
1243 du3 = (fixed)(((u1 - u0) << FIXED_BITS) * finv);
1244 dv3 = (fixed)(((v1 - v0) << FIXED_BITS) * finv);
1245 dr3 = (fixed)(((r1 - r0) << FIXED_BITS) * finv);
1246 dg3 = (fixed)(((g1 - g0) << FIXED_BITS) * finv);
1247 db3 = (fixed)(((b1 - b0) << FIXED_BITS) * finv);
1249 dx3 = du3 = dv3 = dr3 = dg3 = db3 = 0;
1251 dx4 = ((y2 - y0) != 0) ? (fixed)(((x2 - x0) << FIXED_BITS) * FloatInv(y2 - y0)) : 0;
1253 if ((y1 - y0) != 0) {
1254 float fdiv = y1 - y0;
1255 dx3 = (fixed)(((x1 - x0) << FIXED_BITS) / fdiv);
1256 du3 = (fixed)(((u1 - u0) << FIXED_BITS) / fdiv);
1257 dv3 = (fixed)(((v1 - v0) << FIXED_BITS) / fdiv);
1258 dr3 = (fixed)(((r1 - r0) << FIXED_BITS) / fdiv);
1259 dg3 = (fixed)(((g1 - g0) << FIXED_BITS) / fdiv);
1260 db3 = (fixed)(((b1 - b0) << FIXED_BITS) / fdiv);
1262 dx3 = du3 = dv3 = dr3 = dg3 = db3 = 0;
1264 dx4 = ((y2 - y0) != 0) ? (fixed)(((x2 - x0) << FIXED_BITS) / float(y2 - y0)) : 0;
1266 #else // Integer Division:
1267 #ifdef GPU_UNAI_USE_INT_DIV_MULTINV
1268 if ((y1 - y0) != 0) {
1270 xInv((y1 - y0), iF, iS);
1271 dx3 = xInvMulx((x1 - x0), iF, iS);
1272 du3 = xInvMulx((u1 - u0), iF, iS);
1273 dv3 = xInvMulx((v1 - v0), iF, iS);
1274 dr3 = xInvMulx((r1 - r0), iF, iS);
1275 dg3 = xInvMulx((g1 - g0), iF, iS);
1276 db3 = xInvMulx((b1 - b0), iF, iS);
1278 dx3 = du3 = dv3 = dr3 = dg3 = db3 = 0;
1280 dx4 = ((y2 - y0) != 0) ? xLoDivx((x2 - x0), (y2 - y0)) : 0;
1282 if ((y1 - y0) != 0) {
1283 dx3 = GPU_FAST_DIV((x1 - x0) << FIXED_BITS, (y1 - y0));
1284 du3 = GPU_FAST_DIV((u1 - u0) << FIXED_BITS, (y1 - y0));
1285 dv3 = GPU_FAST_DIV((v1 - v0) << FIXED_BITS, (y1 - y0));
1286 dr3 = GPU_FAST_DIV((r1 - r0) << FIXED_BITS, (y1 - y0));
1287 dg3 = GPU_FAST_DIV((g1 - g0) << FIXED_BITS, (y1 - y0));
1288 db3 = GPU_FAST_DIV((b1 - b0) << FIXED_BITS, (y1 - y0));
1290 dx3 = du3 = dv3 = dr3 = dg3 = db3 = 0;
1292 dx4 = ((y2 - y0) != 0) ? GPU_FAST_DIV((x2 - x0) << FIXED_BITS, (y2 - y0)) : 0;
1297 //senquack - break out of final loop if nothing to be drawn (1st loop
1298 // must always be taken to setup dx3/dx4)
1299 if (y1 == y2) break;
1304 x3 = i2x(x0); x4 = i2x(x1);
1305 u3 = i2x(u0); v3 = i2x(v0);
1306 r3 = i2x(r0); g3 = i2x(g0); b3 = i2x(b0);
1308 if ((y1 - y0) != 0) {
1309 x3 += (dx3 * (y1 - y0));
1310 u3 += (du3 * (y1 - y0));
1311 v3 += (dv3 * (y1 - y0));
1312 r3 += (dr3 * (y1 - y0));
1313 g3 += (dg3 * (y1 - y0));
1314 b3 += (db3 * (y1 - y0));
1317 #ifdef GPU_UNAI_USE_FLOATMATH
1318 #ifdef GPU_UNAI_USE_FLOAT_DIV_MULTINV
1319 dx4 = ((y2 - y1) != 0) ? (fixed)(((x2 - x1) << FIXED_BITS) * FloatInv(y2 - y1)) : 0;
1321 dx4 = ((y2 - y1) != 0) ? (fixed)(((x2 - x1) << FIXED_BITS) / (float)(y2 - y1)) : 0;
1323 #else // Integer Division:
1324 #ifdef GPU_UNAI_USE_INT_DIV_MULTINV
1325 dx4 = ((y2 - y1) != 0) ? xLoDivx((x2 - x1), (y2 - y1)) : 0;
1327 dx4 = ((y2 - y1) != 0) ? GPU_FAST_DIV((x2 - x1) << FIXED_BITS, (y2 - y1)) : 0;
1332 x4 = i2x(x0) + (dx4 * (y1 - y0));
1334 u3 = i2x(u1); v3 = i2x(v1);
1335 r3 = i2x(r1); g3 = i2x(g1); b3 = i2x(b1);
1336 #ifdef GPU_UNAI_USE_FLOATMATH
1337 #ifdef GPU_UNAI_USE_FLOAT_DIV_MULTINV
1338 if ((y2 - y1) != 0) {
1339 float finv = FloatInv(y2 - y1);
1340 dx3 = (fixed)(((x2 - x1) << FIXED_BITS) * finv);
1341 du3 = (fixed)(((u2 - u1) << FIXED_BITS) * finv);
1342 dv3 = (fixed)(((v2 - v1) << FIXED_BITS) * finv);
1343 dr3 = (fixed)(((r2 - r1) << FIXED_BITS) * finv);
1344 dg3 = (fixed)(((g2 - g1) << FIXED_BITS) * finv);
1345 db3 = (fixed)(((b2 - b1) << FIXED_BITS) * finv);
1347 dx3 = du3 = dv3 = dr3 = dg3 = db3 = 0;
1350 if ((y2 - y1) != 0) {
1351 float fdiv = y2 - y1;
1352 dx3 = (fixed)(((x2 - x1) << FIXED_BITS) / fdiv);
1353 du3 = (fixed)(((u2 - u1) << FIXED_BITS) / fdiv);
1354 dv3 = (fixed)(((v2 - v1) << FIXED_BITS) / fdiv);
1355 dr3 = (fixed)(((r2 - r1) << FIXED_BITS) / fdiv);
1356 dg3 = (fixed)(((g2 - g1) << FIXED_BITS) / fdiv);
1357 db3 = (fixed)(((b2 - b1) << FIXED_BITS) / fdiv);
1359 dx3 = du3 = dv3 = dr3 = dg3 = db3 = 0;
1362 #else // Integer Division:
1363 #ifdef GPU_UNAI_USE_INT_DIV_MULTINV
1364 if ((y2 - y1) != 0) {
1366 xInv((y2 - y1), iF, iS);
1367 dx3 = xInvMulx((x2 - x1), iF, iS);
1368 du3 = xInvMulx((u2 - u1), iF, iS);
1369 dv3 = xInvMulx((v2 - v1), iF, iS);
1370 dr3 = xInvMulx((r2 - r1), iF, iS);
1371 dg3 = xInvMulx((g2 - g1), iF, iS);
1372 db3 = xInvMulx((b2 - b1), iF, iS);
1374 dx3 = du3 = dv3 = dr3 = dg3 = db3 = 0;
1377 if ((y2 - y1) != 0) {
1378 dx3 = GPU_FAST_DIV((x2 - x1) << FIXED_BITS, (y2 - y1));
1379 du3 = GPU_FAST_DIV((u2 - u1) << FIXED_BITS, (y2 - y1));
1380 dv3 = GPU_FAST_DIV((v2 - v1) << FIXED_BITS, (y2 - y1));
1381 dr3 = GPU_FAST_DIV((r2 - r1) << FIXED_BITS, (y2 - y1));
1382 dg3 = GPU_FAST_DIV((g2 - g1) << FIXED_BITS, (y2 - y1));
1383 db3 = GPU_FAST_DIV((b2 - b1) << FIXED_BITS, (y2 - y1));
1385 dx3 = du3 = dv3 = dr3 = dg3 = db3 = 0;
1392 s32 xmin, xmax, ymin, ymax;
1393 xmin = gpu_unai.DrawingArea[0]; xmax = gpu_unai.DrawingArea[2];
1394 ymin = gpu_unai.DrawingArea[1]; ymax = gpu_unai.DrawingArea[3];
1396 if ((ymin - ya) > 0) {
1397 x3 += (dx3 * (ymin - ya));
1398 x4 += (dx4 * (ymin - ya));
1399 u3 += (du3 * (ymin - ya));
1400 v3 += (dv3 * (ymin - ya));
1401 r3 += (dr3 * (ymin - ya));
1402 g3 += (dg3 * (ymin - ya));
1403 b3 += (db3 * (ymin - ya));
1407 if (yb > ymax) yb = ymax;
1409 int loop1 = yb - ya;
1413 le16_t* PixelBase = &gpu_unai.vram[FRAME_OFFSET(0, ya)];
1414 int li=gpu_unai.inn.ilace_mask;
1415 int pi=(ProgressiveInterlaceEnabled()?(gpu_unai.inn.ilace_mask+1):0);
1416 int pif=(ProgressiveInterlaceEnabled()?(gpu_unai.prog_ilace_flag?(gpu_unai.inn.ilace_mask+1):0):1);
1418 for (; loop1; --loop1, ++ya, PixelBase += FRAME_WIDTH,
1419 x3 += dx3, x4 += dx4,
1420 u3 += du3, v3 += dv3,
1421 r3 += dr3, g3 += dg3, b3 += db3 )
1423 if (ya&li) continue;
1424 if ((ya&pi)==pif) continue;
1429 xa = FixedCeilToInt(x3);
1430 xb = FixedCeilToInt(x4);
1432 r4 = r3; g4 = g3; b4 = b3;
1434 fixed itmp = i2x(xa) - x3;
1436 u4 += (du4 * itmp) >> FIXED_BITS;
1437 v4 += (dv4 * itmp) >> FIXED_BITS;
1438 r4 += (dr4 * itmp) >> FIXED_BITS;
1439 g4 += (dg4 * itmp) >> FIXED_BITS;
1440 b4 += (db4 * itmp) >> FIXED_BITS;
1449 if ((xmin - xa) > 0) {
1450 u4 += du4 * (xmin - xa);
1451 v4 += dv4 * (xmin - xa);
1452 r4 += dr4 * (xmin - xa);
1453 g4 += dg4 * (xmin - xa);
1454 b4 += db4 * (xmin - xa);
1458 // Set packed Gouraud color and u,v coords for inner driver
1459 gpu_unai.inn.u = u4;
1460 gpu_unai.inn.v = v4;
1461 gpu_unai.inn.gCol = gpuPackGouraudCol(r4, g4, b4);
1463 if (xb > xmax) xb = xmax;
1465 gpuPolySpanDriver(gpu_unai, PixelBase + xa, (xb - xa));
1468 } while (++cur_pass < total_passes);
1471 #endif /* __GPU_UNAI_GPU_RASTER_POLYGON_H__ */