New, separate GPU plugin based on Unai. (#233)
[pcsx_rearmed.git] / plugins / gpu_senquack / gpu_raster_polygon.h
CommitLineData
0bfe8d59 1/***************************************************************************
2* Copyright (C) 2010 PCSX4ALL Team *
3* Copyright (C) 2010 Unai *
4* *
5* This program is free software; you can redistribute it and/or modify *
6* it under the terms of the GNU General Public License as published by *
7* the Free Software Foundation; either version 2 of the License, or *
8* (at your option) any later version. *
9* *
10* This program is distributed in the hope that it will be useful, *
11* but WITHOUT ANY WARRANTY; without even the implied warranty of *
12* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
13* GNU General Public License for more details. *
14* *
15* You should have received a copy of the GNU General Public License *
16* along with this program; if not, write to the *
17* Free Software Foundation, Inc., *
18* 51 Franklin Street, Fifth Floor, Boston, MA 02111-1307 USA. *
19***************************************************************************/
20
21#ifndef __GPU_UNAI_GPU_RASTER_POLYGON_H__
22#define __GPU_UNAI_GPU_RASTER_POLYGON_H__
23
24//senquack - NOTE: GPU Unai poly routines have been rewritten/adapted
25// from DrHell routines to fix multiple issues. See README_senquack.txt
26
27///////////////////////////////////////////////////////////////////////////////
28// Shared poly vertex buffer, able to handle 3 or 4-pt polys of any type.
29///////////////////////////////////////////////////////////////////////////////
30
31struct PolyVertex {
32 s32 x, y; // Sign-extended 11-bit X,Y coords
33 union {
34 struct { u8 u, v, pad[2]; } tex; // Texture coords (if used)
35 u32 tex_word;
36 };
37 union {
38 struct { u8 r, g, b, pad; } col; // 24-bit RGB color (if used)
39 u32 col_word;
40 };
41};
42
43enum PolyAttribute {
44 POLYATTR_TEXTURE = (1 << 0),
45 POLYATTR_GOURAUD = (1 << 1)
46};
47
48enum PolyType {
49 POLYTYPE_F = 0,
50 POLYTYPE_FT = (POLYATTR_TEXTURE),
51 POLYTYPE_G = (POLYATTR_GOURAUD),
52 POLYTYPE_GT = (POLYATTR_TEXTURE | POLYATTR_GOURAUD)
53};
54
55///////////////////////////////////////////////////////////////////////////////
56// polyInitVertexBuffer()
57// Fills vbuf[] array with data from any type of poly draw-command packet.
58///////////////////////////////////////////////////////////////////////////////
59static void polyInitVertexBuffer(PolyVertex *vbuf, const PtrUnion packet, PolyType ptype, u32 is_quad)
60{
61 bool texturing = ptype & POLYATTR_TEXTURE;
62 bool gouraud = ptype & POLYATTR_GOURAUD;
63
64 int vert_stride = 1; // Stride of vertices in cmd packet, in 32-bit words
65 if (texturing)
66 vert_stride++;
67 if (gouraud)
68 vert_stride++;
69
70 int num_verts = (is_quad) ? 4 : 3;
71 u32 *ptr;
72
73 // X,Y coords, adjusted by draw offsets
74 s32 x_off = gpu_senquack.DrawingOffset[0];
75 s32 y_off = gpu_senquack.DrawingOffset[1];
76 ptr = &packet.U4[1];
77 for (int i=0; i < num_verts; ++i, ptr += vert_stride) {
78 s16* coord_ptr = (s16*)ptr;
79 vbuf[i].x = GPU_EXPANDSIGN(coord_ptr[0]) + x_off;
80 vbuf[i].y = GPU_EXPANDSIGN(coord_ptr[1]) + y_off;
81 }
82
83 // U,V texture coords (if applicable)
84 if (texturing) {
85 ptr = &packet.U4[2];
86 for (int i=0; i < num_verts; ++i, ptr += vert_stride)
87 vbuf[i].tex_word = *ptr;
88 }
89
90 // Colors (if applicable)
91 if (gouraud) {
92 ptr = &packet.U4[0];
93 for (int i=0; i < num_verts; ++i, ptr += vert_stride)
94 vbuf[i].col_word = *ptr;
95 }
96}
97
98///////////////////////////////////////////////////////////////////////////////
99// Helper functions to determine which vertex in a 2 or 3 vertex array
100// has the highest/lowest X/Y coordinate.
101// Note: the comparison logic is such that, given a set of vertices with
102// identical values for a given coordinate, a different index will be
103// returned from vertIdxOfLeast..() than a call to vertIdxOfHighest..().
104// This ensures that, during the vertex-ordering phase of rasterization,
105// all three vertices remain unique.
106///////////////////////////////////////////////////////////////////////////////
107
108template<typename T>
109static inline int vertIdxOfLeastXCoord2(const T *Tptr)
110{
111 return (Tptr[0].x <= Tptr[1].x) ? 0 : 1;
112}
113
114template<typename T>
115static inline int vertIdxOfLeastXCoord3(const T *Tptr)
116{
117 int least_of_v0_v1 = vertIdxOfLeastXCoord2(Tptr);
118 return (Tptr[least_of_v0_v1].x <= Tptr[2].x) ? least_of_v0_v1 : 2;
119}
120
121template<typename T>
122static inline int vertIdxOfLeastYCoord2(const T *Tptr)
123{
124 return (Tptr[0].y <= Tptr[1].y) ? 0 : 1;
125}
126
127template<typename T>
128static inline int vertIdxOfLeastYCoord3(const T *Tptr)
129{
130 int least_of_v0_v1 = vertIdxOfLeastYCoord2(Tptr);
131 return (Tptr[least_of_v0_v1].y <= Tptr[2].y) ? least_of_v0_v1 : 2;
132}
133
134template<typename T>
135static inline int vertIdxOfHighestXCoord2(const T *Tptr)
136{
137 return (Tptr[1].x >= Tptr[0].x) ? 1 : 0;
138}
139
140template<typename T>
141static inline int vertIdxOfHighestXCoord3(const T *Tptr)
142{
143 int highest_of_v0_v1 = vertIdxOfHighestXCoord2(Tptr);
144 return (Tptr[2].x >= Tptr[highest_of_v0_v1].x) ? 2 : highest_of_v0_v1;
145}
146
147template<typename T>
148static inline int vertIdxOfHighestYCoord2(const T *Tptr)
149{
150 return (Tptr[1].y >= Tptr[0].y) ? 1 : 0;
151}
152
153template<typename T>
154static inline int vertIdxOfHighestYCoord3(const T *Tptr)
155{
156 int highest_of_v0_v1 = vertIdxOfHighestYCoord2(Tptr);
157 return (Tptr[2].y >= Tptr[highest_of_v0_v1].y) ? 2 : highest_of_v0_v1;
158}
159
160///////////////////////////////////////////////////////////////////////////////
161// polyUseTriangle()
162// Determines if the specified triangle should be rendered. If so, it
163// fills the given array of vertex pointers, vert_ptrs, in order of
164// increasing Y coordinate values, as required by rasterization algorithm.
165// Parameter 'tri_num' is 0 for first triangle (idx 0,1,2 of vbuf[]),
166// or 1 for second triangle of a quad (idx 1,2,3 of vbuf[]).
167// Returns true if triangle should be rendered, false if not.
168///////////////////////////////////////////////////////////////////////////////
169static bool polyUseTriangle(const PolyVertex *vbuf, int tri_num, const PolyVertex **vert_ptrs)
170{
171 // Using verts 0,1,2 or is this the 2nd pass of a quad (verts 1,2,3)?
172 const PolyVertex *tri_ptr = &vbuf[(tri_num == 0) ? 0 : 1];
173
174 // Get indices of highest/lowest X,Y coords within triangle
175 int idx_lowest_x = vertIdxOfLeastXCoord3(tri_ptr);
176 int idx_highest_x = vertIdxOfHighestXCoord3(tri_ptr);
177 int idx_lowest_y = vertIdxOfLeastYCoord3(tri_ptr);
178 int idx_highest_y = vertIdxOfHighestYCoord3(tri_ptr);
179
180 // Maximum absolute distance between any two X coordinates is 1023,
181 // and for Y coordinates is 511 (PS1 hardware limitation)
182 int lowest_x = tri_ptr[idx_lowest_x].x;
183 int highest_x = tri_ptr[idx_highest_x].x;
184 int lowest_y = tri_ptr[idx_lowest_y].y;
185 int highest_y = tri_ptr[idx_highest_y].y;
186 if ((highest_x - lowest_x) >= CHKMAX_X ||
187 (highest_y - lowest_y) >= CHKMAX_Y)
188 return false;
189
190 // Determine if triangle is completely outside clipping range
191 int xmin, xmax, ymin, ymax;
192 xmin = gpu_senquack.DrawingArea[0]; xmax = gpu_senquack.DrawingArea[2];
193 ymin = gpu_senquack.DrawingArea[1]; ymax = gpu_senquack.DrawingArea[3];
194 int clipped_lowest_x = Max2(xmin,lowest_x);
195 int clipped_lowest_y = Max2(ymin,lowest_y);
196 int clipped_highest_x = Min2(xmax,highest_x);
197 int clipped_highest_y = Min2(ymax,highest_y);
198 if (clipped_lowest_x >= clipped_highest_x ||
199 clipped_lowest_y >= clipped_highest_y)
200 return false;
201
202 // Order vertex ptrs by increasing y value (draw routines need this).
203 // The middle index is deduced by a binary math trick that depends
204 // on index range always being between 0..2
205 vert_ptrs[0] = tri_ptr + idx_lowest_y;
206 vert_ptrs[1] = tri_ptr + ((idx_lowest_y + idx_highest_y) ^ 3);
207 vert_ptrs[2] = tri_ptr + idx_highest_y;
208 return true;
209}
210
211///////////////////////////////////////////////////////////////////////////////
212// GPU internal polygon drawing functions
213///////////////////////////////////////////////////////////////////////////////
214
215/*----------------------------------------------------------------------
216gpuDrawPolyF - Flat-shaded, untextured poly
217----------------------------------------------------------------------*/
218void gpuDrawPolyF(const PtrUnion packet, const PP gpuPolySpanDriver, u32 is_quad)
219{
220 // Set up bgr555 color to be used across calls in inner driver
221 gpu_senquack.PixelData = GPU_RGB16(packet.U4[0]);
222
223 PolyVertex vbuf[4];
224 polyInitVertexBuffer(vbuf, packet, POLYTYPE_F, is_quad);
225
226 int total_passes = is_quad ? 2 : 1;
227 int cur_pass = 0;
228 do
229 {
230 const PolyVertex* vptrs[3];
231 if (polyUseTriangle(vbuf, cur_pass, vptrs) == false)
232 continue;
233
234 s32 xa, xb, ya, yb;
235 s32 x3, dx3, x4, dx4, dx;
236 s32 x0, x1, x2, y0, y1, y2;
237
238 x0 = vptrs[0]->x; y0 = vptrs[0]->y;
239 x1 = vptrs[1]->x; y1 = vptrs[1]->y;
240 x2 = vptrs[2]->x; y2 = vptrs[2]->y;
241
242 ya = y2 - y0;
243 yb = y2 - y1;
244 dx = (x2 - x1) * ya - (x2 - x0) * yb;
245
246 for (int loop0 = 2; loop0; loop0--) {
247 if (loop0 == 2) {
248 ya = y0; yb = y1;
249 x3 = x4 = i2x(x0);
250 if (dx < 0) {
251#ifdef GPU_UNAI_USE_FLOATMATH
252#ifdef GPU_UNAI_USE_FLOAT_DIV_MULTINV
253 dx3 = ((y2 - y0) != 0) ? (fixed)(((x2 - x0) << FIXED_BITS) * FloatInv(y2 - y0)) : 0;
254 dx4 = ((y1 - y0) != 0) ? (fixed)(((x1 - x0) << FIXED_BITS) * FloatInv(y1 - y0)) : 0;
255#else
256 dx3 = ((y2 - y0) != 0) ? (fixed)(((x2 - x0) << FIXED_BITS) / (float)(y2 - y0)) : 0;
257 dx4 = ((y1 - y0) != 0) ? (fixed)(((x1 - x0) << FIXED_BITS) / (float)(y1 - y0)) : 0;
258#endif
259#else // Integer Division:
260#ifdef GPU_UNAI_USE_INT_DIV_MULTINV
261 dx3 = ((y2 - y0) != 0) ? xLoDivx((x2 - x0), (y2 - y0)) : 0;
262 dx4 = ((y1 - y0) != 0) ? xLoDivx((x1 - x0), (y1 - y0)) : 0;
263#else
264 dx3 = ((y2 - y0) != 0) ? GPU_FAST_DIV((x2 - x0) << FIXED_BITS, (y2 - y0)) : 0;
265 dx4 = ((y1 - y0) != 0) ? GPU_FAST_DIV((x1 - x0) << FIXED_BITS, (y1 - y0)) : 0;
266#endif
267#endif
268 } else {
269#ifdef GPU_UNAI_USE_FLOATMATH
270#ifdef GPU_UNAI_USE_FLOAT_DIV_MULTINV
271 dx3 = ((y1 - y0) != 0) ? (fixed)(((x1 - x0) << FIXED_BITS) * FloatInv(y1 - y0)) : 0;
272 dx4 = ((y2 - y0) != 0) ? (fixed)(((x2 - x0) << FIXED_BITS) * FloatInv(y2 - y0)) : 0;
273#else
274 dx3 = ((y1 - y0) != 0) ? (fixed)(((x1 - x0) << FIXED_BITS) / (float)(y1 - y0)) : 0;
275 dx4 = ((y2 - y0) != 0) ? (fixed)(((x2 - x0) << FIXED_BITS) / (float)(y2 - y0)) : 0;
276#endif
277#else // Integer Division:
278#ifdef GPU_UNAI_USE_INT_DIV_MULTINV
279 dx3 = ((y1 - y0) != 0) ? xLoDivx((x1 - x0), (y1 - y0)) : 0;
280 dx4 = ((y2 - y0) != 0) ? xLoDivx((x2 - x0), (y2 - y0)) : 0;
281#else
282 dx3 = ((y1 - y0) != 0) ? GPU_FAST_DIV((x1 - x0) << FIXED_BITS, (y1 - y0)) : 0;
283 dx4 = ((y2 - y0) != 0) ? GPU_FAST_DIV((x2 - x0) << FIXED_BITS, (y2 - y0)) : 0;
284#endif
285#endif
286 }
287 } else {
288 //senquack - break out of final loop if nothing to be drawn (1st loop
289 // must always be taken to setup dx3/dx4)
290 if (y1 == y2) break;
291
292 ya = y1; yb = y2;
293
294 if (dx < 0) {
295 x3 = i2x(x0) + (dx3 * (y1 - y0));
296 x4 = i2x(x1);
297#ifdef GPU_UNAI_USE_FLOATMATH
298#ifdef GPU_UNAI_USE_FLOAT_DIV_MULTINV
299 dx4 = ((y2 - y1) != 0) ? (fixed)(((x2 - x1) << FIXED_BITS) * FloatInv(y2 - y1)) : 0;
300#else
301 dx4 = ((y2 - y1) != 0) ? (fixed)(((x2 - x1) << FIXED_BITS) / (float)(y2 - y1)) : 0;
302#endif
303#else // Integer Division:
304#ifdef GPU_UNAI_USE_INT_DIV_MULTINV
305 dx4 = ((y2 - y1) != 0) ? xLoDivx ((x2 - x1), (y2 - y1)) : 0;
306#else
307 dx4 = ((y2 - y1) != 0) ? GPU_FAST_DIV((x2 - x1) << FIXED_BITS, (y2 - y1)) : 0;
308#endif
309#endif
310 } else {
311 x3 = i2x(x1);
312 x4 = i2x(x0) + (dx4 * (y1 - y0));
313#ifdef GPU_UNAI_USE_FLOATMATH
314#ifdef GPU_UNAI_USE_FLOAT_DIV_MULTINV
315 dx3 = ((y2 - y1) != 0) ? (fixed)(((x2 - x1) << FIXED_BITS) * FloatInv(y2 - y1)) : 0;
316#else
317 dx3 = ((y2 - y1) != 0) ? (fixed)(((x2 - x1) << FIXED_BITS) / (float)(y2 - y1)) : 0;
318#endif
319#else // Integer Division:
320#ifdef GPU_UNAI_USE_INT_DIV_MULTINV
321 dx3 = ((y2 - y1) != 0) ? xLoDivx ((x2 - x1), (y2 - y1)) : 0;
322#else
323 dx3 = ((y2 - y1) != 0) ? GPU_FAST_DIV((x2 - x1) << FIXED_BITS, (y2 - y1)) : 0;
324#endif
325#endif
326 }
327 }
328
329 s32 xmin, xmax, ymin, ymax;
330 xmin = gpu_senquack.DrawingArea[0]; xmax = gpu_senquack.DrawingArea[2];
331 ymin = gpu_senquack.DrawingArea[1]; ymax = gpu_senquack.DrawingArea[3];
332
333 if ((ymin - ya) > 0) {
334 x3 += (dx3 * (ymin - ya));
335 x4 += (dx4 * (ymin - ya));
336 ya = ymin;
337 }
338
339 if (yb > ymax) yb = ymax;
340
341 int loop1 = yb - ya;
342 if (loop1 <= 0)
343 continue;
344
345 u16* PixelBase = &((u16*)gpu_senquack.vram)[FRAME_OFFSET(0, ya)];
346 int li=gpu_senquack.ilace_mask;
347 int pi=(ProgressiveInterlaceEnabled()?(gpu_senquack.ilace_mask+1):0);
348 int pif=(ProgressiveInterlaceEnabled()?(gpu_senquack.prog_ilace_flag?(gpu_senquack.ilace_mask+1):0):1);
349
350 for (; loop1; --loop1, ya++, PixelBase += FRAME_WIDTH,
351 x3 += dx3, x4 += dx4 )
352 {
353 if (ya&li) continue;
354 if ((ya&pi)==pif) continue;
355
356 xa = FixedCeilToInt(x3); xb = FixedCeilToInt(x4);
357 if ((xmin - xa) > 0) xa = xmin;
358 if (xb > xmax) xb = xmax;
359 if ((xb - xa) > 0)
360 gpuPolySpanDriver(gpu_senquack, PixelBase + xa, (xb - xa));
361 }
362 }
363 } while (++cur_pass < total_passes);
364}
365
366/*----------------------------------------------------------------------
367gpuDrawPolyFT - Flat-shaded, textured poly
368----------------------------------------------------------------------*/
369void gpuDrawPolyFT(const PtrUnion packet, const PP gpuPolySpanDriver, u32 is_quad)
370{
371 // r8/g8/b8 used if texture-blending & dithering is applied (24-bit light)
372 gpu_senquack.r8 = packet.U1[0];
373 gpu_senquack.g8 = packet.U1[1];
374 gpu_senquack.b8 = packet.U1[2];
375 // r5/g5/b5 used if just texture-blending is applied (15-bit light)
376 gpu_senquack.r5 = packet.U1[0] >> 3;
377 gpu_senquack.g5 = packet.U1[1] >> 3;
378 gpu_senquack.b5 = packet.U1[2] >> 3;
379
380 PolyVertex vbuf[4];
381 polyInitVertexBuffer(vbuf, packet, POLYTYPE_FT, is_quad);
382
383 int total_passes = is_quad ? 2 : 1;
384 int cur_pass = 0;
385 do
386 {
387 const PolyVertex* vptrs[3];
388 if (polyUseTriangle(vbuf, cur_pass, vptrs) == false)
389 continue;
390
391 s32 xa, xb, ya, yb;
392 s32 x3, dx3, x4, dx4, dx;
393 s32 u3, du3, v3, dv3;
394 s32 x0, x1, x2, y0, y1, y2;
395 s32 u0, u1, u2, v0, v1, v2;
396 s32 du4, dv4;
397
398 x0 = vptrs[0]->x; y0 = vptrs[0]->y;
399 u0 = vptrs[0]->tex.u; v0 = vptrs[0]->tex.v;
400 x1 = vptrs[1]->x; y1 = vptrs[1]->y;
401 u1 = vptrs[1]->tex.u; v1 = vptrs[1]->tex.v;
402 x2 = vptrs[2]->x; y2 = vptrs[2]->y;
403 u2 = vptrs[2]->tex.u; v2 = vptrs[2]->tex.v;
404
405 ya = y2 - y0;
406 yb = y2 - y1;
407 dx4 = (x2 - x1) * ya - (x2 - x0) * yb;
408 du4 = (u2 - u1) * ya - (u2 - u0) * yb;
409 dv4 = (v2 - v1) * ya - (v2 - v0) * yb;
410 dx = dx4;
411 if (dx4 < 0) {
412 dx4 = -dx4;
413 du4 = -du4;
414 dv4 = -dv4;
415 }
416
417#ifdef GPU_UNAI_USE_FLOATMATH
418#ifdef GPU_UNAI_USE_FLOAT_DIV_MULTINV
419 if (dx4 != 0) {
420 float finv = FloatInv(dx4);
421 du4 = (fixed)((du4 << FIXED_BITS) * finv);
422 dv4 = (fixed)((dv4 << FIXED_BITS) * finv);
423 } else {
424 du4 = dv4 = 0;
425 }
426#else
427 if (dx4 != 0) {
428 float fdiv = dx4;
429 du4 = (fixed)((du4 << FIXED_BITS) / fdiv);
430 dv4 = (fixed)((dv4 << FIXED_BITS) / fdiv);
431 } else {
432 du4 = dv4 = 0;
433 }
434#endif
435#else // Integer Division:
436#ifdef GPU_UNAI_USE_INT_DIV_MULTINV
437 if (dx4 != 0) {
438 int iF, iS;
439 xInv(dx4, iF, iS);
440 du4 = xInvMulx(du4, iF, iS);
441 dv4 = xInvMulx(dv4, iF, iS);
442 } else {
443 du4 = dv4 = 0;
444 }
445#else
446 if (dx4 != 0) {
447 du4 = GPU_FAST_DIV(du4 << FIXED_BITS, dx4);
448 dv4 = GPU_FAST_DIV(dv4 << FIXED_BITS, dx4);
449 } else {
450 du4 = dv4 = 0;
451 }
452#endif
453#endif
454 // Set u,v increments for inner driver
455 gpu_senquack.u_inc = du4;
456 gpu_senquack.v_inc = dv4;
457
458 //senquack - TODO: why is it always going through 2 iterations when sometimes one would suffice here?
459 // (SAME ISSUE ELSEWHERE)
460 for (s32 loop0 = 2; loop0; loop0--) {
461 if (loop0 == 2) {
462 ya = y0; yb = y1;
463 x3 = x4 = i2x(x0);
464 u3 = i2x(u0); v3 = i2x(v0);
465 if (dx < 0) {
466#ifdef GPU_UNAI_USE_FLOATMATH
467#ifdef GPU_UNAI_USE_FLOAT_DIV_MULTINV
468 if ((y2 - y0) != 0) {
469 float finv = FloatInv(y2 - y0);
470 dx3 = (fixed)(((x2 - x0) << FIXED_BITS) * finv);
471 du3 = (fixed)(((u2 - u0) << FIXED_BITS) * finv);
472 dv3 = (fixed)(((v2 - v0) << FIXED_BITS) * finv);
473 } else {
474 dx3 = du3 = dv3 = 0;
475 }
476 dx4 = ((y1 - y0) != 0) ? (fixed)(((x1 - x0) << FIXED_BITS) * FloatInv(y1 - y0)) : 0;
477#else
478 if ((y2 - y0) != 0) {
479 float fdiv = y2 - y0;
480 dx3 = (fixed)(((x2 - x0) << FIXED_BITS) / fdiv);
481 du3 = (fixed)(((u2 - u0) << FIXED_BITS) / fdiv);
482 dv3 = (fixed)(((v2 - v0) << FIXED_BITS) / fdiv);
483 } else {
484 dx3 = du3 = dv3 = 0;
485 }
486 dx4 = ((y1 - y0) != 0) ? (fixed)(((x1 - x0) << FIXED_BITS) / (float)(y1 - y0)) : 0;
487#endif
488#else // Integer Division:
489#ifdef GPU_UNAI_USE_INT_DIV_MULTINV
490 if ((y2 - y0) != 0) {
491 int iF, iS;
492 xInv((y2 - y0), iF, iS);
493 dx3 = xInvMulx((x2 - x0), iF, iS);
494 du3 = xInvMulx((u2 - u0), iF, iS);
495 dv3 = xInvMulx((v2 - v0), iF, iS);
496 } else {
497 dx3 = du3 = dv3 = 0;
498 }
499 dx4 = ((y1 - y0) != 0) ? xLoDivx((x1 - x0), (y1 - y0)) : 0;
500#else
501 if ((y2 - y0) != 0) {
502 dx3 = GPU_FAST_DIV((x2 - x0) << FIXED_BITS, (y2 - y0));
503 du3 = GPU_FAST_DIV((u2 - u0) << FIXED_BITS, (y2 - y0));
504 dv3 = GPU_FAST_DIV((v2 - v0) << FIXED_BITS, (y2 - y0));
505 } else {
506 dx3 = du3 = dv3 = 0;
507 }
508 dx4 = ((y1 - y0) != 0) ? GPU_FAST_DIV((x1 - x0) << FIXED_BITS, (y1 - y0)) : 0;
509#endif
510#endif
511 } else {
512#ifdef GPU_UNAI_USE_FLOATMATH
513#ifdef GPU_UNAI_USE_FLOAT_DIV_MULTINV
514 if ((y1 - y0) != 0) {
515 float finv = FloatInv(y1 - y0);
516 dx3 = (fixed)(((x1 - x0) << FIXED_BITS) * finv);
517 du3 = (fixed)(((u1 - u0) << FIXED_BITS) * finv);
518 dv3 = (fixed)(((v1 - v0) << FIXED_BITS) * finv);
519 } else {
520 dx3 = du3 = dv3 = 0;
521 }
522 dx4 = ((y2 - y0) != 0) ? (fixed)(((x2 - x0) << FIXED_BITS) * FloatInv(y2 - y0)) : 0;
523#else
524 if ((y1 - y0) != 0) {
525 float fdiv = y1 - y0;
526 dx3 = (fixed)(((x1 - x0) << FIXED_BITS) / fdiv);
527 du3 = (fixed)(((u1 - u0) << FIXED_BITS) / fdiv);
528 dv3 = (fixed)(((v1 - v0) << FIXED_BITS) / fdiv);
529 } else {
530 dx3 = du3 = dv3 = 0;
531 }
532 dx4 = ((y2 - y0) != 0) ? (fixed)(((x2 - x0) << FIXED_BITS) / (float)(y2 - y0)) : 0;
533#endif
534#else // Integer Division:
535#ifdef GPU_UNAI_USE_INT_DIV_MULTINV
536 if ((y1 - y0) != 0) {
537 int iF, iS;
538 xInv((y1 - y0), iF, iS);
539 dx3 = xInvMulx((x1 - x0), iF, iS);
540 du3 = xInvMulx((u1 - u0), iF, iS);
541 dv3 = xInvMulx((v1 - v0), iF, iS);
542 } else {
543 dx3 = du3 = dv3 = 0;
544 }
545 dx4 = ((y2 - y0) != 0) ? xLoDivx((x2 - x0), (y2 - y0)) : 0;
546#else
547 if ((y1 - y0) != 0) {
548 dx3 = GPU_FAST_DIV((x1 - x0) << FIXED_BITS, (y1 - y0));
549 du3 = GPU_FAST_DIV((u1 - u0) << FIXED_BITS, (y1 - y0));
550 dv3 = GPU_FAST_DIV((v1 - v0) << FIXED_BITS, (y1 - y0));
551 } else {
552 dx3 = du3 = dv3 = 0;
553 }
554 dx4 = ((y2 - y0) != 0) ? GPU_FAST_DIV((x2 - x0) << FIXED_BITS, (y2 - y0)) : 0;
555#endif
556#endif
557 }
558 } else {
559 //senquack - break out of final loop if nothing to be drawn (1st loop
560 // must always be taken to setup dx3/dx4)
561 if (y1 == y2) break;
562
563 ya = y1; yb = y2;
564
565 if (dx < 0) {
566 x3 = i2x(x0);
567 x4 = i2x(x1);
568 u3 = i2x(u0);
569 v3 = i2x(v0);
570 if ((y1 - y0) != 0) {
571 x3 += (dx3 * (y1 - y0));
572 u3 += (du3 * (y1 - y0));
573 v3 += (dv3 * (y1 - y0));
574 }
575#ifdef GPU_UNAI_USE_FLOATMATH
576#ifdef GPU_UNAI_USE_FLOAT_DIV_MULTINV
577 dx4 = ((y2 - y1) != 0) ? (fixed)(((x2 - x1) << FIXED_BITS) * FloatInv(y2 - y1)) : 0;
578#else
579 dx4 = ((y2 - y1) != 0) ? (fixed)(((x2 - x1) << FIXED_BITS) / (float)(y2 - y1)) : 0;
580#endif
581#else // Integer Division:
582#ifdef GPU_UNAI_USE_INT_DIV_MULTINV
583 dx4 = ((y2 - y1) != 0) ? xLoDivx((x2 - x1), (y2 - y1)) : 0;
584#else
585 dx4 = ((y2 - y1) != 0) ? GPU_FAST_DIV((x2 - x1) << FIXED_BITS, (y2 - y1)) : 0;
586#endif
587#endif
588 } else {
589 x3 = i2x(x1);
590 x4 = i2x(x0) + (dx4 * (y1 - y0));
591 u3 = i2x(u1);
592 v3 = i2x(v1);
593#ifdef GPU_UNAI_USE_FLOATMATH
594#ifdef GPU_UNAI_USE_FLOAT_DIV_MULTINV
595 if ((y2 - y1) != 0) {
596 float finv = FloatInv(y2 - y1);
597 dx3 = (fixed)(((x2 - x1) << FIXED_BITS) * finv);
598 du3 = (fixed)(((u2 - u1) << FIXED_BITS) * finv);
599 dv3 = (fixed)(((v2 - v1) << FIXED_BITS) * finv);
600 } else {
601 dx3 = du3 = dv3 = 0;
602 }
603#else
604 if ((y2 - y1) != 0) {
605 float fdiv = y2 - y1;
606 dx3 = (fixed)(((x2 - x1) << FIXED_BITS) / fdiv);
607 du3 = (fixed)(((u2 - u1) << FIXED_BITS) / fdiv);
608 dv3 = (fixed)(((v2 - v1) << FIXED_BITS) / fdiv);
609 } else {
610 dx3 = du3 = dv3 = 0;
611 }
612#endif
613#else // Integer Division:
614#ifdef GPU_UNAI_USE_INT_DIV_MULTINV
615 if ((y2 - y1) != 0) {
616 int iF, iS;
617 xInv((y2 - y1), iF, iS);
618 dx3 = xInvMulx((x2 - x1), iF, iS);
619 du3 = xInvMulx((u2 - u1), iF, iS);
620 dv3 = xInvMulx((v2 - v1), iF, iS);
621 } else {
622 dx3 = du3 = dv3 = 0;
623 }
624#else
625 if ((y2 - y1) != 0) {
626 dx3 = GPU_FAST_DIV((x2 - x1) << FIXED_BITS, (y2 - y1));
627 du3 = GPU_FAST_DIV((u2 - u1) << FIXED_BITS, (y2 - y1));
628 dv3 = GPU_FAST_DIV((v2 - v1) << FIXED_BITS, (y2 - y1));
629 } else {
630 dx3 = du3 = dv3 = 0;
631 }
632#endif
633#endif
634 }
635 }
636
637 s32 xmin, xmax, ymin, ymax;
638 xmin = gpu_senquack.DrawingArea[0]; xmax = gpu_senquack.DrawingArea[2];
639 ymin = gpu_senquack.DrawingArea[1]; ymax = gpu_senquack.DrawingArea[3];
640
641 if ((ymin - ya) > 0) {
642 x3 += dx3 * (ymin - ya);
643 x4 += dx4 * (ymin - ya);
644 u3 += du3 * (ymin - ya);
645 v3 += dv3 * (ymin - ya);
646 ya = ymin;
647 }
648
649 if (yb > ymax) yb = ymax;
650
651 int loop1 = yb - ya;
652 if (loop1 <= 0)
653 continue;
654
655 u16* PixelBase = &((u16*)gpu_senquack.vram)[FRAME_OFFSET(0, ya)];
656 int li=gpu_senquack.ilace_mask;
657 int pi=(ProgressiveInterlaceEnabled()?(gpu_senquack.ilace_mask+1):0);
658 int pif=(ProgressiveInterlaceEnabled()?(gpu_senquack.prog_ilace_flag?(gpu_senquack.ilace_mask+1):0):1);
659
660 for (; loop1; --loop1, ++ya, PixelBase += FRAME_WIDTH,
661 x3 += dx3, x4 += dx4,
662 u3 += du3, v3 += dv3 )
663 {
664 if (ya&li) continue;
665 if ((ya&pi)==pif) continue;
666
667 u32 u4, v4;
668
669 xa = FixedCeilToInt(x3); xb = FixedCeilToInt(x4);
670 u4 = u3; v4 = v3;
671
672 fixed itmp = i2x(xa) - x3;
673 if (itmp != 0) {
674 u4 += (du4 * itmp) >> FIXED_BITS;
675 v4 += (dv4 * itmp) >> FIXED_BITS;
676 }
677
678 u4 += fixed_HALF;
679 v4 += fixed_HALF;
680
681 if ((xmin - xa) > 0) {
682 u4 += du4 * (xmin - xa);
683 v4 += dv4 * (xmin - xa);
684 xa = xmin;
685 }
686
687 // Set u,v coords for inner driver
688 gpu_senquack.u = u4;
689 gpu_senquack.v = v4;
690
691 if (xb > xmax) xb = xmax;
692 if ((xb - xa) > 0)
693 gpuPolySpanDriver(gpu_senquack, PixelBase + xa, (xb - xa));
694 }
695 }
696 } while (++cur_pass < total_passes);
697}
698
699/*----------------------------------------------------------------------
700gpuDrawPolyG - Gouraud-shaded, untextured poly
701----------------------------------------------------------------------*/
702void gpuDrawPolyG(const PtrUnion packet, const PP gpuPolySpanDriver, u32 is_quad)
703{
704 PolyVertex vbuf[4];
705 polyInitVertexBuffer(vbuf, packet, POLYTYPE_G, is_quad);
706
707 int total_passes = is_quad ? 2 : 1;
708 int cur_pass = 0;
709 do
710 {
711 const PolyVertex* vptrs[3];
712 if (polyUseTriangle(vbuf, cur_pass, vptrs) == false)
713 continue;
714
715 s32 xa, xb, ya, yb;
716 s32 x3, dx3, x4, dx4, dx;
717 s32 r3, dr3, g3, dg3, b3, db3;
718 s32 x0, x1, x2, y0, y1, y2;
719 s32 r0, r1, r2, g0, g1, g2, b0, b1, b2;
720 s32 dr4, dg4, db4;
721
722 x0 = vptrs[0]->x; y0 = vptrs[0]->y;
723 r0 = vptrs[0]->col.r; g0 = vptrs[0]->col.g; b0 = vptrs[0]->col.b;
724 x1 = vptrs[1]->x; y1 = vptrs[1]->y;
725 r1 = vptrs[1]->col.r; g1 = vptrs[1]->col.g; b1 = vptrs[1]->col.b;
726 x2 = vptrs[2]->x; y2 = vptrs[2]->y;
727 r2 = vptrs[2]->col.r; g2 = vptrs[2]->col.g; b2 = vptrs[2]->col.b;
728
729 ya = y2 - y0;
730 yb = y2 - y1;
731 dx4 = (x2 - x1) * ya - (x2 - x0) * yb;
732 dr4 = (r2 - r1) * ya - (r2 - r0) * yb;
733 dg4 = (g2 - g1) * ya - (g2 - g0) * yb;
734 db4 = (b2 - b1) * ya - (b2 - b0) * yb;
735 dx = dx4;
736 if (dx4 < 0) {
737 dx4 = -dx4;
738 dr4 = -dr4;
739 dg4 = -dg4;
740 db4 = -db4;
741 }
742
743#ifdef GPU_UNAI_USE_FLOATMATH
744#ifdef GPU_UNAI_USE_FLOAT_DIV_MULTINV
745 if (dx4 != 0) {
746 float finv = FloatInv(dx4);
747 dr4 = (fixed)((dr4 << FIXED_BITS) * finv);
748 dg4 = (fixed)((dg4 << FIXED_BITS) * finv);
749 db4 = (fixed)((db4 << FIXED_BITS) * finv);
750 } else {
751 dr4 = dg4 = db4 = 0;
752 }
753#else
754 if (dx4 != 0) {
755 float fdiv = dx4;
756 dr4 = (fixed)((dr4 << FIXED_BITS) / fdiv);
757 dg4 = (fixed)((dg4 << FIXED_BITS) / fdiv);
758 db4 = (fixed)((db4 << FIXED_BITS) / fdiv);
759 } else {
760 dr4 = dg4 = db4 = 0;
761 }
762#endif
763#else // Integer Division:
764#ifdef GPU_UNAI_USE_INT_DIV_MULTINV
765 if (dx4 != 0) {
766 int iF, iS;
767 xInv(dx4, iF, iS);
768 dr4 = xInvMulx(dr4, iF, iS);
769 dg4 = xInvMulx(dg4, iF, iS);
770 db4 = xInvMulx(db4, iF, iS);
771 } else {
772 dr4 = dg4 = db4 = 0;
773 }
774#else
775 if (dx4 != 0) {
776 dr4 = GPU_FAST_DIV(dr4 << FIXED_BITS, dx4);
777 dg4 = GPU_FAST_DIV(dg4 << FIXED_BITS, dx4);
778 db4 = GPU_FAST_DIV(db4 << FIXED_BITS, dx4);
779 } else {
780 dr4 = dg4 = db4 = 0;
781 }
782#endif
783#endif
784 // Setup packed Gouraud increment for inner driver
785 gpu_senquack.gInc = gpuPackGouraudColInc(dr4, dg4, db4);
786
787 for (s32 loop0 = 2; loop0; loop0--) {
788 if (loop0 == 2) {
789 ya = y0;
790 yb = y1;
791 x3 = x4 = i2x(x0);
792 r3 = i2x(r0);
793 g3 = i2x(g0);
794 b3 = i2x(b0);
795 if (dx < 0) {
796#ifdef GPU_UNAI_USE_FLOATMATH
797#ifdef GPU_UNAI_USE_FLOAT_DIV_MULTINV
798 if ((y2 - y0) != 0) {
799 float finv = FloatInv(y2 - y0);
800 dx3 = (fixed)(((x2 - x0) << FIXED_BITS) * finv);
801 dr3 = (fixed)(((r2 - r0) << FIXED_BITS) * finv);
802 dg3 = (fixed)(((g2 - g0) << FIXED_BITS) * finv);
803 db3 = (fixed)(((b2 - b0) << FIXED_BITS) * finv);
804 } else {
805 dx3 = dr3 = dg3 = db3 = 0;
806 }
807 dx4 = ((y1 - y0) != 0) ? (fixed)(((x1 - x0) << FIXED_BITS) * FloatInv(y1 - y0)) : 0;
808#else
809 if ((y2 - y0) != 0) {
810 float fdiv = y2 - y0;
811 dx3 = (fixed)(((x2 - x0) << FIXED_BITS) / fdiv);
812 dr3 = (fixed)(((r2 - r0) << FIXED_BITS) / fdiv);
813 dg3 = (fixed)(((g2 - g0) << FIXED_BITS) / fdiv);
814 db3 = (fixed)(((b2 - b0) << FIXED_BITS) / fdiv);
815 } else {
816 dx3 = dr3 = dg3 = db3 = 0;
817 }
818 dx4 = ((y1 - y0) != 0) ? (fixed)(((x1 - x0) << FIXED_BITS) / (float)(y1 - y0)) : 0;
819#endif
820#else // Integer Division:
821#ifdef GPU_UNAI_USE_INT_DIV_MULTINV
822 if ((y2 - y0) != 0) {
823 int iF, iS;
824 xInv((y2 - y0), iF, iS);
825 dx3 = xInvMulx((x2 - x0), iF, iS);
826 dr3 = xInvMulx((r2 - r0), iF, iS);
827 dg3 = xInvMulx((g2 - g0), iF, iS);
828 db3 = xInvMulx((b2 - b0), iF, iS);
829 } else {
830 dx3 = dr3 = dg3 = db3 = 0;
831 }
832 dx4 = ((y1 - y0) != 0) ? xLoDivx((x1 - x0), (y1 - y0)) : 0;
833#else
834 if ((y2 - y0) != 0) {
835 dx3 = GPU_FAST_DIV((x2 - x0) << FIXED_BITS, (y2 - y0));
836 dr3 = GPU_FAST_DIV((r2 - r0) << FIXED_BITS, (y2 - y0));
837 dg3 = GPU_FAST_DIV((g2 - g0) << FIXED_BITS, (y2 - y0));
838 db3 = GPU_FAST_DIV((b2 - b0) << FIXED_BITS, (y2 - y0));
839 } else {
840 dx3 = dr3 = dg3 = db3 = 0;
841 }
842 dx4 = ((y1 - y0) != 0) ? GPU_FAST_DIV((x1 - x0) << FIXED_BITS, (y1 - y0)) : 0;
843#endif
844#endif
845 } else {
846#ifdef GPU_UNAI_USE_FLOATMATH
847#ifdef GPU_UNAI_USE_FLOAT_DIV_MULTINV
848 if ((y1 - y0) != 0) {
849 float finv = FloatInv(y1 - y0);
850 dx3 = (fixed)(((x1 - x0) << FIXED_BITS) * finv);
851 dr3 = (fixed)(((r1 - r0) << FIXED_BITS) * finv);
852 dg3 = (fixed)(((g1 - g0) << FIXED_BITS) * finv);
853 db3 = (fixed)(((b1 - b0) << FIXED_BITS) * finv);
854 } else {
855 dx3 = dr3 = dg3 = db3 = 0;
856 }
857 dx4 = ((y2 - y0) != 0) ? (fixed)(((x2 - x0) << FIXED_BITS) * FloatInv(y2 - y0)) : 0;
858#else
859 if ((y1 - y0) != 0) {
860 float fdiv = y1 - y0;
861 dx3 = (fixed)(((x1 - x0) << FIXED_BITS) / fdiv);
862 dr3 = (fixed)(((r1 - r0) << FIXED_BITS) / fdiv);
863 dg3 = (fixed)(((g1 - g0) << FIXED_BITS) / fdiv);
864 db3 = (fixed)(((b1 - b0) << FIXED_BITS) / fdiv);
865 } else {
866 dx3 = dr3 = dg3 = db3 = 0;
867 }
868 dx4 = ((y2 - y0) != 0) ? (fixed)(((x2 - x0) << FIXED_BITS) / (float)(y2 - y0)) : 0;
869#endif
870#else // Integer Division:
871#ifdef GPU_UNAI_USE_INT_DIV_MULTINV
872 if ((y1 - y0) != 0) {
873 int iF, iS;
874 xInv((y1 - y0), iF, iS);
875 dx3 = xInvMulx((x1 - x0), iF, iS);
876 dr3 = xInvMulx((r1 - r0), iF, iS);
877 dg3 = xInvMulx((g1 - g0), iF, iS);
878 db3 = xInvMulx((b1 - b0), iF, iS);
879 } else {
880 dx3 = dr3 = dg3 = db3 = 0;
881 }
882 dx4 = ((y2 - y0) != 0) ? xLoDivx((x2 - x0), (y2 - y0)) : 0;
883#else
884 if ((y1 - y0) != 0) {
885 dx3 = GPU_FAST_DIV((x1 - x0) << FIXED_BITS, (y1 - y0));
886 dr3 = GPU_FAST_DIV((r1 - r0) << FIXED_BITS, (y1 - y0));
887 dg3 = GPU_FAST_DIV((g1 - g0) << FIXED_BITS, (y1 - y0));
888 db3 = GPU_FAST_DIV((b1 - b0) << FIXED_BITS, (y1 - y0));
889 } else {
890 dx3 = dr3 = dg3 = db3 = 0;
891 }
892 dx4 = ((y2 - y0) != 0) ? GPU_FAST_DIV((x2 - x0) << FIXED_BITS, (y2 - y0)) : 0;
893#endif
894#endif
895 }
896 } else {
897 //senquack - break out of final loop if nothing to be drawn (1st loop
898 // must always be taken to setup dx3/dx4)
899 if (y1 == y2) break;
900
901 ya = y1; yb = y2;
902
903 if (dx < 0) {
904 x3 = i2x(x0); x4 = i2x(x1);
905 r3 = i2x(r0); g3 = i2x(g0); b3 = i2x(b0);
906
907 if ((y1 - y0) != 0) {
908 x3 += (dx3 * (y1 - y0));
909 r3 += (dr3 * (y1 - y0));
910 g3 += (dg3 * (y1 - y0));
911 b3 += (db3 * (y1 - y0));
912 }
913
914#ifdef GPU_UNAI_USE_FLOATMATH
915#ifdef GPU_UNAI_USE_FLOAT_DIV_MULTINV
916 dx4 = ((y2 - y1) != 0) ? (fixed)(((x2 - x1) << FIXED_BITS) * FloatInv(y2 - y1)) : 0;
917#else
918 dx4 = ((y2 - y1) != 0) ? (fixed)(((x2 - x1) << FIXED_BITS) / (float)(y2 - y1)) : 0;
919#endif
920#else // Integer Division:
921#ifdef GPU_UNAI_USE_INT_DIV_MULTINV
922 dx4 = ((y2 - y1) != 0) ? xLoDivx((x2 - x1), (y2 - y1)) : 0;
923#else
924 dx4 = ((y2 - y1) != 0) ? GPU_FAST_DIV((x2 - x1) << FIXED_BITS, (y2 - y1)) : 0;
925#endif
926#endif
927 } else {
928 x3 = i2x(x1);
929 x4 = i2x(x0) + (dx4 * (y1 - y0));
930
931 r3 = i2x(r1); g3 = i2x(g1); b3 = i2x(b1);
932
933#ifdef GPU_UNAI_USE_FLOATMATH
934#ifdef GPU_UNAI_USE_FLOAT_DIV_MULTINV
935 if ((y2 - y1) != 0) {
936 float finv = FloatInv(y2 - y1);
937 dx3 = (fixed)(((x2 - x1) << FIXED_BITS) * finv);
938 dr3 = (fixed)(((r2 - r1) << FIXED_BITS) * finv);
939 dg3 = (fixed)(((g2 - g1) << FIXED_BITS) * finv);
940 db3 = (fixed)(((b2 - b1) << FIXED_BITS) * finv);
941 } else {
942 dx3 = dr3 = dg3 = db3 = 0;
943 }
944#else
945 if ((y2 - y1) != 0) {
946 float fdiv = y2 - y1;
947 dx3 = (fixed)(((x2 - x1) << FIXED_BITS) / fdiv);
948 dr3 = (fixed)(((r2 - r1) << FIXED_BITS) / fdiv);
949 dg3 = (fixed)(((g2 - g1) << FIXED_BITS) / fdiv);
950 db3 = (fixed)(((b2 - b1) << FIXED_BITS) / fdiv);
951 } else {
952 dx3 = dr3 = dg3 = db3 = 0;
953 }
954#endif
955#else // Integer Division:
956#ifdef GPU_UNAI_USE_INT_DIV_MULTINV
957 if ((y2 - y1) != 0) {
958 int iF, iS;
959 xInv((y2 - y1), iF, iS);
960 dx3 = xInvMulx((x2 - x1), iF, iS);
961 dr3 = xInvMulx((r2 - r1), iF, iS);
962 dg3 = xInvMulx((g2 - g1), iF, iS);
963 db3 = xInvMulx((b2 - b1), iF, iS);
964 } else {
965 dx3 = dr3 = dg3 = db3 = 0;
966 }
967#else
968 if ((y2 - y1) != 0) {
969 dx3 = GPU_FAST_DIV((x2 - x1) << FIXED_BITS, (y2 - y1));
970 dr3 = GPU_FAST_DIV((r2 - r1) << FIXED_BITS, (y2 - y1));
971 dg3 = GPU_FAST_DIV((g2 - g1) << FIXED_BITS, (y2 - y1));
972 db3 = GPU_FAST_DIV((b2 - b1) << FIXED_BITS, (y2 - y1));
973 } else {
974 dx3 = dr3 = dg3 = db3 = 0;
975 }
976#endif
977#endif
978 }
979 }
980
981 s32 xmin, xmax, ymin, ymax;
982 xmin = gpu_senquack.DrawingArea[0]; xmax = gpu_senquack.DrawingArea[2];
983 ymin = gpu_senquack.DrawingArea[1]; ymax = gpu_senquack.DrawingArea[3];
984
985 if ((ymin - ya) > 0) {
986 x3 += (dx3 * (ymin - ya));
987 x4 += (dx4 * (ymin - ya));
988 r3 += (dr3 * (ymin - ya));
989 g3 += (dg3 * (ymin - ya));
990 b3 += (db3 * (ymin - ya));
991 ya = ymin;
992 }
993
994 if (yb > ymax) yb = ymax;
995
996 int loop1 = yb - ya;
997 if (loop1 <= 0)
998 continue;
999
1000 u16* PixelBase = &((u16*)gpu_senquack.vram)[FRAME_OFFSET(0, ya)];
1001 int li=gpu_senquack.ilace_mask;
1002 int pi=(ProgressiveInterlaceEnabled()?(gpu_senquack.ilace_mask+1):0);
1003 int pif=(ProgressiveInterlaceEnabled()?(gpu_senquack.prog_ilace_flag?(gpu_senquack.ilace_mask+1):0):1);
1004
1005 for (; loop1; --loop1, ++ya, PixelBase += FRAME_WIDTH,
1006 x3 += dx3, x4 += dx4,
1007 r3 += dr3, g3 += dg3, b3 += db3 )
1008 {
1009 if (ya&li) continue;
1010 if ((ya&pi)==pif) continue;
1011
1012 u32 r4, g4, b4;
1013
1014 xa = FixedCeilToInt(x3);
1015 xb = FixedCeilToInt(x4);
1016 r4 = r3; g4 = g3; b4 = b3;
1017
1018 fixed itmp = i2x(xa) - x3;
1019 if (itmp != 0) {
1020 r4 += (dr4 * itmp) >> FIXED_BITS;
1021 g4 += (dg4 * itmp) >> FIXED_BITS;
1022 b4 += (db4 * itmp) >> FIXED_BITS;
1023 }
1024
1025 r4 += fixed_HALF;
1026 g4 += fixed_HALF;
1027 b4 += fixed_HALF;
1028
1029 if ((xmin - xa) > 0) {
1030 r4 += (dr4 * (xmin - xa));
1031 g4 += (dg4 * (xmin - xa));
1032 b4 += (db4 * (xmin - xa));
1033 xa = xmin;
1034 }
1035
1036 // Setup packed Gouraud color for inner driver
1037 gpu_senquack.gCol = gpuPackGouraudCol(r4, g4, b4);
1038
1039 if (xb > xmax) xb = xmax;
1040 if ((xb - xa) > 0)
1041 gpuPolySpanDriver(gpu_senquack, PixelBase + xa, (xb - xa));
1042 }
1043 }
1044 } while (++cur_pass < total_passes);
1045}
1046
1047/*----------------------------------------------------------------------
1048gpuDrawPolyGT - Gouraud-shaded, textured poly
1049----------------------------------------------------------------------*/
1050void gpuDrawPolyGT(const PtrUnion packet, const PP gpuPolySpanDriver, u32 is_quad)
1051{
1052 PolyVertex vbuf[4];
1053 polyInitVertexBuffer(vbuf, packet, POLYTYPE_GT, is_quad);
1054
1055 int total_passes = is_quad ? 2 : 1;
1056 int cur_pass = 0;
1057 do
1058 {
1059 const PolyVertex* vptrs[3];
1060 if (polyUseTriangle(vbuf, cur_pass, vptrs) == false)
1061 continue;
1062
1063 s32 xa, xb, ya, yb;
1064 s32 x3, dx3, x4, dx4, dx;
1065 s32 u3, du3, v3, dv3;
1066 s32 r3, dr3, g3, dg3, b3, db3;
1067 s32 x0, x1, x2, y0, y1, y2;
1068 s32 u0, u1, u2, v0, v1, v2;
1069 s32 r0, r1, r2, g0, g1, g2, b0, b1, b2;
1070 s32 du4, dv4;
1071 s32 dr4, dg4, db4;
1072
1073 x0 = vptrs[0]->x; y0 = vptrs[0]->y;
1074 u0 = vptrs[0]->tex.u; v0 = vptrs[0]->tex.v;
1075 r0 = vptrs[0]->col.r; g0 = vptrs[0]->col.g; b0 = vptrs[0]->col.b;
1076 x1 = vptrs[1]->x; y1 = vptrs[1]->y;
1077 u1 = vptrs[1]->tex.u; v1 = vptrs[1]->tex.v;
1078 r1 = vptrs[1]->col.r; g1 = vptrs[1]->col.g; b1 = vptrs[1]->col.b;
1079 x2 = vptrs[2]->x; y2 = vptrs[2]->y;
1080 u2 = vptrs[2]->tex.u; v2 = vptrs[2]->tex.v;
1081 r2 = vptrs[2]->col.r; g2 = vptrs[2]->col.g; b2 = vptrs[2]->col.b;
1082
1083 ya = y2 - y0;
1084 yb = y2 - y1;
1085 dx4 = (x2 - x1) * ya - (x2 - x0) * yb;
1086 du4 = (u2 - u1) * ya - (u2 - u0) * yb;
1087 dv4 = (v2 - v1) * ya - (v2 - v0) * yb;
1088 dr4 = (r2 - r1) * ya - (r2 - r0) * yb;
1089 dg4 = (g2 - g1) * ya - (g2 - g0) * yb;
1090 db4 = (b2 - b1) * ya - (b2 - b0) * yb;
1091 dx = dx4;
1092 if (dx4 < 0) {
1093 dx4 = -dx4;
1094 du4 = -du4;
1095 dv4 = -dv4;
1096 dr4 = -dr4;
1097 dg4 = -dg4;
1098 db4 = -db4;
1099 }
1100
1101#ifdef GPU_UNAI_USE_FLOATMATH
1102#ifdef GPU_UNAI_USE_FLOAT_DIV_MULTINV
1103 if (dx4 != 0) {
1104 float finv = FloatInv(dx4);
1105 du4 = (fixed)((du4 << FIXED_BITS) * finv);
1106 dv4 = (fixed)((dv4 << FIXED_BITS) * finv);
1107 dr4 = (fixed)((dr4 << FIXED_BITS) * finv);
1108 dg4 = (fixed)((dg4 << FIXED_BITS) * finv);
1109 db4 = (fixed)((db4 << FIXED_BITS) * finv);
1110 } else {
1111 du4 = dv4 = dr4 = dg4 = db4 = 0;
1112 }
1113#else
1114 if (dx4 != 0) {
1115 float fdiv = dx4;
1116 du4 = (fixed)((du4 << FIXED_BITS) / fdiv);
1117 dv4 = (fixed)((dv4 << FIXED_BITS) / fdiv);
1118 dr4 = (fixed)((dr4 << FIXED_BITS) / fdiv);
1119 dg4 = (fixed)((dg4 << FIXED_BITS) / fdiv);
1120 db4 = (fixed)((db4 << FIXED_BITS) / fdiv);
1121 } else {
1122 du4 = dv4 = dr4 = dg4 = db4 = 0;
1123 }
1124#endif
1125#else // Integer Division:
1126#ifdef GPU_UNAI_USE_INT_DIV_MULTINV
1127 if (dx4 != 0) {
1128 int iF, iS;
1129 xInv(dx4, iF, iS);
1130 du4 = xInvMulx(du4, iF, iS);
1131 dv4 = xInvMulx(dv4, iF, iS);
1132 dr4 = xInvMulx(dr4, iF, iS);
1133 dg4 = xInvMulx(dg4, iF, iS);
1134 db4 = xInvMulx(db4, iF, iS);
1135 } else {
1136 du4 = dv4 = dr4 = dg4 = db4 = 0;
1137 }
1138#else
1139 if (dx4 != 0) {
1140 du4 = GPU_FAST_DIV(du4 << FIXED_BITS, dx4);
1141 dv4 = GPU_FAST_DIV(dv4 << FIXED_BITS, dx4);
1142 dr4 = GPU_FAST_DIV(dr4 << FIXED_BITS, dx4);
1143 dg4 = GPU_FAST_DIV(dg4 << FIXED_BITS, dx4);
1144 db4 = GPU_FAST_DIV(db4 << FIXED_BITS, dx4);
1145 } else {
1146 du4 = dv4 = dr4 = dg4 = db4 = 0;
1147 }
1148#endif
1149#endif
1150 // Set u,v increments and packed Gouraud increment for inner driver
1151 gpu_senquack.u_inc = du4;
1152 gpu_senquack.v_inc = dv4;
1153 gpu_senquack.gInc = gpuPackGouraudColInc(dr4, dg4, db4);
1154
1155 for (s32 loop0 = 2; loop0; loop0--) {
1156 if (loop0 == 2) {
1157 ya = y0; yb = y1;
1158 x3 = x4 = i2x(x0);
1159 u3 = i2x(u0); v3 = i2x(v0);
1160 r3 = i2x(r0); g3 = i2x(g0); b3 = i2x(b0);
1161 if (dx < 0) {
1162#ifdef GPU_UNAI_USE_FLOATMATH
1163#ifdef GPU_UNAI_USE_FLOAT_DIV_MULTINV
1164 if ((y2 - y0) != 0) {
1165 float finv = FloatInv(y2 - y0);
1166 dx3 = (fixed)(((x2 - x0) << FIXED_BITS) * finv);
1167 du3 = (fixed)(((u2 - u0) << FIXED_BITS) * finv);
1168 dv3 = (fixed)(((v2 - v0) << FIXED_BITS) * finv);
1169 dr3 = (fixed)(((r2 - r0) << FIXED_BITS) * finv);
1170 dg3 = (fixed)(((g2 - g0) << FIXED_BITS) * finv);
1171 db3 = (fixed)(((b2 - b0) << FIXED_BITS) * finv);
1172 } else {
1173 dx3 = du3 = dv3 = dr3 = dg3 = db3 = 0;
1174 }
1175 dx4 = ((y1 - y0) != 0) ? (fixed)(((x1 - x0) << FIXED_BITS) * FloatInv(y1 - y0)) : 0;
1176#else
1177 if ((y2 - y0) != 0) {
1178 float fdiv = y2 - y0;
1179 dx3 = (fixed)(((x2 - x0) << FIXED_BITS) / fdiv);
1180 du3 = (fixed)(((u2 - u0) << FIXED_BITS) / fdiv);
1181 dv3 = (fixed)(((v2 - v0) << FIXED_BITS) / fdiv);
1182 dr3 = (fixed)(((r2 - r0) << FIXED_BITS) / fdiv);
1183 dg3 = (fixed)(((g2 - g0) << FIXED_BITS) / fdiv);
1184 db3 = (fixed)(((b2 - b0) << FIXED_BITS) / fdiv);
1185 } else {
1186 dx3 = du3 = dv3 = dr3 = dg3 = db3 = 0;
1187 }
1188 dx4 = ((y1 - y0) != 0) ? (fixed)(((x1 - x0) << FIXED_BITS) / (float)(y1 - y0)) : 0;
1189#endif
1190#else // Integer Division:
1191#ifdef GPU_UNAI_USE_INT_DIV_MULTINV
1192 if ((y2 - y0) != 0) {
1193 int iF, iS;
1194 xInv((y2 - y0), iF, iS);
1195 dx3 = xInvMulx((x2 - x0), iF, iS);
1196 du3 = xInvMulx((u2 - u0), iF, iS);
1197 dv3 = xInvMulx((v2 - v0), iF, iS);
1198 dr3 = xInvMulx((r2 - r0), iF, iS);
1199 dg3 = xInvMulx((g2 - g0), iF, iS);
1200 db3 = xInvMulx((b2 - b0), iF, iS);
1201 } else {
1202 dx3 = du3 = dv3 = dr3 = dg3 = db3 = 0;
1203 }
1204 dx4 = ((y1 - y0) != 0) ? xLoDivx((x1 - x0), (y1 - y0)) : 0;
1205#else
1206 if ((y2 - y0) != 0) {
1207 dx3 = GPU_FAST_DIV((x2 - x0) << FIXED_BITS, (y2 - y0));
1208 du3 = GPU_FAST_DIV((u2 - u0) << FIXED_BITS, (y2 - y0));
1209 dv3 = GPU_FAST_DIV((v2 - v0) << FIXED_BITS, (y2 - y0));
1210 dr3 = GPU_FAST_DIV((r2 - r0) << FIXED_BITS, (y2 - y0));
1211 dg3 = GPU_FAST_DIV((g2 - g0) << FIXED_BITS, (y2 - y0));
1212 db3 = GPU_FAST_DIV((b2 - b0) << FIXED_BITS, (y2 - y0));
1213 } else {
1214 dx3 = du3 = dv3 = dr3 = dg3 = db3 = 0;
1215 }
1216 dx4 = ((y1 - y0) != 0) ? GPU_FAST_DIV((x1 - x0) << FIXED_BITS, (y1 - y0)) : 0;
1217#endif
1218#endif
1219 } else {
1220#ifdef GPU_UNAI_USE_FLOATMATH
1221#ifdef GPU_UNAI_USE_FLOAT_DIV_MULTINV
1222 if ((y1 - y0) != 0) {
1223 float finv = FloatInv(y1 - y0);
1224 dx3 = (fixed)(((x1 - x0) << FIXED_BITS) * finv);
1225 du3 = (fixed)(((u1 - u0) << FIXED_BITS) * finv);
1226 dv3 = (fixed)(((v1 - v0) << FIXED_BITS) * finv);
1227 dr3 = (fixed)(((r1 - r0) << FIXED_BITS) * finv);
1228 dg3 = (fixed)(((g1 - g0) << FIXED_BITS) * finv);
1229 db3 = (fixed)(((b1 - b0) << FIXED_BITS) * finv);
1230 } else {
1231 dx3 = du3 = dv3 = dr3 = dg3 = db3 = 0;
1232 }
1233 dx4 = ((y2 - y0) != 0) ? (fixed)(((x2 - x0) << FIXED_BITS) * FloatInv(y2 - y0)) : 0;
1234#else
1235 if ((y1 - y0) != 0) {
1236 float fdiv = y1 - y0;
1237 dx3 = (fixed)(((x1 - x0) << FIXED_BITS) / fdiv);
1238 du3 = (fixed)(((u1 - u0) << FIXED_BITS) / fdiv);
1239 dv3 = (fixed)(((v1 - v0) << FIXED_BITS) / fdiv);
1240 dr3 = (fixed)(((r1 - r0) << FIXED_BITS) / fdiv);
1241 dg3 = (fixed)(((g1 - g0) << FIXED_BITS) / fdiv);
1242 db3 = (fixed)(((b1 - b0) << FIXED_BITS) / fdiv);
1243 } else {
1244 dx3 = du3 = dv3 = dr3 = dg3 = db3 = 0;
1245 }
1246 dx4 = ((y2 - y0) != 0) ? (fixed)(((x2 - x0) << FIXED_BITS) / float(y2 - y0)) : 0;
1247#endif
1248#else // Integer Division:
1249#ifdef GPU_UNAI_USE_INT_DIV_MULTINV
1250 if ((y1 - y0) != 0) {
1251 int iF, iS;
1252 xInv((y1 - y0), iF, iS);
1253 dx3 = xInvMulx((x1 - x0), iF, iS);
1254 du3 = xInvMulx((u1 - u0), iF, iS);
1255 dv3 = xInvMulx((v1 - v0), iF, iS);
1256 dr3 = xInvMulx((r1 - r0), iF, iS);
1257 dg3 = xInvMulx((g1 - g0), iF, iS);
1258 db3 = xInvMulx((b1 - b0), iF, iS);
1259 } else {
1260 dx3 = du3 = dv3 = dr3 = dg3 = db3 = 0;
1261 }
1262 dx4 = ((y2 - y0) != 0) ? xLoDivx((x2 - x0), (y2 - y0)) : 0;
1263#else
1264 if ((y1 - y0) != 0) {
1265 dx3 = GPU_FAST_DIV((x1 - x0) << FIXED_BITS, (y1 - y0));
1266 du3 = GPU_FAST_DIV((u1 - u0) << FIXED_BITS, (y1 - y0));
1267 dv3 = GPU_FAST_DIV((v1 - v0) << FIXED_BITS, (y1 - y0));
1268 dr3 = GPU_FAST_DIV((r1 - r0) << FIXED_BITS, (y1 - y0));
1269 dg3 = GPU_FAST_DIV((g1 - g0) << FIXED_BITS, (y1 - y0));
1270 db3 = GPU_FAST_DIV((b1 - b0) << FIXED_BITS, (y1 - y0));
1271 } else {
1272 dx3 = du3 = dv3 = dr3 = dg3 = db3 = 0;
1273 }
1274 dx4 = ((y2 - y0) != 0) ? GPU_FAST_DIV((x2 - x0) << FIXED_BITS, (y2 - y0)) : 0;
1275#endif
1276#endif
1277 }
1278 } else {
1279 //senquack - break out of final loop if nothing to be drawn (1st loop
1280 // must always be taken to setup dx3/dx4)
1281 if (y1 == y2) break;
1282
1283 ya = y1; yb = y2;
1284
1285 if (dx < 0) {
1286 x3 = i2x(x0); x4 = i2x(x1);
1287 u3 = i2x(u0); v3 = i2x(v0);
1288 r3 = i2x(r0); g3 = i2x(g0); b3 = i2x(b0);
1289
1290 if ((y1 - y0) != 0) {
1291 x3 += (dx3 * (y1 - y0));
1292 u3 += (du3 * (y1 - y0));
1293 v3 += (dv3 * (y1 - y0));
1294 r3 += (dr3 * (y1 - y0));
1295 g3 += (dg3 * (y1 - y0));
1296 b3 += (db3 * (y1 - y0));
1297 }
1298
1299#ifdef GPU_UNAI_USE_FLOATMATH
1300#ifdef GPU_UNAI_USE_FLOAT_DIV_MULTINV
1301 dx4 = ((y2 - y1) != 0) ? (fixed)(((x2 - x1) << FIXED_BITS) * FloatInv(y2 - y1)) : 0;
1302#else
1303 dx4 = ((y2 - y1) != 0) ? (fixed)(((x2 - x1) << FIXED_BITS) / (float)(y2 - y1)) : 0;
1304#endif
1305#else // Integer Division:
1306#ifdef GPU_UNAI_USE_INT_DIV_MULTINV
1307 dx4 = ((y2 - y1) != 0) ? xLoDivx((x2 - x1), (y2 - y1)) : 0;
1308#else
1309 dx4 = ((y2 - y1) != 0) ? GPU_FAST_DIV((x2 - x1) << FIXED_BITS, (y2 - y1)) : 0;
1310#endif
1311#endif
1312 } else {
1313 x3 = i2x(x1);
1314 x4 = i2x(x0) + (dx4 * (y1 - y0));
1315
1316 u3 = i2x(u1); v3 = i2x(v1);
1317 r3 = i2x(r1); g3 = i2x(g1); b3 = i2x(b1);
1318#ifdef GPU_UNAI_USE_FLOATMATH
1319#ifdef GPU_UNAI_USE_FLOAT_DIV_MULTINV
1320 if ((y2 - y1) != 0) {
1321 float finv = FloatInv(y2 - y1);
1322 dx3 = (fixed)(((x2 - x1) << FIXED_BITS) * finv);
1323 du3 = (fixed)(((u2 - u1) << FIXED_BITS) * finv);
1324 dv3 = (fixed)(((v2 - v1) << FIXED_BITS) * finv);
1325 dr3 = (fixed)(((r2 - r1) << FIXED_BITS) * finv);
1326 dg3 = (fixed)(((g2 - g1) << FIXED_BITS) * finv);
1327 db3 = (fixed)(((b2 - b1) << FIXED_BITS) * finv);
1328 } else {
1329 dx3 = du3 = dv3 = dr3 = dg3 = db3 = 0;
1330 }
1331#else
1332 if ((y2 - y1) != 0) {
1333 float fdiv = y2 - y1;
1334 dx3 = (fixed)(((x2 - x1) << FIXED_BITS) / fdiv);
1335 du3 = (fixed)(((u2 - u1) << FIXED_BITS) / fdiv);
1336 dv3 = (fixed)(((v2 - v1) << FIXED_BITS) / fdiv);
1337 dr3 = (fixed)(((r2 - r1) << FIXED_BITS) / fdiv);
1338 dg3 = (fixed)(((g2 - g1) << FIXED_BITS) / fdiv);
1339 db3 = (fixed)(((b2 - b1) << FIXED_BITS) / fdiv);
1340 } else {
1341 dx3 = du3 = dv3 = dr3 = dg3 = db3 = 0;
1342 }
1343#endif
1344#else // Integer Division:
1345#ifdef GPU_UNAI_USE_INT_DIV_MULTINV
1346 if ((y2 - y1) != 0) {
1347 int iF, iS;
1348 xInv((y2 - y1), iF, iS);
1349 dx3 = xInvMulx((x2 - x1), iF, iS);
1350 du3 = xInvMulx((u2 - u1), iF, iS);
1351 dv3 = xInvMulx((v2 - v1), iF, iS);
1352 dr3 = xInvMulx((r2 - r1), iF, iS);
1353 dg3 = xInvMulx((g2 - g1), iF, iS);
1354 db3 = xInvMulx((b2 - b1), iF, iS);
1355 } else {
1356 dx3 = du3 = dv3 = dr3 = dg3 = db3 = 0;
1357 }
1358#else
1359 if ((y2 - y1) != 0) {
1360 dx3 = GPU_FAST_DIV((x2 - x1) << FIXED_BITS, (y2 - y1));
1361 du3 = GPU_FAST_DIV((u2 - u1) << FIXED_BITS, (y2 - y1));
1362 dv3 = GPU_FAST_DIV((v2 - v1) << FIXED_BITS, (y2 - y1));
1363 dr3 = GPU_FAST_DIV((r2 - r1) << FIXED_BITS, (y2 - y1));
1364 dg3 = GPU_FAST_DIV((g2 - g1) << FIXED_BITS, (y2 - y1));
1365 db3 = GPU_FAST_DIV((b2 - b1) << FIXED_BITS, (y2 - y1));
1366 } else {
1367 dx3 = du3 = dv3 = dr3 = dg3 = db3 = 0;
1368 }
1369#endif
1370#endif
1371 }
1372 }
1373
1374 s32 xmin, xmax, ymin, ymax;
1375 xmin = gpu_senquack.DrawingArea[0]; xmax = gpu_senquack.DrawingArea[2];
1376 ymin = gpu_senquack.DrawingArea[1]; ymax = gpu_senquack.DrawingArea[3];
1377
1378 if ((ymin - ya) > 0) {
1379 x3 += (dx3 * (ymin - ya));
1380 x4 += (dx4 * (ymin - ya));
1381 u3 += (du3 * (ymin - ya));
1382 v3 += (dv3 * (ymin - ya));
1383 r3 += (dr3 * (ymin - ya));
1384 g3 += (dg3 * (ymin - ya));
1385 b3 += (db3 * (ymin - ya));
1386 ya = ymin;
1387 }
1388
1389 if (yb > ymax) yb = ymax;
1390
1391 int loop1 = yb - ya;
1392 if (loop1 <= 0)
1393 continue;
1394
1395 u16* PixelBase = &((u16*)gpu_senquack.vram)[FRAME_OFFSET(0, ya)];
1396 int li=gpu_senquack.ilace_mask;
1397 int pi=(ProgressiveInterlaceEnabled()?(gpu_senquack.ilace_mask+1):0);
1398 int pif=(ProgressiveInterlaceEnabled()?(gpu_senquack.prog_ilace_flag?(gpu_senquack.ilace_mask+1):0):1);
1399
1400 for (; loop1; --loop1, ++ya, PixelBase += FRAME_WIDTH,
1401 x3 += dx3, x4 += dx4,
1402 u3 += du3, v3 += dv3,
1403 r3 += dr3, g3 += dg3, b3 += db3 )
1404 {
1405 if (ya&li) continue;
1406 if ((ya&pi)==pif) continue;
1407
1408 u32 u4, v4;
1409 u32 r4, g4, b4;
1410
1411 xa = FixedCeilToInt(x3);
1412 xb = FixedCeilToInt(x4);
1413 u4 = u3; v4 = v3;
1414 r4 = r3; g4 = g3; b4 = b3;
1415
1416 fixed itmp = i2x(xa) - x3;
1417 if (itmp != 0) {
1418 u4 += (du4 * itmp) >> FIXED_BITS;
1419 v4 += (dv4 * itmp) >> FIXED_BITS;
1420 r4 += (dr4 * itmp) >> FIXED_BITS;
1421 g4 += (dg4 * itmp) >> FIXED_BITS;
1422 b4 += (db4 * itmp) >> FIXED_BITS;
1423 }
1424
1425 u4 += fixed_HALF;
1426 v4 += fixed_HALF;
1427 r4 += fixed_HALF;
1428 g4 += fixed_HALF;
1429 b4 += fixed_HALF;
1430
1431 if ((xmin - xa) > 0) {
1432 u4 += du4 * (xmin - xa);
1433 v4 += dv4 * (xmin - xa);
1434 r4 += dr4 * (xmin - xa);
1435 g4 += dg4 * (xmin - xa);
1436 b4 += db4 * (xmin - xa);
1437 xa = xmin;
1438 }
1439
1440 // Set packed Gouraud color and u,v coords for inner driver
1441 gpu_senquack.u = u4;
1442 gpu_senquack.v = v4;
1443 gpu_senquack.gCol = gpuPackGouraudCol(r4, g4, b4);
1444
1445 if (xb > xmax) xb = xmax;
1446 if ((xb - xa) > 0)
1447 gpuPolySpanDriver(gpu_senquack, PixelBase + xa, (xb - xa));
1448 }
1449 }
1450 } while (++cur_pass < total_passes);
1451}
1452
1453#endif /* __GPU_UNAI_GPU_RASTER_POLYGON_H__ */