1 //NOTE: You can find the set of original Unai poly routines (disabled now)
2 // at the bottom end of this file.
4 //senquack - Original Unai GPU poly routines have been replaced with new
5 // ones based on DrHell routines. The original routines suffered from
6 // shifted rows, causing many quads to have their first triangle drawn
7 // correctly, but the second triangle would randomly have pixels shifted
8 // either left or right or entire rows not drawn at all. Furthermore,
9 // some times entire triangles seemed to be either missing or only
10 // partially drawn (most clearly seen in sky/road textures in NFS3,
11 // clock tower in beginning of Castlevania SOTN). Pixel gaps were
14 // Since DrHell GPU didn't seem to exhibit these artifacts at all, I adapted
15 // its routines to GPU Unai (Unai was probably already originally based on it).
16 // DrHell uses 22.10 fixed point instead of Unai's 16.16, so gpu_fixedpoint.h
17 // required modification as well as gpu_inner.h (where gpuPolySpanFn driver
20 // Originally, I tried to patch up original Unai routines and got as far
21 // as fixing the shifted rows, but still had other problem of triangles rendered
22 // wrong (black triangular gaps in NFS3 sky, clock tower in Castlevania SOTN).
23 // I eventually gave up. Even after rewriting/adapting the routines,
24 // however, I still had some random pixel droupouts, specifically in
25 // NFS3 sky texture. I discovered that gpu_inner.h gpuPolySpanFn function
26 // was taking optimizations to an extreme and packing u/v texture coords
27 // into one 32-bit word, reducing their accuracy. Only once they were
28 // handled in full-accuracy individual words was that problem fixed.
30 // NOTE: I also added support for doing divisions using the FPU, either
31 // with normal division or multiplication-by-reciprocal.
32 // To use float division, GPU_UNAI_USE_FLOATMATH should be defined.
33 // To use float mult-by-reciprocal, GPU_UNAI_USE_FLOAT_DIV_MULTINV
34 // can be specified (GPU_UNAI_USE_FLOATMATH must also be specified)
35 // To use inaccurate fixed-point mult-by-reciprocal, define
36 // GPU_UNAI_USE_INT_DIV_MULTINV. This is the default on older
37 // ARM devices like Wiz/Caanoo that have neither integer division
38 // in hardware or an FPU. It results in some pixel dropouts,
39 // texture glitches, but less than the original GPU UNAI code.
41 // If nothing is specified, integer division will be used.
43 // NOTE 2: Even with MIPS32R2 having FPU recip.s instruction, and it is
44 // used when this platform is detected, I found it not to give any
45 // noticeable speedup over normal float division (in fact seemed a tiny
46 // tiny bit slower). I also found float division to not provide any
47 // noticeable speedups versus integer division on MISP32R2 platform.
48 // Granted, the differences were all around .5 FPS or less.
51 // * See if anything can be done about remaining pixel gaps in Gran
52 // Turismo car models, track.
53 // * Find better way of passing parameters to gpuPolySpanFn functions than
54 // through original Unai method of using global variables u4,v4,du4 etc.
55 // * Come up with some newer way of drawing rows of pixels than by calling
56 // gpuPolySpanFn through function pointer. For every row, at least on
57 // MIPS platforms, many registers are having to be pushed/popped from stack
58 // on each call, which is strange since MIPS has so many registers.
59 // * MIPS MXU/ASM optimized gpuPolySpanFn ?
61 //////////////////////////////////////////////////////////////////////////
62 //senquack - Disabled original Unai poly routines left here for reference:
63 // ( from gpu_raster_polygon.h )
64 //////////////////////////////////////////////////////////////////////////
65 #define GPU_TESTRANGE3() \
67 if(x0<0) { if((x1-x0)>CHKMAX_X) return; if((x2-x0)>CHKMAX_X) return; } \
68 if(x1<0) { if((x0-x1)>CHKMAX_X) return; if((x2-x1)>CHKMAX_X) return; } \
69 if(x2<0) { if((x0-x2)>CHKMAX_X) return; if((x1-x2)>CHKMAX_X) return; } \
70 if(y0<0) { if((y1-y0)>CHKMAX_Y) return; if((y2-y0)>CHKMAX_Y) return; } \
71 if(y1<0) { if((y0-y1)>CHKMAX_Y) return; if((y2-y1)>CHKMAX_Y) return; } \
72 if(y2<0) { if((y0-y2)>CHKMAX_Y) return; if((y1-y2)>CHKMAX_Y) return; } \
75 /*----------------------------------------------------------------------
77 ----------------------------------------------------------------------*/
79 void gpuDrawF3(const PP gpuPolySpanDriver)
81 const int li=linesInterlace;
82 const int pi=(progressInterlace?(linesInterlace+1):0);
83 const int pif=(progressInterlace?(progressInterlace_flag?(linesInterlace+1):0):1);
85 s32 xa, xb, xmin, xmax;
86 s32 ya, yb, ymin, ymax;
87 s32 x0, x1, x2, x3, dx3=0, x4, dx4=0, dx;
90 x0 = GPU_EXPANDSIGN(PacketBuffer.S2[2]);
91 y0 = GPU_EXPANDSIGN(PacketBuffer.S2[3]);
92 x1 = GPU_EXPANDSIGN(PacketBuffer.S2[4]);
93 y1 = GPU_EXPANDSIGN(PacketBuffer.S2[5]);
94 x2 = GPU_EXPANDSIGN(PacketBuffer.S2[6]);
95 y2 = GPU_EXPANDSIGN(PacketBuffer.S2[7]);
99 x0 += DrawingOffset[0]; x1 += DrawingOffset[0]; x2 += DrawingOffset[0];
100 y0 += DrawingOffset[1]; y1 += DrawingOffset[1]; y2 += DrawingOffset[1];
102 xmin = DrawingArea[0]; xmax = DrawingArea[2];
103 ymin = DrawingArea[1]; ymax = DrawingArea[3];
106 int rx0 = Max2(xmin,Min3(x0,x1,x2));
107 int ry0 = Max2(ymin,Min3(y0,y1,y2));
108 int rx1 = Min2(xmax,Max3(x0,x1,x2));
109 int ry1 = Min2(ymax,Max3(y0,y1,y2));
110 if( rx0>=rx1 || ry0>=ry1) return;
113 PixelData = GPU_RGB16(PacketBuffer.U4[0]);
117 if( y0!=y1 || x0>x1 )
119 GPU_SWAP(x0, x1, temp);
120 GPU_SWAP(y0, y1, temp);
125 if( y1!=y2 || x1>x2 )
127 GPU_SWAP(x1, x2, temp);
128 GPU_SWAP(y1, y2, temp);
133 if( y0!=y1 || x0>x1 )
135 GPU_SWAP(x0, x1, temp);
136 GPU_SWAP(y0, y1, temp);
142 dx =(x2 - x1) * ya - (x2 - x0) * yb;
144 for (s32 loop0 = 2; loop0; --loop0)
151 x4 = y0!=y1 ? x3 : i2x(x1);
154 dx3 = xLoDivx((x2 - x0), (y2 - y0));
155 dx4 = xLoDivx((x1 - x0), (y1 - y0));
159 dx3 = xLoDivx((x1 - x0), (y1 - y0));
160 dx4 = xLoDivx((x2 - x0), (y2 - y0));
170 x3 = i2x(x0) + (dx3 * (y1 - y0));
171 dx4 = xLoDivx((x2 - x1), (y2 - y1));
176 x4 = i2x(x0) + (dx4 * (y1 - y0));
177 dx3 = xLoDivx((x2 - x1), (y2 - y1));
188 if (yb > ymax) yb = ymax;
189 if (ya>=yb) continue;
194 u16* PixelBase = &((u16*)GPU_FrameBuffer)[FRAME_OFFSET(0, ya)];
196 for(;ya<yb;++ya, PixelBase += FRAME_WIDTH, x3+=dx3, x4+=dx4)
199 if ((ya&pi)==pif) continue;
202 if( (xa>xmax) || (xb<xmin) ) continue;
203 if(xa < xmin) xa = xmin;
204 if(xb > xmax) xb = xmax;
206 if(xb>0) gpuPolySpanDriver(PixelBase + xa,xb);
211 /*----------------------------------------------------------------------
213 ----------------------------------------------------------------------*/
215 void gpuDrawFT3(const PP gpuPolySpanDriver)
217 const int li=linesInterlace;
218 const int pi=(progressInterlace?(linesInterlace+1):0);
219 const int pif=(progressInterlace?(progressInterlace_flag?(linesInterlace+1):0):1);
221 s32 xa, xb, xmin, xmax;
222 s32 ya, yb, ymin, ymax;
223 s32 x0, x1, x2, x3, dx3=0, x4, dx4=0, dx;
225 s32 u0, u1, u2, u3, du3=0;
226 s32 v0, v1, v2, v3, dv3=0;
228 x0 = GPU_EXPANDSIGN(PacketBuffer.S2[2] );
229 y0 = GPU_EXPANDSIGN(PacketBuffer.S2[3] );
230 x1 = GPU_EXPANDSIGN(PacketBuffer.S2[6] );
231 y1 = GPU_EXPANDSIGN(PacketBuffer.S2[7] );
232 x2 = GPU_EXPANDSIGN(PacketBuffer.S2[10]);
233 y2 = GPU_EXPANDSIGN(PacketBuffer.S2[11]);
237 x0 += DrawingOffset[0]; x1 += DrawingOffset[0]; x2 += DrawingOffset[0];
238 y0 += DrawingOffset[1]; y1 += DrawingOffset[1]; y2 += DrawingOffset[1];
240 xmin = DrawingArea[0]; xmax = DrawingArea[2];
241 ymin = DrawingArea[1]; ymax = DrawingArea[3];
244 int rx0 = Max2(xmin,Min3(x0,x1,x2));
245 int ry0 = Max2(ymin,Min3(y0,y1,y2));
246 int rx1 = Min2(xmax,Max3(x0,x1,x2));
247 int ry1 = Min2(ymax,Max3(y0,y1,y2));
248 if( rx0>=rx1 || ry0>=ry1) return;
251 u0 = PacketBuffer.U1[8]; v0 = PacketBuffer.U1[9];
252 u1 = PacketBuffer.U1[16]; v1 = PacketBuffer.U1[17];
253 u2 = PacketBuffer.U1[24]; v2 = PacketBuffer.U1[25];
255 r4 = s32(PacketBuffer.U1[0]);
256 g4 = s32(PacketBuffer.U1[1]);
257 b4 = s32(PacketBuffer.U1[2]);
262 if( y0!=y1 || x0>x1 )
264 GPU_SWAP(x0, x1, temp);
265 GPU_SWAP(y0, y1, temp);
266 GPU_SWAP(u0, u1, temp);
267 GPU_SWAP(v0, v1, temp);
272 if( y1!=y2 || x1>x2 )
274 GPU_SWAP(x1, x2, temp);
275 GPU_SWAP(y1, y2, temp);
276 GPU_SWAP(u1, u2, temp);
277 GPU_SWAP(v1, v2, temp);
282 if( y0!=y1 || x0>x1 )
284 GPU_SWAP(x0, x1, temp);
285 GPU_SWAP(y0, y1, temp);
286 GPU_SWAP(u0, u1, temp);
287 GPU_SWAP(v0, v1, temp);
293 dx = (x2 - x1) * ya - (x2 - x0) * yb;
294 du4 = (u2 - u1) * ya - (u2 - u0) * yb;
295 dv4 = (v2 - v1) * ya - (v2 - v0) * yb;
299 du4 = xInvMulx( du4, iF, iS);
300 dv4 = xInvMulx( dv4, iF, iS);
301 tInc = ((u32)(du4<<7)&0x7fff0000) | ((u32)(dv4>>9)&0x00007fff);
302 tMsk = (TextureWindow[2]<<23) | (TextureWindow[3]<<7) | 0x00ff00ff;
304 for (s32 loop0 = 2; loop0; --loop0)
313 x4 = y0!=y1 ? x3 : i2x(x1);
316 xInv( (y2 - y0), iF, iS);
317 dx3 = xInvMulx( (x2 - x0), iF, iS);
318 du3 = xInvMulx( (u2 - u0), iF, iS);
319 dv3 = xInvMulx( (v2 - v0), iF, iS);
320 dx4 = xLoDivx ( (x1 - x0), (y1 - y0));
324 xInv( (y1 - y0), iF, iS);
325 dx3 = xInvMulx( (x1 - x0), iF, iS);
326 du3 = xInvMulx( (u1 - u0), iF, iS);
327 dv3 = xInvMulx( (v1 - v0), iF, iS);
328 dx4 = xLoDivx ( (x2 - x0), (y2 - y0));
338 u3 = i2x(u0) + (du3 * temp);
339 v3 = i2x(v0) + (dv3 * temp);
340 x3 = i2x(x0) + (dx3 * temp);
342 dx4 = xLoDivx((x2 - x1), (y2 - y1));
349 x4 = i2x(x0) + (dx4 * (y1 - y0));
350 xInv( (y2 - y1), iF, iS);
351 dx3 = xInvMulx( (x2 - x1), iF, iS);
352 du3 = xInvMulx( (u2 - u1), iF, iS);
353 dv3 = xInvMulx( (v2 - v1), iF, iS);
366 if (yb > ymax) yb = ymax;
367 if (ya>=yb) continue;
374 u16* PixelBase = &((u16*)GPU_FrameBuffer)[FRAME_OFFSET(0, ya)];
376 for(;ya<yb;++ya, PixelBase += FRAME_WIDTH, x3+=dx3, x4+=dx4, u3+=du3, v3+=dv3)
379 if ((ya&pi)==pif) continue;
382 if( (xa>xmax) || (xb<xmin) ) continue;
396 if(xb > xmax) xb = xmax;
398 if(xb>0) gpuPolySpanDriver(PixelBase + xa,xb);
403 /*----------------------------------------------------------------------
405 ----------------------------------------------------------------------*/
407 void gpuDrawG3(const PP gpuPolySpanDriver)
409 const int li=linesInterlace;
410 const int pi=(progressInterlace?(linesInterlace+1):0);
411 const int pif=(progressInterlace?(progressInterlace_flag?(linesInterlace+1):0):1);
413 s32 xa, xb, xmin, xmax;
414 s32 ya, yb, ymin, ymax;
415 s32 x0, x1, x2, x3, dx3=0, x4, dx4=0, dx;
417 s32 r0, r1, r2, r3, dr3=0;
418 s32 g0, g1, g2, g3, dg3=0;
419 s32 b0, b1, b2, b3, db3=0;
421 x0 = GPU_EXPANDSIGN(PacketBuffer.S2[2] );
422 y0 = GPU_EXPANDSIGN(PacketBuffer.S2[3] );
423 x1 = GPU_EXPANDSIGN(PacketBuffer.S2[6] );
424 y1 = GPU_EXPANDSIGN(PacketBuffer.S2[7] );
425 x2 = GPU_EXPANDSIGN(PacketBuffer.S2[10]);
426 y2 = GPU_EXPANDSIGN(PacketBuffer.S2[11]);
430 x0 += DrawingOffset[0]; x1 += DrawingOffset[0]; x2 += DrawingOffset[0];
431 y0 += DrawingOffset[1]; y1 += DrawingOffset[1]; y2 += DrawingOffset[1];
433 xmin = DrawingArea[0]; xmax = DrawingArea[2];
434 ymin = DrawingArea[1]; ymax = DrawingArea[3];
437 int rx0 = Max2(xmin,Min3(x0,x1,x2));
438 int ry0 = Max2(ymin,Min3(y0,y1,y2));
439 int rx1 = Min2(xmax,Max3(x0,x1,x2));
440 int ry1 = Min2(ymax,Max3(y0,y1,y2));
441 if( rx0>=rx1 || ry0>=ry1) return;
444 r0 = PacketBuffer.U1[0]; g0 = PacketBuffer.U1[1]; b0 = PacketBuffer.U1[2];
445 r1 = PacketBuffer.U1[8]; g1 = PacketBuffer.U1[9]; b1 = PacketBuffer.U1[10];
446 r2 = PacketBuffer.U1[16]; g2 = PacketBuffer.U1[17]; b2 = PacketBuffer.U1[18];
450 if( y0!=y1 || x0>x1 )
452 GPU_SWAP(x0, x1, temp); GPU_SWAP(y0, y1, temp);
453 GPU_SWAP(r0, r1, temp); GPU_SWAP(g0, g1, temp); GPU_SWAP(b0, b1, temp);
458 if( y1!=y2 || x1>x2 )
460 GPU_SWAP(x1, x2, temp); GPU_SWAP(y1, y2, temp);
461 GPU_SWAP(r1, r2, temp); GPU_SWAP(g1, g2, temp); GPU_SWAP(b1, b2, temp);
466 if( y0!=y1 || x0>x1 )
468 GPU_SWAP(x0, x1, temp); GPU_SWAP(y0, y1, temp);
469 GPU_SWAP(r0, r1, temp); GPU_SWAP(g0, g1, temp); GPU_SWAP(b0, b1, temp);
475 dx = (x2 - x1) * ya - (x2 - x0) * yb;
476 dr4 = (r2 - r1) * ya - (r2 - r0) * yb;
477 dg4 = (g2 - g1) * ya - (g2 - g0) * yb;
478 db4 = (b2 - b1) * ya - (b2 - b0) * yb;
482 dr4 = xInvMulx( dr4, iF, iS);
483 dg4 = xInvMulx( dg4, iF, iS);
484 db4 = xInvMulx( db4, iF, iS);
485 u32 dr = (u32)(dr4<< 8)&(0xffffffff<<21); if(dr4<0) dr+= 1<<21;
486 u32 dg = (u32)(dg4>> 3)&(0xffffffff<<10); if(dg4<0) dg+= 1<<10;
487 u32 db = (u32)(db4>>14)&(0xffffffff ); if(db4<0) db+= 1<< 0;
490 for (s32 loop0 = 2; loop0; --loop0)
500 x4 = y0!=y1 ? x3 : i2x(x1);
503 xInv( (y2 - y0), iF, iS);
504 dx3 = xInvMulx( (x2 - x0), iF, iS);
505 dr3 = xInvMulx( (r2 - r0), iF, iS);
506 dg3 = xInvMulx( (g2 - g0), iF, iS);
507 db3 = xInvMulx( (b2 - b0), iF, iS);
508 dx4 = xLoDivx ( (x1 - x0), (y1 - y0));
512 xInv( (y1 - y0), iF, iS);
513 dx3 = xInvMulx( (x1 - x0), iF, iS);
514 dr3 = xInvMulx( (r1 - r0), iF, iS);
515 dg3 = xInvMulx( (g1 - g0), iF, iS);
516 db3 = xInvMulx( (b1 - b0), iF, iS);
517 dx4 = xLoDivx ( (x2 - x0), (y2 - y0));
527 r3 = i2x(r0) + (dr3 * temp);
528 g3 = i2x(g0) + (dg3 * temp);
529 b3 = i2x(b0) + (db3 * temp);
530 x3 = i2x(x0) + (dx3 * temp);
532 dx4 = xLoDivx((x2 - x1), (y2 - y1));
540 x4 = i2x(x0) + (dx4 * (y1 - y0));
542 xInv( (y2 - y1), iF, iS);
543 dx3 = xInvMulx( (x2 - x1), iF, iS);
544 dr3 = xInvMulx( (r2 - r1), iF, iS);
545 dg3 = xInvMulx( (g2 - g1), iF, iS);
546 db3 = xInvMulx( (b2 - b1), iF, iS);
554 x3 += dx3*temp; x4 += dx4*temp;
555 r3 += dr3*temp; g3 += dg3*temp; b3 += db3*temp;
557 if (yb > ymax) yb = ymax;
558 if (ya>=yb) continue;
560 x3+= fixed_HALF; x4+= fixed_HALF;
561 r3+= fixed_HALF; g3+= fixed_HALF; b3+= fixed_HALF;
563 u16* PixelBase = &((u16*)GPU_FrameBuffer)[FRAME_OFFSET(0, ya)];
565 for(;ya<yb;++ya, PixelBase += FRAME_WIDTH, x3+=dx3, x4+=dx4, r3+=dr3, g3+=dg3, b3+=db3)
568 if ((ya&pi)==pif) continue;
571 if( (xa>xmax) || (xb<xmin) ) continue;
577 r4 = r3 + dr4*temp; g4 = g3 + dg4*temp; b4 = b3 + db4*temp;
581 r4 = r3; g4 = g3; b4 = b3;
583 if(xb > xmax) xb = xmax;
585 if(xb>0) gpuPolySpanDriver(PixelBase + xa,xb);
590 /*----------------------------------------------------------------------
592 ----------------------------------------------------------------------*/
594 void gpuDrawGT3(const PP gpuPolySpanDriver)
596 const int li=linesInterlace;
597 const int pi=(progressInterlace?(linesInterlace+1):0);
598 const int pif=(progressInterlace?(progressInterlace_flag?(linesInterlace+1):0):1);
600 s32 xa, xb, xmin, xmax;
601 s32 ya, yb, ymin, ymax;
602 s32 x0, x1, x2, x3, dx3=0, x4, dx4=0, dx;
604 s32 u0, u1, u2, u3, du3=0;
605 s32 v0, v1, v2, v3, dv3=0;
606 s32 r0, r1, r2, r3, dr3=0;
607 s32 g0, g1, g2, g3, dg3=0;
608 s32 b0, b1, b2, b3, db3=0;
610 x0 = GPU_EXPANDSIGN(PacketBuffer.S2[2] );
611 y0 = GPU_EXPANDSIGN(PacketBuffer.S2[3] );
612 x1 = GPU_EXPANDSIGN(PacketBuffer.S2[8] );
613 y1 = GPU_EXPANDSIGN(PacketBuffer.S2[9] );
614 x2 = GPU_EXPANDSIGN(PacketBuffer.S2[14]);
615 y2 = GPU_EXPANDSIGN(PacketBuffer.S2[15]);
619 x0 += DrawingOffset[0]; x1 += DrawingOffset[0]; x2 += DrawingOffset[0];
620 y0 += DrawingOffset[1]; y1 += DrawingOffset[1]; y2 += DrawingOffset[1];
622 xmin = DrawingArea[0]; xmax = DrawingArea[2];
623 ymin = DrawingArea[1]; ymax = DrawingArea[3];
626 int rx0 = Max2(xmin,Min3(x0,x1,x2));
627 int ry0 = Max2(ymin,Min3(y0,y1,y2));
628 int rx1 = Min2(xmax,Max3(x0,x1,x2));
629 int ry1 = Min2(ymax,Max3(y0,y1,y2));
630 if( rx0>=rx1 || ry0>=ry1) return;
633 r0 = PacketBuffer.U1[0]; g0 = PacketBuffer.U1[1]; b0 = PacketBuffer.U1[2];
634 u0 = PacketBuffer.U1[8]; v0 = PacketBuffer.U1[9];
635 r1 = PacketBuffer.U1[12]; g1 = PacketBuffer.U1[13]; b1 = PacketBuffer.U1[14];
636 u1 = PacketBuffer.U1[20]; v1 = PacketBuffer.U1[21];
637 r2 = PacketBuffer.U1[24]; g2 = PacketBuffer.U1[25]; b2 = PacketBuffer.U1[26];
638 u2 = PacketBuffer.U1[32]; v2 = PacketBuffer.U1[33];
642 if( y0!=y1 || x0>x1 )
644 GPU_SWAP(x0, x1, temp); GPU_SWAP(y0, y1, temp);
645 GPU_SWAP(u0, u1, temp); GPU_SWAP(v0, v1, temp);
646 GPU_SWAP(r0, r1, temp); GPU_SWAP(g0, g1, temp); GPU_SWAP(b0, b1, temp);
651 if( y1!=y2 || x1>x2 )
653 GPU_SWAP(x1, x2, temp); GPU_SWAP(y1, y2, temp);
654 GPU_SWAP(u1, u2, temp); GPU_SWAP(v1, v2, temp);
655 GPU_SWAP(r1, r2, temp); GPU_SWAP(g1, g2, temp); GPU_SWAP(b1, b2, temp);
660 if( y0!=y1 || x0>x1 )
662 GPU_SWAP(x0, x1, temp); GPU_SWAP(y0, y1, temp);
663 GPU_SWAP(u0, u1, temp); GPU_SWAP(v0, v1, temp);
664 GPU_SWAP(r0, r1, temp); GPU_SWAP(g0, g1, temp); GPU_SWAP(b0, b1, temp);
670 dx = (x2 - x1) * ya - (x2 - x0) * yb;
671 du4 = (u2 - u1) * ya - (u2 - u0) * yb;
672 dv4 = (v2 - v1) * ya - (v2 - v0) * yb;
673 dr4 = (r2 - r1) * ya - (r2 - r0) * yb;
674 dg4 = (g2 - g1) * ya - (g2 - g0) * yb;
675 db4 = (b2 - b1) * ya - (b2 - b0) * yb;
680 du4 = xInvMulx( du4, iF, iS);
681 dv4 = xInvMulx( dv4, iF, iS);
682 dr4 = xInvMulx( dr4, iF, iS);
683 dg4 = xInvMulx( dg4, iF, iS);
684 db4 = xInvMulx( db4, iF, iS);
685 u32 dr = (u32)(dr4<< 8)&(0xffffffff<<21); if(dr4<0) dr+= 1<<21;
686 u32 dg = (u32)(dg4>> 3)&(0xffffffff<<10); if(dg4<0) dg+= 1<<10;
687 u32 db = (u32)(db4>>14)&(0xffffffff ); if(db4<0) db+= 1<< 0;
689 tInc = ((u32)(du4<<7)&0x7fff0000) | ((u32)(dv4>>9)&0x00007fff);
690 tMsk = (TextureWindow[2]<<23) | (TextureWindow[3]<<7) | 0x00ff00ff;
692 for (s32 loop0 = 2; loop0; --loop0)
704 x4 = y0!=y1 ? x3 : i2x(x1);
707 xInv( (y2 - y0), iF, iS);
708 dx3 = xInvMulx( (x2 - x0), iF, iS);
709 du3 = xInvMulx( (u2 - u0), iF, iS);
710 dv3 = xInvMulx( (v2 - v0), iF, iS);
711 dr3 = xInvMulx( (r2 - r0), iF, iS);
712 dg3 = xInvMulx( (g2 - g0), iF, iS);
713 db3 = xInvMulx( (b2 - b0), iF, iS);
714 dx4 = xLoDivx ( (x1 - x0), (y1 - y0));
718 xInv( (y1 - y0), iF, iS);
719 dx3 = xInvMulx( (x1 - x0), iF, iS);
720 du3 = xInvMulx( (u1 - u0), iF, iS);
721 dv3 = xInvMulx( (v1 - v0), iF, iS);
722 dr3 = xInvMulx( (r1 - r0), iF, iS);
723 dg3 = xInvMulx( (g1 - g0), iF, iS);
724 db3 = xInvMulx( (b1 - b0), iF, iS);
725 dx4 = xLoDivx ( (x2 - x0), (y2 - y0));
735 u3 = i2x(u0) + (du3 * temp);
736 v3 = i2x(v0) + (dv3 * temp);
737 r3 = i2x(r0) + (dr3 * temp);
738 g3 = i2x(g0) + (dg3 * temp);
739 b3 = i2x(b0) + (db3 * temp);
740 x3 = i2x(x0) + (dx3 * temp);
742 dx4 = xLoDivx((x2 - x1), (y2 - y1));
752 x4 = i2x(x0) + (dx4 * (y1 - y0));
754 xInv( (y2 - y1), iF, iS);
755 dx3 = xInvMulx( (x2 - x1), iF, iS);
756 du3 = xInvMulx( (u2 - u1), iF, iS);
757 dv3 = xInvMulx( (v2 - v1), iF, iS);
758 dr3 = xInvMulx( (r2 - r1), iF, iS);
759 dg3 = xInvMulx( (g2 - g1), iF, iS);
760 db3 = xInvMulx( (b2 - b1), iF, iS);
768 x3 += dx3*temp; x4 += dx4*temp;
769 u3 += du3*temp; v3 += dv3*temp;
770 r3 += dr3*temp; g3 += dg3*temp; b3 += db3*temp;
772 if (yb > ymax) yb = ymax;
773 if (ya>=yb) continue;
775 x3+= fixed_HALF; x4+= fixed_HALF;
776 u3+= fixed_HALF; v4+= fixed_HALF;
777 r3+= fixed_HALF; g3+= fixed_HALF; b3+= fixed_HALF;
778 u16* PixelBase = &((u16*)GPU_FrameBuffer)[FRAME_OFFSET(0, ya)];
780 for(;ya<yb;++ya, PixelBase += FRAME_WIDTH, x3+=dx3, x4+=dx4, u3+=du3, v3+=dv3, r3+=dr3, g3+=dg3, b3+=db3)
783 if ((ya&pi)==pif) continue;
786 if( (xa>xmax) || (xb<xmin)) continue;
792 u4 = u3 + du4*temp; v4 = v3 + dv4*temp;
793 r4 = r3 + dr4*temp; g4 = g3 + dg4*temp; b4 = b3 + db4*temp;
798 r4 = r3; g4 = g3; b4 = b3;
800 if(xb > xmax) xb = xmax;
802 if(xb>0) gpuPolySpanDriver(PixelBase + xa,xb);
808 //////////////////////////////////////////////////////////////////////////
809 //senquack - Original Unai poly routines left here for reference:
810 // ( from gpu_inner.h ) NOTE: this uses 16.16, not 22.10 fixed point
811 //////////////////////////////////////////////////////////////////////////
812 template<const int CF>
813 INLINE void gpuPolySpanFn(u16 *pDst, u32 count)
822 if (L) { u32 lCol=((u32)(b4<< 2)&(0x03ff)) | ((u32)(g4<<13)&(0x07ff<<10)) | ((u32)(r4<<24)&(0x07ff<<21)); gpuLightingRGB(data,lCol); }
826 if (MB) { data = data | 0x8000; }
827 do { *pDst++ = data; } while (--count);
831 if (MB) { data = data | 0x8000; }
832 do { if (!(*pDst&0x8000)) { *pDst = data; } pDst++; } while (--count);
838 u32 uMsk; if (BM==0) uMsk=0x7BDE;
839 u32 bMsk; if (BI) bMsk=blit_mask;
843 if (BI) { if((bMsk>>((((u32)pDst)>>1)&7))&1) goto endtile; }
846 if(M) { if (uDst&0x8000) goto endtile; }
849 if (BM==0) gpuBlending00(uSrc, uDst);
850 if (BM==1) gpuBlending01(uSrc, uDst);
851 if (BM==2) gpuBlending02(uSrc, uDst);
852 if (BM==3) gpuBlending03(uSrc, uDst);
853 if (MB) { *pDst = uSrc | 0x8000; }
854 else { *pDst = uSrc; }
866 u32 lCol=((u32)(b4>>14)&(0x03ff)) | ((u32)(g4>>3)&(0x07ff<<10)) | ((u32)(r4<<8)&(0x07ff<<21));
867 u32 uMsk; if ((B)&&(BM==0)) uMsk=0x7BDE;
868 u32 bMsk; if (BI) bMsk=blit_mask;
872 if (BI) { if((bMsk>>((((u32)pDst)>>1)&7))&1) goto endgou; }
874 if(M) { uDst = *pDst; if (uDst&0x8000) goto endgou; }
879 gpuLightingRGB(uSrc,lCol);
880 if(!M) { uDst = *pDst; }
881 if (BM==0) gpuBlending00(uSrc, uDst);
882 if (BM==1) gpuBlending01(uSrc, uDst);
883 if (BM==2) gpuBlending02(uSrc, uDst);
884 if (BM==3) gpuBlending03(uSrc, uDst);
889 gpuLightingRGB(uSrc,lCol);
891 if (MB) { *pDst = uSrc | 0x8000; }
892 else { *pDst = uSrc; }
893 endgou: pDst++; lCol=(lCol+linc);
903 u32 linc; if (L&&G) linc=lInc;
906 u32 tCor = ((u32)( u4<<7)&0x7fff0000) | ((u32)( v4>>9)&0x00007fff); tCor&= tmsk;
908 const u16* _CBA; if (TM!=3) _CBA=CBA;
910 if(L && !G) { lCol = ((u32)(b4<< 2)&(0x03ff)) | ((u32)(g4<<13)&(0x07ff<<10)) | ((u32)(r4<<24)&(0x07ff<<21)); }
911 else if(L && G) { lCol = ((u32)(b4>>14)&(0x03ff)) | ((u32)(g4>>3)&(0x07ff<<10)) | ((u32)(r4<<8)&(0x07ff<<21)); }
912 u32 uMsk; if ((B)&&(BM==0)) uMsk=0x7BDE;
913 u32 bMsk; if (BI) bMsk=blit_mask;
917 if (BI) { if((bMsk>>((((u32)pDst)>>1)&7))&1) goto endpoly; }
919 if(M) { uDst = *pDst; if (uDst&0x8000) goto endpoly; }
921 if (TM==1) { u32 tu=(tCor>>23); u32 tv=(tCor<<4)&(0xff<<11); u8 rgb=((u8*)_TBA)[tv+(tu>>1)]; uSrc=_CBA[(rgb>>((tu&1)<<2))&0xf]; if(!uSrc) goto endpoly; }
922 if (TM==2) { uSrc = _CBA[(((u8*)_TBA)[(tCor>>23)+((tCor<<4)&(0xff<<11))])]; if(!uSrc) goto endpoly; }
923 if (TM==3) { uSrc = _TBA[(tCor>>23)+((tCor<<3)&(0xff<<10))]; if(!uSrc) goto endpoly; }
930 if(L) gpuLightingTXT(uSrc, lCol);
931 if(!M) { uDst = *pDst; }
932 if (BM==0) gpuBlending00(uSrc, uDst);
933 if (BM==1) gpuBlending01(uSrc, uDst);
934 if (BM==2) gpuBlending02(uSrc, uDst);
935 if (BM==3) gpuBlending03(uSrc, uDst);
940 if(L) gpuLightingTXT(uSrc, lCol);
946 if(L) { gpuLightingTXT(uSrc, lCol); } else if(!MB) { uSrc&= 0x7fff; }
948 if (MB) { *pDst = uSrc | 0x8000; }
949 else { *pDst = uSrc; }
951 tCor=(tCor+tinc)&tmsk;
952 if (L&&G) lCol=(lCol+linc);