cdrom: change pause timing again
[pcsx_rearmed.git] / plugins / gpu_senquack / README_senquack.txt
1 //NOTE: You can find the set of original Unai poly routines (disabled now)
2 // at the bottom end of this file.
3
4 //senquack - Original Unai GPU poly routines have been replaced with new
5 // ones based on DrHell routines. The original routines suffered from
6 // shifted rows, causing many quads to have their first triangle drawn
7 // correctly, but the second triangle would randomly have pixels shifted
8 // either left or right or entire rows not drawn at all. Furthermore,
9 // some times entire triangles seemed to be either missing or only
10 // partially drawn (most clearly seen in sky/road textures in NFS3,
11 // clock tower in beginning of Castlevania SOTN). Pixel gaps were
12 // prevalent.
13 //
14 // Since DrHell GPU didn't seem to exhibit these artifacts at all, I adapted
15 // its routines to GPU Unai (Unai was probably already originally based on it).
16 // DrHell uses 22.10 fixed point instead of Unai's 16.16, so gpu_fixedpoint.h
17 // required modification as well as gpu_inner.h (where gpuPolySpanFn driver
18 // functions are).
19 //
20 // Originally, I tried to patch up original Unai routines and got as far
21 // as fixing the shifted rows, but still had other problem of triangles rendered
22 // wrong (black triangular gaps in NFS3 sky, clock tower in Castlevania SOTN).
23 // I eventually gave up. Even after rewriting/adapting the routines,
24 // however, I still had some random pixel droupouts, specifically in
25 // NFS3 sky texture. I discovered that gpu_inner.h gpuPolySpanFn function
26 // was taking optimizations to an extreme and packing u/v texture coords
27 // into one 32-bit word, reducing their accuracy. Only once they were
28 // handled in full-accuracy individual words was that problem fixed.
29 //
30 // NOTE: I also added support for doing divisions using the FPU, either
31 //  with normal division or multiplication-by-reciprocal.
32 //  To use float division, GPU_UNAI_USE_FLOATMATH should be defined.
33 //  To use float mult-by-reciprocal, GPU_UNAI_USE_FLOAT_DIV_MULTINV
34 //   can be specified (GPU_UNAI_USE_FLOATMATH must also be specified)
35 //  To use inaccurate fixed-point mult-by-reciprocal, define
36 //   GPU_UNAI_USE_INT_DIV_MULTINV. This is the default on older
37 //   ARM devices like Wiz/Caanoo that have neither integer division
38 //   in hardware or an FPU. It results in some pixel dropouts,
39 //   texture glitches, but less than the original GPU UNAI code.
40 //
41 //  If nothing is specified, integer division will be used.
42 //
43 // NOTE 2: Even with MIPS32R2 having FPU recip.s instruction, and it is
44 //  used when this platform is detected, I found it not to give any
45 //  noticeable speedup over normal float division (in fact seemed a tiny
46 //  tiny bit slower). I also found float division to not provide any
47 //  noticeable speedups versus integer division on MISP32R2 platform.
48 //  Granted, the differences were all around .5 FPS or less.
49 //
50 // TODO:
51 // * See if anything can be done about remaining pixel gaps in Gran
52 //   Turismo car models, track.
53 // * Find better way of passing parameters to gpuPolySpanFn functions than
54 //   through original Unai method of using global variables u4,v4,du4 etc.
55 // * Come up with some newer way of drawing rows of pixels than by calling
56 //   gpuPolySpanFn through function pointer. For every row, at least on
57 //   MIPS platforms, many registers are having to be pushed/popped from stack
58 //   on each call, which is strange since MIPS has so many registers.
59 // * MIPS MXU/ASM optimized gpuPolySpanFn ?
60
61 //////////////////////////////////////////////////////////////////////////
62 //senquack - Disabled original Unai poly routines left here for reference:
63 // ( from gpu_raster_polygon.h )
64 //////////////////////////////////////////////////////////////////////////
65 #define GPU_TESTRANGE3() \
66 { \
67         if(x0<0) { if((x1-x0)>CHKMAX_X) return; if((x2-x0)>CHKMAX_X) return; } \
68         if(x1<0) { if((x0-x1)>CHKMAX_X) return; if((x2-x1)>CHKMAX_X) return; } \
69         if(x2<0) { if((x0-x2)>CHKMAX_X) return; if((x1-x2)>CHKMAX_X) return; } \
70         if(y0<0) { if((y1-y0)>CHKMAX_Y) return; if((y2-y0)>CHKMAX_Y) return; } \
71         if(y1<0) { if((y0-y1)>CHKMAX_Y) return; if((y2-y1)>CHKMAX_Y) return; } \
72         if(y2<0) { if((y0-y2)>CHKMAX_Y) return; if((y1-y2)>CHKMAX_Y) return; } \
73 }
74
75 /*----------------------------------------------------------------------
76 F3
77 ----------------------------------------------------------------------*/
78
79 void gpuDrawF3(const PP gpuPolySpanDriver)
80 {
81         const int li=linesInterlace;
82         const int pi=(progressInterlace?(linesInterlace+1):0);
83         const int pif=(progressInterlace?(progressInterlace_flag?(linesInterlace+1):0):1);
84         s32 temp;
85         s32 xa, xb, xmin, xmax;
86         s32 ya, yb, ymin, ymax;
87         s32 x0, x1, x2, x3, dx3=0, x4, dx4=0, dx;
88         s32 y0, y1, y2;
89
90         x0 = GPU_EXPANDSIGN(PacketBuffer.S2[2]);
91         y0 = GPU_EXPANDSIGN(PacketBuffer.S2[3]);
92         x1 = GPU_EXPANDSIGN(PacketBuffer.S2[4]);
93         y1 = GPU_EXPANDSIGN(PacketBuffer.S2[5]);
94         x2 = GPU_EXPANDSIGN(PacketBuffer.S2[6]);
95         y2 = GPU_EXPANDSIGN(PacketBuffer.S2[7]);
96
97         GPU_TESTRANGE3();
98         
99         x0 += DrawingOffset[0];   x1 += DrawingOffset[0];   x2 += DrawingOffset[0];
100         y0 += DrawingOffset[1];   y1 += DrawingOffset[1];   y2 += DrawingOffset[1];
101
102         xmin = DrawingArea[0];  xmax = DrawingArea[2];
103         ymin = DrawingArea[1];  ymax = DrawingArea[3];
104
105         {
106                 int rx0 = Max2(xmin,Min3(x0,x1,x2));
107                 int ry0 = Max2(ymin,Min3(y0,y1,y2));
108                 int rx1 = Min2(xmax,Max3(x0,x1,x2));
109                 int ry1 = Min2(ymax,Max3(y0,y1,y2));
110                 if( rx0>=rx1 || ry0>=ry1) return;
111         }
112         
113         PixelData = GPU_RGB16(PacketBuffer.U4[0]);
114
115         if (y0 >= y1)
116         {
117                 if( y0!=y1 || x0>x1 )
118                 {
119                         GPU_SWAP(x0, x1, temp);
120                         GPU_SWAP(y0, y1, temp);
121                 }
122         }
123         if (y1 >= y2)
124         {
125                 if( y1!=y2 || x1>x2 )
126                 {
127                         GPU_SWAP(x1, x2, temp);
128                         GPU_SWAP(y1, y2, temp);
129                 }
130         }
131         if (y0 >= y1)
132         {
133                 if( y0!=y1 || x0>x1 )
134                 {
135                         GPU_SWAP(x0, x1, temp);
136                         GPU_SWAP(y0, y1, temp);
137                 }
138         }
139
140         ya = y2 - y0;
141         yb = y2 - y1;
142         dx =(x2 - x1) * ya - (x2 - x0) * yb;
143
144         for (s32 loop0 = 2; loop0; --loop0)
145         {
146                 if (loop0 == 2)
147                 {
148                         ya = y0;
149                         yb = y1;
150                         x3 = i2x(x0);
151                         x4 = y0!=y1 ? x3 : i2x(x1);
152                         if (dx < 0)
153                         {
154                                 dx3 = xLoDivx((x2 - x0), (y2 - y0));
155                                 dx4 = xLoDivx((x1 - x0), (y1 - y0));
156                         }
157                         else
158                         {
159                                 dx3 = xLoDivx((x1 - x0), (y1 - y0));
160                                 dx4 = xLoDivx((x2 - x0), (y2 - y0));
161                         }
162                 }
163                 else
164                 {
165                         ya = y1;
166                         yb = y2;
167                         if (dx < 0)
168                         {
169                                 x4  = i2x(x1);
170                                 x3  = i2x(x0) + (dx3 * (y1 - y0));
171                                 dx4 = xLoDivx((x2 - x1), (y2 - y1));
172                         }
173                         else
174                         {
175                                 x3  = i2x(x1);
176                                 x4  = i2x(x0) + (dx4 * (y1 - y0));
177                                 dx3 = xLoDivx((x2 - x1), (y2 - y1));
178                         }
179                 }
180
181                 temp = ymin - ya;
182                 if (temp > 0)
183                 {
184                         ya  = ymin;
185                         x3 += dx3*temp;
186                         x4 += dx4*temp;
187                 }
188                 if (yb > ymax) yb = ymax;
189                 if (ya>=yb) continue;
190
191                 x3+= fixed_HALF;
192                 x4+= fixed_HALF;
193
194                 u16* PixelBase  = &((u16*)GPU_FrameBuffer)[FRAME_OFFSET(0, ya)];
195                 
196                 for(;ya<yb;++ya, PixelBase += FRAME_WIDTH, x3+=dx3, x4+=dx4)
197                 {
198                         if (ya&li) continue;
199                         if ((ya&pi)==pif) continue;
200                         xa = x2i(x3);
201                         xb = x2i(x4);
202                         if( (xa>xmax) || (xb<xmin) ) continue;
203                         if(xa < xmin) xa = xmin;
204                         if(xb > xmax) xb = xmax;
205                         xb-=xa;
206                         if(xb>0) gpuPolySpanDriver(PixelBase + xa,xb);
207                 }
208         }
209 }
210
211 /*----------------------------------------------------------------------
212 FT3
213 ----------------------------------------------------------------------*/
214
215 void gpuDrawFT3(const PP gpuPolySpanDriver)
216 {
217         const int li=linesInterlace;
218         const int pi=(progressInterlace?(linesInterlace+1):0);
219         const int pif=(progressInterlace?(progressInterlace_flag?(linesInterlace+1):0):1);
220         s32 temp;
221         s32 xa, xb, xmin, xmax;
222         s32 ya, yb, ymin, ymax;
223         s32 x0, x1, x2, x3, dx3=0, x4, dx4=0, dx;
224         s32 y0, y1, y2;
225         s32 u0, u1, u2, u3, du3=0;
226         s32 v0, v1, v2, v3, dv3=0;
227
228         x0 = GPU_EXPANDSIGN(PacketBuffer.S2[2] );
229         y0 = GPU_EXPANDSIGN(PacketBuffer.S2[3] );
230         x1 = GPU_EXPANDSIGN(PacketBuffer.S2[6] );
231         y1 = GPU_EXPANDSIGN(PacketBuffer.S2[7] );
232         x2 = GPU_EXPANDSIGN(PacketBuffer.S2[10]);
233         y2 = GPU_EXPANDSIGN(PacketBuffer.S2[11]);
234
235         GPU_TESTRANGE3();
236         
237         x0 += DrawingOffset[0];   x1 += DrawingOffset[0];   x2 += DrawingOffset[0];
238         y0 += DrawingOffset[1];   y1 += DrawingOffset[1];   y2 += DrawingOffset[1];
239
240         xmin = DrawingArea[0];  xmax = DrawingArea[2];
241         ymin = DrawingArea[1];  ymax = DrawingArea[3];
242
243         {
244                 int rx0 = Max2(xmin,Min3(x0,x1,x2));
245                 int ry0 = Max2(ymin,Min3(y0,y1,y2));
246                 int rx1 = Min2(xmax,Max3(x0,x1,x2));
247                 int ry1 = Min2(ymax,Max3(y0,y1,y2));
248                 if( rx0>=rx1 || ry0>=ry1) return;
249         }
250         
251         u0 = PacketBuffer.U1[8];  v0 = PacketBuffer.U1[9];
252         u1 = PacketBuffer.U1[16]; v1 = PacketBuffer.U1[17];
253         u2 = PacketBuffer.U1[24]; v2 = PacketBuffer.U1[25];
254
255         r4 = s32(PacketBuffer.U1[0]);
256         g4 = s32(PacketBuffer.U1[1]);
257         b4 = s32(PacketBuffer.U1[2]);
258         dr4 = dg4 = db4 = 0;
259
260         if (y0 >= y1)
261         {
262                 if( y0!=y1 || x0>x1 )
263                 {
264                         GPU_SWAP(x0, x1, temp);
265                         GPU_SWAP(y0, y1, temp);
266                         GPU_SWAP(u0, u1, temp);
267                         GPU_SWAP(v0, v1, temp);
268                 }
269         }
270         if (y1 >= y2)
271         {
272                 if( y1!=y2 || x1>x2 )
273                 {
274                         GPU_SWAP(x1, x2, temp);
275                         GPU_SWAP(y1, y2, temp);
276                         GPU_SWAP(u1, u2, temp);
277                         GPU_SWAP(v1, v2, temp);
278                 }
279         }
280         if (y0 >= y1)
281         {
282                 if( y0!=y1 || x0>x1 )
283                 {
284                         GPU_SWAP(x0, x1, temp);
285                         GPU_SWAP(y0, y1, temp);
286                         GPU_SWAP(u0, u1, temp);
287                         GPU_SWAP(v0, v1, temp);
288                 }
289         }
290
291         ya  = y2 - y0;
292         yb  = y2 - y1;
293         dx  = (x2 - x1) * ya - (x2 - x0) * yb;
294         du4 = (u2 - u1) * ya - (u2 - u0) * yb;
295         dv4 = (v2 - v1) * ya - (v2 - v0) * yb;
296
297         s32 iF,iS;
298         xInv( dx, iF, iS);
299         du4 = xInvMulx( du4, iF, iS);
300         dv4 = xInvMulx( dv4, iF, iS);
301         tInc = ((u32)(du4<<7)&0x7fff0000) | ((u32)(dv4>>9)&0x00007fff);
302         tMsk = (TextureWindow[2]<<23) | (TextureWindow[3]<<7) | 0x00ff00ff;
303
304         for (s32 loop0 = 2; loop0; --loop0)
305         {
306                 if (loop0 == 2)
307                 {
308                         ya = y0;
309                         yb = y1;
310                         u3 = i2x(u0);
311                         v3 = i2x(v0);
312                         x3 = i2x(x0);
313                         x4 = y0!=y1 ? x3 : i2x(x1);
314                         if (dx < 0)
315                         {
316                                 xInv( (y2 - y0), iF, iS);
317                                 dx3 = xInvMulx( (x2 - x0), iF, iS);
318                                 du3 = xInvMulx( (u2 - u0), iF, iS);
319                                 dv3 = xInvMulx( (v2 - v0), iF, iS);
320                                 dx4 = xLoDivx ( (x1 - x0), (y1 - y0));
321                         }
322                         else
323                         {
324                                 xInv( (y1 - y0), iF, iS);
325                                 dx3 = xInvMulx( (x1 - x0), iF, iS);
326                                 du3 = xInvMulx( (u1 - u0), iF, iS);
327                                 dv3 = xInvMulx( (v1 - v0), iF, iS);
328                                 dx4 = xLoDivx ( (x2 - x0), (y2 - y0));
329                         }
330                 }
331                 else
332                 {
333                         ya = y1;
334                         yb = y2;
335                         if (dx < 0)
336                         {
337                                 temp = y1 - y0;
338                                 u3 = i2x(u0) + (du3 * temp);
339                                 v3 = i2x(v0) + (dv3 * temp);
340                                 x3 = i2x(x0) + (dx3 * temp);
341                                 x4 = i2x(x1);
342                                 dx4 = xLoDivx((x2 - x1), (y2 - y1));
343                         }
344                         else
345                         {
346                                 u3 = i2x(u1);
347                                 v3 = i2x(v1);
348                                 x3 = i2x(x1);
349                                 x4 = i2x(x0) + (dx4 * (y1 - y0));
350                                 xInv( (y2 - y1), iF, iS);
351                                 dx3 = xInvMulx( (x2 - x1), iF, iS);
352                                 du3 = xInvMulx( (u2 - u1), iF, iS);
353                                 dv3 = xInvMulx( (v2 - v1), iF, iS);
354                         }
355                 }
356
357                 temp = ymin - ya;
358                 if (temp > 0)
359                 {
360                         ya  = ymin;
361                         x3 += dx3*temp;
362                         x4 += dx4*temp;
363                         u3 += du3*temp;
364                         v3 += dv3*temp;
365                 }
366                 if (yb > ymax) yb = ymax;
367                 if (ya>=yb) continue;
368
369                 x3+= fixed_HALF;
370                 x4+= fixed_HALF;
371                 u3+= fixed_HALF;
372                 v4+= fixed_HALF;
373
374                 u16* PixelBase  = &((u16*)GPU_FrameBuffer)[FRAME_OFFSET(0, ya)];
375
376                 for(;ya<yb;++ya, PixelBase += FRAME_WIDTH, x3+=dx3, x4+=dx4, u3+=du3, v3+=dv3)
377                 {
378                         if (ya&li) continue;
379                         if ((ya&pi)==pif) continue;
380                         xa = x2i(x3);
381                         xb = x2i(x4);
382                         if( (xa>xmax) || (xb<xmin) ) continue;
383
384                         temp = xmin - xa;
385                         if(temp > 0)
386                         {
387                                 xa  = xmin;
388                                 u4 = u3 + du4*temp;
389                                 v4 = v3 + dv4*temp;
390                         }
391                         else
392                         {
393                                 u4 = u3;
394                                 v4 = v3;
395                         }
396                         if(xb > xmax) xb = xmax;
397                         xb-=xa;
398                         if(xb>0) gpuPolySpanDriver(PixelBase + xa,xb);
399                 }
400         }
401 }
402
403 /*----------------------------------------------------------------------
404 G3
405 ----------------------------------------------------------------------*/
406
407 void gpuDrawG3(const PP gpuPolySpanDriver)
408 {
409         const int li=linesInterlace;
410         const int pi=(progressInterlace?(linesInterlace+1):0);
411         const int pif=(progressInterlace?(progressInterlace_flag?(linesInterlace+1):0):1);
412         s32 temp;
413         s32 xa, xb, xmin, xmax;
414         s32 ya, yb, ymin, ymax;
415         s32 x0, x1, x2, x3, dx3=0, x4, dx4=0, dx;
416         s32 y0, y1, y2;
417         s32 r0, r1, r2, r3, dr3=0;
418         s32 g0, g1, g2, g3, dg3=0;
419         s32 b0, b1, b2, b3, db3=0;
420
421         x0 = GPU_EXPANDSIGN(PacketBuffer.S2[2] );
422         y0 = GPU_EXPANDSIGN(PacketBuffer.S2[3] );
423         x1 = GPU_EXPANDSIGN(PacketBuffer.S2[6] );
424         y1 = GPU_EXPANDSIGN(PacketBuffer.S2[7] );
425         x2 = GPU_EXPANDSIGN(PacketBuffer.S2[10]);
426         y2 = GPU_EXPANDSIGN(PacketBuffer.S2[11]);
427
428         GPU_TESTRANGE3();
429         
430         x0 += DrawingOffset[0];   x1 += DrawingOffset[0];   x2 += DrawingOffset[0];
431         y0 += DrawingOffset[1];   y1 += DrawingOffset[1];   y2 += DrawingOffset[1];
432
433         xmin = DrawingArea[0];  xmax = DrawingArea[2];
434         ymin = DrawingArea[1];  ymax = DrawingArea[3];
435
436         {
437                 int rx0 = Max2(xmin,Min3(x0,x1,x2));
438                 int ry0 = Max2(ymin,Min3(y0,y1,y2));
439                 int rx1 = Min2(xmax,Max3(x0,x1,x2));
440                 int ry1 = Min2(ymax,Max3(y0,y1,y2));
441                 if( rx0>=rx1 || ry0>=ry1) return;
442         }
443         
444         r0 = PacketBuffer.U1[0];        g0 = PacketBuffer.U1[1];        b0 = PacketBuffer.U1[2];
445         r1 = PacketBuffer.U1[8];        g1 = PacketBuffer.U1[9];        b1 = PacketBuffer.U1[10];
446         r2 = PacketBuffer.U1[16];       g2 = PacketBuffer.U1[17];       b2 = PacketBuffer.U1[18];
447
448         if (y0 >= y1)
449         {
450                 if( y0!=y1 || x0>x1 )
451                 {
452                         GPU_SWAP(x0, x1, temp);         GPU_SWAP(y0, y1, temp);
453                         GPU_SWAP(r0, r1, temp);         GPU_SWAP(g0, g1, temp);         GPU_SWAP(b0, b1, temp);
454                 }
455         }
456         if (y1 >= y2)
457         {
458                 if( y1!=y2 || x1>x2 )
459                 {
460                         GPU_SWAP(x1, x2, temp);         GPU_SWAP(y1, y2, temp);
461                         GPU_SWAP(r1, r2, temp);         GPU_SWAP(g1, g2, temp);   GPU_SWAP(b1, b2, temp);
462                 }
463         }
464         if (y0 >= y1)
465         {
466                 if( y0!=y1 || x0>x1 )
467                 {
468                         GPU_SWAP(x0, x1, temp);         GPU_SWAP(y0, y1, temp);
469                         GPU_SWAP(r0, r1, temp);   GPU_SWAP(g0, g1, temp);               GPU_SWAP(b0, b1, temp);
470                 }
471         }
472
473         ya  = y2 - y0;
474         yb  = y2 - y1;
475         dx  = (x2 - x1) * ya - (x2 - x0) * yb;
476         dr4 = (r2 - r1) * ya - (r2 - r0) * yb;
477         dg4 = (g2 - g1) * ya - (g2 - g0) * yb;
478         db4 = (b2 - b1) * ya - (b2 - b0) * yb;
479
480         s32 iF,iS;
481         xInv(            dx, iF, iS);
482         dr4 = xInvMulx( dr4, iF, iS);
483         dg4 = xInvMulx( dg4, iF, iS);
484         db4 = xInvMulx( db4, iF, iS);
485         u32 dr = (u32)(dr4<< 8)&(0xffffffff<<21);   if(dr4<0) dr+= 1<<21;
486         u32 dg = (u32)(dg4>> 3)&(0xffffffff<<10);   if(dg4<0) dg+= 1<<10;
487         u32 db = (u32)(db4>>14)&(0xffffffff    );   if(db4<0) db+= 1<< 0;
488         lInc = db + dg + dr;
489
490         for (s32 loop0 = 2; loop0; --loop0)
491         {
492                 if (loop0 == 2)
493                 {
494                         ya = y0;
495                         yb = y1;
496                         r3 = i2x(r0);
497                         g3 = i2x(g0);
498                         b3 = i2x(b0);
499                         x3 = i2x(x0);
500                         x4 = y0!=y1 ? x3 : i2x(x1);
501                         if (dx < 0)
502                         {
503                                 xInv(           (y2 - y0), iF, iS);
504                                 dx3 = xInvMulx( (x2 - x0), iF, iS);
505                                 dr3 = xInvMulx( (r2 - r0), iF, iS);
506                                 dg3 = xInvMulx( (g2 - g0), iF, iS);
507                                 db3 = xInvMulx( (b2 - b0), iF, iS);
508                                 dx4 = xLoDivx ( (x1 - x0), (y1 - y0));
509                         }
510                         else
511                         {
512                                 xInv(           (y1 - y0), iF, iS);
513                                 dx3 = xInvMulx( (x1 - x0), iF, iS);
514                                 dr3 = xInvMulx( (r1 - r0), iF, iS);
515                                 dg3 = xInvMulx( (g1 - g0), iF, iS);
516                                 db3 = xInvMulx( (b1 - b0), iF, iS);
517                                 dx4 = xLoDivx ( (x2 - x0), (y2 - y0));
518                         }
519                 }
520                 else
521                 {
522                         ya = y1;
523                         yb = y2;
524                         if (dx < 0)
525                         {
526                                 temp = y1 - y0;
527                                 r3  = i2x(r0) + (dr3 * temp);
528                                 g3  = i2x(g0) + (dg3 * temp);
529                                 b3  = i2x(b0) + (db3 * temp);
530                                 x3  = i2x(x0) + (dx3 * temp);
531                                 x4  = i2x(x1);
532                                 dx4 = xLoDivx((x2 - x1), (y2 - y1));
533                         }
534                         else
535                         {
536                                 r3 = i2x(r1);
537                                 g3 = i2x(g1);
538                                 b3 = i2x(b1);
539                                 x3 = i2x(x1);
540                                 x4 = i2x(x0) + (dx4 * (y1 - y0));
541
542                                 xInv(           (y2 - y1), iF, iS);
543                                 dx3 = xInvMulx( (x2 - x1), iF, iS);
544                                 dr3 = xInvMulx( (r2 - r1), iF, iS);
545                                 dg3 = xInvMulx( (g2 - g1), iF, iS);
546                                 db3 = xInvMulx( (b2 - b1), iF, iS);
547                         }
548                 }
549
550                 temp = ymin - ya;
551                 if (temp > 0)
552                 {
553                         ya  = ymin;
554                         x3 += dx3*temp;   x4 += dx4*temp;
555                         r3 += dr3*temp;   g3 += dg3*temp;   b3 += db3*temp;
556                 }
557                 if (yb > ymax) yb = ymax;
558                 if (ya>=yb) continue;
559
560                 x3+= fixed_HALF;  x4+= fixed_HALF;
561                 r3+= fixed_HALF;  g3+= fixed_HALF;  b3+= fixed_HALF;
562
563                 u16* PixelBase  = &((u16*)GPU_FrameBuffer)[FRAME_OFFSET(0, ya)];
564                 
565                 for(;ya<yb;++ya, PixelBase += FRAME_WIDTH, x3+=dx3, x4+=dx4, r3+=dr3, g3+=dg3, b3+=db3)
566                 {
567                         if (ya&li) continue;
568                         if ((ya&pi)==pif) continue;
569                         xa = x2i(x3);
570                         xb = x2i(x4);
571                         if( (xa>xmax) || (xb<xmin) ) continue;
572
573                         temp = xmin - xa;
574                         if(temp > 0)
575                         {
576                                 xa  = xmin;
577                                 r4 = r3 + dr4*temp;   g4 = g3 + dg4*temp;   b4 = b3 + db4*temp;
578                         }
579                         else
580                         {
581                                 r4 = r3;  g4 = g3;  b4 = b3;
582                         }
583                         if(xb > xmax) xb = xmax;
584                         xb-=xa;
585                         if(xb>0) gpuPolySpanDriver(PixelBase + xa,xb);
586                 }
587         }
588 }
589
590 /*----------------------------------------------------------------------
591 GT3
592 ----------------------------------------------------------------------*/
593
594 void gpuDrawGT3(const PP gpuPolySpanDriver)
595 {
596         const int li=linesInterlace;
597         const int pi=(progressInterlace?(linesInterlace+1):0);
598         const int pif=(progressInterlace?(progressInterlace_flag?(linesInterlace+1):0):1);
599         s32 temp;
600         s32 xa, xb, xmin, xmax;
601         s32 ya, yb, ymin, ymax;
602         s32 x0, x1, x2, x3, dx3=0, x4, dx4=0, dx;
603         s32 y0, y1, y2;
604         s32 u0, u1, u2, u3, du3=0;
605         s32 v0, v1, v2, v3, dv3=0;
606         s32 r0, r1, r2, r3, dr3=0;
607         s32 g0, g1, g2, g3, dg3=0;
608         s32 b0, b1, b2, b3, db3=0;
609
610         x0 = GPU_EXPANDSIGN(PacketBuffer.S2[2] );
611         y0 = GPU_EXPANDSIGN(PacketBuffer.S2[3] );
612         x1 = GPU_EXPANDSIGN(PacketBuffer.S2[8] );
613         y1 = GPU_EXPANDSIGN(PacketBuffer.S2[9] );
614         x2 = GPU_EXPANDSIGN(PacketBuffer.S2[14]);
615         y2 = GPU_EXPANDSIGN(PacketBuffer.S2[15]);
616
617         GPU_TESTRANGE3();
618         
619         x0 += DrawingOffset[0];   x1 += DrawingOffset[0];   x2 += DrawingOffset[0];
620         y0 += DrawingOffset[1];   y1 += DrawingOffset[1];   y2 += DrawingOffset[1];
621
622         xmin = DrawingArea[0];  xmax = DrawingArea[2];
623         ymin = DrawingArea[1];  ymax = DrawingArea[3];
624
625         {
626                 int rx0 = Max2(xmin,Min3(x0,x1,x2));
627                 int ry0 = Max2(ymin,Min3(y0,y1,y2));
628                 int rx1 = Min2(xmax,Max3(x0,x1,x2));
629                 int ry1 = Min2(ymax,Max3(y0,y1,y2));
630                 if( rx0>=rx1 || ry0>=ry1) return;
631         }
632
633         r0 = PacketBuffer.U1[0];        g0 = PacketBuffer.U1[1];        b0 = PacketBuffer.U1[2];
634         u0 = PacketBuffer.U1[8];        v0 = PacketBuffer.U1[9];
635         r1 = PacketBuffer.U1[12];       g1 = PacketBuffer.U1[13];       b1 = PacketBuffer.U1[14];
636         u1 = PacketBuffer.U1[20];       v1 = PacketBuffer.U1[21];
637         r2 = PacketBuffer.U1[24];       g2 = PacketBuffer.U1[25];       b2 = PacketBuffer.U1[26];
638         u2 = PacketBuffer.U1[32];       v2 = PacketBuffer.U1[33];
639
640         if (y0 >= y1)
641         {
642                 if( y0!=y1 || x0>x1 )
643                 {
644                         GPU_SWAP(x0, x1, temp);         GPU_SWAP(y0, y1, temp);
645                         GPU_SWAP(u0, u1, temp);         GPU_SWAP(v0, v1, temp);
646                         GPU_SWAP(r0, r1, temp);         GPU_SWAP(g0, g1, temp);   GPU_SWAP(b0, b1, temp);
647                 }
648         }
649         if (y1 >= y2)
650         {
651                 if( y1!=y2 || x1>x2 )
652                 {
653                         GPU_SWAP(x1, x2, temp);         GPU_SWAP(y1, y2, temp);
654                         GPU_SWAP(u1, u2, temp);         GPU_SWAP(v1, v2, temp);
655                         GPU_SWAP(r1, r2, temp);   GPU_SWAP(g1, g2, temp);               GPU_SWAP(b1, b2, temp);
656                 }
657         }
658         if (y0 >= y1)
659         {
660                 if( y0!=y1 || x0>x1 )
661                 {
662                         GPU_SWAP(x0, x1, temp);         GPU_SWAP(y0, y1, temp);
663                         GPU_SWAP(u0, u1, temp);         GPU_SWAP(v0, v1, temp);
664                         GPU_SWAP(r0, r1, temp);         GPU_SWAP(g0, g1, temp);         GPU_SWAP(b0, b1, temp);
665                 }
666         }
667
668         ya  = y2 - y0;
669         yb  = y2 - y1;
670         dx  = (x2 - x1) * ya - (x2 - x0) * yb;
671         du4 = (u2 - u1) * ya - (u2 - u0) * yb;
672         dv4 = (v2 - v1) * ya - (v2 - v0) * yb;
673         dr4 = (r2 - r1) * ya - (r2 - r0) * yb;
674         dg4 = (g2 - g1) * ya - (g2 - g0) * yb;
675         db4 = (b2 - b1) * ya - (b2 - b0) * yb;
676
677         s32 iF,iS;
678
679         xInv(            dx, iF, iS);
680         du4 = xInvMulx( du4, iF, iS);
681         dv4 = xInvMulx( dv4, iF, iS);
682         dr4 = xInvMulx( dr4, iF, iS);
683         dg4 = xInvMulx( dg4, iF, iS);
684         db4 = xInvMulx( db4, iF, iS);
685         u32 dr = (u32)(dr4<< 8)&(0xffffffff<<21);   if(dr4<0) dr+= 1<<21;
686         u32 dg = (u32)(dg4>> 3)&(0xffffffff<<10);   if(dg4<0) dg+= 1<<10;
687         u32 db = (u32)(db4>>14)&(0xffffffff    );   if(db4<0) db+= 1<< 0;
688         lInc = db + dg + dr;
689         tInc = ((u32)(du4<<7)&0x7fff0000) | ((u32)(dv4>>9)&0x00007fff);
690         tMsk = (TextureWindow[2]<<23) | (TextureWindow[3]<<7) | 0x00ff00ff;
691
692         for (s32 loop0 = 2; loop0; --loop0)
693         {
694                 if (loop0 == 2)
695                 {
696                         ya = y0;
697                         yb = y1;
698                         u3 = i2x(u0);
699                         v3 = i2x(v0);
700                         r3 = i2x(r0);
701                         g3 = i2x(g0);
702                         b3 = i2x(b0);
703                         x3 = i2x(x0);
704                         x4 = y0!=y1 ? x3 : i2x(x1);
705                         if (dx < 0)
706                         {
707                                 xInv(           (y2 - y0), iF, iS);
708                                 dx3 = xInvMulx( (x2 - x0), iF, iS);
709                                 du3 = xInvMulx( (u2 - u0), iF, iS);
710                                 dv3 = xInvMulx( (v2 - v0), iF, iS);
711                                 dr3 = xInvMulx( (r2 - r0), iF, iS);
712                                 dg3 = xInvMulx( (g2 - g0), iF, iS);
713                                 db3 = xInvMulx( (b2 - b0), iF, iS);
714                                 dx4 = xLoDivx ( (x1 - x0), (y1 - y0));
715                         }
716                         else
717                         {
718                                 xInv(           (y1 - y0), iF, iS);
719                                 dx3 = xInvMulx( (x1 - x0), iF, iS);
720                                 du3 = xInvMulx( (u1 - u0), iF, iS);
721                                 dv3 = xInvMulx( (v1 - v0), iF, iS);
722                                 dr3 = xInvMulx( (r1 - r0), iF, iS);
723                                 dg3 = xInvMulx( (g1 - g0), iF, iS);
724                                 db3 = xInvMulx( (b1 - b0), iF, iS);
725                                 dx4 = xLoDivx ( (x2 - x0), (y2 - y0));
726                         }
727                 }
728                 else
729                 {
730                         ya = y1;
731                         yb = y2;
732                         if (dx < 0)
733                         {
734                                 temp = y1 - y0;
735                                 u3  = i2x(u0) + (du3 * temp);
736                                 v3  = i2x(v0) + (dv3 * temp);
737                                 r3  = i2x(r0) + (dr3 * temp);
738                                 g3  = i2x(g0) + (dg3 * temp);
739                                 b3  = i2x(b0) + (db3 * temp);
740                                 x3  = i2x(x0) + (dx3 * temp);
741                                 x4  = i2x(x1);
742                                 dx4 = xLoDivx((x2 - x1), (y2 - y1));
743                         }
744                         else
745                         {
746                                 u3 = i2x(u1);
747                                 v3 = i2x(v1);
748                                 r3 = i2x(r1);
749                                 g3 = i2x(g1);
750                                 b3 = i2x(b1);
751                                 x3 = i2x(x1);
752                                 x4 = i2x(x0) + (dx4 * (y1 - y0));
753
754                                 xInv(           (y2 - y1), iF, iS);
755                                 dx3 = xInvMulx( (x2 - x1), iF, iS);
756                                 du3 = xInvMulx( (u2 - u1), iF, iS);
757                                 dv3 = xInvMulx( (v2 - v1), iF, iS);
758                                 dr3 = xInvMulx( (r2 - r1), iF, iS);
759                                 dg3 = xInvMulx( (g2 - g1), iF, iS);
760                                 db3 = xInvMulx( (b2 - b1), iF, iS);
761                         }
762                 }
763
764                 temp = ymin - ya;
765                 if (temp > 0)
766                 {
767                         ya  = ymin;
768                         x3 += dx3*temp;   x4 += dx4*temp;
769                         u3 += du3*temp;   v3 += dv3*temp;
770                         r3 += dr3*temp;   g3 += dg3*temp;   b3 += db3*temp;
771                 }
772                 if (yb > ymax) yb = ymax;
773                 if (ya>=yb) continue;
774
775                 x3+= fixed_HALF;  x4+= fixed_HALF;
776                 u3+= fixed_HALF;  v4+= fixed_HALF;
777                 r3+= fixed_HALF;  g3+= fixed_HALF;  b3+= fixed_HALF;
778                 u16* PixelBase  = &((u16*)GPU_FrameBuffer)[FRAME_OFFSET(0, ya)];
779                 
780                 for(;ya<yb;++ya, PixelBase += FRAME_WIDTH, x3+=dx3, x4+=dx4, u3+=du3, v3+=dv3, r3+=dr3, g3+=dg3,        b3+=db3)
781                 {
782                         if (ya&li) continue;
783                         if ((ya&pi)==pif) continue;
784                         xa = x2i(x3);
785                         xb = x2i(x4);
786                         if( (xa>xmax) || (xb<xmin))     continue;
787
788                         temp = xmin - xa;
789                         if(temp > 0)
790                         {
791                                 xa  = xmin;
792                                 u4 = u3 + du4*temp;   v4 = v3 + dv4*temp;
793                                 r4 = r3 + dr4*temp;   g4 = g3 + dg4*temp;   b4 = b3 + db4*temp;
794                         }
795                         else
796                         {
797                                 u4 = u3;  v4 = v3;
798                                 r4 = r3;  g4 = g3;  b4 = b3;
799                         }
800                         if(xb > xmax) xb = xmax;
801                         xb-=xa;
802                         if(xb>0) gpuPolySpanDriver(PixelBase + xa,xb);
803                 }
804         }
805 }
806
807
808 //////////////////////////////////////////////////////////////////////////
809 //senquack - Original Unai poly routines left here for reference:
810 // ( from gpu_inner.h ) NOTE: this uses 16.16, not 22.10 fixed point
811 //////////////////////////////////////////////////////////////////////////
812 template<const int CF>
813 INLINE void  gpuPolySpanFn(u16 *pDst, u32 count)
814 {
815         if (!TM)
816         {       
817                 // NO TEXTURE
818                 if (!G)
819                 {
820                         // NO GOURAUD
821                         u16 data;
822                         if (L) { u32 lCol=((u32)(b4<< 2)&(0x03ff)) | ((u32)(g4<<13)&(0x07ff<<10)) | ((u32)(r4<<24)&(0x07ff<<21)); gpuLightingRGB(data,lCol); }
823                         else data=PixelData;
824                         if ((!M)&&(!B))
825                         {
826                                 if (MB) { data = data | 0x8000; }
827                                 do { *pDst++ = data; } while (--count);
828                         }
829                         else if ((M)&&(!B))
830                         {
831                                 if (MB) { data = data | 0x8000; }
832                                 do { if (!(*pDst&0x8000)) { *pDst = data; } pDst++; } while (--count);
833                         }
834                         else
835                         {
836                                 u16 uSrc;
837                                 u16 uDst;
838                                 u32 uMsk; if (BM==0) uMsk=0x7BDE;
839                                 u32 bMsk; if (BI) bMsk=blit_mask;
840                                 do
841                                 {
842                                         // blit-mask
843                                         if (BI) { if((bMsk>>((((u32)pDst)>>1)&7))&1) goto endtile; }
844                                         //  masking
845                                         uDst = *pDst;
846                                         if(M) { if (uDst&0x8000) goto endtile;  }
847                                         uSrc = data;
848                                         //  blend
849                                         if (BM==0) gpuBlending00(uSrc, uDst);
850                                         if (BM==1) gpuBlending01(uSrc, uDst);
851                                         if (BM==2) gpuBlending02(uSrc, uDst);
852                                         if (BM==3) gpuBlending03(uSrc, uDst);
853                                         if (MB) { *pDst = uSrc | 0x8000; }
854                                         else    { *pDst = uSrc; }
855                                         endtile: pDst++;
856                                 }
857                                 while (--count);
858                         }
859                 }
860                 else
861                 {
862                         // GOURAUD
863                         u16 uDst;
864                         u16 uSrc;
865                         u32 linc=lInc;
866                         u32 lCol=((u32)(b4>>14)&(0x03ff)) | ((u32)(g4>>3)&(0x07ff<<10)) | ((u32)(r4<<8)&(0x07ff<<21));
867                         u32 uMsk; if ((B)&&(BM==0)) uMsk=0x7BDE;
868                         u32 bMsk; if (BI) bMsk=blit_mask;
869                         do
870                         {
871                                 // blit-mask
872                                 if (BI) { if((bMsk>>((((u32)pDst)>>1)&7))&1) goto endgou; }
873                                 //  masking
874                                 if(M) { uDst = *pDst;  if (uDst&0x8000) goto endgou;  }
875                                 //  blend
876                                 if(B)
877                                 {
878                                         //  light
879                                         gpuLightingRGB(uSrc,lCol);
880                                         if(!M)    { uDst = *pDst; }
881                                         if (BM==0) gpuBlending00(uSrc, uDst);
882                                         if (BM==1) gpuBlending01(uSrc, uDst);
883                                         if (BM==2) gpuBlending02(uSrc, uDst);
884                                         if (BM==3) gpuBlending03(uSrc, uDst);
885                                 }
886                                 else
887                                 {
888                                         //  light
889                                         gpuLightingRGB(uSrc,lCol);
890                                 }
891                                 if (MB) { *pDst = uSrc | 0x8000; }
892                                 else    { *pDst = uSrc; }
893                                 endgou: pDst++; lCol=(lCol+linc);
894                         }
895                         while (--count);
896                 }
897         }
898         else
899         {
900                 // TEXTURE
901                 u16 uDst;
902                 u16 uSrc;
903                 u32 linc; if (L&&G) linc=lInc;
904                 u32 tinc=tInc;
905                 u32 tmsk=tMsk;
906                 u32 tCor = ((u32)( u4<<7)&0x7fff0000) | ((u32)( v4>>9)&0x00007fff); tCor&= tmsk;
907                 const u16* _TBA=TBA;
908                 const u16* _CBA; if (TM!=3) _CBA=CBA;
909                 u32 lCol;
910                 if(L && !G) { lCol = ((u32)(b4<< 2)&(0x03ff)) | ((u32)(g4<<13)&(0x07ff<<10)) | ((u32)(r4<<24)&(0x07ff<<21)); }
911                 else if(L && G) { lCol = ((u32)(b4>>14)&(0x03ff)) | ((u32)(g4>>3)&(0x07ff<<10)) | ((u32)(r4<<8)&(0x07ff<<21));  }
912                 u32 uMsk; if ((B)&&(BM==0)) uMsk=0x7BDE;
913                 u32 bMsk; if (BI) bMsk=blit_mask;
914                 do
915                 {
916                         // blit-mask
917                         if (BI) { if((bMsk>>((((u32)pDst)>>1)&7))&1) goto endpoly; }
918                         //  masking
919                         if(M) { uDst = *pDst;  if (uDst&0x8000) goto endpoly;  }
920                         //  texture
921                         if (TM==1) { u32 tu=(tCor>>23); u32 tv=(tCor<<4)&(0xff<<11); u8 rgb=((u8*)_TBA)[tv+(tu>>1)]; uSrc=_CBA[(rgb>>((tu&1)<<2))&0xf]; if(!uSrc) goto endpoly; }
922                         if (TM==2) { uSrc = _CBA[(((u8*)_TBA)[(tCor>>23)+((tCor<<4)&(0xff<<11))])]; if(!uSrc)  goto endpoly; }
923                         if (TM==3) { uSrc = _TBA[(tCor>>23)+((tCor<<3)&(0xff<<10))]; if(!uSrc)  goto endpoly; }
924                         //  blend
925                         if(B)
926                         {
927                                 if (uSrc&0x8000)
928                                 {
929                                         //  light
930                                         if(L) gpuLightingTXT(uSrc, lCol);
931                                         if(!M)    { uDst = *pDst; }
932                                         if (BM==0) gpuBlending00(uSrc, uDst);
933                                         if (BM==1) gpuBlending01(uSrc, uDst);
934                                         if (BM==2) gpuBlending02(uSrc, uDst);
935                                         if (BM==3) gpuBlending03(uSrc, uDst);
936                                 }
937                                 else
938                                 {
939                                         // light
940                                         if(L) gpuLightingTXT(uSrc, lCol);
941                                 }
942                         }
943                         else
944                         {
945                                 //  light
946                                 if(L)  { gpuLightingTXT(uSrc, lCol); } else if(!MB) { uSrc&= 0x7fff; }
947                         }
948                         if (MB) { *pDst = uSrc | 0x8000; }
949                         else    { *pDst = uSrc; }
950                         endpoly: pDst++;
951                         tCor=(tCor+tinc)&tmsk;
952                         if (L&&G) lCol=(lCol+linc);
953                 }
954                 while (--count);
955         }
956 }