fix x86 build
[pcsx_rearmed.git] / plugins / gpu_senquack / README_senquack.txt
CommitLineData
0bfe8d59 1//NOTE: You can find the set of original Unai poly routines (disabled now)
2// at the bottom end of this file.
3
4//senquack - Original Unai GPU poly routines have been replaced with new
5// ones based on DrHell routines. The original routines suffered from
6// shifted rows, causing many quads to have their first triangle drawn
7// correctly, but the second triangle would randomly have pixels shifted
8// either left or right or entire rows not drawn at all. Furthermore,
9// some times entire triangles seemed to be either missing or only
10// partially drawn (most clearly seen in sky/road textures in NFS3,
11// clock tower in beginning of Castlevania SOTN). Pixel gaps were
12// prevalent.
13//
14// Since DrHell GPU didn't seem to exhibit these artifacts at all, I adapted
15// its routines to GPU Unai (Unai was probably already originally based on it).
16// DrHell uses 22.10 fixed point instead of Unai's 16.16, so gpu_fixedpoint.h
17// required modification as well as gpu_inner.h (where gpuPolySpanFn driver
18// functions are).
19//
20// Originally, I tried to patch up original Unai routines and got as far
21// as fixing the shifted rows, but still had other problem of triangles rendered
22// wrong (black triangular gaps in NFS3 sky, clock tower in Castlevania SOTN).
23// I eventually gave up. Even after rewriting/adapting the routines,
24// however, I still had some random pixel droupouts, specifically in
25// NFS3 sky texture. I discovered that gpu_inner.h gpuPolySpanFn function
26// was taking optimizations to an extreme and packing u/v texture coords
27// into one 32-bit word, reducing their accuracy. Only once they were
28// handled in full-accuracy individual words was that problem fixed.
29//
30// NOTE: I also added support for doing divisions using the FPU, either
31// with normal division or multiplication-by-reciprocal.
32// To use float division, GPU_UNAI_USE_FLOATMATH should be defined.
33// To use float mult-by-reciprocal, GPU_UNAI_USE_FLOAT_DIV_MULTINV
34// can be specified (GPU_UNAI_USE_FLOATMATH must also be specified)
35// To use inaccurate fixed-point mult-by-reciprocal, define
36// GPU_UNAI_USE_INT_DIV_MULTINV. This is the default on older
37// ARM devices like Wiz/Caanoo that have neither integer division
38// in hardware or an FPU. It results in some pixel dropouts,
39// texture glitches, but less than the original GPU UNAI code.
40//
41// If nothing is specified, integer division will be used.
42//
43// NOTE 2: Even with MIPS32R2 having FPU recip.s instruction, and it is
44// used when this platform is detected, I found it not to give any
45// noticeable speedup over normal float division (in fact seemed a tiny
46// tiny bit slower). I also found float division to not provide any
47// noticeable speedups versus integer division on MISP32R2 platform.
48// Granted, the differences were all around .5 FPS or less.
49//
50// TODO:
51// * See if anything can be done about remaining pixel gaps in Gran
52// Turismo car models, track.
53// * Find better way of passing parameters to gpuPolySpanFn functions than
54// through original Unai method of using global variables u4,v4,du4 etc.
55// * Come up with some newer way of drawing rows of pixels than by calling
56// gpuPolySpanFn through function pointer. For every row, at least on
57// MIPS platforms, many registers are having to be pushed/popped from stack
58// on each call, which is strange since MIPS has so many registers.
59// * MIPS MXU/ASM optimized gpuPolySpanFn ?
60
61//////////////////////////////////////////////////////////////////////////
62//senquack - Disabled original Unai poly routines left here for reference:
63// ( from gpu_raster_polygon.h )
64//////////////////////////////////////////////////////////////////////////
65#define GPU_TESTRANGE3() \
66{ \
67 if(x0<0) { if((x1-x0)>CHKMAX_X) return; if((x2-x0)>CHKMAX_X) return; } \
68 if(x1<0) { if((x0-x1)>CHKMAX_X) return; if((x2-x1)>CHKMAX_X) return; } \
69 if(x2<0) { if((x0-x2)>CHKMAX_X) return; if((x1-x2)>CHKMAX_X) return; } \
70 if(y0<0) { if((y1-y0)>CHKMAX_Y) return; if((y2-y0)>CHKMAX_Y) return; } \
71 if(y1<0) { if((y0-y1)>CHKMAX_Y) return; if((y2-y1)>CHKMAX_Y) return; } \
72 if(y2<0) { if((y0-y2)>CHKMAX_Y) return; if((y1-y2)>CHKMAX_Y) return; } \
73}
74
75/*----------------------------------------------------------------------
76F3
77----------------------------------------------------------------------*/
78
79void gpuDrawF3(const PP gpuPolySpanDriver)
80{
81 const int li=linesInterlace;
82 const int pi=(progressInterlace?(linesInterlace+1):0);
83 const int pif=(progressInterlace?(progressInterlace_flag?(linesInterlace+1):0):1);
84 s32 temp;
85 s32 xa, xb, xmin, xmax;
86 s32 ya, yb, ymin, ymax;
87 s32 x0, x1, x2, x3, dx3=0, x4, dx4=0, dx;
88 s32 y0, y1, y2;
89
90 x0 = GPU_EXPANDSIGN(PacketBuffer.S2[2]);
91 y0 = GPU_EXPANDSIGN(PacketBuffer.S2[3]);
92 x1 = GPU_EXPANDSIGN(PacketBuffer.S2[4]);
93 y1 = GPU_EXPANDSIGN(PacketBuffer.S2[5]);
94 x2 = GPU_EXPANDSIGN(PacketBuffer.S2[6]);
95 y2 = GPU_EXPANDSIGN(PacketBuffer.S2[7]);
96
97 GPU_TESTRANGE3();
98
99 x0 += DrawingOffset[0]; x1 += DrawingOffset[0]; x2 += DrawingOffset[0];
100 y0 += DrawingOffset[1]; y1 += DrawingOffset[1]; y2 += DrawingOffset[1];
101
102 xmin = DrawingArea[0]; xmax = DrawingArea[2];
103 ymin = DrawingArea[1]; ymax = DrawingArea[3];
104
105 {
106 int rx0 = Max2(xmin,Min3(x0,x1,x2));
107 int ry0 = Max2(ymin,Min3(y0,y1,y2));
108 int rx1 = Min2(xmax,Max3(x0,x1,x2));
109 int ry1 = Min2(ymax,Max3(y0,y1,y2));
110 if( rx0>=rx1 || ry0>=ry1) return;
111 }
112
113 PixelData = GPU_RGB16(PacketBuffer.U4[0]);
114
115 if (y0 >= y1)
116 {
117 if( y0!=y1 || x0>x1 )
118 {
119 GPU_SWAP(x0, x1, temp);
120 GPU_SWAP(y0, y1, temp);
121 }
122 }
123 if (y1 >= y2)
124 {
125 if( y1!=y2 || x1>x2 )
126 {
127 GPU_SWAP(x1, x2, temp);
128 GPU_SWAP(y1, y2, temp);
129 }
130 }
131 if (y0 >= y1)
132 {
133 if( y0!=y1 || x0>x1 )
134 {
135 GPU_SWAP(x0, x1, temp);
136 GPU_SWAP(y0, y1, temp);
137 }
138 }
139
140 ya = y2 - y0;
141 yb = y2 - y1;
142 dx =(x2 - x1) * ya - (x2 - x0) * yb;
143
144 for (s32 loop0 = 2; loop0; --loop0)
145 {
146 if (loop0 == 2)
147 {
148 ya = y0;
149 yb = y1;
150 x3 = i2x(x0);
151 x4 = y0!=y1 ? x3 : i2x(x1);
152 if (dx < 0)
153 {
154 dx3 = xLoDivx((x2 - x0), (y2 - y0));
155 dx4 = xLoDivx((x1 - x0), (y1 - y0));
156 }
157 else
158 {
159 dx3 = xLoDivx((x1 - x0), (y1 - y0));
160 dx4 = xLoDivx((x2 - x0), (y2 - y0));
161 }
162 }
163 else
164 {
165 ya = y1;
166 yb = y2;
167 if (dx < 0)
168 {
169 x4 = i2x(x1);
170 x3 = i2x(x0) + (dx3 * (y1 - y0));
171 dx4 = xLoDivx((x2 - x1), (y2 - y1));
172 }
173 else
174 {
175 x3 = i2x(x1);
176 x4 = i2x(x0) + (dx4 * (y1 - y0));
177 dx3 = xLoDivx((x2 - x1), (y2 - y1));
178 }
179 }
180
181 temp = ymin - ya;
182 if (temp > 0)
183 {
184 ya = ymin;
185 x3 += dx3*temp;
186 x4 += dx4*temp;
187 }
188 if (yb > ymax) yb = ymax;
189 if (ya>=yb) continue;
190
191 x3+= fixed_HALF;
192 x4+= fixed_HALF;
193
194 u16* PixelBase = &((u16*)GPU_FrameBuffer)[FRAME_OFFSET(0, ya)];
195
196 for(;ya<yb;++ya, PixelBase += FRAME_WIDTH, x3+=dx3, x4+=dx4)
197 {
198 if (ya&li) continue;
199 if ((ya&pi)==pif) continue;
200 xa = x2i(x3);
201 xb = x2i(x4);
202 if( (xa>xmax) || (xb<xmin) ) continue;
203 if(xa < xmin) xa = xmin;
204 if(xb > xmax) xb = xmax;
205 xb-=xa;
206 if(xb>0) gpuPolySpanDriver(PixelBase + xa,xb);
207 }
208 }
209}
210
211/*----------------------------------------------------------------------
212FT3
213----------------------------------------------------------------------*/
214
215void gpuDrawFT3(const PP gpuPolySpanDriver)
216{
217 const int li=linesInterlace;
218 const int pi=(progressInterlace?(linesInterlace+1):0);
219 const int pif=(progressInterlace?(progressInterlace_flag?(linesInterlace+1):0):1);
220 s32 temp;
221 s32 xa, xb, xmin, xmax;
222 s32 ya, yb, ymin, ymax;
223 s32 x0, x1, x2, x3, dx3=0, x4, dx4=0, dx;
224 s32 y0, y1, y2;
225 s32 u0, u1, u2, u3, du3=0;
226 s32 v0, v1, v2, v3, dv3=0;
227
228 x0 = GPU_EXPANDSIGN(PacketBuffer.S2[2] );
229 y0 = GPU_EXPANDSIGN(PacketBuffer.S2[3] );
230 x1 = GPU_EXPANDSIGN(PacketBuffer.S2[6] );
231 y1 = GPU_EXPANDSIGN(PacketBuffer.S2[7] );
232 x2 = GPU_EXPANDSIGN(PacketBuffer.S2[10]);
233 y2 = GPU_EXPANDSIGN(PacketBuffer.S2[11]);
234
235 GPU_TESTRANGE3();
236
237 x0 += DrawingOffset[0]; x1 += DrawingOffset[0]; x2 += DrawingOffset[0];
238 y0 += DrawingOffset[1]; y1 += DrawingOffset[1]; y2 += DrawingOffset[1];
239
240 xmin = DrawingArea[0]; xmax = DrawingArea[2];
241 ymin = DrawingArea[1]; ymax = DrawingArea[3];
242
243 {
244 int rx0 = Max2(xmin,Min3(x0,x1,x2));
245 int ry0 = Max2(ymin,Min3(y0,y1,y2));
246 int rx1 = Min2(xmax,Max3(x0,x1,x2));
247 int ry1 = Min2(ymax,Max3(y0,y1,y2));
248 if( rx0>=rx1 || ry0>=ry1) return;
249 }
250
251 u0 = PacketBuffer.U1[8]; v0 = PacketBuffer.U1[9];
252 u1 = PacketBuffer.U1[16]; v1 = PacketBuffer.U1[17];
253 u2 = PacketBuffer.U1[24]; v2 = PacketBuffer.U1[25];
254
255 r4 = s32(PacketBuffer.U1[0]);
256 g4 = s32(PacketBuffer.U1[1]);
257 b4 = s32(PacketBuffer.U1[2]);
258 dr4 = dg4 = db4 = 0;
259
260 if (y0 >= y1)
261 {
262 if( y0!=y1 || x0>x1 )
263 {
264 GPU_SWAP(x0, x1, temp);
265 GPU_SWAP(y0, y1, temp);
266 GPU_SWAP(u0, u1, temp);
267 GPU_SWAP(v0, v1, temp);
268 }
269 }
270 if (y1 >= y2)
271 {
272 if( y1!=y2 || x1>x2 )
273 {
274 GPU_SWAP(x1, x2, temp);
275 GPU_SWAP(y1, y2, temp);
276 GPU_SWAP(u1, u2, temp);
277 GPU_SWAP(v1, v2, temp);
278 }
279 }
280 if (y0 >= y1)
281 {
282 if( y0!=y1 || x0>x1 )
283 {
284 GPU_SWAP(x0, x1, temp);
285 GPU_SWAP(y0, y1, temp);
286 GPU_SWAP(u0, u1, temp);
287 GPU_SWAP(v0, v1, temp);
288 }
289 }
290
291 ya = y2 - y0;
292 yb = y2 - y1;
293 dx = (x2 - x1) * ya - (x2 - x0) * yb;
294 du4 = (u2 - u1) * ya - (u2 - u0) * yb;
295 dv4 = (v2 - v1) * ya - (v2 - v0) * yb;
296
297 s32 iF,iS;
298 xInv( dx, iF, iS);
299 du4 = xInvMulx( du4, iF, iS);
300 dv4 = xInvMulx( dv4, iF, iS);
301 tInc = ((u32)(du4<<7)&0x7fff0000) | ((u32)(dv4>>9)&0x00007fff);
302 tMsk = (TextureWindow[2]<<23) | (TextureWindow[3]<<7) | 0x00ff00ff;
303
304 for (s32 loop0 = 2; loop0; --loop0)
305 {
306 if (loop0 == 2)
307 {
308 ya = y0;
309 yb = y1;
310 u3 = i2x(u0);
311 v3 = i2x(v0);
312 x3 = i2x(x0);
313 x4 = y0!=y1 ? x3 : i2x(x1);
314 if (dx < 0)
315 {
316 xInv( (y2 - y0), iF, iS);
317 dx3 = xInvMulx( (x2 - x0), iF, iS);
318 du3 = xInvMulx( (u2 - u0), iF, iS);
319 dv3 = xInvMulx( (v2 - v0), iF, iS);
320 dx4 = xLoDivx ( (x1 - x0), (y1 - y0));
321 }
322 else
323 {
324 xInv( (y1 - y0), iF, iS);
325 dx3 = xInvMulx( (x1 - x0), iF, iS);
326 du3 = xInvMulx( (u1 - u0), iF, iS);
327 dv3 = xInvMulx( (v1 - v0), iF, iS);
328 dx4 = xLoDivx ( (x2 - x0), (y2 - y0));
329 }
330 }
331 else
332 {
333 ya = y1;
334 yb = y2;
335 if (dx < 0)
336 {
337 temp = y1 - y0;
338 u3 = i2x(u0) + (du3 * temp);
339 v3 = i2x(v0) + (dv3 * temp);
340 x3 = i2x(x0) + (dx3 * temp);
341 x4 = i2x(x1);
342 dx4 = xLoDivx((x2 - x1), (y2 - y1));
343 }
344 else
345 {
346 u3 = i2x(u1);
347 v3 = i2x(v1);
348 x3 = i2x(x1);
349 x4 = i2x(x0) + (dx4 * (y1 - y0));
350 xInv( (y2 - y1), iF, iS);
351 dx3 = xInvMulx( (x2 - x1), iF, iS);
352 du3 = xInvMulx( (u2 - u1), iF, iS);
353 dv3 = xInvMulx( (v2 - v1), iF, iS);
354 }
355 }
356
357 temp = ymin - ya;
358 if (temp > 0)
359 {
360 ya = ymin;
361 x3 += dx3*temp;
362 x4 += dx4*temp;
363 u3 += du3*temp;
364 v3 += dv3*temp;
365 }
366 if (yb > ymax) yb = ymax;
367 if (ya>=yb) continue;
368
369 x3+= fixed_HALF;
370 x4+= fixed_HALF;
371 u3+= fixed_HALF;
372 v4+= fixed_HALF;
373
374 u16* PixelBase = &((u16*)GPU_FrameBuffer)[FRAME_OFFSET(0, ya)];
375
376 for(;ya<yb;++ya, PixelBase += FRAME_WIDTH, x3+=dx3, x4+=dx4, u3+=du3, v3+=dv3)
377 {
378 if (ya&li) continue;
379 if ((ya&pi)==pif) continue;
380 xa = x2i(x3);
381 xb = x2i(x4);
382 if( (xa>xmax) || (xb<xmin) ) continue;
383
384 temp = xmin - xa;
385 if(temp > 0)
386 {
387 xa = xmin;
388 u4 = u3 + du4*temp;
389 v4 = v3 + dv4*temp;
390 }
391 else
392 {
393 u4 = u3;
394 v4 = v3;
395 }
396 if(xb > xmax) xb = xmax;
397 xb-=xa;
398 if(xb>0) gpuPolySpanDriver(PixelBase + xa,xb);
399 }
400 }
401}
402
403/*----------------------------------------------------------------------
404G3
405----------------------------------------------------------------------*/
406
407void gpuDrawG3(const PP gpuPolySpanDriver)
408{
409 const int li=linesInterlace;
410 const int pi=(progressInterlace?(linesInterlace+1):0);
411 const int pif=(progressInterlace?(progressInterlace_flag?(linesInterlace+1):0):1);
412 s32 temp;
413 s32 xa, xb, xmin, xmax;
414 s32 ya, yb, ymin, ymax;
415 s32 x0, x1, x2, x3, dx3=0, x4, dx4=0, dx;
416 s32 y0, y1, y2;
417 s32 r0, r1, r2, r3, dr3=0;
418 s32 g0, g1, g2, g3, dg3=0;
419 s32 b0, b1, b2, b3, db3=0;
420
421 x0 = GPU_EXPANDSIGN(PacketBuffer.S2[2] );
422 y0 = GPU_EXPANDSIGN(PacketBuffer.S2[3] );
423 x1 = GPU_EXPANDSIGN(PacketBuffer.S2[6] );
424 y1 = GPU_EXPANDSIGN(PacketBuffer.S2[7] );
425 x2 = GPU_EXPANDSIGN(PacketBuffer.S2[10]);
426 y2 = GPU_EXPANDSIGN(PacketBuffer.S2[11]);
427
428 GPU_TESTRANGE3();
429
430 x0 += DrawingOffset[0]; x1 += DrawingOffset[0]; x2 += DrawingOffset[0];
431 y0 += DrawingOffset[1]; y1 += DrawingOffset[1]; y2 += DrawingOffset[1];
432
433 xmin = DrawingArea[0]; xmax = DrawingArea[2];
434 ymin = DrawingArea[1]; ymax = DrawingArea[3];
435
436 {
437 int rx0 = Max2(xmin,Min3(x0,x1,x2));
438 int ry0 = Max2(ymin,Min3(y0,y1,y2));
439 int rx1 = Min2(xmax,Max3(x0,x1,x2));
440 int ry1 = Min2(ymax,Max3(y0,y1,y2));
441 if( rx0>=rx1 || ry0>=ry1) return;
442 }
443
444 r0 = PacketBuffer.U1[0]; g0 = PacketBuffer.U1[1]; b0 = PacketBuffer.U1[2];
445 r1 = PacketBuffer.U1[8]; g1 = PacketBuffer.U1[9]; b1 = PacketBuffer.U1[10];
446 r2 = PacketBuffer.U1[16]; g2 = PacketBuffer.U1[17]; b2 = PacketBuffer.U1[18];
447
448 if (y0 >= y1)
449 {
450 if( y0!=y1 || x0>x1 )
451 {
452 GPU_SWAP(x0, x1, temp); GPU_SWAP(y0, y1, temp);
453 GPU_SWAP(r0, r1, temp); GPU_SWAP(g0, g1, temp); GPU_SWAP(b0, b1, temp);
454 }
455 }
456 if (y1 >= y2)
457 {
458 if( y1!=y2 || x1>x2 )
459 {
460 GPU_SWAP(x1, x2, temp); GPU_SWAP(y1, y2, temp);
461 GPU_SWAP(r1, r2, temp); GPU_SWAP(g1, g2, temp); GPU_SWAP(b1, b2, temp);
462 }
463 }
464 if (y0 >= y1)
465 {
466 if( y0!=y1 || x0>x1 )
467 {
468 GPU_SWAP(x0, x1, temp); GPU_SWAP(y0, y1, temp);
469 GPU_SWAP(r0, r1, temp); GPU_SWAP(g0, g1, temp); GPU_SWAP(b0, b1, temp);
470 }
471 }
472
473 ya = y2 - y0;
474 yb = y2 - y1;
475 dx = (x2 - x1) * ya - (x2 - x0) * yb;
476 dr4 = (r2 - r1) * ya - (r2 - r0) * yb;
477 dg4 = (g2 - g1) * ya - (g2 - g0) * yb;
478 db4 = (b2 - b1) * ya - (b2 - b0) * yb;
479
480 s32 iF,iS;
481 xInv( dx, iF, iS);
482 dr4 = xInvMulx( dr4, iF, iS);
483 dg4 = xInvMulx( dg4, iF, iS);
484 db4 = xInvMulx( db4, iF, iS);
485 u32 dr = (u32)(dr4<< 8)&(0xffffffff<<21); if(dr4<0) dr+= 1<<21;
486 u32 dg = (u32)(dg4>> 3)&(0xffffffff<<10); if(dg4<0) dg+= 1<<10;
487 u32 db = (u32)(db4>>14)&(0xffffffff ); if(db4<0) db+= 1<< 0;
488 lInc = db + dg + dr;
489
490 for (s32 loop0 = 2; loop0; --loop0)
491 {
492 if (loop0 == 2)
493 {
494 ya = y0;
495 yb = y1;
496 r3 = i2x(r0);
497 g3 = i2x(g0);
498 b3 = i2x(b0);
499 x3 = i2x(x0);
500 x4 = y0!=y1 ? x3 : i2x(x1);
501 if (dx < 0)
502 {
503 xInv( (y2 - y0), iF, iS);
504 dx3 = xInvMulx( (x2 - x0), iF, iS);
505 dr3 = xInvMulx( (r2 - r0), iF, iS);
506 dg3 = xInvMulx( (g2 - g0), iF, iS);
507 db3 = xInvMulx( (b2 - b0), iF, iS);
508 dx4 = xLoDivx ( (x1 - x0), (y1 - y0));
509 }
510 else
511 {
512 xInv( (y1 - y0), iF, iS);
513 dx3 = xInvMulx( (x1 - x0), iF, iS);
514 dr3 = xInvMulx( (r1 - r0), iF, iS);
515 dg3 = xInvMulx( (g1 - g0), iF, iS);
516 db3 = xInvMulx( (b1 - b0), iF, iS);
517 dx4 = xLoDivx ( (x2 - x0), (y2 - y0));
518 }
519 }
520 else
521 {
522 ya = y1;
523 yb = y2;
524 if (dx < 0)
525 {
526 temp = y1 - y0;
527 r3 = i2x(r0) + (dr3 * temp);
528 g3 = i2x(g0) + (dg3 * temp);
529 b3 = i2x(b0) + (db3 * temp);
530 x3 = i2x(x0) + (dx3 * temp);
531 x4 = i2x(x1);
532 dx4 = xLoDivx((x2 - x1), (y2 - y1));
533 }
534 else
535 {
536 r3 = i2x(r1);
537 g3 = i2x(g1);
538 b3 = i2x(b1);
539 x3 = i2x(x1);
540 x4 = i2x(x0) + (dx4 * (y1 - y0));
541
542 xInv( (y2 - y1), iF, iS);
543 dx3 = xInvMulx( (x2 - x1), iF, iS);
544 dr3 = xInvMulx( (r2 - r1), iF, iS);
545 dg3 = xInvMulx( (g2 - g1), iF, iS);
546 db3 = xInvMulx( (b2 - b1), iF, iS);
547 }
548 }
549
550 temp = ymin - ya;
551 if (temp > 0)
552 {
553 ya = ymin;
554 x3 += dx3*temp; x4 += dx4*temp;
555 r3 += dr3*temp; g3 += dg3*temp; b3 += db3*temp;
556 }
557 if (yb > ymax) yb = ymax;
558 if (ya>=yb) continue;
559
560 x3+= fixed_HALF; x4+= fixed_HALF;
561 r3+= fixed_HALF; g3+= fixed_HALF; b3+= fixed_HALF;
562
563 u16* PixelBase = &((u16*)GPU_FrameBuffer)[FRAME_OFFSET(0, ya)];
564
565 for(;ya<yb;++ya, PixelBase += FRAME_WIDTH, x3+=dx3, x4+=dx4, r3+=dr3, g3+=dg3, b3+=db3)
566 {
567 if (ya&li) continue;
568 if ((ya&pi)==pif) continue;
569 xa = x2i(x3);
570 xb = x2i(x4);
571 if( (xa>xmax) || (xb<xmin) ) continue;
572
573 temp = xmin - xa;
574 if(temp > 0)
575 {
576 xa = xmin;
577 r4 = r3 + dr4*temp; g4 = g3 + dg4*temp; b4 = b3 + db4*temp;
578 }
579 else
580 {
581 r4 = r3; g4 = g3; b4 = b3;
582 }
583 if(xb > xmax) xb = xmax;
584 xb-=xa;
585 if(xb>0) gpuPolySpanDriver(PixelBase + xa,xb);
586 }
587 }
588}
589
590/*----------------------------------------------------------------------
591GT3
592----------------------------------------------------------------------*/
593
594void gpuDrawGT3(const PP gpuPolySpanDriver)
595{
596 const int li=linesInterlace;
597 const int pi=(progressInterlace?(linesInterlace+1):0);
598 const int pif=(progressInterlace?(progressInterlace_flag?(linesInterlace+1):0):1);
599 s32 temp;
600 s32 xa, xb, xmin, xmax;
601 s32 ya, yb, ymin, ymax;
602 s32 x0, x1, x2, x3, dx3=0, x4, dx4=0, dx;
603 s32 y0, y1, y2;
604 s32 u0, u1, u2, u3, du3=0;
605 s32 v0, v1, v2, v3, dv3=0;
606 s32 r0, r1, r2, r3, dr3=0;
607 s32 g0, g1, g2, g3, dg3=0;
608 s32 b0, b1, b2, b3, db3=0;
609
610 x0 = GPU_EXPANDSIGN(PacketBuffer.S2[2] );
611 y0 = GPU_EXPANDSIGN(PacketBuffer.S2[3] );
612 x1 = GPU_EXPANDSIGN(PacketBuffer.S2[8] );
613 y1 = GPU_EXPANDSIGN(PacketBuffer.S2[9] );
614 x2 = GPU_EXPANDSIGN(PacketBuffer.S2[14]);
615 y2 = GPU_EXPANDSIGN(PacketBuffer.S2[15]);
616
617 GPU_TESTRANGE3();
618
619 x0 += DrawingOffset[0]; x1 += DrawingOffset[0]; x2 += DrawingOffset[0];
620 y0 += DrawingOffset[1]; y1 += DrawingOffset[1]; y2 += DrawingOffset[1];
621
622 xmin = DrawingArea[0]; xmax = DrawingArea[2];
623 ymin = DrawingArea[1]; ymax = DrawingArea[3];
624
625 {
626 int rx0 = Max2(xmin,Min3(x0,x1,x2));
627 int ry0 = Max2(ymin,Min3(y0,y1,y2));
628 int rx1 = Min2(xmax,Max3(x0,x1,x2));
629 int ry1 = Min2(ymax,Max3(y0,y1,y2));
630 if( rx0>=rx1 || ry0>=ry1) return;
631 }
632
633 r0 = PacketBuffer.U1[0]; g0 = PacketBuffer.U1[1]; b0 = PacketBuffer.U1[2];
634 u0 = PacketBuffer.U1[8]; v0 = PacketBuffer.U1[9];
635 r1 = PacketBuffer.U1[12]; g1 = PacketBuffer.U1[13]; b1 = PacketBuffer.U1[14];
636 u1 = PacketBuffer.U1[20]; v1 = PacketBuffer.U1[21];
637 r2 = PacketBuffer.U1[24]; g2 = PacketBuffer.U1[25]; b2 = PacketBuffer.U1[26];
638 u2 = PacketBuffer.U1[32]; v2 = PacketBuffer.U1[33];
639
640 if (y0 >= y1)
641 {
642 if( y0!=y1 || x0>x1 )
643 {
644 GPU_SWAP(x0, x1, temp); GPU_SWAP(y0, y1, temp);
645 GPU_SWAP(u0, u1, temp); GPU_SWAP(v0, v1, temp);
646 GPU_SWAP(r0, r1, temp); GPU_SWAP(g0, g1, temp); GPU_SWAP(b0, b1, temp);
647 }
648 }
649 if (y1 >= y2)
650 {
651 if( y1!=y2 || x1>x2 )
652 {
653 GPU_SWAP(x1, x2, temp); GPU_SWAP(y1, y2, temp);
654 GPU_SWAP(u1, u2, temp); GPU_SWAP(v1, v2, temp);
655 GPU_SWAP(r1, r2, temp); GPU_SWAP(g1, g2, temp); GPU_SWAP(b1, b2, temp);
656 }
657 }
658 if (y0 >= y1)
659 {
660 if( y0!=y1 || x0>x1 )
661 {
662 GPU_SWAP(x0, x1, temp); GPU_SWAP(y0, y1, temp);
663 GPU_SWAP(u0, u1, temp); GPU_SWAP(v0, v1, temp);
664 GPU_SWAP(r0, r1, temp); GPU_SWAP(g0, g1, temp); GPU_SWAP(b0, b1, temp);
665 }
666 }
667
668 ya = y2 - y0;
669 yb = y2 - y1;
670 dx = (x2 - x1) * ya - (x2 - x0) * yb;
671 du4 = (u2 - u1) * ya - (u2 - u0) * yb;
672 dv4 = (v2 - v1) * ya - (v2 - v0) * yb;
673 dr4 = (r2 - r1) * ya - (r2 - r0) * yb;
674 dg4 = (g2 - g1) * ya - (g2 - g0) * yb;
675 db4 = (b2 - b1) * ya - (b2 - b0) * yb;
676
677 s32 iF,iS;
678
679 xInv( dx, iF, iS);
680 du4 = xInvMulx( du4, iF, iS);
681 dv4 = xInvMulx( dv4, iF, iS);
682 dr4 = xInvMulx( dr4, iF, iS);
683 dg4 = xInvMulx( dg4, iF, iS);
684 db4 = xInvMulx( db4, iF, iS);
685 u32 dr = (u32)(dr4<< 8)&(0xffffffff<<21); if(dr4<0) dr+= 1<<21;
686 u32 dg = (u32)(dg4>> 3)&(0xffffffff<<10); if(dg4<0) dg+= 1<<10;
687 u32 db = (u32)(db4>>14)&(0xffffffff ); if(db4<0) db+= 1<< 0;
688 lInc = db + dg + dr;
689 tInc = ((u32)(du4<<7)&0x7fff0000) | ((u32)(dv4>>9)&0x00007fff);
690 tMsk = (TextureWindow[2]<<23) | (TextureWindow[3]<<7) | 0x00ff00ff;
691
692 for (s32 loop0 = 2; loop0; --loop0)
693 {
694 if (loop0 == 2)
695 {
696 ya = y0;
697 yb = y1;
698 u3 = i2x(u0);
699 v3 = i2x(v0);
700 r3 = i2x(r0);
701 g3 = i2x(g0);
702 b3 = i2x(b0);
703 x3 = i2x(x0);
704 x4 = y0!=y1 ? x3 : i2x(x1);
705 if (dx < 0)
706 {
707 xInv( (y2 - y0), iF, iS);
708 dx3 = xInvMulx( (x2 - x0), iF, iS);
709 du3 = xInvMulx( (u2 - u0), iF, iS);
710 dv3 = xInvMulx( (v2 - v0), iF, iS);
711 dr3 = xInvMulx( (r2 - r0), iF, iS);
712 dg3 = xInvMulx( (g2 - g0), iF, iS);
713 db3 = xInvMulx( (b2 - b0), iF, iS);
714 dx4 = xLoDivx ( (x1 - x0), (y1 - y0));
715 }
716 else
717 {
718 xInv( (y1 - y0), iF, iS);
719 dx3 = xInvMulx( (x1 - x0), iF, iS);
720 du3 = xInvMulx( (u1 - u0), iF, iS);
721 dv3 = xInvMulx( (v1 - v0), iF, iS);
722 dr3 = xInvMulx( (r1 - r0), iF, iS);
723 dg3 = xInvMulx( (g1 - g0), iF, iS);
724 db3 = xInvMulx( (b1 - b0), iF, iS);
725 dx4 = xLoDivx ( (x2 - x0), (y2 - y0));
726 }
727 }
728 else
729 {
730 ya = y1;
731 yb = y2;
732 if (dx < 0)
733 {
734 temp = y1 - y0;
735 u3 = i2x(u0) + (du3 * temp);
736 v3 = i2x(v0) + (dv3 * temp);
737 r3 = i2x(r0) + (dr3 * temp);
738 g3 = i2x(g0) + (dg3 * temp);
739 b3 = i2x(b0) + (db3 * temp);
740 x3 = i2x(x0) + (dx3 * temp);
741 x4 = i2x(x1);
742 dx4 = xLoDivx((x2 - x1), (y2 - y1));
743 }
744 else
745 {
746 u3 = i2x(u1);
747 v3 = i2x(v1);
748 r3 = i2x(r1);
749 g3 = i2x(g1);
750 b3 = i2x(b1);
751 x3 = i2x(x1);
752 x4 = i2x(x0) + (dx4 * (y1 - y0));
753
754 xInv( (y2 - y1), iF, iS);
755 dx3 = xInvMulx( (x2 - x1), iF, iS);
756 du3 = xInvMulx( (u2 - u1), iF, iS);
757 dv3 = xInvMulx( (v2 - v1), iF, iS);
758 dr3 = xInvMulx( (r2 - r1), iF, iS);
759 dg3 = xInvMulx( (g2 - g1), iF, iS);
760 db3 = xInvMulx( (b2 - b1), iF, iS);
761 }
762 }
763
764 temp = ymin - ya;
765 if (temp > 0)
766 {
767 ya = ymin;
768 x3 += dx3*temp; x4 += dx4*temp;
769 u3 += du3*temp; v3 += dv3*temp;
770 r3 += dr3*temp; g3 += dg3*temp; b3 += db3*temp;
771 }
772 if (yb > ymax) yb = ymax;
773 if (ya>=yb) continue;
774
775 x3+= fixed_HALF; x4+= fixed_HALF;
776 u3+= fixed_HALF; v4+= fixed_HALF;
777 r3+= fixed_HALF; g3+= fixed_HALF; b3+= fixed_HALF;
778 u16* PixelBase = &((u16*)GPU_FrameBuffer)[FRAME_OFFSET(0, ya)];
779
780 for(;ya<yb;++ya, PixelBase += FRAME_WIDTH, x3+=dx3, x4+=dx4, u3+=du3, v3+=dv3, r3+=dr3, g3+=dg3, b3+=db3)
781 {
782 if (ya&li) continue;
783 if ((ya&pi)==pif) continue;
784 xa = x2i(x3);
785 xb = x2i(x4);
786 if( (xa>xmax) || (xb<xmin)) continue;
787
788 temp = xmin - xa;
789 if(temp > 0)
790 {
791 xa = xmin;
792 u4 = u3 + du4*temp; v4 = v3 + dv4*temp;
793 r4 = r3 + dr4*temp; g4 = g3 + dg4*temp; b4 = b3 + db4*temp;
794 }
795 else
796 {
797 u4 = u3; v4 = v3;
798 r4 = r3; g4 = g3; b4 = b3;
799 }
800 if(xb > xmax) xb = xmax;
801 xb-=xa;
802 if(xb>0) gpuPolySpanDriver(PixelBase + xa,xb);
803 }
804 }
805}
806
807
808//////////////////////////////////////////////////////////////////////////
809//senquack - Original Unai poly routines left here for reference:
810// ( from gpu_inner.h ) NOTE: this uses 16.16, not 22.10 fixed point
811//////////////////////////////////////////////////////////////////////////
812template<const int CF>
813INLINE void gpuPolySpanFn(u16 *pDst, u32 count)
814{
815 if (!TM)
816 {
817 // NO TEXTURE
818 if (!G)
819 {
820 // NO GOURAUD
821 u16 data;
822 if (L) { u32 lCol=((u32)(b4<< 2)&(0x03ff)) | ((u32)(g4<<13)&(0x07ff<<10)) | ((u32)(r4<<24)&(0x07ff<<21)); gpuLightingRGB(data,lCol); }
823 else data=PixelData;
824 if ((!M)&&(!B))
825 {
826 if (MB) { data = data | 0x8000; }
827 do { *pDst++ = data; } while (--count);
828 }
829 else if ((M)&&(!B))
830 {
831 if (MB) { data = data | 0x8000; }
832 do { if (!(*pDst&0x8000)) { *pDst = data; } pDst++; } while (--count);
833 }
834 else
835 {
836 u16 uSrc;
837 u16 uDst;
838 u32 uMsk; if (BM==0) uMsk=0x7BDE;
839 u32 bMsk; if (BI) bMsk=blit_mask;
840 do
841 {
842 // blit-mask
843 if (BI) { if((bMsk>>((((u32)pDst)>>1)&7))&1) goto endtile; }
844 // masking
845 uDst = *pDst;
846 if(M) { if (uDst&0x8000) goto endtile; }
847 uSrc = data;
848 // blend
849 if (BM==0) gpuBlending00(uSrc, uDst);
850 if (BM==1) gpuBlending01(uSrc, uDst);
851 if (BM==2) gpuBlending02(uSrc, uDst);
852 if (BM==3) gpuBlending03(uSrc, uDst);
853 if (MB) { *pDst = uSrc | 0x8000; }
854 else { *pDst = uSrc; }
855 endtile: pDst++;
856 }
857 while (--count);
858 }
859 }
860 else
861 {
862 // GOURAUD
863 u16 uDst;
864 u16 uSrc;
865 u32 linc=lInc;
866 u32 lCol=((u32)(b4>>14)&(0x03ff)) | ((u32)(g4>>3)&(0x07ff<<10)) | ((u32)(r4<<8)&(0x07ff<<21));
867 u32 uMsk; if ((B)&&(BM==0)) uMsk=0x7BDE;
868 u32 bMsk; if (BI) bMsk=blit_mask;
869 do
870 {
871 // blit-mask
872 if (BI) { if((bMsk>>((((u32)pDst)>>1)&7))&1) goto endgou; }
873 // masking
874 if(M) { uDst = *pDst; if (uDst&0x8000) goto endgou; }
875 // blend
876 if(B)
877 {
878 // light
879 gpuLightingRGB(uSrc,lCol);
880 if(!M) { uDst = *pDst; }
881 if (BM==0) gpuBlending00(uSrc, uDst);
882 if (BM==1) gpuBlending01(uSrc, uDst);
883 if (BM==2) gpuBlending02(uSrc, uDst);
884 if (BM==3) gpuBlending03(uSrc, uDst);
885 }
886 else
887 {
888 // light
889 gpuLightingRGB(uSrc,lCol);
890 }
891 if (MB) { *pDst = uSrc | 0x8000; }
892 else { *pDst = uSrc; }
893 endgou: pDst++; lCol=(lCol+linc);
894 }
895 while (--count);
896 }
897 }
898 else
899 {
900 // TEXTURE
901 u16 uDst;
902 u16 uSrc;
903 u32 linc; if (L&&G) linc=lInc;
904 u32 tinc=tInc;
905 u32 tmsk=tMsk;
906 u32 tCor = ((u32)( u4<<7)&0x7fff0000) | ((u32)( v4>>9)&0x00007fff); tCor&= tmsk;
907 const u16* _TBA=TBA;
908 const u16* _CBA; if (TM!=3) _CBA=CBA;
909 u32 lCol;
910 if(L && !G) { lCol = ((u32)(b4<< 2)&(0x03ff)) | ((u32)(g4<<13)&(0x07ff<<10)) | ((u32)(r4<<24)&(0x07ff<<21)); }
911 else if(L && G) { lCol = ((u32)(b4>>14)&(0x03ff)) | ((u32)(g4>>3)&(0x07ff<<10)) | ((u32)(r4<<8)&(0x07ff<<21)); }
912 u32 uMsk; if ((B)&&(BM==0)) uMsk=0x7BDE;
913 u32 bMsk; if (BI) bMsk=blit_mask;
914 do
915 {
916 // blit-mask
917 if (BI) { if((bMsk>>((((u32)pDst)>>1)&7))&1) goto endpoly; }
918 // masking
919 if(M) { uDst = *pDst; if (uDst&0x8000) goto endpoly; }
920 // texture
921 if (TM==1) { u32 tu=(tCor>>23); u32 tv=(tCor<<4)&(0xff<<11); u8 rgb=((u8*)_TBA)[tv+(tu>>1)]; uSrc=_CBA[(rgb>>((tu&1)<<2))&0xf]; if(!uSrc) goto endpoly; }
922 if (TM==2) { uSrc = _CBA[(((u8*)_TBA)[(tCor>>23)+((tCor<<4)&(0xff<<11))])]; if(!uSrc) goto endpoly; }
923 if (TM==3) { uSrc = _TBA[(tCor>>23)+((tCor<<3)&(0xff<<10))]; if(!uSrc) goto endpoly; }
924 // blend
925 if(B)
926 {
927 if (uSrc&0x8000)
928 {
929 // light
930 if(L) gpuLightingTXT(uSrc, lCol);
931 if(!M) { uDst = *pDst; }
932 if (BM==0) gpuBlending00(uSrc, uDst);
933 if (BM==1) gpuBlending01(uSrc, uDst);
934 if (BM==2) gpuBlending02(uSrc, uDst);
935 if (BM==3) gpuBlending03(uSrc, uDst);
936 }
937 else
938 {
939 // light
940 if(L) gpuLightingTXT(uSrc, lCol);
941 }
942 }
943 else
944 {
945 // light
946 if(L) { gpuLightingTXT(uSrc, lCol); } else if(!MB) { uSrc&= 0x7fff; }
947 }
948 if (MB) { *pDst = uSrc | 0x8000; }
949 else { *pDst = uSrc; }
950 endpoly: pDst++;
951 tCor=(tCor+tinc)&tmsk;
952 if (L&&G) lCol=(lCol+linc);
953 }
954 while (--count);
955 }
956}