030d1121 |
1 | //NOTE: You can find the set of original Unai poly routines (disabled now) |
2 | // at the bottom end of this file. |
3 | |
4 | //senquack - Original Unai GPU poly routines have been replaced with new |
5 | // ones based on DrHell routines. The original routines suffered from |
6 | // shifted rows, causing many quads to have their first triangle drawn |
7 | // correctly, but the second triangle would randomly have pixels shifted |
8 | // either left or right or entire rows not drawn at all. Furthermore, |
9 | // some times entire triangles seemed to be either missing or only |
10 | // partially drawn (most clearly seen in sky/road textures in NFS3, |
11 | // clock tower in beginning of Castlevania SOTN). Pixel gaps were |
12 | // prevalent. |
13 | // |
14 | // Since DrHell GPU didn't seem to exhibit these artifacts at all, I adapted |
15 | // its routines to GPU Unai (Unai was probably already originally based on it). |
16 | // DrHell uses 22.10 fixed point instead of Unai's 16.16, so gpu_fixedpoint.h |
17 | // required modification as well as gpu_inner.h (where gpuPolySpanFn driver |
18 | // functions are). |
19 | // |
20 | // Originally, I tried to patch up original Unai routines and got as far |
21 | // as fixing the shifted rows, but still had other problem of triangles rendered |
22 | // wrong (black triangular gaps in NFS3 sky, clock tower in Castlevania SOTN). |
23 | // I eventually gave up. Even after rewriting/adapting the routines, |
24 | // however, I still had some random pixel droupouts, specifically in |
25 | // NFS3 sky texture. I discovered that gpu_inner.h gpuPolySpanFn function |
26 | // was taking optimizations to an extreme and packing u/v texture coords |
27 | // into one 32-bit word, reducing their accuracy. Only once they were |
28 | // handled in full-accuracy individual words was that problem fixed. |
29 | // |
30 | // NOTE: I also added support for doing divisions using the FPU, either |
31 | // with normal division or multiplication-by-reciprocal. |
32 | // To use float division, GPU_UNAI_USE_FLOATMATH should be defined. |
33 | // To use float mult-by-reciprocal, GPU_UNAI_USE_FLOAT_DIV_MULTINV |
34 | // can be specified (GPU_UNAI_USE_FLOATMATH must also be specified) |
35 | // To use inaccurate fixed-point mult-by-reciprocal, define |
36 | // GPU_UNAI_USE_INT_DIV_MULTINV. This is the default on older |
37 | // ARM devices like Wiz/Caanoo that have neither integer division |
38 | // in hardware or an FPU. It results in some pixel dropouts, |
39 | // texture glitches, but less than the original GPU UNAI code. |
40 | // |
41 | // If nothing is specified, integer division will be used. |
42 | // |
43 | // NOTE 2: Even with MIPS32R2 having FPU recip.s instruction, and it is |
44 | // used when this platform is detected, I found it not to give any |
45 | // noticeable speedup over normal float division (in fact seemed a tiny |
46 | // tiny bit slower). I also found float division to not provide any |
47 | // noticeable speedups versus integer division on MISP32R2 platform. |
48 | // Granted, the differences were all around .5 FPS or less. |
49 | // |
50 | // TODO: |
51 | // * See if anything can be done about remaining pixel gaps in Gran |
52 | // Turismo car models, track. |
53 | // * Find better way of passing parameters to gpuPolySpanFn functions than |
54 | // through original Unai method of using global variables u4,v4,du4 etc. |
55 | // * Come up with some newer way of drawing rows of pixels than by calling |
56 | // gpuPolySpanFn through function pointer. For every row, at least on |
57 | // MIPS platforms, many registers are having to be pushed/popped from stack |
58 | // on each call, which is strange since MIPS has so many registers. |
59 | // * MIPS MXU/ASM optimized gpuPolySpanFn ? |
60 | |
61 | ////////////////////////////////////////////////////////////////////////// |
62 | //senquack - Disabled original Unai poly routines left here for reference: |
63 | // ( from gpu_raster_polygon.h ) |
64 | ////////////////////////////////////////////////////////////////////////// |
65 | #define GPU_TESTRANGE3() \ |
66 | { \ |
67 | if(x0<0) { if((x1-x0)>CHKMAX_X) return; if((x2-x0)>CHKMAX_X) return; } \ |
68 | if(x1<0) { if((x0-x1)>CHKMAX_X) return; if((x2-x1)>CHKMAX_X) return; } \ |
69 | if(x2<0) { if((x0-x2)>CHKMAX_X) return; if((x1-x2)>CHKMAX_X) return; } \ |
70 | if(y0<0) { if((y1-y0)>CHKMAX_Y) return; if((y2-y0)>CHKMAX_Y) return; } \ |
71 | if(y1<0) { if((y0-y1)>CHKMAX_Y) return; if((y2-y1)>CHKMAX_Y) return; } \ |
72 | if(y2<0) { if((y0-y2)>CHKMAX_Y) return; if((y1-y2)>CHKMAX_Y) return; } \ |
73 | } |
74 | |
75 | /*---------------------------------------------------------------------- |
76 | F3 |
77 | ----------------------------------------------------------------------*/ |
78 | |
79 | void gpuDrawF3(const PP gpuPolySpanDriver) |
80 | { |
81 | const int li=linesInterlace; |
82 | const int pi=(progressInterlace?(linesInterlace+1):0); |
83 | const int pif=(progressInterlace?(progressInterlace_flag?(linesInterlace+1):0):1); |
84 | s32 temp; |
85 | s32 xa, xb, xmin, xmax; |
86 | s32 ya, yb, ymin, ymax; |
87 | s32 x0, x1, x2, x3, dx3=0, x4, dx4=0, dx; |
88 | s32 y0, y1, y2; |
89 | |
90 | x0 = GPU_EXPANDSIGN(PacketBuffer.S2[2]); |
91 | y0 = GPU_EXPANDSIGN(PacketBuffer.S2[3]); |
92 | x1 = GPU_EXPANDSIGN(PacketBuffer.S2[4]); |
93 | y1 = GPU_EXPANDSIGN(PacketBuffer.S2[5]); |
94 | x2 = GPU_EXPANDSIGN(PacketBuffer.S2[6]); |
95 | y2 = GPU_EXPANDSIGN(PacketBuffer.S2[7]); |
96 | |
97 | GPU_TESTRANGE3(); |
98 | |
99 | x0 += DrawingOffset[0]; x1 += DrawingOffset[0]; x2 += DrawingOffset[0]; |
100 | y0 += DrawingOffset[1]; y1 += DrawingOffset[1]; y2 += DrawingOffset[1]; |
101 | |
102 | xmin = DrawingArea[0]; xmax = DrawingArea[2]; |
103 | ymin = DrawingArea[1]; ymax = DrawingArea[3]; |
104 | |
105 | { |
106 | int rx0 = Max2(xmin,Min3(x0,x1,x2)); |
107 | int ry0 = Max2(ymin,Min3(y0,y1,y2)); |
108 | int rx1 = Min2(xmax,Max3(x0,x1,x2)); |
109 | int ry1 = Min2(ymax,Max3(y0,y1,y2)); |
110 | if( rx0>=rx1 || ry0>=ry1) return; |
111 | } |
112 | |
113 | PixelData = GPU_RGB16(PacketBuffer.U4[0]); |
114 | |
115 | if (y0 >= y1) |
116 | { |
117 | if( y0!=y1 || x0>x1 ) |
118 | { |
119 | GPU_SWAP(x0, x1, temp); |
120 | GPU_SWAP(y0, y1, temp); |
121 | } |
122 | } |
123 | if (y1 >= y2) |
124 | { |
125 | if( y1!=y2 || x1>x2 ) |
126 | { |
127 | GPU_SWAP(x1, x2, temp); |
128 | GPU_SWAP(y1, y2, temp); |
129 | } |
130 | } |
131 | if (y0 >= y1) |
132 | { |
133 | if( y0!=y1 || x0>x1 ) |
134 | { |
135 | GPU_SWAP(x0, x1, temp); |
136 | GPU_SWAP(y0, y1, temp); |
137 | } |
138 | } |
139 | |
140 | ya = y2 - y0; |
141 | yb = y2 - y1; |
142 | dx =(x2 - x1) * ya - (x2 - x0) * yb; |
143 | |
144 | for (s32 loop0 = 2; loop0; --loop0) |
145 | { |
146 | if (loop0 == 2) |
147 | { |
148 | ya = y0; |
149 | yb = y1; |
150 | x3 = i2x(x0); |
151 | x4 = y0!=y1 ? x3 : i2x(x1); |
152 | if (dx < 0) |
153 | { |
154 | dx3 = xLoDivx((x2 - x0), (y2 - y0)); |
155 | dx4 = xLoDivx((x1 - x0), (y1 - y0)); |
156 | } |
157 | else |
158 | { |
159 | dx3 = xLoDivx((x1 - x0), (y1 - y0)); |
160 | dx4 = xLoDivx((x2 - x0), (y2 - y0)); |
161 | } |
162 | } |
163 | else |
164 | { |
165 | ya = y1; |
166 | yb = y2; |
167 | if (dx < 0) |
168 | { |
169 | x4 = i2x(x1); |
170 | x3 = i2x(x0) + (dx3 * (y1 - y0)); |
171 | dx4 = xLoDivx((x2 - x1), (y2 - y1)); |
172 | } |
173 | else |
174 | { |
175 | x3 = i2x(x1); |
176 | x4 = i2x(x0) + (dx4 * (y1 - y0)); |
177 | dx3 = xLoDivx((x2 - x1), (y2 - y1)); |
178 | } |
179 | } |
180 | |
181 | temp = ymin - ya; |
182 | if (temp > 0) |
183 | { |
184 | ya = ymin; |
185 | x3 += dx3*temp; |
186 | x4 += dx4*temp; |
187 | } |
188 | if (yb > ymax) yb = ymax; |
189 | if (ya>=yb) continue; |
190 | |
191 | x3+= fixed_HALF; |
192 | x4+= fixed_HALF; |
193 | |
194 | u16* PixelBase = &((u16*)GPU_FrameBuffer)[FRAME_OFFSET(0, ya)]; |
195 | |
196 | for(;ya<yb;++ya, PixelBase += FRAME_WIDTH, x3+=dx3, x4+=dx4) |
197 | { |
198 | if (ya&li) continue; |
199 | if ((ya&pi)==pif) continue; |
200 | xa = x2i(x3); |
201 | xb = x2i(x4); |
202 | if( (xa>xmax) || (xb<xmin) ) continue; |
203 | if(xa < xmin) xa = xmin; |
204 | if(xb > xmax) xb = xmax; |
205 | xb-=xa; |
206 | if(xb>0) gpuPolySpanDriver(PixelBase + xa,xb); |
207 | } |
208 | } |
209 | } |
210 | |
211 | /*---------------------------------------------------------------------- |
212 | FT3 |
213 | ----------------------------------------------------------------------*/ |
214 | |
215 | void gpuDrawFT3(const PP gpuPolySpanDriver) |
216 | { |
217 | const int li=linesInterlace; |
218 | const int pi=(progressInterlace?(linesInterlace+1):0); |
219 | const int pif=(progressInterlace?(progressInterlace_flag?(linesInterlace+1):0):1); |
220 | s32 temp; |
221 | s32 xa, xb, xmin, xmax; |
222 | s32 ya, yb, ymin, ymax; |
223 | s32 x0, x1, x2, x3, dx3=0, x4, dx4=0, dx; |
224 | s32 y0, y1, y2; |
225 | s32 u0, u1, u2, u3, du3=0; |
226 | s32 v0, v1, v2, v3, dv3=0; |
227 | |
228 | x0 = GPU_EXPANDSIGN(PacketBuffer.S2[2] ); |
229 | y0 = GPU_EXPANDSIGN(PacketBuffer.S2[3] ); |
230 | x1 = GPU_EXPANDSIGN(PacketBuffer.S2[6] ); |
231 | y1 = GPU_EXPANDSIGN(PacketBuffer.S2[7] ); |
232 | x2 = GPU_EXPANDSIGN(PacketBuffer.S2[10]); |
233 | y2 = GPU_EXPANDSIGN(PacketBuffer.S2[11]); |
234 | |
235 | GPU_TESTRANGE3(); |
236 | |
237 | x0 += DrawingOffset[0]; x1 += DrawingOffset[0]; x2 += DrawingOffset[0]; |
238 | y0 += DrawingOffset[1]; y1 += DrawingOffset[1]; y2 += DrawingOffset[1]; |
239 | |
240 | xmin = DrawingArea[0]; xmax = DrawingArea[2]; |
241 | ymin = DrawingArea[1]; ymax = DrawingArea[3]; |
242 | |
243 | { |
244 | int rx0 = Max2(xmin,Min3(x0,x1,x2)); |
245 | int ry0 = Max2(ymin,Min3(y0,y1,y2)); |
246 | int rx1 = Min2(xmax,Max3(x0,x1,x2)); |
247 | int ry1 = Min2(ymax,Max3(y0,y1,y2)); |
248 | if( rx0>=rx1 || ry0>=ry1) return; |
249 | } |
250 | |
251 | u0 = PacketBuffer.U1[8]; v0 = PacketBuffer.U1[9]; |
252 | u1 = PacketBuffer.U1[16]; v1 = PacketBuffer.U1[17]; |
253 | u2 = PacketBuffer.U1[24]; v2 = PacketBuffer.U1[25]; |
254 | |
255 | r4 = s32(PacketBuffer.U1[0]); |
256 | g4 = s32(PacketBuffer.U1[1]); |
257 | b4 = s32(PacketBuffer.U1[2]); |
258 | dr4 = dg4 = db4 = 0; |
259 | |
260 | if (y0 >= y1) |
261 | { |
262 | if( y0!=y1 || x0>x1 ) |
263 | { |
264 | GPU_SWAP(x0, x1, temp); |
265 | GPU_SWAP(y0, y1, temp); |
266 | GPU_SWAP(u0, u1, temp); |
267 | GPU_SWAP(v0, v1, temp); |
268 | } |
269 | } |
270 | if (y1 >= y2) |
271 | { |
272 | if( y1!=y2 || x1>x2 ) |
273 | { |
274 | GPU_SWAP(x1, x2, temp); |
275 | GPU_SWAP(y1, y2, temp); |
276 | GPU_SWAP(u1, u2, temp); |
277 | GPU_SWAP(v1, v2, temp); |
278 | } |
279 | } |
280 | if (y0 >= y1) |
281 | { |
282 | if( y0!=y1 || x0>x1 ) |
283 | { |
284 | GPU_SWAP(x0, x1, temp); |
285 | GPU_SWAP(y0, y1, temp); |
286 | GPU_SWAP(u0, u1, temp); |
287 | GPU_SWAP(v0, v1, temp); |
288 | } |
289 | } |
290 | |
291 | ya = y2 - y0; |
292 | yb = y2 - y1; |
293 | dx = (x2 - x1) * ya - (x2 - x0) * yb; |
294 | du4 = (u2 - u1) * ya - (u2 - u0) * yb; |
295 | dv4 = (v2 - v1) * ya - (v2 - v0) * yb; |
296 | |
297 | s32 iF,iS; |
298 | xInv( dx, iF, iS); |
299 | du4 = xInvMulx( du4, iF, iS); |
300 | dv4 = xInvMulx( dv4, iF, iS); |
301 | tInc = ((u32)(du4<<7)&0x7fff0000) | ((u32)(dv4>>9)&0x00007fff); |
302 | tMsk = (TextureWindow[2]<<23) | (TextureWindow[3]<<7) | 0x00ff00ff; |
303 | |
304 | for (s32 loop0 = 2; loop0; --loop0) |
305 | { |
306 | if (loop0 == 2) |
307 | { |
308 | ya = y0; |
309 | yb = y1; |
310 | u3 = i2x(u0); |
311 | v3 = i2x(v0); |
312 | x3 = i2x(x0); |
313 | x4 = y0!=y1 ? x3 : i2x(x1); |
314 | if (dx < 0) |
315 | { |
316 | xInv( (y2 - y0), iF, iS); |
317 | dx3 = xInvMulx( (x2 - x0), iF, iS); |
318 | du3 = xInvMulx( (u2 - u0), iF, iS); |
319 | dv3 = xInvMulx( (v2 - v0), iF, iS); |
320 | dx4 = xLoDivx ( (x1 - x0), (y1 - y0)); |
321 | } |
322 | else |
323 | { |
324 | xInv( (y1 - y0), iF, iS); |
325 | dx3 = xInvMulx( (x1 - x0), iF, iS); |
326 | du3 = xInvMulx( (u1 - u0), iF, iS); |
327 | dv3 = xInvMulx( (v1 - v0), iF, iS); |
328 | dx4 = xLoDivx ( (x2 - x0), (y2 - y0)); |
329 | } |
330 | } |
331 | else |
332 | { |
333 | ya = y1; |
334 | yb = y2; |
335 | if (dx < 0) |
336 | { |
337 | temp = y1 - y0; |
338 | u3 = i2x(u0) + (du3 * temp); |
339 | v3 = i2x(v0) + (dv3 * temp); |
340 | x3 = i2x(x0) + (dx3 * temp); |
341 | x4 = i2x(x1); |
342 | dx4 = xLoDivx((x2 - x1), (y2 - y1)); |
343 | } |
344 | else |
345 | { |
346 | u3 = i2x(u1); |
347 | v3 = i2x(v1); |
348 | x3 = i2x(x1); |
349 | x4 = i2x(x0) + (dx4 * (y1 - y0)); |
350 | xInv( (y2 - y1), iF, iS); |
351 | dx3 = xInvMulx( (x2 - x1), iF, iS); |
352 | du3 = xInvMulx( (u2 - u1), iF, iS); |
353 | dv3 = xInvMulx( (v2 - v1), iF, iS); |
354 | } |
355 | } |
356 | |
357 | temp = ymin - ya; |
358 | if (temp > 0) |
359 | { |
360 | ya = ymin; |
361 | x3 += dx3*temp; |
362 | x4 += dx4*temp; |
363 | u3 += du3*temp; |
364 | v3 += dv3*temp; |
365 | } |
366 | if (yb > ymax) yb = ymax; |
367 | if (ya>=yb) continue; |
368 | |
369 | x3+= fixed_HALF; |
370 | x4+= fixed_HALF; |
371 | u3+= fixed_HALF; |
372 | v4+= fixed_HALF; |
373 | |
374 | u16* PixelBase = &((u16*)GPU_FrameBuffer)[FRAME_OFFSET(0, ya)]; |
375 | |
376 | for(;ya<yb;++ya, PixelBase += FRAME_WIDTH, x3+=dx3, x4+=dx4, u3+=du3, v3+=dv3) |
377 | { |
378 | if (ya&li) continue; |
379 | if ((ya&pi)==pif) continue; |
380 | xa = x2i(x3); |
381 | xb = x2i(x4); |
382 | if( (xa>xmax) || (xb<xmin) ) continue; |
383 | |
384 | temp = xmin - xa; |
385 | if(temp > 0) |
386 | { |
387 | xa = xmin; |
388 | u4 = u3 + du4*temp; |
389 | v4 = v3 + dv4*temp; |
390 | } |
391 | else |
392 | { |
393 | u4 = u3; |
394 | v4 = v3; |
395 | } |
396 | if(xb > xmax) xb = xmax; |
397 | xb-=xa; |
398 | if(xb>0) gpuPolySpanDriver(PixelBase + xa,xb); |
399 | } |
400 | } |
401 | } |
402 | |
403 | /*---------------------------------------------------------------------- |
404 | G3 |
405 | ----------------------------------------------------------------------*/ |
406 | |
407 | void gpuDrawG3(const PP gpuPolySpanDriver) |
408 | { |
409 | const int li=linesInterlace; |
410 | const int pi=(progressInterlace?(linesInterlace+1):0); |
411 | const int pif=(progressInterlace?(progressInterlace_flag?(linesInterlace+1):0):1); |
412 | s32 temp; |
413 | s32 xa, xb, xmin, xmax; |
414 | s32 ya, yb, ymin, ymax; |
415 | s32 x0, x1, x2, x3, dx3=0, x4, dx4=0, dx; |
416 | s32 y0, y1, y2; |
417 | s32 r0, r1, r2, r3, dr3=0; |
418 | s32 g0, g1, g2, g3, dg3=0; |
419 | s32 b0, b1, b2, b3, db3=0; |
420 | |
421 | x0 = GPU_EXPANDSIGN(PacketBuffer.S2[2] ); |
422 | y0 = GPU_EXPANDSIGN(PacketBuffer.S2[3] ); |
423 | x1 = GPU_EXPANDSIGN(PacketBuffer.S2[6] ); |
424 | y1 = GPU_EXPANDSIGN(PacketBuffer.S2[7] ); |
425 | x2 = GPU_EXPANDSIGN(PacketBuffer.S2[10]); |
426 | y2 = GPU_EXPANDSIGN(PacketBuffer.S2[11]); |
427 | |
428 | GPU_TESTRANGE3(); |
429 | |
430 | x0 += DrawingOffset[0]; x1 += DrawingOffset[0]; x2 += DrawingOffset[0]; |
431 | y0 += DrawingOffset[1]; y1 += DrawingOffset[1]; y2 += DrawingOffset[1]; |
432 | |
433 | xmin = DrawingArea[0]; xmax = DrawingArea[2]; |
434 | ymin = DrawingArea[1]; ymax = DrawingArea[3]; |
435 | |
436 | { |
437 | int rx0 = Max2(xmin,Min3(x0,x1,x2)); |
438 | int ry0 = Max2(ymin,Min3(y0,y1,y2)); |
439 | int rx1 = Min2(xmax,Max3(x0,x1,x2)); |
440 | int ry1 = Min2(ymax,Max3(y0,y1,y2)); |
441 | if( rx0>=rx1 || ry0>=ry1) return; |
442 | } |
443 | |
444 | r0 = PacketBuffer.U1[0]; g0 = PacketBuffer.U1[1]; b0 = PacketBuffer.U1[2]; |
445 | r1 = PacketBuffer.U1[8]; g1 = PacketBuffer.U1[9]; b1 = PacketBuffer.U1[10]; |
446 | r2 = PacketBuffer.U1[16]; g2 = PacketBuffer.U1[17]; b2 = PacketBuffer.U1[18]; |
447 | |
448 | if (y0 >= y1) |
449 | { |
450 | if( y0!=y1 || x0>x1 ) |
451 | { |
452 | GPU_SWAP(x0, x1, temp); GPU_SWAP(y0, y1, temp); |
453 | GPU_SWAP(r0, r1, temp); GPU_SWAP(g0, g1, temp); GPU_SWAP(b0, b1, temp); |
454 | } |
455 | } |
456 | if (y1 >= y2) |
457 | { |
458 | if( y1!=y2 || x1>x2 ) |
459 | { |
460 | GPU_SWAP(x1, x2, temp); GPU_SWAP(y1, y2, temp); |
461 | GPU_SWAP(r1, r2, temp); GPU_SWAP(g1, g2, temp); GPU_SWAP(b1, b2, temp); |
462 | } |
463 | } |
464 | if (y0 >= y1) |
465 | { |
466 | if( y0!=y1 || x0>x1 ) |
467 | { |
468 | GPU_SWAP(x0, x1, temp); GPU_SWAP(y0, y1, temp); |
469 | GPU_SWAP(r0, r1, temp); GPU_SWAP(g0, g1, temp); GPU_SWAP(b0, b1, temp); |
470 | } |
471 | } |
472 | |
473 | ya = y2 - y0; |
474 | yb = y2 - y1; |
475 | dx = (x2 - x1) * ya - (x2 - x0) * yb; |
476 | dr4 = (r2 - r1) * ya - (r2 - r0) * yb; |
477 | dg4 = (g2 - g1) * ya - (g2 - g0) * yb; |
478 | db4 = (b2 - b1) * ya - (b2 - b0) * yb; |
479 | |
480 | s32 iF,iS; |
481 | xInv( dx, iF, iS); |
482 | dr4 = xInvMulx( dr4, iF, iS); |
483 | dg4 = xInvMulx( dg4, iF, iS); |
484 | db4 = xInvMulx( db4, iF, iS); |
485 | u32 dr = (u32)(dr4<< 8)&(0xffffffff<<21); if(dr4<0) dr+= 1<<21; |
486 | u32 dg = (u32)(dg4>> 3)&(0xffffffff<<10); if(dg4<0) dg+= 1<<10; |
487 | u32 db = (u32)(db4>>14)&(0xffffffff ); if(db4<0) db+= 1<< 0; |
488 | lInc = db + dg + dr; |
489 | |
490 | for (s32 loop0 = 2; loop0; --loop0) |
491 | { |
492 | if (loop0 == 2) |
493 | { |
494 | ya = y0; |
495 | yb = y1; |
496 | r3 = i2x(r0); |
497 | g3 = i2x(g0); |
498 | b3 = i2x(b0); |
499 | x3 = i2x(x0); |
500 | x4 = y0!=y1 ? x3 : i2x(x1); |
501 | if (dx < 0) |
502 | { |
503 | xInv( (y2 - y0), iF, iS); |
504 | dx3 = xInvMulx( (x2 - x0), iF, iS); |
505 | dr3 = xInvMulx( (r2 - r0), iF, iS); |
506 | dg3 = xInvMulx( (g2 - g0), iF, iS); |
507 | db3 = xInvMulx( (b2 - b0), iF, iS); |
508 | dx4 = xLoDivx ( (x1 - x0), (y1 - y0)); |
509 | } |
510 | else |
511 | { |
512 | xInv( (y1 - y0), iF, iS); |
513 | dx3 = xInvMulx( (x1 - x0), iF, iS); |
514 | dr3 = xInvMulx( (r1 - r0), iF, iS); |
515 | dg3 = xInvMulx( (g1 - g0), iF, iS); |
516 | db3 = xInvMulx( (b1 - b0), iF, iS); |
517 | dx4 = xLoDivx ( (x2 - x0), (y2 - y0)); |
518 | } |
519 | } |
520 | else |
521 | { |
522 | ya = y1; |
523 | yb = y2; |
524 | if (dx < 0) |
525 | { |
526 | temp = y1 - y0; |
527 | r3 = i2x(r0) + (dr3 * temp); |
528 | g3 = i2x(g0) + (dg3 * temp); |
529 | b3 = i2x(b0) + (db3 * temp); |
530 | x3 = i2x(x0) + (dx3 * temp); |
531 | x4 = i2x(x1); |
532 | dx4 = xLoDivx((x2 - x1), (y2 - y1)); |
533 | } |
534 | else |
535 | { |
536 | r3 = i2x(r1); |
537 | g3 = i2x(g1); |
538 | b3 = i2x(b1); |
539 | x3 = i2x(x1); |
540 | x4 = i2x(x0) + (dx4 * (y1 - y0)); |
541 | |
542 | xInv( (y2 - y1), iF, iS); |
543 | dx3 = xInvMulx( (x2 - x1), iF, iS); |
544 | dr3 = xInvMulx( (r2 - r1), iF, iS); |
545 | dg3 = xInvMulx( (g2 - g1), iF, iS); |
546 | db3 = xInvMulx( (b2 - b1), iF, iS); |
547 | } |
548 | } |
549 | |
550 | temp = ymin - ya; |
551 | if (temp > 0) |
552 | { |
553 | ya = ymin; |
554 | x3 += dx3*temp; x4 += dx4*temp; |
555 | r3 += dr3*temp; g3 += dg3*temp; b3 += db3*temp; |
556 | } |
557 | if (yb > ymax) yb = ymax; |
558 | if (ya>=yb) continue; |
559 | |
560 | x3+= fixed_HALF; x4+= fixed_HALF; |
561 | r3+= fixed_HALF; g3+= fixed_HALF; b3+= fixed_HALF; |
562 | |
563 | u16* PixelBase = &((u16*)GPU_FrameBuffer)[FRAME_OFFSET(0, ya)]; |
564 | |
565 | for(;ya<yb;++ya, PixelBase += FRAME_WIDTH, x3+=dx3, x4+=dx4, r3+=dr3, g3+=dg3, b3+=db3) |
566 | { |
567 | if (ya&li) continue; |
568 | if ((ya&pi)==pif) continue; |
569 | xa = x2i(x3); |
570 | xb = x2i(x4); |
571 | if( (xa>xmax) || (xb<xmin) ) continue; |
572 | |
573 | temp = xmin - xa; |
574 | if(temp > 0) |
575 | { |
576 | xa = xmin; |
577 | r4 = r3 + dr4*temp; g4 = g3 + dg4*temp; b4 = b3 + db4*temp; |
578 | } |
579 | else |
580 | { |
581 | r4 = r3; g4 = g3; b4 = b3; |
582 | } |
583 | if(xb > xmax) xb = xmax; |
584 | xb-=xa; |
585 | if(xb>0) gpuPolySpanDriver(PixelBase + xa,xb); |
586 | } |
587 | } |
588 | } |
589 | |
590 | /*---------------------------------------------------------------------- |
591 | GT3 |
592 | ----------------------------------------------------------------------*/ |
593 | |
594 | void gpuDrawGT3(const PP gpuPolySpanDriver) |
595 | { |
596 | const int li=linesInterlace; |
597 | const int pi=(progressInterlace?(linesInterlace+1):0); |
598 | const int pif=(progressInterlace?(progressInterlace_flag?(linesInterlace+1):0):1); |
599 | s32 temp; |
600 | s32 xa, xb, xmin, xmax; |
601 | s32 ya, yb, ymin, ymax; |
602 | s32 x0, x1, x2, x3, dx3=0, x4, dx4=0, dx; |
603 | s32 y0, y1, y2; |
604 | s32 u0, u1, u2, u3, du3=0; |
605 | s32 v0, v1, v2, v3, dv3=0; |
606 | s32 r0, r1, r2, r3, dr3=0; |
607 | s32 g0, g1, g2, g3, dg3=0; |
608 | s32 b0, b1, b2, b3, db3=0; |
609 | |
610 | x0 = GPU_EXPANDSIGN(PacketBuffer.S2[2] ); |
611 | y0 = GPU_EXPANDSIGN(PacketBuffer.S2[3] ); |
612 | x1 = GPU_EXPANDSIGN(PacketBuffer.S2[8] ); |
613 | y1 = GPU_EXPANDSIGN(PacketBuffer.S2[9] ); |
614 | x2 = GPU_EXPANDSIGN(PacketBuffer.S2[14]); |
615 | y2 = GPU_EXPANDSIGN(PacketBuffer.S2[15]); |
616 | |
617 | GPU_TESTRANGE3(); |
618 | |
619 | x0 += DrawingOffset[0]; x1 += DrawingOffset[0]; x2 += DrawingOffset[0]; |
620 | y0 += DrawingOffset[1]; y1 += DrawingOffset[1]; y2 += DrawingOffset[1]; |
621 | |
622 | xmin = DrawingArea[0]; xmax = DrawingArea[2]; |
623 | ymin = DrawingArea[1]; ymax = DrawingArea[3]; |
624 | |
625 | { |
626 | int rx0 = Max2(xmin,Min3(x0,x1,x2)); |
627 | int ry0 = Max2(ymin,Min3(y0,y1,y2)); |
628 | int rx1 = Min2(xmax,Max3(x0,x1,x2)); |
629 | int ry1 = Min2(ymax,Max3(y0,y1,y2)); |
630 | if( rx0>=rx1 || ry0>=ry1) return; |
631 | } |
632 | |
633 | r0 = PacketBuffer.U1[0]; g0 = PacketBuffer.U1[1]; b0 = PacketBuffer.U1[2]; |
634 | u0 = PacketBuffer.U1[8]; v0 = PacketBuffer.U1[9]; |
635 | r1 = PacketBuffer.U1[12]; g1 = PacketBuffer.U1[13]; b1 = PacketBuffer.U1[14]; |
636 | u1 = PacketBuffer.U1[20]; v1 = PacketBuffer.U1[21]; |
637 | r2 = PacketBuffer.U1[24]; g2 = PacketBuffer.U1[25]; b2 = PacketBuffer.U1[26]; |
638 | u2 = PacketBuffer.U1[32]; v2 = PacketBuffer.U1[33]; |
639 | |
640 | if (y0 >= y1) |
641 | { |
642 | if( y0!=y1 || x0>x1 ) |
643 | { |
644 | GPU_SWAP(x0, x1, temp); GPU_SWAP(y0, y1, temp); |
645 | GPU_SWAP(u0, u1, temp); GPU_SWAP(v0, v1, temp); |
646 | GPU_SWAP(r0, r1, temp); GPU_SWAP(g0, g1, temp); GPU_SWAP(b0, b1, temp); |
647 | } |
648 | } |
649 | if (y1 >= y2) |
650 | { |
651 | if( y1!=y2 || x1>x2 ) |
652 | { |
653 | GPU_SWAP(x1, x2, temp); GPU_SWAP(y1, y2, temp); |
654 | GPU_SWAP(u1, u2, temp); GPU_SWAP(v1, v2, temp); |
655 | GPU_SWAP(r1, r2, temp); GPU_SWAP(g1, g2, temp); GPU_SWAP(b1, b2, temp); |
656 | } |
657 | } |
658 | if (y0 >= y1) |
659 | { |
660 | if( y0!=y1 || x0>x1 ) |
661 | { |
662 | GPU_SWAP(x0, x1, temp); GPU_SWAP(y0, y1, temp); |
663 | GPU_SWAP(u0, u1, temp); GPU_SWAP(v0, v1, temp); |
664 | GPU_SWAP(r0, r1, temp); GPU_SWAP(g0, g1, temp); GPU_SWAP(b0, b1, temp); |
665 | } |
666 | } |
667 | |
668 | ya = y2 - y0; |
669 | yb = y2 - y1; |
670 | dx = (x2 - x1) * ya - (x2 - x0) * yb; |
671 | du4 = (u2 - u1) * ya - (u2 - u0) * yb; |
672 | dv4 = (v2 - v1) * ya - (v2 - v0) * yb; |
673 | dr4 = (r2 - r1) * ya - (r2 - r0) * yb; |
674 | dg4 = (g2 - g1) * ya - (g2 - g0) * yb; |
675 | db4 = (b2 - b1) * ya - (b2 - b0) * yb; |
676 | |
677 | s32 iF,iS; |
678 | |
679 | xInv( dx, iF, iS); |
680 | du4 = xInvMulx( du4, iF, iS); |
681 | dv4 = xInvMulx( dv4, iF, iS); |
682 | dr4 = xInvMulx( dr4, iF, iS); |
683 | dg4 = xInvMulx( dg4, iF, iS); |
684 | db4 = xInvMulx( db4, iF, iS); |
685 | u32 dr = (u32)(dr4<< 8)&(0xffffffff<<21); if(dr4<0) dr+= 1<<21; |
686 | u32 dg = (u32)(dg4>> 3)&(0xffffffff<<10); if(dg4<0) dg+= 1<<10; |
687 | u32 db = (u32)(db4>>14)&(0xffffffff ); if(db4<0) db+= 1<< 0; |
688 | lInc = db + dg + dr; |
689 | tInc = ((u32)(du4<<7)&0x7fff0000) | ((u32)(dv4>>9)&0x00007fff); |
690 | tMsk = (TextureWindow[2]<<23) | (TextureWindow[3]<<7) | 0x00ff00ff; |
691 | |
692 | for (s32 loop0 = 2; loop0; --loop0) |
693 | { |
694 | if (loop0 == 2) |
695 | { |
696 | ya = y0; |
697 | yb = y1; |
698 | u3 = i2x(u0); |
699 | v3 = i2x(v0); |
700 | r3 = i2x(r0); |
701 | g3 = i2x(g0); |
702 | b3 = i2x(b0); |
703 | x3 = i2x(x0); |
704 | x4 = y0!=y1 ? x3 : i2x(x1); |
705 | if (dx < 0) |
706 | { |
707 | xInv( (y2 - y0), iF, iS); |
708 | dx3 = xInvMulx( (x2 - x0), iF, iS); |
709 | du3 = xInvMulx( (u2 - u0), iF, iS); |
710 | dv3 = xInvMulx( (v2 - v0), iF, iS); |
711 | dr3 = xInvMulx( (r2 - r0), iF, iS); |
712 | dg3 = xInvMulx( (g2 - g0), iF, iS); |
713 | db3 = xInvMulx( (b2 - b0), iF, iS); |
714 | dx4 = xLoDivx ( (x1 - x0), (y1 - y0)); |
715 | } |
716 | else |
717 | { |
718 | xInv( (y1 - y0), iF, iS); |
719 | dx3 = xInvMulx( (x1 - x0), iF, iS); |
720 | du3 = xInvMulx( (u1 - u0), iF, iS); |
721 | dv3 = xInvMulx( (v1 - v0), iF, iS); |
722 | dr3 = xInvMulx( (r1 - r0), iF, iS); |
723 | dg3 = xInvMulx( (g1 - g0), iF, iS); |
724 | db3 = xInvMulx( (b1 - b0), iF, iS); |
725 | dx4 = xLoDivx ( (x2 - x0), (y2 - y0)); |
726 | } |
727 | } |
728 | else |
729 | { |
730 | ya = y1; |
731 | yb = y2; |
732 | if (dx < 0) |
733 | { |
734 | temp = y1 - y0; |
735 | u3 = i2x(u0) + (du3 * temp); |
736 | v3 = i2x(v0) + (dv3 * temp); |
737 | r3 = i2x(r0) + (dr3 * temp); |
738 | g3 = i2x(g0) + (dg3 * temp); |
739 | b3 = i2x(b0) + (db3 * temp); |
740 | x3 = i2x(x0) + (dx3 * temp); |
741 | x4 = i2x(x1); |
742 | dx4 = xLoDivx((x2 - x1), (y2 - y1)); |
743 | } |
744 | else |
745 | { |
746 | u3 = i2x(u1); |
747 | v3 = i2x(v1); |
748 | r3 = i2x(r1); |
749 | g3 = i2x(g1); |
750 | b3 = i2x(b1); |
751 | x3 = i2x(x1); |
752 | x4 = i2x(x0) + (dx4 * (y1 - y0)); |
753 | |
754 | xInv( (y2 - y1), iF, iS); |
755 | dx3 = xInvMulx( (x2 - x1), iF, iS); |
756 | du3 = xInvMulx( (u2 - u1), iF, iS); |
757 | dv3 = xInvMulx( (v2 - v1), iF, iS); |
758 | dr3 = xInvMulx( (r2 - r1), iF, iS); |
759 | dg3 = xInvMulx( (g2 - g1), iF, iS); |
760 | db3 = xInvMulx( (b2 - b1), iF, iS); |
761 | } |
762 | } |
763 | |
764 | temp = ymin - ya; |
765 | if (temp > 0) |
766 | { |
767 | ya = ymin; |
768 | x3 += dx3*temp; x4 += dx4*temp; |
769 | u3 += du3*temp; v3 += dv3*temp; |
770 | r3 += dr3*temp; g3 += dg3*temp; b3 += db3*temp; |
771 | } |
772 | if (yb > ymax) yb = ymax; |
773 | if (ya>=yb) continue; |
774 | |
775 | x3+= fixed_HALF; x4+= fixed_HALF; |
776 | u3+= fixed_HALF; v4+= fixed_HALF; |
777 | r3+= fixed_HALF; g3+= fixed_HALF; b3+= fixed_HALF; |
778 | u16* PixelBase = &((u16*)GPU_FrameBuffer)[FRAME_OFFSET(0, ya)]; |
779 | |
780 | for(;ya<yb;++ya, PixelBase += FRAME_WIDTH, x3+=dx3, x4+=dx4, u3+=du3, v3+=dv3, r3+=dr3, g3+=dg3, b3+=db3) |
781 | { |
782 | if (ya&li) continue; |
783 | if ((ya&pi)==pif) continue; |
784 | xa = x2i(x3); |
785 | xb = x2i(x4); |
786 | if( (xa>xmax) || (xb<xmin)) continue; |
787 | |
788 | temp = xmin - xa; |
789 | if(temp > 0) |
790 | { |
791 | xa = xmin; |
792 | u4 = u3 + du4*temp; v4 = v3 + dv4*temp; |
793 | r4 = r3 + dr4*temp; g4 = g3 + dg4*temp; b4 = b3 + db4*temp; |
794 | } |
795 | else |
796 | { |
797 | u4 = u3; v4 = v3; |
798 | r4 = r3; g4 = g3; b4 = b3; |
799 | } |
800 | if(xb > xmax) xb = xmax; |
801 | xb-=xa; |
802 | if(xb>0) gpuPolySpanDriver(PixelBase + xa,xb); |
803 | } |
804 | } |
805 | } |
806 | |
807 | |
808 | ////////////////////////////////////////////////////////////////////////// |
809 | //senquack - Original Unai poly routines left here for reference: |
810 | // ( from gpu_inner.h ) NOTE: this uses 16.16, not 22.10 fixed point |
811 | ////////////////////////////////////////////////////////////////////////// |
812 | template<const int CF> |
813 | INLINE void gpuPolySpanFn(u16 *pDst, u32 count) |
814 | { |
815 | if (!TM) |
816 | { |
817 | // NO TEXTURE |
818 | if (!G) |
819 | { |
820 | // NO GOURAUD |
821 | u16 data; |
822 | if (L) { u32 lCol=((u32)(b4<< 2)&(0x03ff)) | ((u32)(g4<<13)&(0x07ff<<10)) | ((u32)(r4<<24)&(0x07ff<<21)); gpuLightingRGB(data,lCol); } |
823 | else data=PixelData; |
824 | if ((!M)&&(!B)) |
825 | { |
826 | if (MB) { data = data | 0x8000; } |
827 | do { *pDst++ = data; } while (--count); |
828 | } |
829 | else if ((M)&&(!B)) |
830 | { |
831 | if (MB) { data = data | 0x8000; } |
832 | do { if (!(*pDst&0x8000)) { *pDst = data; } pDst++; } while (--count); |
833 | } |
834 | else |
835 | { |
836 | u16 uSrc; |
837 | u16 uDst; |
838 | u32 uMsk; if (BM==0) uMsk=0x7BDE; |
839 | u32 bMsk; if (BI) bMsk=blit_mask; |
840 | do |
841 | { |
842 | // blit-mask |
843 | if (BI) { if((bMsk>>((((u32)pDst)>>1)&7))&1) goto endtile; } |
844 | // masking |
845 | uDst = *pDst; |
846 | if(M) { if (uDst&0x8000) goto endtile; } |
847 | uSrc = data; |
848 | // blend |
849 | if (BM==0) gpuBlending00(uSrc, uDst); |
850 | if (BM==1) gpuBlending01(uSrc, uDst); |
851 | if (BM==2) gpuBlending02(uSrc, uDst); |
852 | if (BM==3) gpuBlending03(uSrc, uDst); |
853 | if (MB) { *pDst = uSrc | 0x8000; } |
854 | else { *pDst = uSrc; } |
855 | endtile: pDst++; |
856 | } |
857 | while (--count); |
858 | } |
859 | } |
860 | else |
861 | { |
862 | // GOURAUD |
863 | u16 uDst; |
864 | u16 uSrc; |
865 | u32 linc=lInc; |
866 | u32 lCol=((u32)(b4>>14)&(0x03ff)) | ((u32)(g4>>3)&(0x07ff<<10)) | ((u32)(r4<<8)&(0x07ff<<21)); |
867 | u32 uMsk; if ((B)&&(BM==0)) uMsk=0x7BDE; |
868 | u32 bMsk; if (BI) bMsk=blit_mask; |
869 | do |
870 | { |
871 | // blit-mask |
872 | if (BI) { if((bMsk>>((((u32)pDst)>>1)&7))&1) goto endgou; } |
873 | // masking |
874 | if(M) { uDst = *pDst; if (uDst&0x8000) goto endgou; } |
875 | // blend |
876 | if(B) |
877 | { |
878 | // light |
879 | gpuLightingRGB(uSrc,lCol); |
880 | if(!M) { uDst = *pDst; } |
881 | if (BM==0) gpuBlending00(uSrc, uDst); |
882 | if (BM==1) gpuBlending01(uSrc, uDst); |
883 | if (BM==2) gpuBlending02(uSrc, uDst); |
884 | if (BM==3) gpuBlending03(uSrc, uDst); |
885 | } |
886 | else |
887 | { |
888 | // light |
889 | gpuLightingRGB(uSrc,lCol); |
890 | } |
891 | if (MB) { *pDst = uSrc | 0x8000; } |
892 | else { *pDst = uSrc; } |
893 | endgou: pDst++; lCol=(lCol+linc); |
894 | } |
895 | while (--count); |
896 | } |
897 | } |
898 | else |
899 | { |
900 | // TEXTURE |
901 | u16 uDst; |
902 | u16 uSrc; |
903 | u32 linc; if (L&&G) linc=lInc; |
904 | u32 tinc=tInc; |
905 | u32 tmsk=tMsk; |
906 | u32 tCor = ((u32)( u4<<7)&0x7fff0000) | ((u32)( v4>>9)&0x00007fff); tCor&= tmsk; |
907 | const u16* _TBA=TBA; |
908 | const u16* _CBA; if (TM!=3) _CBA=CBA; |
909 | u32 lCol; |
910 | if(L && !G) { lCol = ((u32)(b4<< 2)&(0x03ff)) | ((u32)(g4<<13)&(0x07ff<<10)) | ((u32)(r4<<24)&(0x07ff<<21)); } |
911 | else if(L && G) { lCol = ((u32)(b4>>14)&(0x03ff)) | ((u32)(g4>>3)&(0x07ff<<10)) | ((u32)(r4<<8)&(0x07ff<<21)); } |
912 | u32 uMsk; if ((B)&&(BM==0)) uMsk=0x7BDE; |
913 | u32 bMsk; if (BI) bMsk=blit_mask; |
914 | do |
915 | { |
916 | // blit-mask |
917 | if (BI) { if((bMsk>>((((u32)pDst)>>1)&7))&1) goto endpoly; } |
918 | // masking |
919 | if(M) { uDst = *pDst; if (uDst&0x8000) goto endpoly; } |
920 | // texture |
921 | if (TM==1) { u32 tu=(tCor>>23); u32 tv=(tCor<<4)&(0xff<<11); u8 rgb=((u8*)_TBA)[tv+(tu>>1)]; uSrc=_CBA[(rgb>>((tu&1)<<2))&0xf]; if(!uSrc) goto endpoly; } |
922 | if (TM==2) { uSrc = _CBA[(((u8*)_TBA)[(tCor>>23)+((tCor<<4)&(0xff<<11))])]; if(!uSrc) goto endpoly; } |
923 | if (TM==3) { uSrc = _TBA[(tCor>>23)+((tCor<<3)&(0xff<<10))]; if(!uSrc) goto endpoly; } |
924 | // blend |
925 | if(B) |
926 | { |
927 | if (uSrc&0x8000) |
928 | { |
929 | // light |
930 | if(L) gpuLightingTXT(uSrc, lCol); |
931 | if(!M) { uDst = *pDst; } |
932 | if (BM==0) gpuBlending00(uSrc, uDst); |
933 | if (BM==1) gpuBlending01(uSrc, uDst); |
934 | if (BM==2) gpuBlending02(uSrc, uDst); |
935 | if (BM==3) gpuBlending03(uSrc, uDst); |
936 | } |
937 | else |
938 | { |
939 | // light |
940 | if(L) gpuLightingTXT(uSrc, lCol); |
941 | } |
942 | } |
943 | else |
944 | { |
945 | // light |
946 | if(L) { gpuLightingTXT(uSrc, lCol); } else if(!MB) { uSrc&= 0x7fff; } |
947 | } |
948 | if (MB) { *pDst = uSrc | 0x8000; } |
949 | else { *pDst = uSrc; } |
950 | endpoly: pDst++; |
951 | tCor=(tCor+tinc)&tmsk; |
952 | if (L&&G) lCol=(lCol+linc); |
953 | } |
954 | while (--count); |
955 | } |
956 | } |