86aad47b |
1 | /*************************************************************************** |
2 | * Copyright (C) 2010 PCSX4ALL Team * |
3 | * Copyright (C) 2010 Unai * |
4 | * * |
5 | * This program is free software; you can redistribute it and/or modify * |
6 | * it under the terms of the GNU General Public License as published by * |
7 | * the Free Software Foundation; either version 2 of the License, or * |
8 | * (at your option) any later version. * |
9 | * * |
10 | * This program is distributed in the hope that it will be useful, * |
11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of * |
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * |
13 | * GNU General Public License for more details. * |
14 | * * |
15 | * You should have received a copy of the GNU General Public License * |
16 | * along with this program; if not, write to the * |
17 | * Free Software Foundation, Inc., * |
18 | * 51 Franklin Street, Fifth Floor, Boston, MA 02111-1307 USA. * |
19 | ***************************************************************************/ |
20 | |
030d1121 |
21 | //senquack - NOTE: GPU Unai poly routines have been rewritten/adapted |
22 | // from DrHell routines to fix multiple issues. See README_senquack.txt |
9ed4ca47 |
23 | |
86aad47b |
24 | /////////////////////////////////////////////////////////////////////////////// |
030d1121 |
25 | // Shared poly vertex buffer, able to handle 3 or 4-pt polys of any type. |
26 | /////////////////////////////////////////////////////////////////////////////// |
86aad47b |
27 | |
030d1121 |
28 | struct PolyVertex { |
29 | s32 x, y; // Sign-extended 11-bit X,Y coords |
30 | union { |
31 | struct { u8 u, v, pad[2]; } tex; // Texture coords (if used) |
32 | u32 tex_word; |
33 | }; |
34 | union { |
35 | struct { u8 r, g, b, pad; } col; // 24-bit RGB color (if used) |
36 | u32 col_word; |
37 | }; |
38 | }; |
39 | |
40 | enum PolyAttribute { |
41 | POLYATTR_TEXTURE = (1 << 0), |
42 | POLYATTR_GOURAUD = (1 << 1) |
43 | }; |
44 | |
45 | enum PolyType { |
46 | POLYTYPE_F = 0, |
47 | POLYTYPE_FT = (POLYATTR_TEXTURE), |
48 | POLYTYPE_G = (POLYATTR_GOURAUD), |
49 | POLYTYPE_GT = (POLYATTR_TEXTURE | POLYATTR_GOURAUD) |
50 | }; |
51 | |
52 | /////////////////////////////////////////////////////////////////////////////// |
53 | // polyInitVertexBuffer() |
54 | // Fills vbuf[] array with data from any type of poly draw-command packet. |
86aad47b |
55 | /////////////////////////////////////////////////////////////////////////////// |
030d1121 |
56 | static void polyInitVertexBuffer(PolyVertex *vbuf, const PtrUnion packet, PolyType ptype, u32 is_quad) |
86aad47b |
57 | { |
030d1121 |
58 | bool texturing = ptype & POLYATTR_TEXTURE; |
59 | bool gouraud = ptype & POLYATTR_GOURAUD; |
60 | |
61 | int vert_stride = 1; // Stride of vertices in cmd packet, in 32-bit words |
62 | if (texturing) |
63 | vert_stride++; |
64 | if (gouraud) |
65 | vert_stride++; |
66 | |
67 | int num_verts = (is_quad) ? 4 : 3; |
68 | u32 *ptr; |
69 | |
70 | // X,Y coords, adjusted by draw offsets |
71 | s32 x_off = gpu_unai.DrawingOffset[0]; |
72 | s32 y_off = gpu_unai.DrawingOffset[1]; |
73 | ptr = &packet.U4[1]; |
74 | for (int i=0; i < num_verts; ++i, ptr += vert_stride) { |
75 | s16* coord_ptr = (s16*)ptr; |
76 | vbuf[i].x = GPU_EXPANDSIGN(coord_ptr[0]) + x_off; |
77 | vbuf[i].y = GPU_EXPANDSIGN(coord_ptr[1]) + y_off; |
78 | } |
86aad47b |
79 | |
030d1121 |
80 | // U,V texture coords (if applicable) |
81 | if (texturing) { |
82 | ptr = &packet.U4[2]; |
83 | for (int i=0; i < num_verts; ++i, ptr += vert_stride) |
84 | vbuf[i].tex_word = *ptr; |
85 | } |
9ed4ca47 |
86 | |
030d1121 |
87 | // Colors (if applicable) |
88 | if (gouraud) { |
89 | ptr = &packet.U4[0]; |
90 | for (int i=0; i < num_verts; ++i, ptr += vert_stride) |
91 | vbuf[i].col_word = *ptr; |
92 | } |
93 | } |
86aad47b |
94 | |
030d1121 |
95 | /////////////////////////////////////////////////////////////////////////////// |
96 | // Helper functions to determine which vertex in a 2 or 3 vertex array |
97 | // has the highest/lowest X/Y coordinate. |
98 | // Note: the comparison logic is such that, given a set of vertices with |
99 | // identical values for a given coordinate, a different index will be |
100 | // returned from vertIdxOfLeast..() than a call to vertIdxOfHighest..(). |
101 | // This ensures that, during the vertex-ordering phase of rasterization, |
102 | // all three vertices remain unique. |
103 | /////////////////////////////////////////////////////////////////////////////// |
86aad47b |
104 | |
030d1121 |
105 | template<typename T> |
106 | static inline int vertIdxOfLeastXCoord2(const T *Tptr) |
107 | { |
108 | return (Tptr[0].x <= Tptr[1].x) ? 0 : 1; |
109 | } |
86aad47b |
110 | |
030d1121 |
111 | template<typename T> |
112 | static inline int vertIdxOfLeastXCoord3(const T *Tptr) |
113 | { |
114 | int least_of_v0_v1 = vertIdxOfLeastXCoord2(Tptr); |
115 | return (Tptr[least_of_v0_v1].x <= Tptr[2].x) ? least_of_v0_v1 : 2; |
116 | } |
86aad47b |
117 | |
030d1121 |
118 | template<typename T> |
119 | static inline int vertIdxOfLeastYCoord2(const T *Tptr) |
120 | { |
121 | return (Tptr[0].y <= Tptr[1].y) ? 0 : 1; |
122 | } |
86aad47b |
123 | |
030d1121 |
124 | template<typename T> |
125 | static inline int vertIdxOfLeastYCoord3(const T *Tptr) |
126 | { |
127 | int least_of_v0_v1 = vertIdxOfLeastYCoord2(Tptr); |
128 | return (Tptr[least_of_v0_v1].y <= Tptr[2].y) ? least_of_v0_v1 : 2; |
129 | } |
130 | |
131 | template<typename T> |
132 | static inline int vertIdxOfHighestXCoord2(const T *Tptr) |
133 | { |
134 | return (Tptr[1].x >= Tptr[0].x) ? 1 : 0; |
135 | } |
136 | |
137 | template<typename T> |
138 | static inline int vertIdxOfHighestXCoord3(const T *Tptr) |
139 | { |
140 | int highest_of_v0_v1 = vertIdxOfHighestXCoord2(Tptr); |
141 | return (Tptr[2].x >= Tptr[highest_of_v0_v1].x) ? 2 : highest_of_v0_v1; |
142 | } |
143 | |
144 | template<typename T> |
145 | static inline int vertIdxOfHighestYCoord2(const T *Tptr) |
146 | { |
147 | return (Tptr[1].y >= Tptr[0].y) ? 1 : 0; |
148 | } |
149 | |
150 | template<typename T> |
151 | static inline int vertIdxOfHighestYCoord3(const T *Tptr) |
152 | { |
153 | int highest_of_v0_v1 = vertIdxOfHighestYCoord2(Tptr); |
154 | return (Tptr[2].y >= Tptr[highest_of_v0_v1].y) ? 2 : highest_of_v0_v1; |
155 | } |
86aad47b |
156 | |
030d1121 |
157 | /////////////////////////////////////////////////////////////////////////////// |
158 | // polyUseTriangle() |
159 | // Determines if the specified triangle should be rendered. If so, it |
160 | // fills the given array of vertex pointers, vert_ptrs, in order of |
161 | // increasing Y coordinate values, as required by rasterization algorithm. |
162 | // Parameter 'tri_num' is 0 for first triangle (idx 0,1,2 of vbuf[]), |
163 | // or 1 for second triangle of a quad (idx 1,2,3 of vbuf[]). |
164 | // Returns true if triangle should be rendered, false if not. |
165 | /////////////////////////////////////////////////////////////////////////////// |
166 | static bool polyUseTriangle(const PolyVertex *vbuf, int tri_num, const PolyVertex **vert_ptrs) |
167 | { |
168 | // Using verts 0,1,2 or is this the 2nd pass of a quad (verts 1,2,3)? |
169 | const PolyVertex *tri_ptr = &vbuf[(tri_num == 0) ? 0 : 1]; |
170 | |
171 | // Get indices of highest/lowest X,Y coords within triangle |
172 | int idx_lowest_x = vertIdxOfLeastXCoord3(tri_ptr); |
173 | int idx_highest_x = vertIdxOfHighestXCoord3(tri_ptr); |
174 | int idx_lowest_y = vertIdxOfLeastYCoord3(tri_ptr); |
175 | int idx_highest_y = vertIdxOfHighestYCoord3(tri_ptr); |
176 | |
177 | // Maximum absolute distance between any two X coordinates is 1023, |
178 | // and for Y coordinates is 511 (PS1 hardware limitation) |
179 | int lowest_x = tri_ptr[idx_lowest_x].x; |
180 | int highest_x = tri_ptr[idx_highest_x].x; |
181 | int lowest_y = tri_ptr[idx_lowest_y].y; |
182 | int highest_y = tri_ptr[idx_highest_y].y; |
183 | if ((highest_x - lowest_x) >= CHKMAX_X || |
184 | (highest_y - lowest_y) >= CHKMAX_Y) |
185 | return false; |
186 | |
187 | // Determine if triangle is completely outside clipping range |
188 | int xmin, xmax, ymin, ymax; |
189 | xmin = gpu_unai.DrawingArea[0]; xmax = gpu_unai.DrawingArea[2]; |
190 | ymin = gpu_unai.DrawingArea[1]; ymax = gpu_unai.DrawingArea[3]; |
191 | int clipped_lowest_x = Max2(xmin,lowest_x); |
192 | int clipped_lowest_y = Max2(ymin,lowest_y); |
193 | int clipped_highest_x = Min2(xmax,highest_x); |
194 | int clipped_highest_y = Min2(ymax,highest_y); |
195 | if (clipped_lowest_x >= clipped_highest_x || |
196 | clipped_lowest_y >= clipped_highest_y) |
197 | return false; |
198 | |
199 | // Order vertex ptrs by increasing y value (draw routines need this). |
200 | // The middle index is deduced by a binary math trick that depends |
201 | // on index range always being between 0..2 |
202 | vert_ptrs[0] = tri_ptr + idx_lowest_y; |
203 | vert_ptrs[1] = tri_ptr + ((idx_lowest_y + idx_highest_y) ^ 3); |
204 | vert_ptrs[2] = tri_ptr + idx_highest_y; |
205 | return true; |
206 | } |
207 | |
208 | /////////////////////////////////////////////////////////////////////////////// |
209 | // GPU internal polygon drawing functions |
210 | /////////////////////////////////////////////////////////////////////////////// |
211 | |
212 | /*---------------------------------------------------------------------- |
213 | gpuDrawPolyF - Flat-shaded, untextured poly |
214 | ----------------------------------------------------------------------*/ |
215 | void gpuDrawPolyF(const PtrUnion packet, const PP gpuPolySpanDriver, u32 is_quad) |
216 | { |
217 | // Set up bgr555 color to be used across calls in inner driver |
218 | gpu_unai.PixelData = GPU_RGB16(packet.U4[0]); |
219 | |
220 | PolyVertex vbuf[4]; |
221 | polyInitVertexBuffer(vbuf, packet, POLYTYPE_F, is_quad); |
222 | |
223 | int total_passes = is_quad ? 2 : 1; |
224 | int cur_pass = 0; |
225 | do |
86aad47b |
226 | { |
030d1121 |
227 | const PolyVertex* vptrs[3]; |
228 | if (polyUseTriangle(vbuf, cur_pass, vptrs) == false) |
229 | continue; |
230 | |
231 | s32 xa, xb, ya, yb; |
232 | s32 x3, dx3, x4, dx4, dx; |
233 | s32 x0, x1, x2, y0, y1, y2; |
234 | |
235 | x0 = vptrs[0]->x; y0 = vptrs[0]->y; |
236 | x1 = vptrs[1]->x; y1 = vptrs[1]->y; |
237 | x2 = vptrs[2]->x; y2 = vptrs[2]->y; |
238 | |
239 | ya = y2 - y0; |
240 | yb = y2 - y1; |
241 | dx = (x2 - x1) * ya - (x2 - x0) * yb; |
242 | |
243 | for (int loop0 = 2; loop0; loop0--) { |
244 | if (loop0 == 2) { |
245 | ya = y0; yb = y1; |
246 | x3 = x4 = i2x(x0); |
247 | if (dx < 0) { |
248 | #ifdef GPU_UNAI_USE_FLOATMATH |
249 | #ifdef GPU_UNAI_USE_FLOAT_DIV_MULTINV |
250 | dx3 = ((y2 - y0) != 0) ? (fixed)(((x2 - x0) << FIXED_BITS) * FloatInv(y2 - y0)) : 0; |
251 | dx4 = ((y1 - y0) != 0) ? (fixed)(((x1 - x0) << FIXED_BITS) * FloatInv(y1 - y0)) : 0; |
252 | #else |
253 | dx3 = ((y2 - y0) != 0) ? (fixed)(((x2 - x0) << FIXED_BITS) / (float)(y2 - y0)) : 0; |
254 | dx4 = ((y1 - y0) != 0) ? (fixed)(((x1 - x0) << FIXED_BITS) / (float)(y1 - y0)) : 0; |
255 | #endif |
256 | #else // Integer Division: |
257 | #ifdef GPU_UNAI_USE_INT_DIV_MULTINV |
258 | dx3 = ((y2 - y0) != 0) ? xLoDivx((x2 - x0), (y2 - y0)) : 0; |
259 | dx4 = ((y1 - y0) != 0) ? xLoDivx((x1 - x0), (y1 - y0)) : 0; |
260 | #else |
261 | dx3 = ((y2 - y0) != 0) ? GPU_FAST_DIV((x2 - x0) << FIXED_BITS, (y2 - y0)) : 0; |
262 | dx4 = ((y1 - y0) != 0) ? GPU_FAST_DIV((x1 - x0) << FIXED_BITS, (y1 - y0)) : 0; |
263 | #endif |
264 | #endif |
265 | } else { |
266 | #ifdef GPU_UNAI_USE_FLOATMATH |
267 | #ifdef GPU_UNAI_USE_FLOAT_DIV_MULTINV |
268 | dx3 = ((y1 - y0) != 0) ? (fixed)(((x1 - x0) << FIXED_BITS) * FloatInv(y1 - y0)) : 0; |
269 | dx4 = ((y2 - y0) != 0) ? (fixed)(((x2 - x0) << FIXED_BITS) * FloatInv(y2 - y0)) : 0; |
270 | #else |
271 | dx3 = ((y1 - y0) != 0) ? (fixed)(((x1 - x0) << FIXED_BITS) / (float)(y1 - y0)) : 0; |
272 | dx4 = ((y2 - y0) != 0) ? (fixed)(((x2 - x0) << FIXED_BITS) / (float)(y2 - y0)) : 0; |
273 | #endif |
274 | #else // Integer Division: |
275 | #ifdef GPU_UNAI_USE_INT_DIV_MULTINV |
276 | dx3 = ((y1 - y0) != 0) ? xLoDivx((x1 - x0), (y1 - y0)) : 0; |
277 | dx4 = ((y2 - y0) != 0) ? xLoDivx((x2 - x0), (y2 - y0)) : 0; |
278 | #else |
279 | dx3 = ((y1 - y0) != 0) ? GPU_FAST_DIV((x1 - x0) << FIXED_BITS, (y1 - y0)) : 0; |
280 | dx4 = ((y2 - y0) != 0) ? GPU_FAST_DIV((x2 - x0) << FIXED_BITS, (y2 - y0)) : 0; |
281 | #endif |
282 | #endif |
283 | } |
284 | } else { |
285 | //senquack - break out of final loop if nothing to be drawn (1st loop |
286 | // must always be taken to setup dx3/dx4) |
287 | if (y1 == y2) break; |
288 | |
289 | ya = y1; yb = y2; |
290 | |
291 | if (dx < 0) { |
292 | x3 = i2x(x0) + (dx3 * (y1 - y0)); |
293 | x4 = i2x(x1); |
294 | #ifdef GPU_UNAI_USE_FLOATMATH |
295 | #ifdef GPU_UNAI_USE_FLOAT_DIV_MULTINV |
296 | dx4 = ((y2 - y1) != 0) ? (fixed)(((x2 - x1) << FIXED_BITS) * FloatInv(y2 - y1)) : 0; |
297 | #else |
298 | dx4 = ((y2 - y1) != 0) ? (fixed)(((x2 - x1) << FIXED_BITS) / (float)(y2 - y1)) : 0; |
299 | #endif |
300 | #else // Integer Division: |
301 | #ifdef GPU_UNAI_USE_INT_DIV_MULTINV |
302 | dx4 = ((y2 - y1) != 0) ? xLoDivx ((x2 - x1), (y2 - y1)) : 0; |
303 | #else |
304 | dx4 = ((y2 - y1) != 0) ? GPU_FAST_DIV((x2 - x1) << FIXED_BITS, (y2 - y1)) : 0; |
305 | #endif |
306 | #endif |
307 | } else { |
308 | x3 = i2x(x1); |
309 | x4 = i2x(x0) + (dx4 * (y1 - y0)); |
310 | #ifdef GPU_UNAI_USE_FLOATMATH |
311 | #ifdef GPU_UNAI_USE_FLOAT_DIV_MULTINV |
312 | dx3 = ((y2 - y1) != 0) ? (fixed)(((x2 - x1) << FIXED_BITS) * FloatInv(y2 - y1)) : 0; |
313 | #else |
314 | dx3 = ((y2 - y1) != 0) ? (fixed)(((x2 - x1) << FIXED_BITS) / (float)(y2 - y1)) : 0; |
315 | #endif |
316 | #else // Integer Division: |
317 | #ifdef GPU_UNAI_USE_INT_DIV_MULTINV |
318 | dx3 = ((y2 - y1) != 0) ? xLoDivx ((x2 - x1), (y2 - y1)) : 0; |
319 | #else |
320 | dx3 = ((y2 - y1) != 0) ? GPU_FAST_DIV((x2 - x1) << FIXED_BITS, (y2 - y1)) : 0; |
321 | #endif |
322 | #endif |
323 | } |
86aad47b |
324 | } |
030d1121 |
325 | |
326 | s32 xmin, xmax, ymin, ymax; |
327 | xmin = gpu_unai.DrawingArea[0]; xmax = gpu_unai.DrawingArea[2]; |
328 | ymin = gpu_unai.DrawingArea[1]; ymax = gpu_unai.DrawingArea[3]; |
329 | |
330 | if ((ymin - ya) > 0) { |
331 | x3 += (dx3 * (ymin - ya)); |
332 | x4 += (dx4 * (ymin - ya)); |
333 | ya = ymin; |
86aad47b |
334 | } |
030d1121 |
335 | |
336 | if (yb > ymax) yb = ymax; |
337 | |
338 | int loop1 = yb - ya; |
339 | if (loop1 <= 0) |
340 | continue; |
341 | |
342 | u16* PixelBase = &((u16*)gpu_unai.vram)[FRAME_OFFSET(0, ya)]; |
343 | int li=gpu_unai.ilace_mask; |
344 | int pi=(ProgressiveInterlaceEnabled()?(gpu_unai.ilace_mask+1):0); |
345 | int pif=(ProgressiveInterlaceEnabled()?(gpu_unai.prog_ilace_flag?(gpu_unai.ilace_mask+1):0):1); |
346 | |
347 | for (; loop1; --loop1, ya++, PixelBase += FRAME_WIDTH, |
348 | x3 += dx3, x4 += dx4 ) |
86aad47b |
349 | { |
030d1121 |
350 | if (ya&li) continue; |
351 | if ((ya&pi)==pif) continue; |
352 | |
353 | xa = FixedCeilToInt(x3); xb = FixedCeilToInt(x4); |
354 | if ((xmin - xa) > 0) xa = xmin; |
355 | if (xb > xmax) xb = xmax; |
356 | if ((xb - xa) > 0) |
357 | gpuPolySpanDriver(gpu_unai, PixelBase + xa, (xb - xa)); |
86aad47b |
358 | } |
359 | } |
030d1121 |
360 | } while (++cur_pass < total_passes); |
86aad47b |
361 | } |
362 | |
363 | /*---------------------------------------------------------------------- |
030d1121 |
364 | gpuDrawPolyFT - Flat-shaded, textured poly |
86aad47b |
365 | ----------------------------------------------------------------------*/ |
030d1121 |
366 | void gpuDrawPolyFT(const PtrUnion packet, const PP gpuPolySpanDriver, u32 is_quad) |
86aad47b |
367 | { |
030d1121 |
368 | // r8/g8/b8 used if texture-blending & dithering is applied (24-bit light) |
369 | gpu_unai.r8 = packet.U1[0]; |
370 | gpu_unai.g8 = packet.U1[1]; |
371 | gpu_unai.b8 = packet.U1[2]; |
372 | // r5/g5/b5 used if just texture-blending is applied (15-bit light) |
373 | gpu_unai.r5 = packet.U1[0] >> 3; |
374 | gpu_unai.g5 = packet.U1[1] >> 3; |
375 | gpu_unai.b5 = packet.U1[2] >> 3; |
376 | |
377 | PolyVertex vbuf[4]; |
378 | polyInitVertexBuffer(vbuf, packet, POLYTYPE_FT, is_quad); |
379 | |
380 | int total_passes = is_quad ? 2 : 1; |
381 | int cur_pass = 0; |
382 | do |
86aad47b |
383 | { |
030d1121 |
384 | const PolyVertex* vptrs[3]; |
385 | if (polyUseTriangle(vbuf, cur_pass, vptrs) == false) |
386 | continue; |
387 | |
388 | s32 xa, xb, ya, yb; |
389 | s32 x3, dx3, x4, dx4, dx; |
390 | s32 u3, du3, v3, dv3; |
391 | s32 x0, x1, x2, y0, y1, y2; |
392 | s32 u0, u1, u2, v0, v1, v2; |
393 | s32 du4, dv4; |
394 | |
395 | x0 = vptrs[0]->x; y0 = vptrs[0]->y; |
396 | u0 = vptrs[0]->tex.u; v0 = vptrs[0]->tex.v; |
397 | x1 = vptrs[1]->x; y1 = vptrs[1]->y; |
398 | u1 = vptrs[1]->tex.u; v1 = vptrs[1]->tex.v; |
399 | x2 = vptrs[2]->x; y2 = vptrs[2]->y; |
400 | u2 = vptrs[2]->tex.u; v2 = vptrs[2]->tex.v; |
401 | |
402 | ya = y2 - y0; |
403 | yb = y2 - y1; |
404 | dx4 = (x2 - x1) * ya - (x2 - x0) * yb; |
405 | du4 = (u2 - u1) * ya - (u2 - u0) * yb; |
406 | dv4 = (v2 - v1) * ya - (v2 - v0) * yb; |
407 | dx = dx4; |
408 | if (dx4 < 0) { |
409 | dx4 = -dx4; |
410 | du4 = -du4; |
411 | dv4 = -dv4; |
412 | } |
86aad47b |
413 | |
030d1121 |
414 | #ifdef GPU_UNAI_USE_FLOATMATH |
415 | #ifdef GPU_UNAI_USE_FLOAT_DIV_MULTINV |
416 | if (dx4 != 0) { |
417 | float finv = FloatInv(dx4); |
418 | du4 = (fixed)((du4 << FIXED_BITS) * finv); |
419 | dv4 = (fixed)((dv4 << FIXED_BITS) * finv); |
420 | } else { |
421 | du4 = dv4 = 0; |
86aad47b |
422 | } |
030d1121 |
423 | #else |
424 | if (dx4 != 0) { |
425 | float fdiv = dx4; |
426 | du4 = (fixed)((du4 << FIXED_BITS) / fdiv); |
427 | dv4 = (fixed)((dv4 << FIXED_BITS) / fdiv); |
428 | } else { |
429 | du4 = dv4 = 0; |
86aad47b |
430 | } |
030d1121 |
431 | #endif |
432 | #else // Integer Division: |
433 | #ifdef GPU_UNAI_USE_INT_DIV_MULTINV |
434 | if (dx4 != 0) { |
435 | int iF, iS; |
436 | xInv(dx4, iF, iS); |
437 | du4 = xInvMulx(du4, iF, iS); |
438 | dv4 = xInvMulx(dv4, iF, iS); |
439 | } else { |
440 | du4 = dv4 = 0; |
86aad47b |
441 | } |
030d1121 |
442 | #else |
443 | if (dx4 != 0) { |
444 | du4 = GPU_FAST_DIV(du4 << FIXED_BITS, dx4); |
445 | dv4 = GPU_FAST_DIV(dv4 << FIXED_BITS, dx4); |
446 | } else { |
447 | du4 = dv4 = 0; |
448 | } |
449 | #endif |
450 | #endif |
451 | // Set u,v increments for inner driver |
452 | gpu_unai.u_inc = du4; |
453 | gpu_unai.v_inc = dv4; |
454 | |
455 | //senquack - TODO: why is it always going through 2 iterations when sometimes one would suffice here? |
456 | // (SAME ISSUE ELSEWHERE) |
457 | for (s32 loop0 = 2; loop0; loop0--) { |
458 | if (loop0 == 2) { |
459 | ya = y0; yb = y1; |
460 | x3 = x4 = i2x(x0); |
461 | u3 = i2x(u0); v3 = i2x(v0); |
462 | if (dx < 0) { |
463 | #ifdef GPU_UNAI_USE_FLOATMATH |
464 | #ifdef GPU_UNAI_USE_FLOAT_DIV_MULTINV |
465 | if ((y2 - y0) != 0) { |
466 | float finv = FloatInv(y2 - y0); |
467 | dx3 = (fixed)(((x2 - x0) << FIXED_BITS) * finv); |
468 | du3 = (fixed)(((u2 - u0) << FIXED_BITS) * finv); |
469 | dv3 = (fixed)(((v2 - v0) << FIXED_BITS) * finv); |
470 | } else { |
471 | dx3 = du3 = dv3 = 0; |
472 | } |
473 | dx4 = ((y1 - y0) != 0) ? (fixed)(((x1 - x0) << FIXED_BITS) * FloatInv(y1 - y0)) : 0; |
474 | #else |
475 | if ((y2 - y0) != 0) { |
476 | float fdiv = y2 - y0; |
477 | dx3 = (fixed)(((x2 - x0) << FIXED_BITS) / fdiv); |
478 | du3 = (fixed)(((u2 - u0) << FIXED_BITS) / fdiv); |
479 | dv3 = (fixed)(((v2 - v0) << FIXED_BITS) / fdiv); |
480 | } else { |
481 | dx3 = du3 = dv3 = 0; |
482 | } |
483 | dx4 = ((y1 - y0) != 0) ? (fixed)(((x1 - x0) << FIXED_BITS) / (float)(y1 - y0)) : 0; |
484 | #endif |
485 | #else // Integer Division: |
486 | #ifdef GPU_UNAI_USE_INT_DIV_MULTINV |
487 | if ((y2 - y0) != 0) { |
488 | int iF, iS; |
489 | xInv((y2 - y0), iF, iS); |
490 | dx3 = xInvMulx((x2 - x0), iF, iS); |
491 | du3 = xInvMulx((u2 - u0), iF, iS); |
492 | dv3 = xInvMulx((v2 - v0), iF, iS); |
493 | } else { |
494 | dx3 = du3 = dv3 = 0; |
495 | } |
496 | dx4 = ((y1 - y0) != 0) ? xLoDivx((x1 - x0), (y1 - y0)) : 0; |
497 | #else |
498 | if ((y2 - y0) != 0) { |
499 | dx3 = GPU_FAST_DIV((x2 - x0) << FIXED_BITS, (y2 - y0)); |
500 | du3 = GPU_FAST_DIV((u2 - u0) << FIXED_BITS, (y2 - y0)); |
501 | dv3 = GPU_FAST_DIV((v2 - v0) << FIXED_BITS, (y2 - y0)); |
502 | } else { |
503 | dx3 = du3 = dv3 = 0; |
504 | } |
505 | dx4 = ((y1 - y0) != 0) ? GPU_FAST_DIV((x1 - x0) << FIXED_BITS, (y1 - y0)) : 0; |
506 | #endif |
507 | #endif |
508 | } else { |
509 | #ifdef GPU_UNAI_USE_FLOATMATH |
510 | #ifdef GPU_UNAI_USE_FLOAT_DIV_MULTINV |
511 | if ((y1 - y0) != 0) { |
512 | float finv = FloatInv(y1 - y0); |
513 | dx3 = (fixed)(((x1 - x0) << FIXED_BITS) * finv); |
514 | du3 = (fixed)(((u1 - u0) << FIXED_BITS) * finv); |
515 | dv3 = (fixed)(((v1 - v0) << FIXED_BITS) * finv); |
516 | } else { |
517 | dx3 = du3 = dv3 = 0; |
518 | } |
519 | dx4 = ((y2 - y0) != 0) ? (fixed)(((x2 - x0) << FIXED_BITS) * FloatInv(y2 - y0)) : 0; |
520 | #else |
521 | if ((y1 - y0) != 0) { |
522 | float fdiv = y1 - y0; |
523 | dx3 = (fixed)(((x1 - x0) << FIXED_BITS) / fdiv); |
524 | du3 = (fixed)(((u1 - u0) << FIXED_BITS) / fdiv); |
525 | dv3 = (fixed)(((v1 - v0) << FIXED_BITS) / fdiv); |
526 | } else { |
527 | dx3 = du3 = dv3 = 0; |
528 | } |
529 | dx4 = ((y2 - y0) != 0) ? (fixed)(((x2 - x0) << FIXED_BITS) / (float)(y2 - y0)) : 0; |
530 | #endif |
531 | #else // Integer Division: |
532 | #ifdef GPU_UNAI_USE_INT_DIV_MULTINV |
533 | if ((y1 - y0) != 0) { |
534 | int iF, iS; |
535 | xInv((y1 - y0), iF, iS); |
536 | dx3 = xInvMulx((x1 - x0), iF, iS); |
537 | du3 = xInvMulx((u1 - u0), iF, iS); |
538 | dv3 = xInvMulx((v1 - v0), iF, iS); |
539 | } else { |
540 | dx3 = du3 = dv3 = 0; |
541 | } |
542 | dx4 = ((y2 - y0) != 0) ? xLoDivx((x2 - x0), (y2 - y0)) : 0; |
543 | #else |
544 | if ((y1 - y0) != 0) { |
545 | dx3 = GPU_FAST_DIV((x1 - x0) << FIXED_BITS, (y1 - y0)); |
546 | du3 = GPU_FAST_DIV((u1 - u0) << FIXED_BITS, (y1 - y0)); |
547 | dv3 = GPU_FAST_DIV((v1 - v0) << FIXED_BITS, (y1 - y0)); |
548 | } else { |
549 | dx3 = du3 = dv3 = 0; |
550 | } |
551 | dx4 = ((y2 - y0) != 0) ? GPU_FAST_DIV((x2 - x0) << FIXED_BITS, (y2 - y0)) : 0; |
552 | #endif |
553 | #endif |
554 | } |
555 | } else { |
556 | //senquack - break out of final loop if nothing to be drawn (1st loop |
557 | // must always be taken to setup dx3/dx4) |
558 | if (y1 == y2) break; |
559 | |
560 | ya = y1; yb = y2; |
561 | |
562 | if (dx < 0) { |
563 | x3 = i2x(x0); |
564 | x4 = i2x(x1); |
565 | u3 = i2x(u0); |
566 | v3 = i2x(v0); |
567 | if ((y1 - y0) != 0) { |
568 | x3 += (dx3 * (y1 - y0)); |
569 | u3 += (du3 * (y1 - y0)); |
570 | v3 += (dv3 * (y1 - y0)); |
571 | } |
572 | #ifdef GPU_UNAI_USE_FLOATMATH |
573 | #ifdef GPU_UNAI_USE_FLOAT_DIV_MULTINV |
574 | dx4 = ((y2 - y1) != 0) ? (fixed)(((x2 - x1) << FIXED_BITS) * FloatInv(y2 - y1)) : 0; |
575 | #else |
576 | dx4 = ((y2 - y1) != 0) ? (fixed)(((x2 - x1) << FIXED_BITS) / (float)(y2 - y1)) : 0; |
577 | #endif |
578 | #else // Integer Division: |
579 | #ifdef GPU_UNAI_USE_INT_DIV_MULTINV |
580 | dx4 = ((y2 - y1) != 0) ? xLoDivx((x2 - x1), (y2 - y1)) : 0; |
581 | #else |
582 | dx4 = ((y2 - y1) != 0) ? GPU_FAST_DIV((x2 - x1) << FIXED_BITS, (y2 - y1)) : 0; |
583 | #endif |
584 | #endif |
585 | } else { |
586 | x3 = i2x(x1); |
587 | x4 = i2x(x0) + (dx4 * (y1 - y0)); |
588 | u3 = i2x(u1); |
589 | v3 = i2x(v1); |
590 | #ifdef GPU_UNAI_USE_FLOATMATH |
591 | #ifdef GPU_UNAI_USE_FLOAT_DIV_MULTINV |
592 | if ((y2 - y1) != 0) { |
593 | float finv = FloatInv(y2 - y1); |
594 | dx3 = (fixed)(((x2 - x1) << FIXED_BITS) * finv); |
595 | du3 = (fixed)(((u2 - u1) << FIXED_BITS) * finv); |
596 | dv3 = (fixed)(((v2 - v1) << FIXED_BITS) * finv); |
597 | } else { |
598 | dx3 = du3 = dv3 = 0; |
599 | } |
600 | #else |
601 | if ((y2 - y1) != 0) { |
602 | float fdiv = y2 - y1; |
603 | dx3 = (fixed)(((x2 - x1) << FIXED_BITS) / fdiv); |
604 | du3 = (fixed)(((u2 - u1) << FIXED_BITS) / fdiv); |
605 | dv3 = (fixed)(((v2 - v1) << FIXED_BITS) / fdiv); |
606 | } else { |
607 | dx3 = du3 = dv3 = 0; |
608 | } |
609 | #endif |
610 | #else // Integer Division: |
611 | #ifdef GPU_UNAI_USE_INT_DIV_MULTINV |
612 | if ((y2 - y1) != 0) { |
613 | int iF, iS; |
614 | xInv((y2 - y1), iF, iS); |
615 | dx3 = xInvMulx((x2 - x1), iF, iS); |
616 | du3 = xInvMulx((u2 - u1), iF, iS); |
617 | dv3 = xInvMulx((v2 - v1), iF, iS); |
618 | } else { |
619 | dx3 = du3 = dv3 = 0; |
620 | } |
621 | #else |
622 | if ((y2 - y1) != 0) { |
623 | dx3 = GPU_FAST_DIV((x2 - x1) << FIXED_BITS, (y2 - y1)); |
624 | du3 = GPU_FAST_DIV((u2 - u1) << FIXED_BITS, (y2 - y1)); |
625 | dv3 = GPU_FAST_DIV((v2 - v1) << FIXED_BITS, (y2 - y1)); |
626 | } else { |
627 | dx3 = du3 = dv3 = 0; |
628 | } |
629 | #endif |
630 | #endif |
631 | } |
632 | } |
86aad47b |
633 | |
030d1121 |
634 | s32 xmin, xmax, ymin, ymax; |
635 | xmin = gpu_unai.DrawingArea[0]; xmax = gpu_unai.DrawingArea[2]; |
636 | ymin = gpu_unai.DrawingArea[1]; ymax = gpu_unai.DrawingArea[3]; |
86aad47b |
637 | |
030d1121 |
638 | if ((ymin - ya) > 0) { |
639 | x3 += dx3 * (ymin - ya); |
640 | x4 += dx4 * (ymin - ya); |
641 | u3 += du3 * (ymin - ya); |
642 | v3 += dv3 * (ymin - ya); |
643 | ya = ymin; |
86aad47b |
644 | } |
030d1121 |
645 | |
646 | if (yb > ymax) yb = ymax; |
647 | |
648 | int loop1 = yb - ya; |
649 | if (loop1 <= 0) |
650 | continue; |
651 | |
652 | u16* PixelBase = &((u16*)gpu_unai.vram)[FRAME_OFFSET(0, ya)]; |
653 | int li=gpu_unai.ilace_mask; |
654 | int pi=(ProgressiveInterlaceEnabled()?(gpu_unai.ilace_mask+1):0); |
655 | int pif=(ProgressiveInterlaceEnabled()?(gpu_unai.prog_ilace_flag?(gpu_unai.ilace_mask+1):0):1); |
656 | |
657 | for (; loop1; --loop1, ++ya, PixelBase += FRAME_WIDTH, |
658 | x3 += dx3, x4 += dx4, |
659 | u3 += du3, v3 += dv3 ) |
86aad47b |
660 | { |
030d1121 |
661 | if (ya&li) continue; |
662 | if ((ya&pi)==pif) continue; |
86aad47b |
663 | |
030d1121 |
664 | u32 u4, v4; |
86aad47b |
665 | |
030d1121 |
666 | xa = FixedCeilToInt(x3); xb = FixedCeilToInt(x4); |
667 | u4 = u3; v4 = v3; |
86aad47b |
668 | |
030d1121 |
669 | fixed itmp = i2x(xa) - x3; |
670 | if (itmp != 0) { |
671 | u4 += (du4 * itmp) >> FIXED_BITS; |
672 | v4 += (dv4 * itmp) >> FIXED_BITS; |
673 | } |
86aad47b |
674 | |
030d1121 |
675 | u4 += fixed_HALF; |
676 | v4 += fixed_HALF; |
53636f15 |
677 | |
030d1121 |
678 | if ((xmin - xa) > 0) { |
679 | u4 += du4 * (xmin - xa); |
680 | v4 += dv4 * (xmin - xa); |
681 | xa = xmin; |
682 | } |
683 | |
684 | // Set u,v coords for inner driver |
685 | gpu_unai.u = u4; |
686 | gpu_unai.v = v4; |
687 | |
688 | if (xb > xmax) xb = xmax; |
689 | if ((xb - xa) > 0) |
690 | gpuPolySpanDriver(gpu_unai, PixelBase + xa, (xb - xa)); |
86aad47b |
691 | } |
692 | } |
030d1121 |
693 | } while (++cur_pass < total_passes); |
86aad47b |
694 | } |
695 | |
696 | /*---------------------------------------------------------------------- |
030d1121 |
697 | gpuDrawPolyG - Gouraud-shaded, untextured poly |
86aad47b |
698 | ----------------------------------------------------------------------*/ |
030d1121 |
699 | void gpuDrawPolyG(const PtrUnion packet, const PP gpuPolySpanDriver, u32 is_quad) |
86aad47b |
700 | { |
030d1121 |
701 | PolyVertex vbuf[4]; |
702 | polyInitVertexBuffer(vbuf, packet, POLYTYPE_G, is_quad); |
86aad47b |
703 | |
030d1121 |
704 | int total_passes = is_quad ? 2 : 1; |
705 | int cur_pass = 0; |
706 | do |
86aad47b |
707 | { |
030d1121 |
708 | const PolyVertex* vptrs[3]; |
709 | if (polyUseTriangle(vbuf, cur_pass, vptrs) == false) |
710 | continue; |
711 | |
712 | s32 xa, xb, ya, yb; |
713 | s32 x3, dx3, x4, dx4, dx; |
714 | s32 r3, dr3, g3, dg3, b3, db3; |
715 | s32 x0, x1, x2, y0, y1, y2; |
716 | s32 r0, r1, r2, g0, g1, g2, b0, b1, b2; |
717 | s32 dr4, dg4, db4; |
718 | |
719 | x0 = vptrs[0]->x; y0 = vptrs[0]->y; |
720 | r0 = vptrs[0]->col.r; g0 = vptrs[0]->col.g; b0 = vptrs[0]->col.b; |
721 | x1 = vptrs[1]->x; y1 = vptrs[1]->y; |
722 | r1 = vptrs[1]->col.r; g1 = vptrs[1]->col.g; b1 = vptrs[1]->col.b; |
723 | x2 = vptrs[2]->x; y2 = vptrs[2]->y; |
724 | r2 = vptrs[2]->col.r; g2 = vptrs[2]->col.g; b2 = vptrs[2]->col.b; |
725 | |
726 | ya = y2 - y0; |
727 | yb = y2 - y1; |
728 | dx4 = (x2 - x1) * ya - (x2 - x0) * yb; |
729 | dr4 = (r2 - r1) * ya - (r2 - r0) * yb; |
730 | dg4 = (g2 - g1) * ya - (g2 - g0) * yb; |
731 | db4 = (b2 - b1) * ya - (b2 - b0) * yb; |
732 | dx = dx4; |
733 | if (dx4 < 0) { |
734 | dx4 = -dx4; |
735 | dr4 = -dr4; |
736 | dg4 = -dg4; |
737 | db4 = -db4; |
738 | } |
86aad47b |
739 | |
030d1121 |
740 | #ifdef GPU_UNAI_USE_FLOATMATH |
741 | #ifdef GPU_UNAI_USE_FLOAT_DIV_MULTINV |
742 | if (dx4 != 0) { |
743 | float finv = FloatInv(dx4); |
744 | dr4 = (fixed)((dr4 << FIXED_BITS) * finv); |
745 | dg4 = (fixed)((dg4 << FIXED_BITS) * finv); |
746 | db4 = (fixed)((db4 << FIXED_BITS) * finv); |
747 | } else { |
748 | dr4 = dg4 = db4 = 0; |
86aad47b |
749 | } |
030d1121 |
750 | #else |
751 | if (dx4 != 0) { |
752 | float fdiv = dx4; |
753 | dr4 = (fixed)((dr4 << FIXED_BITS) / fdiv); |
754 | dg4 = (fixed)((dg4 << FIXED_BITS) / fdiv); |
755 | db4 = (fixed)((db4 << FIXED_BITS) / fdiv); |
756 | } else { |
757 | dr4 = dg4 = db4 = 0; |
86aad47b |
758 | } |
030d1121 |
759 | #endif |
760 | #else // Integer Division: |
761 | #ifdef GPU_UNAI_USE_INT_DIV_MULTINV |
762 | if (dx4 != 0) { |
763 | int iF, iS; |
764 | xInv(dx4, iF, iS); |
765 | dr4 = xInvMulx(dr4, iF, iS); |
766 | dg4 = xInvMulx(dg4, iF, iS); |
767 | db4 = xInvMulx(db4, iF, iS); |
768 | } else { |
769 | dr4 = dg4 = db4 = 0; |
86aad47b |
770 | } |
030d1121 |
771 | #else |
772 | if (dx4 != 0) { |
773 | dr4 = GPU_FAST_DIV(dr4 << FIXED_BITS, dx4); |
774 | dg4 = GPU_FAST_DIV(dg4 << FIXED_BITS, dx4); |
775 | db4 = GPU_FAST_DIV(db4 << FIXED_BITS, dx4); |
776 | } else { |
777 | dr4 = dg4 = db4 = 0; |
86aad47b |
778 | } |
030d1121 |
779 | #endif |
780 | #endif |
781 | // Setup packed Gouraud increment for inner driver |
782 | gpu_unai.gInc = gpuPackGouraudColInc(dr4, dg4, db4); |
783 | |
784 | for (s32 loop0 = 2; loop0; loop0--) { |
785 | if (loop0 == 2) { |
786 | ya = y0; |
787 | yb = y1; |
788 | x3 = x4 = i2x(x0); |
789 | r3 = i2x(r0); |
790 | g3 = i2x(g0); |
791 | b3 = i2x(b0); |
792 | if (dx < 0) { |
793 | #ifdef GPU_UNAI_USE_FLOATMATH |
794 | #ifdef GPU_UNAI_USE_FLOAT_DIV_MULTINV |
795 | if ((y2 - y0) != 0) { |
796 | float finv = FloatInv(y2 - y0); |
797 | dx3 = (fixed)(((x2 - x0) << FIXED_BITS) * finv); |
798 | dr3 = (fixed)(((r2 - r0) << FIXED_BITS) * finv); |
799 | dg3 = (fixed)(((g2 - g0) << FIXED_BITS) * finv); |
800 | db3 = (fixed)(((b2 - b0) << FIXED_BITS) * finv); |
801 | } else { |
802 | dx3 = dr3 = dg3 = db3 = 0; |
803 | } |
804 | dx4 = ((y1 - y0) != 0) ? (fixed)(((x1 - x0) << FIXED_BITS) * FloatInv(y1 - y0)) : 0; |
805 | #else |
806 | if ((y2 - y0) != 0) { |
807 | float fdiv = y2 - y0; |
808 | dx3 = (fixed)(((x2 - x0) << FIXED_BITS) / fdiv); |
809 | dr3 = (fixed)(((r2 - r0) << FIXED_BITS) / fdiv); |
810 | dg3 = (fixed)(((g2 - g0) << FIXED_BITS) / fdiv); |
811 | db3 = (fixed)(((b2 - b0) << FIXED_BITS) / fdiv); |
812 | } else { |
813 | dx3 = dr3 = dg3 = db3 = 0; |
814 | } |
815 | dx4 = ((y1 - y0) != 0) ? (fixed)(((x1 - x0) << FIXED_BITS) / (float)(y1 - y0)) : 0; |
816 | #endif |
817 | #else // Integer Division: |
818 | #ifdef GPU_UNAI_USE_INT_DIV_MULTINV |
819 | if ((y2 - y0) != 0) { |
820 | int iF, iS; |
821 | xInv((y2 - y0), iF, iS); |
822 | dx3 = xInvMulx((x2 - x0), iF, iS); |
823 | dr3 = xInvMulx((r2 - r0), iF, iS); |
824 | dg3 = xInvMulx((g2 - g0), iF, iS); |
825 | db3 = xInvMulx((b2 - b0), iF, iS); |
826 | } else { |
827 | dx3 = dr3 = dg3 = db3 = 0; |
828 | } |
829 | dx4 = ((y1 - y0) != 0) ? xLoDivx((x1 - x0), (y1 - y0)) : 0; |
830 | #else |
831 | if ((y2 - y0) != 0) { |
832 | dx3 = GPU_FAST_DIV((x2 - x0) << FIXED_BITS, (y2 - y0)); |
833 | dr3 = GPU_FAST_DIV((r2 - r0) << FIXED_BITS, (y2 - y0)); |
834 | dg3 = GPU_FAST_DIV((g2 - g0) << FIXED_BITS, (y2 - y0)); |
835 | db3 = GPU_FAST_DIV((b2 - b0) << FIXED_BITS, (y2 - y0)); |
836 | } else { |
837 | dx3 = dr3 = dg3 = db3 = 0; |
838 | } |
839 | dx4 = ((y1 - y0) != 0) ? GPU_FAST_DIV((x1 - x0) << FIXED_BITS, (y1 - y0)) : 0; |
840 | #endif |
841 | #endif |
842 | } else { |
843 | #ifdef GPU_UNAI_USE_FLOATMATH |
844 | #ifdef GPU_UNAI_USE_FLOAT_DIV_MULTINV |
845 | if ((y1 - y0) != 0) { |
846 | float finv = FloatInv(y1 - y0); |
847 | dx3 = (fixed)(((x1 - x0) << FIXED_BITS) * finv); |
848 | dr3 = (fixed)(((r1 - r0) << FIXED_BITS) * finv); |
849 | dg3 = (fixed)(((g1 - g0) << FIXED_BITS) * finv); |
850 | db3 = (fixed)(((b1 - b0) << FIXED_BITS) * finv); |
851 | } else { |
852 | dx3 = dr3 = dg3 = db3 = 0; |
853 | } |
854 | dx4 = ((y2 - y0) != 0) ? (fixed)(((x2 - x0) << FIXED_BITS) * FloatInv(y2 - y0)) : 0; |
855 | #else |
856 | if ((y1 - y0) != 0) { |
857 | float fdiv = y1 - y0; |
858 | dx3 = (fixed)(((x1 - x0) << FIXED_BITS) / fdiv); |
859 | dr3 = (fixed)(((r1 - r0) << FIXED_BITS) / fdiv); |
860 | dg3 = (fixed)(((g1 - g0) << FIXED_BITS) / fdiv); |
861 | db3 = (fixed)(((b1 - b0) << FIXED_BITS) / fdiv); |
862 | } else { |
863 | dx3 = dr3 = dg3 = db3 = 0; |
864 | } |
865 | dx4 = ((y2 - y0) != 0) ? (fixed)(((x2 - x0) << FIXED_BITS) / (float)(y2 - y0)) : 0; |
866 | #endif |
867 | #else // Integer Division: |
868 | #ifdef GPU_UNAI_USE_INT_DIV_MULTINV |
869 | if ((y1 - y0) != 0) { |
870 | int iF, iS; |
871 | xInv((y1 - y0), iF, iS); |
872 | dx3 = xInvMulx((x1 - x0), iF, iS); |
873 | dr3 = xInvMulx((r1 - r0), iF, iS); |
874 | dg3 = xInvMulx((g1 - g0), iF, iS); |
875 | db3 = xInvMulx((b1 - b0), iF, iS); |
876 | } else { |
877 | dx3 = dr3 = dg3 = db3 = 0; |
878 | } |
879 | dx4 = ((y2 - y0) != 0) ? xLoDivx((x2 - x0), (y2 - y0)) : 0; |
880 | #else |
881 | if ((y1 - y0) != 0) { |
882 | dx3 = GPU_FAST_DIV((x1 - x0) << FIXED_BITS, (y1 - y0)); |
883 | dr3 = GPU_FAST_DIV((r1 - r0) << FIXED_BITS, (y1 - y0)); |
884 | dg3 = GPU_FAST_DIV((g1 - g0) << FIXED_BITS, (y1 - y0)); |
885 | db3 = GPU_FAST_DIV((b1 - b0) << FIXED_BITS, (y1 - y0)); |
886 | } else { |
887 | dx3 = dr3 = dg3 = db3 = 0; |
888 | } |
889 | dx4 = ((y2 - y0) != 0) ? GPU_FAST_DIV((x2 - x0) << FIXED_BITS, (y2 - y0)) : 0; |
890 | #endif |
891 | #endif |
892 | } |
893 | } else { |
894 | //senquack - break out of final loop if nothing to be drawn (1st loop |
895 | // must always be taken to setup dx3/dx4) |
896 | if (y1 == y2) break; |
897 | |
898 | ya = y1; yb = y2; |
899 | |
900 | if (dx < 0) { |
901 | x3 = i2x(x0); x4 = i2x(x1); |
902 | r3 = i2x(r0); g3 = i2x(g0); b3 = i2x(b0); |
903 | |
904 | if ((y1 - y0) != 0) { |
905 | x3 += (dx3 * (y1 - y0)); |
906 | r3 += (dr3 * (y1 - y0)); |
907 | g3 += (dg3 * (y1 - y0)); |
908 | b3 += (db3 * (y1 - y0)); |
909 | } |
910 | |
911 | #ifdef GPU_UNAI_USE_FLOATMATH |
912 | #ifdef GPU_UNAI_USE_FLOAT_DIV_MULTINV |
913 | dx4 = ((y2 - y1) != 0) ? (fixed)(((x2 - x1) << FIXED_BITS) * FloatInv(y2 - y1)) : 0; |
914 | #else |
915 | dx4 = ((y2 - y1) != 0) ? (fixed)(((x2 - x1) << FIXED_BITS) / (float)(y2 - y1)) : 0; |
916 | #endif |
917 | #else // Integer Division: |
918 | #ifdef GPU_UNAI_USE_INT_DIV_MULTINV |
919 | dx4 = ((y2 - y1) != 0) ? xLoDivx((x2 - x1), (y2 - y1)) : 0; |
920 | #else |
921 | dx4 = ((y2 - y1) != 0) ? GPU_FAST_DIV((x2 - x1) << FIXED_BITS, (y2 - y1)) : 0; |
922 | #endif |
923 | #endif |
924 | } else { |
925 | x3 = i2x(x1); |
926 | x4 = i2x(x0) + (dx4 * (y1 - y0)); |
927 | |
928 | r3 = i2x(r1); g3 = i2x(g1); b3 = i2x(b1); |
929 | |
930 | #ifdef GPU_UNAI_USE_FLOATMATH |
931 | #ifdef GPU_UNAI_USE_FLOAT_DIV_MULTINV |
932 | if ((y2 - y1) != 0) { |
933 | float finv = FloatInv(y2 - y1); |
934 | dx3 = (fixed)(((x2 - x1) << FIXED_BITS) * finv); |
935 | dr3 = (fixed)(((r2 - r1) << FIXED_BITS) * finv); |
936 | dg3 = (fixed)(((g2 - g1) << FIXED_BITS) * finv); |
937 | db3 = (fixed)(((b2 - b1) << FIXED_BITS) * finv); |
938 | } else { |
939 | dx3 = dr3 = dg3 = db3 = 0; |
940 | } |
941 | #else |
942 | if ((y2 - y1) != 0) { |
943 | float fdiv = y2 - y1; |
944 | dx3 = (fixed)(((x2 - x1) << FIXED_BITS) / fdiv); |
945 | dr3 = (fixed)(((r2 - r1) << FIXED_BITS) / fdiv); |
946 | dg3 = (fixed)(((g2 - g1) << FIXED_BITS) / fdiv); |
947 | db3 = (fixed)(((b2 - b1) << FIXED_BITS) / fdiv); |
948 | } else { |
949 | dx3 = dr3 = dg3 = db3 = 0; |
950 | } |
951 | #endif |
952 | #else // Integer Division: |
953 | #ifdef GPU_UNAI_USE_INT_DIV_MULTINV |
954 | if ((y2 - y1) != 0) { |
955 | int iF, iS; |
956 | xInv((y2 - y1), iF, iS); |
957 | dx3 = xInvMulx((x2 - x1), iF, iS); |
958 | dr3 = xInvMulx((r2 - r1), iF, iS); |
959 | dg3 = xInvMulx((g2 - g1), iF, iS); |
960 | db3 = xInvMulx((b2 - b1), iF, iS); |
961 | } else { |
962 | dx3 = dr3 = dg3 = db3 = 0; |
963 | } |
964 | #else |
965 | if ((y2 - y1) != 0) { |
966 | dx3 = GPU_FAST_DIV((x2 - x1) << FIXED_BITS, (y2 - y1)); |
967 | dr3 = GPU_FAST_DIV((r2 - r1) << FIXED_BITS, (y2 - y1)); |
968 | dg3 = GPU_FAST_DIV((g2 - g1) << FIXED_BITS, (y2 - y1)); |
969 | db3 = GPU_FAST_DIV((b2 - b1) << FIXED_BITS, (y2 - y1)); |
970 | } else { |
971 | dx3 = dr3 = dg3 = db3 = 0; |
972 | } |
973 | #endif |
974 | #endif |
975 | } |
86aad47b |
976 | } |
86aad47b |
977 | |
030d1121 |
978 | s32 xmin, xmax, ymin, ymax; |
979 | xmin = gpu_unai.DrawingArea[0]; xmax = gpu_unai.DrawingArea[2]; |
980 | ymin = gpu_unai.DrawingArea[1]; ymax = gpu_unai.DrawingArea[3]; |
981 | |
982 | if ((ymin - ya) > 0) { |
983 | x3 += (dx3 * (ymin - ya)); |
984 | x4 += (dx4 * (ymin - ya)); |
985 | r3 += (dr3 * (ymin - ya)); |
986 | g3 += (dg3 * (ymin - ya)); |
987 | b3 += (db3 * (ymin - ya)); |
988 | ya = ymin; |
86aad47b |
989 | } |
030d1121 |
990 | |
991 | if (yb > ymax) yb = ymax; |
992 | |
993 | int loop1 = yb - ya; |
994 | if (loop1 <= 0) |
995 | continue; |
996 | |
997 | u16* PixelBase = &((u16*)gpu_unai.vram)[FRAME_OFFSET(0, ya)]; |
998 | int li=gpu_unai.ilace_mask; |
999 | int pi=(ProgressiveInterlaceEnabled()?(gpu_unai.ilace_mask+1):0); |
1000 | int pif=(ProgressiveInterlaceEnabled()?(gpu_unai.prog_ilace_flag?(gpu_unai.ilace_mask+1):0):1); |
1001 | |
1002 | for (; loop1; --loop1, ++ya, PixelBase += FRAME_WIDTH, |
1003 | x3 += dx3, x4 += dx4, |
1004 | r3 += dr3, g3 += dg3, b3 += db3 ) |
86aad47b |
1005 | { |
030d1121 |
1006 | if (ya&li) continue; |
1007 | if ((ya&pi)==pif) continue; |
1008 | |
1009 | u32 r4, g4, b4; |
1010 | |
1011 | xa = FixedCeilToInt(x3); |
1012 | xb = FixedCeilToInt(x4); |
53636f15 |
1013 | r4 = r3; g4 = g3; b4 = b3; |
030d1121 |
1014 | |
1015 | fixed itmp = i2x(xa) - x3; |
1016 | if (itmp != 0) { |
1017 | r4 += (dr4 * itmp) >> FIXED_BITS; |
1018 | g4 += (dg4 * itmp) >> FIXED_BITS; |
1019 | b4 += (db4 * itmp) >> FIXED_BITS; |
1020 | } |
1021 | |
1022 | r4 += fixed_HALF; |
1023 | g4 += fixed_HALF; |
1024 | b4 += fixed_HALF; |
1025 | |
1026 | if ((xmin - xa) > 0) { |
1027 | r4 += (dr4 * (xmin - xa)); |
1028 | g4 += (dg4 * (xmin - xa)); |
1029 | b4 += (db4 * (xmin - xa)); |
1030 | xa = xmin; |
1031 | } |
1032 | |
1033 | // Setup packed Gouraud color for inner driver |
1034 | gpu_unai.gCol = gpuPackGouraudCol(r4, g4, b4); |
1035 | |
1036 | if (xb > xmax) xb = xmax; |
1037 | if ((xb - xa) > 0) |
1038 | gpuPolySpanDriver(gpu_unai, PixelBase + xa, (xb - xa)); |
86aad47b |
1039 | } |
1040 | } |
030d1121 |
1041 | } while (++cur_pass < total_passes); |
86aad47b |
1042 | } |
1043 | |
1044 | /*---------------------------------------------------------------------- |
030d1121 |
1045 | gpuDrawPolyGT - Gouraud-shaded, textured poly |
86aad47b |
1046 | ----------------------------------------------------------------------*/ |
030d1121 |
1047 | void gpuDrawPolyGT(const PtrUnion packet, const PP gpuPolySpanDriver, u32 is_quad) |
86aad47b |
1048 | { |
030d1121 |
1049 | PolyVertex vbuf[4]; |
1050 | polyInitVertexBuffer(vbuf, packet, POLYTYPE_GT, is_quad); |
86aad47b |
1051 | |
030d1121 |
1052 | int total_passes = is_quad ? 2 : 1; |
1053 | int cur_pass = 0; |
1054 | do |
86aad47b |
1055 | { |
030d1121 |
1056 | const PolyVertex* vptrs[3]; |
1057 | if (polyUseTriangle(vbuf, cur_pass, vptrs) == false) |
1058 | continue; |
1059 | |
1060 | s32 xa, xb, ya, yb; |
1061 | s32 x3, dx3, x4, dx4, dx; |
1062 | s32 u3, du3, v3, dv3; |
1063 | s32 r3, dr3, g3, dg3, b3, db3; |
1064 | s32 x0, x1, x2, y0, y1, y2; |
1065 | s32 u0, u1, u2, v0, v1, v2; |
1066 | s32 r0, r1, r2, g0, g1, g2, b0, b1, b2; |
1067 | s32 du4, dv4; |
1068 | s32 dr4, dg4, db4; |
1069 | |
1070 | x0 = vptrs[0]->x; y0 = vptrs[0]->y; |
1071 | u0 = vptrs[0]->tex.u; v0 = vptrs[0]->tex.v; |
1072 | r0 = vptrs[0]->col.r; g0 = vptrs[0]->col.g; b0 = vptrs[0]->col.b; |
1073 | x1 = vptrs[1]->x; y1 = vptrs[1]->y; |
1074 | u1 = vptrs[1]->tex.u; v1 = vptrs[1]->tex.v; |
1075 | r1 = vptrs[1]->col.r; g1 = vptrs[1]->col.g; b1 = vptrs[1]->col.b; |
1076 | x2 = vptrs[2]->x; y2 = vptrs[2]->y; |
1077 | u2 = vptrs[2]->tex.u; v2 = vptrs[2]->tex.v; |
1078 | r2 = vptrs[2]->col.r; g2 = vptrs[2]->col.g; b2 = vptrs[2]->col.b; |
1079 | |
1080 | ya = y2 - y0; |
1081 | yb = y2 - y1; |
1082 | dx4 = (x2 - x1) * ya - (x2 - x0) * yb; |
1083 | du4 = (u2 - u1) * ya - (u2 - u0) * yb; |
1084 | dv4 = (v2 - v1) * ya - (v2 - v0) * yb; |
1085 | dr4 = (r2 - r1) * ya - (r2 - r0) * yb; |
1086 | dg4 = (g2 - g1) * ya - (g2 - g0) * yb; |
1087 | db4 = (b2 - b1) * ya - (b2 - b0) * yb; |
1088 | dx = dx4; |
1089 | if (dx4 < 0) { |
1090 | dx4 = -dx4; |
1091 | du4 = -du4; |
1092 | dv4 = -dv4; |
1093 | dr4 = -dr4; |
1094 | dg4 = -dg4; |
1095 | db4 = -db4; |
1096 | } |
86aad47b |
1097 | |
030d1121 |
1098 | #ifdef GPU_UNAI_USE_FLOATMATH |
1099 | #ifdef GPU_UNAI_USE_FLOAT_DIV_MULTINV |
1100 | if (dx4 != 0) { |
1101 | float finv = FloatInv(dx4); |
1102 | du4 = (fixed)((du4 << FIXED_BITS) * finv); |
1103 | dv4 = (fixed)((dv4 << FIXED_BITS) * finv); |
1104 | dr4 = (fixed)((dr4 << FIXED_BITS) * finv); |
1105 | dg4 = (fixed)((dg4 << FIXED_BITS) * finv); |
1106 | db4 = (fixed)((db4 << FIXED_BITS) * finv); |
1107 | } else { |
1108 | du4 = dv4 = dr4 = dg4 = db4 = 0; |
86aad47b |
1109 | } |
030d1121 |
1110 | #else |
1111 | if (dx4 != 0) { |
1112 | float fdiv = dx4; |
1113 | du4 = (fixed)((du4 << FIXED_BITS) / fdiv); |
1114 | dv4 = (fixed)((dv4 << FIXED_BITS) / fdiv); |
1115 | dr4 = (fixed)((dr4 << FIXED_BITS) / fdiv); |
1116 | dg4 = (fixed)((dg4 << FIXED_BITS) / fdiv); |
1117 | db4 = (fixed)((db4 << FIXED_BITS) / fdiv); |
1118 | } else { |
1119 | du4 = dv4 = dr4 = dg4 = db4 = 0; |
86aad47b |
1120 | } |
030d1121 |
1121 | #endif |
1122 | #else // Integer Division: |
1123 | #ifdef GPU_UNAI_USE_INT_DIV_MULTINV |
1124 | if (dx4 != 0) { |
1125 | int iF, iS; |
1126 | xInv(dx4, iF, iS); |
1127 | du4 = xInvMulx(du4, iF, iS); |
1128 | dv4 = xInvMulx(dv4, iF, iS); |
1129 | dr4 = xInvMulx(dr4, iF, iS); |
1130 | dg4 = xInvMulx(dg4, iF, iS); |
1131 | db4 = xInvMulx(db4, iF, iS); |
1132 | } else { |
1133 | du4 = dv4 = dr4 = dg4 = db4 = 0; |
86aad47b |
1134 | } |
030d1121 |
1135 | #else |
1136 | if (dx4 != 0) { |
1137 | du4 = GPU_FAST_DIV(du4 << FIXED_BITS, dx4); |
1138 | dv4 = GPU_FAST_DIV(dv4 << FIXED_BITS, dx4); |
1139 | dr4 = GPU_FAST_DIV(dr4 << FIXED_BITS, dx4); |
1140 | dg4 = GPU_FAST_DIV(dg4 << FIXED_BITS, dx4); |
1141 | db4 = GPU_FAST_DIV(db4 << FIXED_BITS, dx4); |
1142 | } else { |
1143 | du4 = dv4 = dr4 = dg4 = db4 = 0; |
86aad47b |
1144 | } |
030d1121 |
1145 | #endif |
1146 | #endif |
1147 | // Set u,v increments and packed Gouraud increment for inner driver |
1148 | gpu_unai.u_inc = du4; |
1149 | gpu_unai.v_inc = dv4; |
1150 | gpu_unai.gInc = gpuPackGouraudColInc(dr4, dg4, db4); |
1151 | |
1152 | for (s32 loop0 = 2; loop0; loop0--) { |
1153 | if (loop0 == 2) { |
1154 | ya = y0; yb = y1; |
1155 | x3 = x4 = i2x(x0); |
1156 | u3 = i2x(u0); v3 = i2x(v0); |
1157 | r3 = i2x(r0); g3 = i2x(g0); b3 = i2x(b0); |
1158 | if (dx < 0) { |
1159 | #ifdef GPU_UNAI_USE_FLOATMATH |
1160 | #ifdef GPU_UNAI_USE_FLOAT_DIV_MULTINV |
1161 | if ((y2 - y0) != 0) { |
1162 | float finv = FloatInv(y2 - y0); |
1163 | dx3 = (fixed)(((x2 - x0) << FIXED_BITS) * finv); |
1164 | du3 = (fixed)(((u2 - u0) << FIXED_BITS) * finv); |
1165 | dv3 = (fixed)(((v2 - v0) << FIXED_BITS) * finv); |
1166 | dr3 = (fixed)(((r2 - r0) << FIXED_BITS) * finv); |
1167 | dg3 = (fixed)(((g2 - g0) << FIXED_BITS) * finv); |
1168 | db3 = (fixed)(((b2 - b0) << FIXED_BITS) * finv); |
1169 | } else { |
1170 | dx3 = du3 = dv3 = dr3 = dg3 = db3 = 0; |
1171 | } |
1172 | dx4 = ((y1 - y0) != 0) ? (fixed)(((x1 - x0) << FIXED_BITS) * FloatInv(y1 - y0)) : 0; |
1173 | #else |
1174 | if ((y2 - y0) != 0) { |
1175 | float fdiv = y2 - y0; |
1176 | dx3 = (fixed)(((x2 - x0) << FIXED_BITS) / fdiv); |
1177 | du3 = (fixed)(((u2 - u0) << FIXED_BITS) / fdiv); |
1178 | dv3 = (fixed)(((v2 - v0) << FIXED_BITS) / fdiv); |
1179 | dr3 = (fixed)(((r2 - r0) << FIXED_BITS) / fdiv); |
1180 | dg3 = (fixed)(((g2 - g0) << FIXED_BITS) / fdiv); |
1181 | db3 = (fixed)(((b2 - b0) << FIXED_BITS) / fdiv); |
1182 | } else { |
1183 | dx3 = du3 = dv3 = dr3 = dg3 = db3 = 0; |
1184 | } |
1185 | dx4 = ((y1 - y0) != 0) ? (fixed)(((x1 - x0) << FIXED_BITS) / (float)(y1 - y0)) : 0; |
1186 | #endif |
1187 | #else // Integer Division: |
1188 | #ifdef GPU_UNAI_USE_INT_DIV_MULTINV |
1189 | if ((y2 - y0) != 0) { |
1190 | int iF, iS; |
1191 | xInv((y2 - y0), iF, iS); |
1192 | dx3 = xInvMulx((x2 - x0), iF, iS); |
1193 | du3 = xInvMulx((u2 - u0), iF, iS); |
1194 | dv3 = xInvMulx((v2 - v0), iF, iS); |
1195 | dr3 = xInvMulx((r2 - r0), iF, iS); |
1196 | dg3 = xInvMulx((g2 - g0), iF, iS); |
1197 | db3 = xInvMulx((b2 - b0), iF, iS); |
1198 | } else { |
1199 | dx3 = du3 = dv3 = dr3 = dg3 = db3 = 0; |
1200 | } |
1201 | dx4 = ((y1 - y0) != 0) ? xLoDivx((x1 - x0), (y1 - y0)) : 0; |
1202 | #else |
1203 | if ((y2 - y0) != 0) { |
1204 | dx3 = GPU_FAST_DIV((x2 - x0) << FIXED_BITS, (y2 - y0)); |
1205 | du3 = GPU_FAST_DIV((u2 - u0) << FIXED_BITS, (y2 - y0)); |
1206 | dv3 = GPU_FAST_DIV((v2 - v0) << FIXED_BITS, (y2 - y0)); |
1207 | dr3 = GPU_FAST_DIV((r2 - r0) << FIXED_BITS, (y2 - y0)); |
1208 | dg3 = GPU_FAST_DIV((g2 - g0) << FIXED_BITS, (y2 - y0)); |
1209 | db3 = GPU_FAST_DIV((b2 - b0) << FIXED_BITS, (y2 - y0)); |
1210 | } else { |
1211 | dx3 = du3 = dv3 = dr3 = dg3 = db3 = 0; |
1212 | } |
1213 | dx4 = ((y1 - y0) != 0) ? GPU_FAST_DIV((x1 - x0) << FIXED_BITS, (y1 - y0)) : 0; |
1214 | #endif |
1215 | #endif |
1216 | } else { |
1217 | #ifdef GPU_UNAI_USE_FLOATMATH |
1218 | #ifdef GPU_UNAI_USE_FLOAT_DIV_MULTINV |
1219 | if ((y1 - y0) != 0) { |
1220 | float finv = FloatInv(y1 - y0); |
1221 | dx3 = (fixed)(((x1 - x0) << FIXED_BITS) * finv); |
1222 | du3 = (fixed)(((u1 - u0) << FIXED_BITS) * finv); |
1223 | dv3 = (fixed)(((v1 - v0) << FIXED_BITS) * finv); |
1224 | dr3 = (fixed)(((r1 - r0) << FIXED_BITS) * finv); |
1225 | dg3 = (fixed)(((g1 - g0) << FIXED_BITS) * finv); |
1226 | db3 = (fixed)(((b1 - b0) << FIXED_BITS) * finv); |
1227 | } else { |
1228 | dx3 = du3 = dv3 = dr3 = dg3 = db3 = 0; |
1229 | } |
1230 | dx4 = ((y2 - y0) != 0) ? (fixed)(((x2 - x0) << FIXED_BITS) * FloatInv(y2 - y0)) : 0; |
1231 | #else |
1232 | if ((y1 - y0) != 0) { |
1233 | float fdiv = y1 - y0; |
1234 | dx3 = (fixed)(((x1 - x0) << FIXED_BITS) / fdiv); |
1235 | du3 = (fixed)(((u1 - u0) << FIXED_BITS) / fdiv); |
1236 | dv3 = (fixed)(((v1 - v0) << FIXED_BITS) / fdiv); |
1237 | dr3 = (fixed)(((r1 - r0) << FIXED_BITS) / fdiv); |
1238 | dg3 = (fixed)(((g1 - g0) << FIXED_BITS) / fdiv); |
1239 | db3 = (fixed)(((b1 - b0) << FIXED_BITS) / fdiv); |
1240 | } else { |
1241 | dx3 = du3 = dv3 = dr3 = dg3 = db3 = 0; |
1242 | } |
1243 | dx4 = ((y2 - y0) != 0) ? (fixed)(((x2 - x0) << FIXED_BITS) / float(y2 - y0)) : 0; |
1244 | #endif |
1245 | #else // Integer Division: |
1246 | #ifdef GPU_UNAI_USE_INT_DIV_MULTINV |
1247 | if ((y1 - y0) != 0) { |
1248 | int iF, iS; |
1249 | xInv((y1 - y0), iF, iS); |
1250 | dx3 = xInvMulx((x1 - x0), iF, iS); |
1251 | du3 = xInvMulx((u1 - u0), iF, iS); |
1252 | dv3 = xInvMulx((v1 - v0), iF, iS); |
1253 | dr3 = xInvMulx((r1 - r0), iF, iS); |
1254 | dg3 = xInvMulx((g1 - g0), iF, iS); |
1255 | db3 = xInvMulx((b1 - b0), iF, iS); |
1256 | } else { |
1257 | dx3 = du3 = dv3 = dr3 = dg3 = db3 = 0; |
1258 | } |
1259 | dx4 = ((y2 - y0) != 0) ? xLoDivx((x2 - x0), (y2 - y0)) : 0; |
1260 | #else |
1261 | if ((y1 - y0) != 0) { |
1262 | dx3 = GPU_FAST_DIV((x1 - x0) << FIXED_BITS, (y1 - y0)); |
1263 | du3 = GPU_FAST_DIV((u1 - u0) << FIXED_BITS, (y1 - y0)); |
1264 | dv3 = GPU_FAST_DIV((v1 - v0) << FIXED_BITS, (y1 - y0)); |
1265 | dr3 = GPU_FAST_DIV((r1 - r0) << FIXED_BITS, (y1 - y0)); |
1266 | dg3 = GPU_FAST_DIV((g1 - g0) << FIXED_BITS, (y1 - y0)); |
1267 | db3 = GPU_FAST_DIV((b1 - b0) << FIXED_BITS, (y1 - y0)); |
1268 | } else { |
1269 | dx3 = du3 = dv3 = dr3 = dg3 = db3 = 0; |
1270 | } |
1271 | dx4 = ((y2 - y0) != 0) ? GPU_FAST_DIV((x2 - x0) << FIXED_BITS, (y2 - y0)) : 0; |
1272 | #endif |
1273 | #endif |
1274 | } |
1275 | } else { |
1276 | //senquack - break out of final loop if nothing to be drawn (1st loop |
1277 | // must always be taken to setup dx3/dx4) |
1278 | if (y1 == y2) break; |
1279 | |
1280 | ya = y1; yb = y2; |
1281 | |
1282 | if (dx < 0) { |
1283 | x3 = i2x(x0); x4 = i2x(x1); |
1284 | u3 = i2x(u0); v3 = i2x(v0); |
1285 | r3 = i2x(r0); g3 = i2x(g0); b3 = i2x(b0); |
1286 | |
1287 | if ((y1 - y0) != 0) { |
1288 | x3 += (dx3 * (y1 - y0)); |
1289 | u3 += (du3 * (y1 - y0)); |
1290 | v3 += (dv3 * (y1 - y0)); |
1291 | r3 += (dr3 * (y1 - y0)); |
1292 | g3 += (dg3 * (y1 - y0)); |
1293 | b3 += (db3 * (y1 - y0)); |
1294 | } |
1295 | |
1296 | #ifdef GPU_UNAI_USE_FLOATMATH |
1297 | #ifdef GPU_UNAI_USE_FLOAT_DIV_MULTINV |
1298 | dx4 = ((y2 - y1) != 0) ? (fixed)(((x2 - x1) << FIXED_BITS) * FloatInv(y2 - y1)) : 0; |
1299 | #else |
1300 | dx4 = ((y2 - y1) != 0) ? (fixed)(((x2 - x1) << FIXED_BITS) / (float)(y2 - y1)) : 0; |
1301 | #endif |
1302 | #else // Integer Division: |
1303 | #ifdef GPU_UNAI_USE_INT_DIV_MULTINV |
1304 | dx4 = ((y2 - y1) != 0) ? xLoDivx((x2 - x1), (y2 - y1)) : 0; |
1305 | #else |
1306 | dx4 = ((y2 - y1) != 0) ? GPU_FAST_DIV((x2 - x1) << FIXED_BITS, (y2 - y1)) : 0; |
1307 | #endif |
1308 | #endif |
1309 | } else { |
1310 | x3 = i2x(x1); |
1311 | x4 = i2x(x0) + (dx4 * (y1 - y0)); |
1312 | |
1313 | u3 = i2x(u1); v3 = i2x(v1); |
1314 | r3 = i2x(r1); g3 = i2x(g1); b3 = i2x(b1); |
1315 | #ifdef GPU_UNAI_USE_FLOATMATH |
1316 | #ifdef GPU_UNAI_USE_FLOAT_DIV_MULTINV |
1317 | if ((y2 - y1) != 0) { |
1318 | float finv = FloatInv(y2 - y1); |
1319 | dx3 = (fixed)(((x2 - x1) << FIXED_BITS) * finv); |
1320 | du3 = (fixed)(((u2 - u1) << FIXED_BITS) * finv); |
1321 | dv3 = (fixed)(((v2 - v1) << FIXED_BITS) * finv); |
1322 | dr3 = (fixed)(((r2 - r1) << FIXED_BITS) * finv); |
1323 | dg3 = (fixed)(((g2 - g1) << FIXED_BITS) * finv); |
1324 | db3 = (fixed)(((b2 - b1) << FIXED_BITS) * finv); |
1325 | } else { |
1326 | dx3 = du3 = dv3 = dr3 = dg3 = db3 = 0; |
1327 | } |
1328 | #else |
1329 | if ((y2 - y1) != 0) { |
1330 | float fdiv = y2 - y1; |
1331 | dx3 = (fixed)(((x2 - x1) << FIXED_BITS) / fdiv); |
1332 | du3 = (fixed)(((u2 - u1) << FIXED_BITS) / fdiv); |
1333 | dv3 = (fixed)(((v2 - v1) << FIXED_BITS) / fdiv); |
1334 | dr3 = (fixed)(((r2 - r1) << FIXED_BITS) / fdiv); |
1335 | dg3 = (fixed)(((g2 - g1) << FIXED_BITS) / fdiv); |
1336 | db3 = (fixed)(((b2 - b1) << FIXED_BITS) / fdiv); |
1337 | } else { |
1338 | dx3 = du3 = dv3 = dr3 = dg3 = db3 = 0; |
1339 | } |
1340 | #endif |
1341 | #else // Integer Division: |
1342 | #ifdef GPU_UNAI_USE_INT_DIV_MULTINV |
1343 | if ((y2 - y1) != 0) { |
1344 | int iF, iS; |
1345 | xInv((y2 - y1), iF, iS); |
1346 | dx3 = xInvMulx((x2 - x1), iF, iS); |
1347 | du3 = xInvMulx((u2 - u1), iF, iS); |
1348 | dv3 = xInvMulx((v2 - v1), iF, iS); |
1349 | dr3 = xInvMulx((r2 - r1), iF, iS); |
1350 | dg3 = xInvMulx((g2 - g1), iF, iS); |
1351 | db3 = xInvMulx((b2 - b1), iF, iS); |
1352 | } else { |
1353 | dx3 = du3 = dv3 = dr3 = dg3 = db3 = 0; |
1354 | } |
1355 | #else |
1356 | if ((y2 - y1) != 0) { |
1357 | dx3 = GPU_FAST_DIV((x2 - x1) << FIXED_BITS, (y2 - y1)); |
1358 | du3 = GPU_FAST_DIV((u2 - u1) << FIXED_BITS, (y2 - y1)); |
1359 | dv3 = GPU_FAST_DIV((v2 - v1) << FIXED_BITS, (y2 - y1)); |
1360 | dr3 = GPU_FAST_DIV((r2 - r1) << FIXED_BITS, (y2 - y1)); |
1361 | dg3 = GPU_FAST_DIV((g2 - g1) << FIXED_BITS, (y2 - y1)); |
1362 | db3 = GPU_FAST_DIV((b2 - b1) << FIXED_BITS, (y2 - y1)); |
1363 | } else { |
1364 | dx3 = du3 = dv3 = dr3 = dg3 = db3 = 0; |
1365 | } |
1366 | #endif |
1367 | #endif |
1368 | } |
86aad47b |
1369 | } |
86aad47b |
1370 | |
030d1121 |
1371 | s32 xmin, xmax, ymin, ymax; |
1372 | xmin = gpu_unai.DrawingArea[0]; xmax = gpu_unai.DrawingArea[2]; |
1373 | ymin = gpu_unai.DrawingArea[1]; ymax = gpu_unai.DrawingArea[3]; |
1374 | |
1375 | if ((ymin - ya) > 0) { |
1376 | x3 += (dx3 * (ymin - ya)); |
1377 | x4 += (dx4 * (ymin - ya)); |
1378 | u3 += (du3 * (ymin - ya)); |
1379 | v3 += (dv3 * (ymin - ya)); |
1380 | r3 += (dr3 * (ymin - ya)); |
1381 | g3 += (dg3 * (ymin - ya)); |
1382 | b3 += (db3 * (ymin - ya)); |
1383 | ya = ymin; |
86aad47b |
1384 | } |
030d1121 |
1385 | |
1386 | if (yb > ymax) yb = ymax; |
1387 | |
1388 | int loop1 = yb - ya; |
1389 | if (loop1 <= 0) |
1390 | continue; |
1391 | |
1392 | u16* PixelBase = &((u16*)gpu_unai.vram)[FRAME_OFFSET(0, ya)]; |
1393 | int li=gpu_unai.ilace_mask; |
1394 | int pi=(ProgressiveInterlaceEnabled()?(gpu_unai.ilace_mask+1):0); |
1395 | int pif=(ProgressiveInterlaceEnabled()?(gpu_unai.prog_ilace_flag?(gpu_unai.ilace_mask+1):0):1); |
1396 | |
1397 | for (; loop1; --loop1, ++ya, PixelBase += FRAME_WIDTH, |
1398 | x3 += dx3, x4 += dx4, |
1399 | u3 += du3, v3 += dv3, |
1400 | r3 += dr3, g3 += dg3, b3 += db3 ) |
86aad47b |
1401 | { |
030d1121 |
1402 | if (ya&li) continue; |
1403 | if ((ya&pi)==pif) continue; |
1404 | |
1405 | u32 u4, v4; |
1406 | u32 r4, g4, b4; |
1407 | |
1408 | xa = FixedCeilToInt(x3); |
1409 | xb = FixedCeilToInt(x4); |
53636f15 |
1410 | u4 = u3; v4 = v3; |
1411 | r4 = r3; g4 = g3; b4 = b3; |
030d1121 |
1412 | |
1413 | fixed itmp = i2x(xa) - x3; |
1414 | if (itmp != 0) { |
1415 | u4 += (du4 * itmp) >> FIXED_BITS; |
1416 | v4 += (dv4 * itmp) >> FIXED_BITS; |
1417 | r4 += (dr4 * itmp) >> FIXED_BITS; |
1418 | g4 += (dg4 * itmp) >> FIXED_BITS; |
1419 | b4 += (db4 * itmp) >> FIXED_BITS; |
1420 | } |
1421 | |
1422 | u4 += fixed_HALF; |
1423 | v4 += fixed_HALF; |
1424 | r4 += fixed_HALF; |
1425 | g4 += fixed_HALF; |
1426 | b4 += fixed_HALF; |
1427 | |
1428 | if ((xmin - xa) > 0) { |
1429 | u4 += du4 * (xmin - xa); |
1430 | v4 += dv4 * (xmin - xa); |
1431 | r4 += dr4 * (xmin - xa); |
1432 | g4 += dg4 * (xmin - xa); |
1433 | b4 += db4 * (xmin - xa); |
1434 | xa = xmin; |
1435 | } |
1436 | |
1437 | // Set packed Gouraud color and u,v coords for inner driver |
1438 | gpu_unai.u = u4; |
1439 | gpu_unai.v = v4; |
1440 | gpu_unai.gCol = gpuPackGouraudCol(r4, g4, b4); |
1441 | |
1442 | if (xb > xmax) xb = xmax; |
1443 | if ((xb - xa) > 0) |
1444 | gpuPolySpanDriver(gpu_unai, PixelBase + xa, (xb - xa)); |
86aad47b |
1445 | } |
1446 | } |
030d1121 |
1447 | } while (++cur_pass < total_passes); |
86aad47b |
1448 | } |