86aad47b |
1 | /*************************************************************************** |
2 | * Copyright (C) 2010 PCSX4ALL Team * |
3 | * Copyright (C) 2010 Unai * |
030d1121 |
4 | * Copyright (C) 2016 Senquack (dansilsby <AT> gmail <DOT> com) * |
86aad47b |
5 | * * |
6 | * This program is free software; you can redistribute it and/or modify * |
7 | * it under the terms of the GNU General Public License as published by * |
8 | * the Free Software Foundation; either version 2 of the License, or * |
9 | * (at your option) any later version. * |
10 | * * |
11 | * This program is distributed in the hope that it will be useful, * |
12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of * |
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * |
14 | * GNU General Public License for more details. * |
15 | * * |
16 | * You should have received a copy of the GNU General Public License * |
17 | * along with this program; if not, write to the * |
18 | * Free Software Foundation, Inc., * |
19 | * 51 Franklin Street, Fifth Floor, Boston, MA 02111-1307 USA. * |
20 | ***************************************************************************/ |
21 | |
22 | /////////////////////////////////////////////////////////////////////////////// |
23 | // GPU internal line drawing functions |
030d1121 |
24 | // |
25 | // Rewritten October 2016 by senquack: |
26 | // Instead of one pixel at a time, lines are now drawn in runs of pixels, |
27 | // whether vertical, horizontal, or diagonal. A new inner driver |
28 | // 'gpuPixelSpanFn' is used, as well as an enhanced Bresenham run-slice |
29 | // algorithm. For more information, see the following: |
30 | // |
31 | // Michael Abrash - Graphics Programming Black Book |
32 | // Chapters 35 - 36 (does not implement diagonal runs) |
33 | // http://www.drdobbs.com/parallel/graphics-programming-black-book/184404919 |
34 | // http://www.jagregory.com/abrash-black-book/ |
35 | // |
36 | // Article by Andrew Delong (does not implement diagonal runs) |
37 | // http://timetraces.ca/nw/drawline.htm |
38 | // |
39 | // 'Run-Based Multi-Point Line Drawing' by Eun Jae Lee & Larry F. Hodges |
40 | // https://smartech.gatech.edu/bitstream/handle/1853/3632/93-22.pdf |
41 | // Provided the idea of doing a half-octant transform allowing lines with |
42 | // slopes between 0.5 and 2.0 (diagonal runs of pixels) to be handled |
43 | // identically to the traditional horizontal/vertical run-slice method. |
86aad47b |
44 | |
030d1121 |
45 | // Use 16.16 fixed point precision for line math. |
46 | // NOTE: Gouraud colors used by gpuPixelSpanFn can use a different precision. |
47 | #define GPU_LINE_FIXED_BITS 16 |
86aad47b |
48 | |
030d1121 |
49 | // If defined, Gouraud lines will use fixed-point multiply-by-inverse to |
50 | // do most divisions. With enough accuracy, this should be OK. |
51 | #define USE_LINES_ALL_FIXED_PT_MATH |
86aad47b |
52 | |
030d1121 |
53 | ////////////////////// |
54 | // Flat-shaded line // |
55 | ////////////////////// |
56 | void gpuDrawLineF(PtrUnion packet, const PSD gpuPixelSpanDriver) |
86aad47b |
57 | { |
030d1121 |
58 | int x0, y0, x1, y1; |
59 | int dx, dy; |
60 | |
61 | // All three of these variables should be signed (so multiplication works) |
62 | ptrdiff_t sx; // Sign of x delta, positive when x0 < x1 |
63 | const ptrdiff_t dst_depth = FRAME_BYTES_PER_PIXEL; // PSX: 2 bytes per pixel |
64 | const ptrdiff_t dst_stride = FRAME_BYTE_STRIDE; // PSX: 2048 bytes per framebuffer line |
65 | |
66 | // Clip region: xmax/ymax seem to normally be one *past* the rightmost/ |
67 | // bottommost pixels of the draw area. Since we render every pixel between |
68 | // and including both line endpoints, subtract one from xmax/ymax. |
69 | const int xmin = gpu_unai.DrawingArea[0]; |
70 | const int ymin = gpu_unai.DrawingArea[1]; |
71 | const int xmax = gpu_unai.DrawingArea[2] - 1; |
72 | const int ymax = gpu_unai.DrawingArea[3] - 1; |
73 | |
74 | x0 = GPU_EXPANDSIGN(packet.S2[2]) + gpu_unai.DrawingOffset[0]; |
75 | y0 = GPU_EXPANDSIGN(packet.S2[3]) + gpu_unai.DrawingOffset[1]; |
76 | x1 = GPU_EXPANDSIGN(packet.S2[4]) + gpu_unai.DrawingOffset[0]; |
77 | y1 = GPU_EXPANDSIGN(packet.S2[5]) + gpu_unai.DrawingOffset[1]; |
78 | |
79 | // Always draw top to bottom, so ensure y0 <= y1 |
80 | if (y0 > y1) { |
81 | SwapValues(y0, y1); |
82 | SwapValues(x0, x1); |
83 | } |
84 | |
85 | // Is line totally outside Y clipping range? |
86 | if (y0 > ymax || y1 < ymin) return; |
87 | |
88 | dx = x1 - x0; |
89 | dy = y1 - y0; |
90 | |
91 | // X-axis range check : max distance between any two X coords is 1023 |
92 | // (PSX hardware will not render anything violating this rule) |
93 | // NOTE: We'll check y coord range further below |
94 | if (dx >= CHKMAX_X || dx <= -CHKMAX_X) |
95 | return; |
96 | |
97 | // Y-axis range check and clipping |
98 | if (dy) { |
99 | // Y-axis range check : max distance between any two Y coords is 511 |
100 | // (PSX hardware will not render anything violating this rule) |
101 | if (dy >= CHKMAX_Y) |
102 | return; |
103 | |
104 | // We already know y0 < y1 |
105 | if (y0 < ymin) { |
106 | x0 += GPU_FAST_DIV(((ymin - y0) * dx), dy); |
107 | y0 = ymin; |
86aad47b |
108 | } |
030d1121 |
109 | if (y1 > ymax) { |
110 | x1 += GPU_FAST_DIV(((ymax - y1) * dx), dy); |
111 | y1 = ymax; |
86aad47b |
112 | } |
030d1121 |
113 | |
114 | // Recompute in case clipping occurred: |
115 | dx = x1 - x0; |
116 | dy = y1 - y0; |
117 | } |
118 | |
119 | // Check X clipping range, set 'sx' x-direction variable |
120 | if (dx == 0) { |
121 | // Is vertical line totally outside X clipping range? |
122 | if (x0 < xmin || x0 > xmax) |
123 | return; |
124 | sx = 0; |
125 | } else { |
126 | if (dx > 0) { |
127 | // x0 is leftmost coordinate |
128 | if (x0 > xmax) return; // Both points outside X clip range |
129 | |
130 | if (x0 < xmin) { |
131 | if (x1 < xmin) return; // Both points outside X clip range |
132 | y0 += GPU_FAST_DIV(((xmin - x0) * dy), dx); |
133 | x0 = xmin; |
134 | } |
135 | |
136 | if (x1 > xmax) { |
137 | y1 += GPU_FAST_DIV(((xmax - x1) * dy), dx); |
138 | x1 = xmax; |
139 | } |
140 | |
141 | sx = +1; |
142 | dx = x1 - x0; // Get final value, which should also be absolute value |
143 | } else { |
144 | // x1 is leftmost coordinate |
145 | if (x1 > xmax) return; // Both points outside X clip range |
146 | |
147 | if (x1 < xmin) { |
148 | if (x0 < xmin) return; // Both points outside X clip range |
149 | |
150 | y1 += GPU_FAST_DIV(((xmin - x1) * dy), dx); |
151 | x1 = xmin; |
86aad47b |
152 | } |
030d1121 |
153 | |
154 | if (x0 > xmax) { |
155 | y0 += GPU_FAST_DIV(((xmax - x0) * dy), dx); |
156 | x0 = xmax; |
157 | } |
158 | |
159 | sx = -1; |
160 | dx = x0 - x1; // Get final value, which should also be absolute value |
161 | } |
162 | |
163 | // Recompute in case clipping occurred: |
164 | dy = y1 - y0; |
165 | } |
166 | |
167 | // IMPORTANT: dx,dy should now contain their absolute values |
168 | |
169 | int min_length, // Minimum length of a pixel run |
170 | start_length, // Length of first run |
171 | end_length, // Length of last run |
172 | err_term, // Cumulative error to determine when to draw longer run |
173 | err_adjup, // Increment to err_term for each run drawn |
174 | err_adjdown; // Subract this from err_term after drawing longer run |
175 | |
176 | // Color to draw with (16 bits, highest of which is unset mask bit) |
177 | uintptr_t col16 = GPU_RGB16(packet.U4[0]); |
178 | |
179 | // We use u8 pointers even though PS1 has u16 framebuffer. |
180 | // This allows pixel-drawing functions to increment dst pointer |
181 | // directly by the passed 'incr' value, not having to shift it first. |
182 | u8 *dst = (u8*)gpu_unai.vram + y0 * dst_stride + x0 * dst_depth; |
183 | |
184 | // SPECIAL CASE: Vertical line |
185 | if (dx == 0) { |
186 | gpuPixelSpanDriver(dst, col16, dst_stride, dy+1); |
187 | return; |
188 | } |
189 | |
190 | // SPECIAL CASE: Horizontal line |
191 | if (dy == 0) { |
192 | gpuPixelSpanDriver(dst, col16, sx * dst_depth, dx+1); |
193 | return; |
194 | } |
195 | |
196 | // SPECIAL CASE: Diagonal line |
197 | if (dx == dy) { |
198 | gpuPixelSpanDriver(dst, col16, dst_stride + (sx * dst_depth), dy+1); |
199 | return; |
200 | } |
201 | |
202 | int major, minor; // Major axis, minor axis |
203 | ptrdiff_t incr_major, incr_minor; // Ptr increment for each step along axis |
204 | |
205 | if (dx > dy) { |
206 | major = dx; |
207 | minor = dy; |
208 | } else { |
209 | major = dy; |
210 | minor = dx; |
211 | } |
212 | |
213 | // Determine if diagonal or horizontal runs |
214 | if (major < (2 * minor)) { |
215 | // Diagonal runs, so perform half-octant transformation |
216 | minor = major - minor; |
217 | |
218 | // Advance diagonally when drawing runs |
219 | incr_major = dst_stride + (sx * dst_depth); |
220 | |
221 | // After drawing each run, correct for over-advance along minor axis |
222 | if (dx > dy) |
223 | incr_minor = -dst_stride; |
224 | else |
225 | incr_minor = -sx * dst_depth; |
226 | } else { |
227 | // Horizontal or vertical runs |
228 | if (dx > dy) { |
229 | incr_major = sx * dst_depth; |
230 | incr_minor = dst_stride; |
231 | } else { |
232 | incr_major = dst_stride; |
233 | incr_minor = sx * dst_depth; |
86aad47b |
234 | } |
030d1121 |
235 | } |
236 | |
237 | if (minor > 1) { |
238 | // Minimum number of pixels each run |
239 | min_length = major / minor; |
240 | |
241 | // Initial error term; reflects an initial step of 0.5 along minor axis |
242 | err_term = (major % minor) - (minor * 2); |
243 | |
244 | // Increment err_term this much each step along minor axis; when |
245 | // err_term crosses zero, draw longer pixel run. |
246 | err_adjup = (major % minor) * 2; |
247 | } else { |
248 | min_length = major; |
249 | err_term = 0; |
250 | err_adjup = 0; |
251 | } |
252 | |
253 | // Error term adjustment when err_term turns over; used to factor |
254 | // out the major-axis step made at that time |
255 | err_adjdown = minor * 2; |
256 | |
257 | // The initial and last runs are partial, because minor axis advances |
258 | // only 0.5 for these runs, rather than 1. Each is half a full run, |
259 | // plus the initial pixel. |
260 | start_length = end_length = (min_length / 2) + 1; |
261 | |
262 | if (min_length & 1) { |
263 | // If there're an odd number of pixels per run, we have 1 pixel that |
264 | // can't be allocated to either the initial or last partial run, so |
265 | // we'll add 0.5 to err_term so that this pixel will be handled |
266 | // by the normal full-run loop |
267 | err_term += minor; |
268 | } else { |
269 | // If the minimum run length is even and there's no fractional advance, |
270 | // we have one pixel that could go to either the initial or last |
271 | // partial run, which we arbitrarily allocate to the last run |
272 | if (err_adjup == 0) |
273 | start_length--; // Leave out the extra pixel at the start |
274 | } |
275 | |
276 | // First run of pixels |
277 | dst = gpuPixelSpanDriver(dst, col16, incr_major, start_length); |
278 | dst += incr_minor; |
279 | |
280 | // Middle runs of pixels |
281 | while (--minor > 0) { |
282 | int run_length = min_length; |
283 | err_term += err_adjup; |
284 | |
285 | // If err_term passed 0, reset it and draw longer run |
286 | if (err_term > 0) { |
287 | err_term -= err_adjdown; |
288 | run_length++; |
86aad47b |
289 | } |
030d1121 |
290 | |
291 | dst = gpuPixelSpanDriver(dst, col16, incr_major, run_length); |
292 | dst += incr_minor; |
293 | } |
294 | |
295 | // Final run of pixels |
296 | gpuPixelSpanDriver(dst, col16, incr_major, end_length); |
297 | } |
298 | |
299 | ///////////////////////// |
300 | // Gouraud-shaded line // |
301 | ///////////////////////// |
302 | void gpuDrawLineG(PtrUnion packet, const PSD gpuPixelSpanDriver) |
303 | { |
304 | int x0, y0, x1, y1; |
305 | int dx, dy, dr, dg, db; |
306 | u32 r0, g0, b0, r1, g1, b1; |
307 | |
308 | // All three of these variables should be signed (so multiplication works) |
309 | ptrdiff_t sx; // Sign of x delta, positive when x0 < x1 |
310 | const ptrdiff_t dst_depth = FRAME_BYTES_PER_PIXEL; // PSX: 2 bytes per pixel |
311 | const ptrdiff_t dst_stride = FRAME_BYTE_STRIDE; // PSX: 2048 bytes per framebuffer line |
312 | |
313 | // Clip region: xmax/ymax seem to normally be one *past* the rightmost/ |
314 | // bottommost pixels of the draw area. We'll render every pixel between |
315 | // and including both line endpoints, so subtract one from xmax/ymax. |
316 | const int xmin = gpu_unai.DrawingArea[0]; |
317 | const int ymin = gpu_unai.DrawingArea[1]; |
318 | const int xmax = gpu_unai.DrawingArea[2] - 1; |
319 | const int ymax = gpu_unai.DrawingArea[3] - 1; |
320 | |
321 | x0 = GPU_EXPANDSIGN(packet.S2[2]) + gpu_unai.DrawingOffset[0]; |
322 | y0 = GPU_EXPANDSIGN(packet.S2[3]) + gpu_unai.DrawingOffset[1]; |
323 | x1 = GPU_EXPANDSIGN(packet.S2[6]) + gpu_unai.DrawingOffset[0]; |
324 | y1 = GPU_EXPANDSIGN(packet.S2[7]) + gpu_unai.DrawingOffset[1]; |
325 | |
326 | u32 col0 = packet.U4[0]; |
327 | u32 col1 = packet.U4[2]; |
328 | |
329 | // Always draw top to bottom, so ensure y0 <= y1 |
330 | if (y0 > y1) { |
331 | SwapValues(y0, y1); |
332 | SwapValues(x0, x1); |
333 | SwapValues(col0, col1); |
334 | } |
335 | |
336 | // Is line totally outside Y clipping range? |
337 | if (y0 > ymax || y1 < ymin) return; |
338 | |
339 | // If defined, Gouraud colors are fixed-point 5.11, otherwise they are 8.16 |
340 | // (This is only beneficial if using SIMD-optimized pixel driver) |
341 | #ifdef GPU_GOURAUD_LOW_PRECISION |
342 | r0 = (col0 >> 3) & 0x1f; g0 = (col0 >> 11) & 0x1f; b0 = (col0 >> 19) & 0x1f; |
343 | r1 = (col1 >> 3) & 0x1f; g1 = (col1 >> 11) & 0x1f; b1 = (col1 >> 19) & 0x1f; |
344 | #else |
345 | r0 = col0 & 0xff; g0 = (col0 >> 8) & 0xff; b0 = (col0 >> 16) & 0xff; |
346 | r1 = col1 & 0xff; g1 = (col1 >> 8) & 0xff; b1 = (col1 >> 16) & 0xff; |
347 | #endif |
348 | |
349 | dx = x1 - x0; |
350 | dy = y1 - y0; |
351 | dr = r1 - r0; |
352 | dg = g1 - g0; |
353 | db = b1 - b0; |
354 | |
355 | // X-axis range check : max distance between any two X coords is 1023 |
356 | // (PSX hardware will not render anything violating this rule) |
357 | // NOTE: We'll check y coord range further below |
358 | if (dx >= CHKMAX_X || dx <= -CHKMAX_X) |
359 | return; |
360 | |
361 | // Y-axis range check and clipping |
362 | if (dy) { |
363 | // Y-axis range check : max distance between any two Y coords is 511 |
364 | // (PSX hardware will not render anything violating this rule) |
365 | if (dy >= CHKMAX_Y) |
366 | return; |
367 | |
368 | // We already know y0 < y1 |
369 | if (y0 < ymin) { |
370 | #ifdef USE_LINES_ALL_FIXED_PT_MATH |
371 | s32 factor = GPU_FAST_DIV(((ymin - y0) << GPU_LINE_FIXED_BITS), dy); |
372 | x0 += (dx * factor) >> GPU_LINE_FIXED_BITS; |
373 | r0 += (dr * factor) >> GPU_LINE_FIXED_BITS; |
374 | g0 += (dg * factor) >> GPU_LINE_FIXED_BITS; |
375 | b0 += (db * factor) >> GPU_LINE_FIXED_BITS; |
376 | #else |
377 | x0 += (ymin - y0) * dx / dy; |
378 | r0 += (ymin - y0) * dr / dy; |
379 | g0 += (ymin - y0) * dg / dy; |
380 | b0 += (ymin - y0) * db / dy; |
381 | #endif |
86aad47b |
382 | y0 = ymin; |
86aad47b |
383 | } |
030d1121 |
384 | |
385 | if (y1 > ymax) { |
386 | #ifdef USE_LINES_ALL_FIXED_PT_MATH |
387 | s32 factor = GPU_FAST_DIV(((ymax - y1) << GPU_LINE_FIXED_BITS), dy); |
388 | x1 += (dx * factor) >> GPU_LINE_FIXED_BITS; |
389 | r1 += (dr * factor) >> GPU_LINE_FIXED_BITS; |
390 | g1 += (dg * factor) >> GPU_LINE_FIXED_BITS; |
391 | b1 += (db * factor) >> GPU_LINE_FIXED_BITS; |
392 | #else |
393 | x1 += (ymax - y1) * dx / dy; |
394 | r1 += (ymax - y1) * dr / dy; |
395 | g1 += (ymax - y1) * dg / dy; |
396 | b1 += (ymax - y1) * db / dy; |
397 | #endif |
398 | y1 = ymax; |
86aad47b |
399 | } |
030d1121 |
400 | |
401 | // Recompute in case clipping occurred: |
402 | dx = x1 - x0; |
403 | dy = y1 - y0; |
404 | dr = r1 - r0; |
405 | dg = g1 - g0; |
406 | db = b1 - b0; |
407 | } |
408 | |
409 | // Check X clipping range, set 'sx' x-direction variable |
410 | if (dx == 0) { |
411 | // Is vertical line totally outside X clipping range? |
412 | if (x0 < xmin || x0 > xmax) |
413 | return; |
414 | sx = 0; |
86aad47b |
415 | } else { |
030d1121 |
416 | if (dx > 0) { |
417 | // x0 is leftmost coordinate |
418 | if (x0 > xmax) return; // Both points outside X clip range |
419 | |
420 | if (x0 < xmin) { |
421 | if (x1 < xmin) return; // Both points outside X clip range |
422 | |
423 | #ifdef USE_LINES_ALL_FIXED_PT_MATH |
424 | s32 factor = GPU_FAST_DIV(((xmin - x0) << GPU_LINE_FIXED_BITS), dx); |
425 | y0 += (dy * factor) >> GPU_LINE_FIXED_BITS; |
426 | r0 += (dr * factor) >> GPU_LINE_FIXED_BITS; |
427 | g0 += (dg * factor) >> GPU_LINE_FIXED_BITS; |
428 | b0 += (db * factor) >> GPU_LINE_FIXED_BITS; |
429 | #else |
430 | y0 += (xmin - x0) * dy / dx; |
431 | r0 += (xmin - x0) * dr / dx; |
432 | g0 += (xmin - x0) * dg / dx; |
433 | b0 += (xmin - x0) * db / dx; |
434 | #endif |
435 | x0 = xmin; |
86aad47b |
436 | } |
030d1121 |
437 | |
438 | if (x1 > xmax) { |
439 | #ifdef USE_LINES_ALL_FIXED_PT_MATH |
440 | s32 factor = GPU_FAST_DIV(((xmax - x1) << GPU_LINE_FIXED_BITS), dx); |
441 | y1 += (dy * factor) >> GPU_LINE_FIXED_BITS; |
442 | r1 += (dr * factor) >> GPU_LINE_FIXED_BITS; |
443 | g1 += (dg * factor) >> GPU_LINE_FIXED_BITS; |
444 | b1 += (db * factor) >> GPU_LINE_FIXED_BITS; |
445 | #else |
446 | y1 += (xmax - x1) * dy / dx; |
447 | r1 += (xmax - x1) * dr / dx; |
448 | g1 += (xmax - x1) * dg / dx; |
449 | b1 += (xmax - x1) * db / dx; |
450 | #endif |
451 | x1 = xmax; |
452 | } |
453 | |
454 | sx = +1; |
455 | dx = x1 - x0; // Get final value, which should also be absolute value |
456 | } else { |
457 | // x1 is leftmost coordinate |
458 | if (x1 > xmax) return; // Both points outside X clip range |
459 | |
460 | if (x1 < xmin) { |
461 | if (x0 < xmin) return; // Both points outside X clip range |
462 | |
463 | #ifdef USE_LINES_ALL_FIXED_PT_MATH |
464 | s32 factor = GPU_FAST_DIV(((xmin - x1) << GPU_LINE_FIXED_BITS), dx); |
465 | y1 += (dy * factor) >> GPU_LINE_FIXED_BITS; |
466 | r1 += (dr * factor) >> GPU_LINE_FIXED_BITS; |
467 | g1 += (dg * factor) >> GPU_LINE_FIXED_BITS; |
468 | b1 += (db * factor) >> GPU_LINE_FIXED_BITS; |
469 | #else |
470 | y1 += (xmin - x1) * dy / dx; |
471 | r1 += (xmin - x1) * dr / dx; |
472 | g1 += (xmin - x1) * dg / dx; |
473 | b1 += (xmin - x1) * db / dx; |
474 | #endif |
475 | x1 = xmin; |
476 | } |
477 | |
478 | if (x0 > xmax) { |
479 | #ifdef USE_LINES_ALL_FIXED_PT_MATH |
480 | s32 factor = GPU_FAST_DIV(((xmax - x0) << GPU_LINE_FIXED_BITS), dx); |
481 | y0 += (dy * factor) >> GPU_LINE_FIXED_BITS; |
482 | r0 += (dr * factor) >> GPU_LINE_FIXED_BITS; |
483 | g0 += (dg * factor) >> GPU_LINE_FIXED_BITS; |
484 | b0 += (db * factor) >> GPU_LINE_FIXED_BITS; |
485 | #else |
486 | y0 += (xmax - x0) * dy / dx; |
487 | r0 += (xmax - x0) * dr / dx; |
488 | g0 += (xmax - x0) * dg / dx; |
489 | b0 += (xmax - x0) * db / dx; |
490 | #endif |
491 | x0 = xmax; |
492 | } |
493 | |
494 | sx = -1; |
495 | dx = x0 - x1; // Get final value, which should also be absolute value |
86aad47b |
496 | } |
030d1121 |
497 | |
498 | // Recompute in case clipping occurred: |
499 | dy = y1 - y0; |
500 | dr = r1 - r0; |
501 | dg = g1 - g0; |
502 | db = b1 - b0; |
86aad47b |
503 | } |
86aad47b |
504 | |
030d1121 |
505 | // IMPORTANT: dx,dy should now contain their absolute values |
86aad47b |
506 | |
030d1121 |
507 | int min_length, // Minimum length of a pixel run |
508 | start_length, // Length of first run |
509 | end_length, // Length of last run |
510 | err_term, // Cumulative error to determine when to draw longer run |
511 | err_adjup, // Increment to err_term for each run drawn |
512 | err_adjdown; // Subract this from err_term after drawing longer run |
513 | |
514 | GouraudColor gcol; |
515 | gcol.r = r0 << GPU_GOURAUD_FIXED_BITS; |
516 | gcol.g = g0 << GPU_GOURAUD_FIXED_BITS; |
517 | gcol.b = b0 << GPU_GOURAUD_FIXED_BITS; |
518 | |
519 | // We use u8 pointers even though PS1 has u16 framebuffer. |
520 | // This allows pixel-drawing functions to increment dst pointer |
521 | // directly by the passed 'incr' value, not having to shift it first. |
522 | u8 *dst = (u8*)gpu_unai.vram + y0 * dst_stride + x0 * dst_depth; |
523 | |
524 | // SPECIAL CASE: Vertical line |
525 | if (dx == 0) { |
526 | #ifdef USE_LINES_ALL_FIXED_PT_MATH |
527 | // Get dy fixed-point inverse |
528 | s32 inv_factor = 1 << GPU_GOURAUD_FIXED_BITS; |
529 | if (dy > 1) inv_factor = GPU_FAST_DIV(inv_factor, dy); |
530 | |
531 | // Simultaneously divide and convert integer to Gouraud fixed point: |
532 | gcol.r_incr = dr * inv_factor; |
533 | gcol.g_incr = dg * inv_factor; |
534 | gcol.b_incr = db * inv_factor; |
535 | #else |
536 | // First, convert to Gouraud fixed point |
537 | gcol.r_incr = dr << GPU_GOURAUD_FIXED_BITS; |
538 | gcol.g_incr = dg << GPU_GOURAUD_FIXED_BITS; |
539 | gcol.b_incr = db << GPU_GOURAUD_FIXED_BITS; |
540 | |
541 | if (dy > 1) { |
542 | if (dr) gcol.r_incr /= dy; |
543 | if (dg) gcol.g_incr /= dy; |
544 | if (db) gcol.b_incr /= dy; |
86aad47b |
545 | } |
030d1121 |
546 | #endif |
86aad47b |
547 | |
030d1121 |
548 | gpuPixelSpanDriver(dst, (uintptr_t)&gcol, dst_stride, dy+1); |
549 | return; |
550 | } |
551 | |
552 | // SPECIAL CASE: Horizontal line |
553 | if (dy == 0) { |
554 | #ifdef USE_LINES_ALL_FIXED_PT_MATH |
555 | // Get dx fixed-point inverse |
556 | s32 inv_factor = (1 << GPU_GOURAUD_FIXED_BITS); |
557 | if (dx > 1) inv_factor = GPU_FAST_DIV(inv_factor, dx); |
558 | |
559 | // Simultaneously divide and convert integer to Gouraud fixed point: |
560 | gcol.r_incr = dr * inv_factor; |
561 | gcol.g_incr = dg * inv_factor; |
562 | gcol.b_incr = db * inv_factor; |
563 | #else |
564 | gcol.r_incr = dr << GPU_GOURAUD_FIXED_BITS; |
565 | gcol.g_incr = dg << GPU_GOURAUD_FIXED_BITS; |
566 | gcol.b_incr = db << GPU_GOURAUD_FIXED_BITS; |
567 | |
568 | if (dx > 1) { |
569 | if (dr) gcol.r_incr /= dx; |
570 | if (dg) gcol.g_incr /= dx; |
571 | if (db) gcol.b_incr /= dx; |
86aad47b |
572 | } |
030d1121 |
573 | #endif |
574 | |
575 | gpuPixelSpanDriver(dst, (uintptr_t)&gcol, sx * dst_depth, dx+1); |
576 | return; |
577 | } |
578 | |
579 | // SPECIAL CASE: Diagonal line |
580 | if (dx == dy) { |
581 | #ifdef USE_LINES_ALL_FIXED_PT_MATH |
582 | // Get dx fixed-point inverse |
583 | s32 inv_factor = (1 << GPU_GOURAUD_FIXED_BITS); |
584 | if (dx > 1) inv_factor = GPU_FAST_DIV(inv_factor, dx); |
585 | |
586 | // Simultaneously divide and convert integer to Gouraud fixed point: |
587 | gcol.r_incr = dr * inv_factor; |
588 | gcol.g_incr = dg * inv_factor; |
589 | gcol.b_incr = db * inv_factor; |
590 | #else |
591 | // First, convert to Gouraud fixed point |
592 | gcol.r_incr = dr << GPU_GOURAUD_FIXED_BITS; |
593 | gcol.g_incr = dg << GPU_GOURAUD_FIXED_BITS; |
594 | gcol.b_incr = db << GPU_GOURAUD_FIXED_BITS; |
595 | |
596 | if (dx > 1) { |
597 | if (dr) gcol.r_incr /= dx; |
598 | if (dg) gcol.g_incr /= dx; |
599 | if (db) gcol.b_incr /= dx; |
86aad47b |
600 | } |
030d1121 |
601 | #endif |
602 | |
603 | gpuPixelSpanDriver(dst, (uintptr_t)&gcol, dst_stride + (sx * dst_depth), dy+1); |
604 | return; |
605 | } |
606 | |
607 | int major, minor; // Absolute val of major,minor axis delta |
608 | ptrdiff_t incr_major, incr_minor; // Ptr increment for each step along axis |
609 | |
610 | if (dx > dy) { |
611 | major = dx; |
612 | minor = dy; |
613 | } else { |
614 | major = dy; |
615 | minor = dx; |
616 | } |
617 | |
618 | // Determine if diagonal or horizontal runs |
619 | if (major < (2 * minor)) { |
620 | // Diagonal runs, so perform half-octant transformation |
621 | minor = major - minor; |
622 | |
623 | // Advance diagonally when drawing runs |
624 | incr_major = dst_stride + (sx * dst_depth); |
625 | |
626 | // After drawing each run, correct for over-advance along minor axis |
627 | if (dx > dy) |
628 | incr_minor = -dst_stride; |
629 | else |
630 | incr_minor = -sx * dst_depth; |
631 | } else { |
632 | // Horizontal or vertical runs |
633 | if (dx > dy) { |
634 | incr_major = sx * dst_depth; |
635 | incr_minor = dst_stride; |
636 | } else { |
637 | incr_major = dst_stride; |
638 | incr_minor = sx * dst_depth; |
86aad47b |
639 | } |
030d1121 |
640 | } |
641 | |
642 | #ifdef USE_LINES_ALL_FIXED_PT_MATH |
643 | s32 major_inv = GPU_FAST_DIV((1 << GPU_GOURAUD_FIXED_BITS), major); |
644 | |
645 | // Simultaneously divide and convert from integer to Gouraud fixed point: |
646 | gcol.r_incr = dr * major_inv; |
647 | gcol.g_incr = dg * major_inv; |
648 | gcol.b_incr = db * major_inv; |
649 | #else |
650 | gcol.r_incr = dr ? ((dr << GPU_GOURAUD_FIXED_BITS) / major) : 0; |
651 | gcol.g_incr = dg ? ((dg << GPU_GOURAUD_FIXED_BITS) / major) : 0; |
652 | gcol.b_incr = db ? ((db << GPU_GOURAUD_FIXED_BITS) / major) : 0; |
653 | #endif |
654 | |
655 | if (minor > 1) { |
656 | // Minimum number of pixels each run |
657 | min_length = major / minor; |
658 | |
659 | // Initial error term; reflects an initial step of 0.5 along minor axis |
660 | err_term = (major % minor) - (minor * 2); |
661 | |
662 | // Increment err_term this much each step along minor axis; when |
663 | // err_term crosses zero, draw longer pixel run. |
664 | err_adjup = (major % minor) * 2; |
86aad47b |
665 | } else { |
030d1121 |
666 | min_length = major; |
667 | err_term = 0; |
668 | err_adjup = 0; |
669 | } |
670 | |
671 | // Error term adjustment when err_term turns over; used to factor |
672 | // out the major-axis step made at that time |
673 | err_adjdown = minor * 2; |
674 | |
675 | // The initial and last runs are partial, because minor axis advances |
676 | // only 0.5 for these runs, rather than 1. Each is half a full run, |
677 | // plus the initial pixel. |
678 | start_length = end_length = (min_length / 2) + 1; |
679 | |
680 | if (min_length & 1) { |
681 | // If there're an odd number of pixels per run, we have 1 pixel that |
682 | // can't be allocated to either the initial or last partial run, so |
683 | // we'll add 0.5 to err_term so that this pixel will be handled |
684 | // by the normal full-run loop |
685 | err_term += minor; |
686 | } else { |
687 | // If the minimum run length is even and there's no fractional advance, |
688 | // we have one pixel that could go to either the initial or last |
689 | // partial run, which we'll arbitrarily allocate to the last run |
690 | if (err_adjup == 0) |
691 | start_length--; // Leave out the extra pixel at the start |
692 | } |
693 | |
694 | // First run of pixels |
695 | dst = gpuPixelSpanDriver(dst, (uintptr_t)&gcol, incr_major, start_length); |
696 | dst += incr_minor; |
697 | |
698 | // Middle runs of pixels |
699 | while (--minor > 0) { |
700 | int run_length = min_length; |
701 | err_term += err_adjup; |
702 | |
703 | // If err_term passed 0, reset it and draw longer run |
704 | if (err_term > 0) { |
705 | err_term -= err_adjdown; |
706 | run_length++; |
86aad47b |
707 | } |
030d1121 |
708 | |
709 | dst = gpuPixelSpanDriver(dst, (uintptr_t)&gcol, incr_major, run_length); |
710 | dst += incr_minor; |
86aad47b |
711 | } |
030d1121 |
712 | |
713 | // Final run of pixels |
714 | gpuPixelSpanDriver(dst, (uintptr_t)&gcol, incr_major, end_length); |
86aad47b |
715 | } |