Commit | Line | Data |
---|---|---|
86aad47b | 1 | /*************************************************************************** |
2 | * Copyright (C) 2010 PCSX4ALL Team * | |
3 | * Copyright (C) 2010 Unai * | |
030d1121 | 4 | * Copyright (C) 2016 Senquack (dansilsby <AT> gmail <DOT> com) * |
86aad47b | 5 | * * |
6 | * This program is free software; you can redistribute it and/or modify * | |
7 | * it under the terms of the GNU General Public License as published by * | |
8 | * the Free Software Foundation; either version 2 of the License, or * | |
9 | * (at your option) any later version. * | |
10 | * * | |
11 | * This program is distributed in the hope that it will be useful, * | |
12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of * | |
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * | |
14 | * GNU General Public License for more details. * | |
15 | * * | |
16 | * You should have received a copy of the GNU General Public License * | |
17 | * along with this program; if not, write to the * | |
18 | * Free Software Foundation, Inc., * | |
19 | * 51 Franklin Street, Fifth Floor, Boston, MA 02111-1307 USA. * | |
20 | ***************************************************************************/ | |
21 | ||
908e426c | 22 | #ifndef __GPU_UNAI_GPU_RASTER_LINE_H__ |
23 | #define __GPU_UNAI_GPU_RASTER_LINE_H__ | |
24 | ||
86aad47b | 25 | /////////////////////////////////////////////////////////////////////////////// |
26 | // GPU internal line drawing functions | |
030d1121 | 27 | // |
28 | // Rewritten October 2016 by senquack: | |
29 | // Instead of one pixel at a time, lines are now drawn in runs of pixels, | |
30 | // whether vertical, horizontal, or diagonal. A new inner driver | |
31 | // 'gpuPixelSpanFn' is used, as well as an enhanced Bresenham run-slice | |
32 | // algorithm. For more information, see the following: | |
33 | // | |
34 | // Michael Abrash - Graphics Programming Black Book | |
35 | // Chapters 35 - 36 (does not implement diagonal runs) | |
36 | // http://www.drdobbs.com/parallel/graphics-programming-black-book/184404919 | |
37 | // http://www.jagregory.com/abrash-black-book/ | |
38 | // | |
39 | // Article by Andrew Delong (does not implement diagonal runs) | |
40 | // http://timetraces.ca/nw/drawline.htm | |
41 | // | |
42 | // 'Run-Based Multi-Point Line Drawing' by Eun Jae Lee & Larry F. Hodges | |
43 | // https://smartech.gatech.edu/bitstream/handle/1853/3632/93-22.pdf | |
44 | // Provided the idea of doing a half-octant transform allowing lines with | |
45 | // slopes between 0.5 and 2.0 (diagonal runs of pixels) to be handled | |
46 | // identically to the traditional horizontal/vertical run-slice method. | |
86aad47b | 47 | |
030d1121 | 48 | // Use 16.16 fixed point precision for line math. |
49 | // NOTE: Gouraud colors used by gpuPixelSpanFn can use a different precision. | |
50 | #define GPU_LINE_FIXED_BITS 16 | |
86aad47b | 51 | |
030d1121 | 52 | // If defined, Gouraud lines will use fixed-point multiply-by-inverse to |
53 | // do most divisions. With enough accuracy, this should be OK. | |
54 | #define USE_LINES_ALL_FIXED_PT_MATH | |
86aad47b | 55 | |
030d1121 | 56 | ////////////////////// |
57 | // Flat-shaded line // | |
58 | ////////////////////// | |
59 | void gpuDrawLineF(PtrUnion packet, const PSD gpuPixelSpanDriver) | |
86aad47b | 60 | { |
030d1121 | 61 | int x0, y0, x1, y1; |
62 | int dx, dy; | |
63 | ||
64 | // All three of these variables should be signed (so multiplication works) | |
65 | ptrdiff_t sx; // Sign of x delta, positive when x0 < x1 | |
66 | const ptrdiff_t dst_depth = FRAME_BYTES_PER_PIXEL; // PSX: 2 bytes per pixel | |
67 | const ptrdiff_t dst_stride = FRAME_BYTE_STRIDE; // PSX: 2048 bytes per framebuffer line | |
68 | ||
69 | // Clip region: xmax/ymax seem to normally be one *past* the rightmost/ | |
70 | // bottommost pixels of the draw area. Since we render every pixel between | |
71 | // and including both line endpoints, subtract one from xmax/ymax. | |
72 | const int xmin = gpu_unai.DrawingArea[0]; | |
73 | const int ymin = gpu_unai.DrawingArea[1]; | |
74 | const int xmax = gpu_unai.DrawingArea[2] - 1; | |
75 | const int ymax = gpu_unai.DrawingArea[3] - 1; | |
76 | ||
4949d4ff PC |
77 | x0 = GPU_EXPANDSIGN(le16_to_s16(packet.U2[2])) + gpu_unai.DrawingOffset[0]; |
78 | y0 = GPU_EXPANDSIGN(le16_to_s16(packet.U2[3])) + gpu_unai.DrawingOffset[1]; | |
79 | x1 = GPU_EXPANDSIGN(le16_to_s16(packet.U2[4])) + gpu_unai.DrawingOffset[0]; | |
80 | y1 = GPU_EXPANDSIGN(le16_to_s16(packet.U2[5])) + gpu_unai.DrawingOffset[1]; | |
030d1121 | 81 | |
82 | // Always draw top to bottom, so ensure y0 <= y1 | |
83 | if (y0 > y1) { | |
84 | SwapValues(y0, y1); | |
85 | SwapValues(x0, x1); | |
86 | } | |
87 | ||
88 | // Is line totally outside Y clipping range? | |
89 | if (y0 > ymax || y1 < ymin) return; | |
90 | ||
91 | dx = x1 - x0; | |
92 | dy = y1 - y0; | |
93 | ||
94 | // X-axis range check : max distance between any two X coords is 1023 | |
95 | // (PSX hardware will not render anything violating this rule) | |
96 | // NOTE: We'll check y coord range further below | |
97 | if (dx >= CHKMAX_X || dx <= -CHKMAX_X) | |
98 | return; | |
99 | ||
100 | // Y-axis range check and clipping | |
101 | if (dy) { | |
102 | // Y-axis range check : max distance between any two Y coords is 511 | |
103 | // (PSX hardware will not render anything violating this rule) | |
104 | if (dy >= CHKMAX_Y) | |
105 | return; | |
106 | ||
107 | // We already know y0 < y1 | |
108 | if (y0 < ymin) { | |
109 | x0 += GPU_FAST_DIV(((ymin - y0) * dx), dy); | |
110 | y0 = ymin; | |
86aad47b | 111 | } |
030d1121 | 112 | if (y1 > ymax) { |
113 | x1 += GPU_FAST_DIV(((ymax - y1) * dx), dy); | |
114 | y1 = ymax; | |
86aad47b | 115 | } |
030d1121 | 116 | |
117 | // Recompute in case clipping occurred: | |
118 | dx = x1 - x0; | |
119 | dy = y1 - y0; | |
120 | } | |
121 | ||
122 | // Check X clipping range, set 'sx' x-direction variable | |
123 | if (dx == 0) { | |
124 | // Is vertical line totally outside X clipping range? | |
125 | if (x0 < xmin || x0 > xmax) | |
126 | return; | |
127 | sx = 0; | |
128 | } else { | |
129 | if (dx > 0) { | |
130 | // x0 is leftmost coordinate | |
131 | if (x0 > xmax) return; // Both points outside X clip range | |
132 | ||
133 | if (x0 < xmin) { | |
134 | if (x1 < xmin) return; // Both points outside X clip range | |
135 | y0 += GPU_FAST_DIV(((xmin - x0) * dy), dx); | |
136 | x0 = xmin; | |
137 | } | |
138 | ||
139 | if (x1 > xmax) { | |
140 | y1 += GPU_FAST_DIV(((xmax - x1) * dy), dx); | |
141 | x1 = xmax; | |
142 | } | |
143 | ||
144 | sx = +1; | |
145 | dx = x1 - x0; // Get final value, which should also be absolute value | |
146 | } else { | |
147 | // x1 is leftmost coordinate | |
148 | if (x1 > xmax) return; // Both points outside X clip range | |
149 | ||
150 | if (x1 < xmin) { | |
151 | if (x0 < xmin) return; // Both points outside X clip range | |
152 | ||
153 | y1 += GPU_FAST_DIV(((xmin - x1) * dy), dx); | |
154 | x1 = xmin; | |
86aad47b | 155 | } |
030d1121 | 156 | |
157 | if (x0 > xmax) { | |
158 | y0 += GPU_FAST_DIV(((xmax - x0) * dy), dx); | |
159 | x0 = xmax; | |
160 | } | |
161 | ||
162 | sx = -1; | |
163 | dx = x0 - x1; // Get final value, which should also be absolute value | |
164 | } | |
165 | ||
166 | // Recompute in case clipping occurred: | |
167 | dy = y1 - y0; | |
168 | } | |
169 | ||
170 | // IMPORTANT: dx,dy should now contain their absolute values | |
171 | ||
172 | int min_length, // Minimum length of a pixel run | |
173 | start_length, // Length of first run | |
174 | end_length, // Length of last run | |
175 | err_term, // Cumulative error to determine when to draw longer run | |
176 | err_adjup, // Increment to err_term for each run drawn | |
177 | err_adjdown; // Subract this from err_term after drawing longer run | |
178 | ||
179 | // Color to draw with (16 bits, highest of which is unset mask bit) | |
4949d4ff | 180 | uintptr_t col16 = GPU_RGB16(le32_to_u32(packet.U4[0])); |
030d1121 | 181 | |
4949d4ff | 182 | le16_t *dst = gpu_unai.vram + (y0 * dst_stride + x0 * dst_depth) / FRAME_BYTES_PER_PIXEL; |
030d1121 | 183 | |
184 | // SPECIAL CASE: Vertical line | |
185 | if (dx == 0) { | |
186 | gpuPixelSpanDriver(dst, col16, dst_stride, dy+1); | |
187 | return; | |
188 | } | |
189 | ||
190 | // SPECIAL CASE: Horizontal line | |
191 | if (dy == 0) { | |
192 | gpuPixelSpanDriver(dst, col16, sx * dst_depth, dx+1); | |
193 | return; | |
194 | } | |
195 | ||
196 | // SPECIAL CASE: Diagonal line | |
197 | if (dx == dy) { | |
198 | gpuPixelSpanDriver(dst, col16, dst_stride + (sx * dst_depth), dy+1); | |
199 | return; | |
200 | } | |
201 | ||
202 | int major, minor; // Major axis, minor axis | |
203 | ptrdiff_t incr_major, incr_minor; // Ptr increment for each step along axis | |
204 | ||
205 | if (dx > dy) { | |
206 | major = dx; | |
207 | minor = dy; | |
208 | } else { | |
209 | major = dy; | |
210 | minor = dx; | |
211 | } | |
212 | ||
213 | // Determine if diagonal or horizontal runs | |
214 | if (major < (2 * minor)) { | |
215 | // Diagonal runs, so perform half-octant transformation | |
216 | minor = major - minor; | |
217 | ||
218 | // Advance diagonally when drawing runs | |
219 | incr_major = dst_stride + (sx * dst_depth); | |
220 | ||
221 | // After drawing each run, correct for over-advance along minor axis | |
222 | if (dx > dy) | |
223 | incr_minor = -dst_stride; | |
224 | else | |
225 | incr_minor = -sx * dst_depth; | |
226 | } else { | |
227 | // Horizontal or vertical runs | |
228 | if (dx > dy) { | |
229 | incr_major = sx * dst_depth; | |
230 | incr_minor = dst_stride; | |
231 | } else { | |
232 | incr_major = dst_stride; | |
233 | incr_minor = sx * dst_depth; | |
86aad47b | 234 | } |
030d1121 | 235 | } |
236 | ||
237 | if (minor > 1) { | |
238 | // Minimum number of pixels each run | |
239 | min_length = major / minor; | |
240 | ||
241 | // Initial error term; reflects an initial step of 0.5 along minor axis | |
242 | err_term = (major % minor) - (minor * 2); | |
243 | ||
244 | // Increment err_term this much each step along minor axis; when | |
245 | // err_term crosses zero, draw longer pixel run. | |
246 | err_adjup = (major % minor) * 2; | |
247 | } else { | |
248 | min_length = major; | |
249 | err_term = 0; | |
250 | err_adjup = 0; | |
251 | } | |
252 | ||
253 | // Error term adjustment when err_term turns over; used to factor | |
254 | // out the major-axis step made at that time | |
255 | err_adjdown = minor * 2; | |
256 | ||
257 | // The initial and last runs are partial, because minor axis advances | |
258 | // only 0.5 for these runs, rather than 1. Each is half a full run, | |
259 | // plus the initial pixel. | |
260 | start_length = end_length = (min_length / 2) + 1; | |
261 | ||
262 | if (min_length & 1) { | |
263 | // If there're an odd number of pixels per run, we have 1 pixel that | |
264 | // can't be allocated to either the initial or last partial run, so | |
265 | // we'll add 0.5 to err_term so that this pixel will be handled | |
266 | // by the normal full-run loop | |
267 | err_term += minor; | |
268 | } else { | |
269 | // If the minimum run length is even and there's no fractional advance, | |
270 | // we have one pixel that could go to either the initial or last | |
271 | // partial run, which we arbitrarily allocate to the last run | |
272 | if (err_adjup == 0) | |
273 | start_length--; // Leave out the extra pixel at the start | |
274 | } | |
275 | ||
276 | // First run of pixels | |
277 | dst = gpuPixelSpanDriver(dst, col16, incr_major, start_length); | |
4949d4ff | 278 | dst += incr_minor / 2; |
030d1121 | 279 | |
280 | // Middle runs of pixels | |
281 | while (--minor > 0) { | |
282 | int run_length = min_length; | |
283 | err_term += err_adjup; | |
284 | ||
285 | // If err_term passed 0, reset it and draw longer run | |
286 | if (err_term > 0) { | |
287 | err_term -= err_adjdown; | |
288 | run_length++; | |
86aad47b | 289 | } |
030d1121 | 290 | |
291 | dst = gpuPixelSpanDriver(dst, col16, incr_major, run_length); | |
4949d4ff | 292 | dst += incr_minor / 2; |
030d1121 | 293 | } |
294 | ||
295 | // Final run of pixels | |
296 | gpuPixelSpanDriver(dst, col16, incr_major, end_length); | |
297 | } | |
298 | ||
299 | ///////////////////////// | |
300 | // Gouraud-shaded line // | |
301 | ///////////////////////// | |
302 | void gpuDrawLineG(PtrUnion packet, const PSD gpuPixelSpanDriver) | |
303 | { | |
304 | int x0, y0, x1, y1; | |
305 | int dx, dy, dr, dg, db; | |
306 | u32 r0, g0, b0, r1, g1, b1; | |
307 | ||
308 | // All three of these variables should be signed (so multiplication works) | |
309 | ptrdiff_t sx; // Sign of x delta, positive when x0 < x1 | |
310 | const ptrdiff_t dst_depth = FRAME_BYTES_PER_PIXEL; // PSX: 2 bytes per pixel | |
311 | const ptrdiff_t dst_stride = FRAME_BYTE_STRIDE; // PSX: 2048 bytes per framebuffer line | |
312 | ||
313 | // Clip region: xmax/ymax seem to normally be one *past* the rightmost/ | |
314 | // bottommost pixels of the draw area. We'll render every pixel between | |
315 | // and including both line endpoints, so subtract one from xmax/ymax. | |
316 | const int xmin = gpu_unai.DrawingArea[0]; | |
317 | const int ymin = gpu_unai.DrawingArea[1]; | |
318 | const int xmax = gpu_unai.DrawingArea[2] - 1; | |
319 | const int ymax = gpu_unai.DrawingArea[3] - 1; | |
320 | ||
4949d4ff PC |
321 | x0 = GPU_EXPANDSIGN(le16_to_s16(packet.U2[2])) + gpu_unai.DrawingOffset[0]; |
322 | y0 = GPU_EXPANDSIGN(le16_to_s16(packet.U2[3])) + gpu_unai.DrawingOffset[1]; | |
323 | x1 = GPU_EXPANDSIGN(le16_to_s16(packet.U2[6])) + gpu_unai.DrawingOffset[0]; | |
324 | y1 = GPU_EXPANDSIGN(le16_to_s16(packet.U2[7])) + gpu_unai.DrawingOffset[1]; | |
030d1121 | 325 | |
4949d4ff PC |
326 | u32 col0 = le32_to_u32(packet.U4[0]); |
327 | u32 col1 = le32_to_u32(packet.U4[2]); | |
030d1121 | 328 | |
329 | // Always draw top to bottom, so ensure y0 <= y1 | |
330 | if (y0 > y1) { | |
331 | SwapValues(y0, y1); | |
332 | SwapValues(x0, x1); | |
333 | SwapValues(col0, col1); | |
334 | } | |
335 | ||
336 | // Is line totally outside Y clipping range? | |
337 | if (y0 > ymax || y1 < ymin) return; | |
338 | ||
339 | // If defined, Gouraud colors are fixed-point 5.11, otherwise they are 8.16 | |
340 | // (This is only beneficial if using SIMD-optimized pixel driver) | |
341 | #ifdef GPU_GOURAUD_LOW_PRECISION | |
342 | r0 = (col0 >> 3) & 0x1f; g0 = (col0 >> 11) & 0x1f; b0 = (col0 >> 19) & 0x1f; | |
343 | r1 = (col1 >> 3) & 0x1f; g1 = (col1 >> 11) & 0x1f; b1 = (col1 >> 19) & 0x1f; | |
344 | #else | |
345 | r0 = col0 & 0xff; g0 = (col0 >> 8) & 0xff; b0 = (col0 >> 16) & 0xff; | |
346 | r1 = col1 & 0xff; g1 = (col1 >> 8) & 0xff; b1 = (col1 >> 16) & 0xff; | |
347 | #endif | |
348 | ||
349 | dx = x1 - x0; | |
350 | dy = y1 - y0; | |
351 | dr = r1 - r0; | |
352 | dg = g1 - g0; | |
353 | db = b1 - b0; | |
354 | ||
355 | // X-axis range check : max distance between any two X coords is 1023 | |
356 | // (PSX hardware will not render anything violating this rule) | |
357 | // NOTE: We'll check y coord range further below | |
358 | if (dx >= CHKMAX_X || dx <= -CHKMAX_X) | |
359 | return; | |
360 | ||
361 | // Y-axis range check and clipping | |
362 | if (dy) { | |
363 | // Y-axis range check : max distance between any two Y coords is 511 | |
364 | // (PSX hardware will not render anything violating this rule) | |
365 | if (dy >= CHKMAX_Y) | |
366 | return; | |
367 | ||
368 | // We already know y0 < y1 | |
369 | if (y0 < ymin) { | |
370 | #ifdef USE_LINES_ALL_FIXED_PT_MATH | |
371 | s32 factor = GPU_FAST_DIV(((ymin - y0) << GPU_LINE_FIXED_BITS), dy); | |
372 | x0 += (dx * factor) >> GPU_LINE_FIXED_BITS; | |
373 | r0 += (dr * factor) >> GPU_LINE_FIXED_BITS; | |
374 | g0 += (dg * factor) >> GPU_LINE_FIXED_BITS; | |
375 | b0 += (db * factor) >> GPU_LINE_FIXED_BITS; | |
376 | #else | |
377 | x0 += (ymin - y0) * dx / dy; | |
378 | r0 += (ymin - y0) * dr / dy; | |
379 | g0 += (ymin - y0) * dg / dy; | |
380 | b0 += (ymin - y0) * db / dy; | |
381 | #endif | |
86aad47b | 382 | y0 = ymin; |
86aad47b | 383 | } |
030d1121 | 384 | |
385 | if (y1 > ymax) { | |
386 | #ifdef USE_LINES_ALL_FIXED_PT_MATH | |
387 | s32 factor = GPU_FAST_DIV(((ymax - y1) << GPU_LINE_FIXED_BITS), dy); | |
388 | x1 += (dx * factor) >> GPU_LINE_FIXED_BITS; | |
389 | r1 += (dr * factor) >> GPU_LINE_FIXED_BITS; | |
390 | g1 += (dg * factor) >> GPU_LINE_FIXED_BITS; | |
391 | b1 += (db * factor) >> GPU_LINE_FIXED_BITS; | |
392 | #else | |
393 | x1 += (ymax - y1) * dx / dy; | |
394 | r1 += (ymax - y1) * dr / dy; | |
395 | g1 += (ymax - y1) * dg / dy; | |
396 | b1 += (ymax - y1) * db / dy; | |
397 | #endif | |
398 | y1 = ymax; | |
86aad47b | 399 | } |
030d1121 | 400 | |
401 | // Recompute in case clipping occurred: | |
402 | dx = x1 - x0; | |
403 | dy = y1 - y0; | |
404 | dr = r1 - r0; | |
405 | dg = g1 - g0; | |
406 | db = b1 - b0; | |
407 | } | |
408 | ||
409 | // Check X clipping range, set 'sx' x-direction variable | |
410 | if (dx == 0) { | |
411 | // Is vertical line totally outside X clipping range? | |
412 | if (x0 < xmin || x0 > xmax) | |
413 | return; | |
414 | sx = 0; | |
86aad47b | 415 | } else { |
030d1121 | 416 | if (dx > 0) { |
417 | // x0 is leftmost coordinate | |
418 | if (x0 > xmax) return; // Both points outside X clip range | |
419 | ||
420 | if (x0 < xmin) { | |
421 | if (x1 < xmin) return; // Both points outside X clip range | |
422 | ||
423 | #ifdef USE_LINES_ALL_FIXED_PT_MATH | |
424 | s32 factor = GPU_FAST_DIV(((xmin - x0) << GPU_LINE_FIXED_BITS), dx); | |
425 | y0 += (dy * factor) >> GPU_LINE_FIXED_BITS; | |
426 | r0 += (dr * factor) >> GPU_LINE_FIXED_BITS; | |
427 | g0 += (dg * factor) >> GPU_LINE_FIXED_BITS; | |
428 | b0 += (db * factor) >> GPU_LINE_FIXED_BITS; | |
429 | #else | |
430 | y0 += (xmin - x0) * dy / dx; | |
431 | r0 += (xmin - x0) * dr / dx; | |
432 | g0 += (xmin - x0) * dg / dx; | |
433 | b0 += (xmin - x0) * db / dx; | |
434 | #endif | |
435 | x0 = xmin; | |
86aad47b | 436 | } |
030d1121 | 437 | |
438 | if (x1 > xmax) { | |
439 | #ifdef USE_LINES_ALL_FIXED_PT_MATH | |
440 | s32 factor = GPU_FAST_DIV(((xmax - x1) << GPU_LINE_FIXED_BITS), dx); | |
441 | y1 += (dy * factor) >> GPU_LINE_FIXED_BITS; | |
442 | r1 += (dr * factor) >> GPU_LINE_FIXED_BITS; | |
443 | g1 += (dg * factor) >> GPU_LINE_FIXED_BITS; | |
444 | b1 += (db * factor) >> GPU_LINE_FIXED_BITS; | |
445 | #else | |
446 | y1 += (xmax - x1) * dy / dx; | |
447 | r1 += (xmax - x1) * dr / dx; | |
448 | g1 += (xmax - x1) * dg / dx; | |
449 | b1 += (xmax - x1) * db / dx; | |
450 | #endif | |
451 | x1 = xmax; | |
452 | } | |
453 | ||
454 | sx = +1; | |
455 | dx = x1 - x0; // Get final value, which should also be absolute value | |
456 | } else { | |
457 | // x1 is leftmost coordinate | |
458 | if (x1 > xmax) return; // Both points outside X clip range | |
459 | ||
460 | if (x1 < xmin) { | |
461 | if (x0 < xmin) return; // Both points outside X clip range | |
462 | ||
463 | #ifdef USE_LINES_ALL_FIXED_PT_MATH | |
464 | s32 factor = GPU_FAST_DIV(((xmin - x1) << GPU_LINE_FIXED_BITS), dx); | |
465 | y1 += (dy * factor) >> GPU_LINE_FIXED_BITS; | |
466 | r1 += (dr * factor) >> GPU_LINE_FIXED_BITS; | |
467 | g1 += (dg * factor) >> GPU_LINE_FIXED_BITS; | |
468 | b1 += (db * factor) >> GPU_LINE_FIXED_BITS; | |
469 | #else | |
470 | y1 += (xmin - x1) * dy / dx; | |
471 | r1 += (xmin - x1) * dr / dx; | |
472 | g1 += (xmin - x1) * dg / dx; | |
473 | b1 += (xmin - x1) * db / dx; | |
474 | #endif | |
475 | x1 = xmin; | |
476 | } | |
477 | ||
478 | if (x0 > xmax) { | |
479 | #ifdef USE_LINES_ALL_FIXED_PT_MATH | |
480 | s32 factor = GPU_FAST_DIV(((xmax - x0) << GPU_LINE_FIXED_BITS), dx); | |
481 | y0 += (dy * factor) >> GPU_LINE_FIXED_BITS; | |
482 | r0 += (dr * factor) >> GPU_LINE_FIXED_BITS; | |
483 | g0 += (dg * factor) >> GPU_LINE_FIXED_BITS; | |
484 | b0 += (db * factor) >> GPU_LINE_FIXED_BITS; | |
485 | #else | |
486 | y0 += (xmax - x0) * dy / dx; | |
487 | r0 += (xmax - x0) * dr / dx; | |
488 | g0 += (xmax - x0) * dg / dx; | |
489 | b0 += (xmax - x0) * db / dx; | |
490 | #endif | |
491 | x0 = xmax; | |
492 | } | |
493 | ||
494 | sx = -1; | |
495 | dx = x0 - x1; // Get final value, which should also be absolute value | |
86aad47b | 496 | } |
030d1121 | 497 | |
498 | // Recompute in case clipping occurred: | |
499 | dy = y1 - y0; | |
500 | dr = r1 - r0; | |
501 | dg = g1 - g0; | |
502 | db = b1 - b0; | |
86aad47b | 503 | } |
86aad47b | 504 | |
030d1121 | 505 | // IMPORTANT: dx,dy should now contain their absolute values |
86aad47b | 506 | |
030d1121 | 507 | int min_length, // Minimum length of a pixel run |
508 | start_length, // Length of first run | |
509 | end_length, // Length of last run | |
510 | err_term, // Cumulative error to determine when to draw longer run | |
511 | err_adjup, // Increment to err_term for each run drawn | |
512 | err_adjdown; // Subract this from err_term after drawing longer run | |
513 | ||
514 | GouraudColor gcol; | |
515 | gcol.r = r0 << GPU_GOURAUD_FIXED_BITS; | |
516 | gcol.g = g0 << GPU_GOURAUD_FIXED_BITS; | |
517 | gcol.b = b0 << GPU_GOURAUD_FIXED_BITS; | |
518 | ||
4949d4ff | 519 | le16_t *dst = gpu_unai.vram + (y0 * dst_stride + x0 * dst_depth) / FRAME_BYTES_PER_PIXEL; |
030d1121 | 520 | |
521 | // SPECIAL CASE: Vertical line | |
522 | if (dx == 0) { | |
523 | #ifdef USE_LINES_ALL_FIXED_PT_MATH | |
524 | // Get dy fixed-point inverse | |
525 | s32 inv_factor = 1 << GPU_GOURAUD_FIXED_BITS; | |
526 | if (dy > 1) inv_factor = GPU_FAST_DIV(inv_factor, dy); | |
527 | ||
528 | // Simultaneously divide and convert integer to Gouraud fixed point: | |
529 | gcol.r_incr = dr * inv_factor; | |
530 | gcol.g_incr = dg * inv_factor; | |
531 | gcol.b_incr = db * inv_factor; | |
532 | #else | |
533 | // First, convert to Gouraud fixed point | |
534 | gcol.r_incr = dr << GPU_GOURAUD_FIXED_BITS; | |
535 | gcol.g_incr = dg << GPU_GOURAUD_FIXED_BITS; | |
536 | gcol.b_incr = db << GPU_GOURAUD_FIXED_BITS; | |
537 | ||
538 | if (dy > 1) { | |
539 | if (dr) gcol.r_incr /= dy; | |
540 | if (dg) gcol.g_incr /= dy; | |
541 | if (db) gcol.b_incr /= dy; | |
86aad47b | 542 | } |
030d1121 | 543 | #endif |
4949d4ff | 544 | |
030d1121 | 545 | gpuPixelSpanDriver(dst, (uintptr_t)&gcol, dst_stride, dy+1); |
546 | return; | |
547 | } | |
548 | ||
549 | // SPECIAL CASE: Horizontal line | |
550 | if (dy == 0) { | |
551 | #ifdef USE_LINES_ALL_FIXED_PT_MATH | |
552 | // Get dx fixed-point inverse | |
553 | s32 inv_factor = (1 << GPU_GOURAUD_FIXED_BITS); | |
554 | if (dx > 1) inv_factor = GPU_FAST_DIV(inv_factor, dx); | |
555 | ||
556 | // Simultaneously divide and convert integer to Gouraud fixed point: | |
557 | gcol.r_incr = dr * inv_factor; | |
558 | gcol.g_incr = dg * inv_factor; | |
559 | gcol.b_incr = db * inv_factor; | |
560 | #else | |
561 | gcol.r_incr = dr << GPU_GOURAUD_FIXED_BITS; | |
562 | gcol.g_incr = dg << GPU_GOURAUD_FIXED_BITS; | |
563 | gcol.b_incr = db << GPU_GOURAUD_FIXED_BITS; | |
564 | ||
565 | if (dx > 1) { | |
566 | if (dr) gcol.r_incr /= dx; | |
567 | if (dg) gcol.g_incr /= dx; | |
568 | if (db) gcol.b_incr /= dx; | |
86aad47b | 569 | } |
030d1121 | 570 | #endif |
571 | ||
572 | gpuPixelSpanDriver(dst, (uintptr_t)&gcol, sx * dst_depth, dx+1); | |
573 | return; | |
574 | } | |
575 | ||
576 | // SPECIAL CASE: Diagonal line | |
577 | if (dx == dy) { | |
578 | #ifdef USE_LINES_ALL_FIXED_PT_MATH | |
579 | // Get dx fixed-point inverse | |
580 | s32 inv_factor = (1 << GPU_GOURAUD_FIXED_BITS); | |
581 | if (dx > 1) inv_factor = GPU_FAST_DIV(inv_factor, dx); | |
582 | ||
583 | // Simultaneously divide and convert integer to Gouraud fixed point: | |
584 | gcol.r_incr = dr * inv_factor; | |
585 | gcol.g_incr = dg * inv_factor; | |
586 | gcol.b_incr = db * inv_factor; | |
587 | #else | |
588 | // First, convert to Gouraud fixed point | |
589 | gcol.r_incr = dr << GPU_GOURAUD_FIXED_BITS; | |
590 | gcol.g_incr = dg << GPU_GOURAUD_FIXED_BITS; | |
591 | gcol.b_incr = db << GPU_GOURAUD_FIXED_BITS; | |
592 | ||
593 | if (dx > 1) { | |
594 | if (dr) gcol.r_incr /= dx; | |
595 | if (dg) gcol.g_incr /= dx; | |
596 | if (db) gcol.b_incr /= dx; | |
86aad47b | 597 | } |
030d1121 | 598 | #endif |
599 | ||
600 | gpuPixelSpanDriver(dst, (uintptr_t)&gcol, dst_stride + (sx * dst_depth), dy+1); | |
601 | return; | |
602 | } | |
603 | ||
604 | int major, minor; // Absolute val of major,minor axis delta | |
605 | ptrdiff_t incr_major, incr_minor; // Ptr increment for each step along axis | |
606 | ||
607 | if (dx > dy) { | |
608 | major = dx; | |
609 | minor = dy; | |
610 | } else { | |
611 | major = dy; | |
612 | minor = dx; | |
613 | } | |
614 | ||
615 | // Determine if diagonal or horizontal runs | |
616 | if (major < (2 * minor)) { | |
617 | // Diagonal runs, so perform half-octant transformation | |
618 | minor = major - minor; | |
619 | ||
620 | // Advance diagonally when drawing runs | |
621 | incr_major = dst_stride + (sx * dst_depth); | |
622 | ||
623 | // After drawing each run, correct for over-advance along minor axis | |
624 | if (dx > dy) | |
625 | incr_minor = -dst_stride; | |
626 | else | |
627 | incr_minor = -sx * dst_depth; | |
628 | } else { | |
629 | // Horizontal or vertical runs | |
630 | if (dx > dy) { | |
631 | incr_major = sx * dst_depth; | |
632 | incr_minor = dst_stride; | |
633 | } else { | |
634 | incr_major = dst_stride; | |
635 | incr_minor = sx * dst_depth; | |
86aad47b | 636 | } |
030d1121 | 637 | } |
638 | ||
639 | #ifdef USE_LINES_ALL_FIXED_PT_MATH | |
640 | s32 major_inv = GPU_FAST_DIV((1 << GPU_GOURAUD_FIXED_BITS), major); | |
641 | ||
642 | // Simultaneously divide and convert from integer to Gouraud fixed point: | |
643 | gcol.r_incr = dr * major_inv; | |
644 | gcol.g_incr = dg * major_inv; | |
645 | gcol.b_incr = db * major_inv; | |
646 | #else | |
647 | gcol.r_incr = dr ? ((dr << GPU_GOURAUD_FIXED_BITS) / major) : 0; | |
648 | gcol.g_incr = dg ? ((dg << GPU_GOURAUD_FIXED_BITS) / major) : 0; | |
649 | gcol.b_incr = db ? ((db << GPU_GOURAUD_FIXED_BITS) / major) : 0; | |
650 | #endif | |
651 | ||
652 | if (minor > 1) { | |
653 | // Minimum number of pixels each run | |
654 | min_length = major / minor; | |
655 | ||
656 | // Initial error term; reflects an initial step of 0.5 along minor axis | |
657 | err_term = (major % minor) - (minor * 2); | |
658 | ||
659 | // Increment err_term this much each step along minor axis; when | |
660 | // err_term crosses zero, draw longer pixel run. | |
661 | err_adjup = (major % minor) * 2; | |
86aad47b | 662 | } else { |
030d1121 | 663 | min_length = major; |
664 | err_term = 0; | |
665 | err_adjup = 0; | |
666 | } | |
667 | ||
668 | // Error term adjustment when err_term turns over; used to factor | |
669 | // out the major-axis step made at that time | |
670 | err_adjdown = minor * 2; | |
671 | ||
672 | // The initial and last runs are partial, because minor axis advances | |
673 | // only 0.5 for these runs, rather than 1. Each is half a full run, | |
674 | // plus the initial pixel. | |
675 | start_length = end_length = (min_length / 2) + 1; | |
676 | ||
677 | if (min_length & 1) { | |
678 | // If there're an odd number of pixels per run, we have 1 pixel that | |
679 | // can't be allocated to either the initial or last partial run, so | |
680 | // we'll add 0.5 to err_term so that this pixel will be handled | |
681 | // by the normal full-run loop | |
682 | err_term += minor; | |
683 | } else { | |
684 | // If the minimum run length is even and there's no fractional advance, | |
685 | // we have one pixel that could go to either the initial or last | |
686 | // partial run, which we'll arbitrarily allocate to the last run | |
687 | if (err_adjup == 0) | |
688 | start_length--; // Leave out the extra pixel at the start | |
689 | } | |
690 | ||
691 | // First run of pixels | |
692 | dst = gpuPixelSpanDriver(dst, (uintptr_t)&gcol, incr_major, start_length); | |
4949d4ff | 693 | dst += incr_minor / 2; |
030d1121 | 694 | |
695 | // Middle runs of pixels | |
696 | while (--minor > 0) { | |
697 | int run_length = min_length; | |
698 | err_term += err_adjup; | |
699 | ||
700 | // If err_term passed 0, reset it and draw longer run | |
701 | if (err_term > 0) { | |
702 | err_term -= err_adjdown; | |
703 | run_length++; | |
86aad47b | 704 | } |
030d1121 | 705 | |
706 | dst = gpuPixelSpanDriver(dst, (uintptr_t)&gcol, incr_major, run_length); | |
4949d4ff | 707 | dst += incr_minor / 2; |
86aad47b | 708 | } |
030d1121 | 709 | |
710 | // Final run of pixels | |
711 | gpuPixelSpanDriver(dst, (uintptr_t)&gcol, incr_major, end_length); | |
86aad47b | 712 | } |
908e426c | 713 | |
714 | #endif /* __GPU_UNAI_GPU_RASTER_LINE_H__ */ |