Backport GPU Unai plugin from PCSX4ALL
[pcsx_rearmed.git] / plugins / gpu_unai / gpu_raster_line.h
CommitLineData
86aad47b 1/***************************************************************************
2* Copyright (C) 2010 PCSX4ALL Team *
3* Copyright (C) 2010 Unai *
030d1121 4* Copyright (C) 2016 Senquack (dansilsby <AT> gmail <DOT> com) *
86aad47b 5* *
6* This program is free software; you can redistribute it and/or modify *
7* it under the terms of the GNU General Public License as published by *
8* the Free Software Foundation; either version 2 of the License, or *
9* (at your option) any later version. *
10* *
11* This program is distributed in the hope that it will be useful, *
12* but WITHOUT ANY WARRANTY; without even the implied warranty of *
13* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
14* GNU General Public License for more details. *
15* *
16* You should have received a copy of the GNU General Public License *
17* along with this program; if not, write to the *
18* Free Software Foundation, Inc., *
19* 51 Franklin Street, Fifth Floor, Boston, MA 02111-1307 USA. *
20***************************************************************************/
21
22///////////////////////////////////////////////////////////////////////////////
23// GPU internal line drawing functions
030d1121 24//
25// Rewritten October 2016 by senquack:
26// Instead of one pixel at a time, lines are now drawn in runs of pixels,
27// whether vertical, horizontal, or diagonal. A new inner driver
28// 'gpuPixelSpanFn' is used, as well as an enhanced Bresenham run-slice
29// algorithm. For more information, see the following:
30//
31// Michael Abrash - Graphics Programming Black Book
32// Chapters 35 - 36 (does not implement diagonal runs)
33// http://www.drdobbs.com/parallel/graphics-programming-black-book/184404919
34// http://www.jagregory.com/abrash-black-book/
35//
36// Article by Andrew Delong (does not implement diagonal runs)
37// http://timetraces.ca/nw/drawline.htm
38//
39// 'Run-Based Multi-Point Line Drawing' by Eun Jae Lee & Larry F. Hodges
40// https://smartech.gatech.edu/bitstream/handle/1853/3632/93-22.pdf
41// Provided the idea of doing a half-octant transform allowing lines with
42// slopes between 0.5 and 2.0 (diagonal runs of pixels) to be handled
43// identically to the traditional horizontal/vertical run-slice method.
86aad47b 44
030d1121 45// Use 16.16 fixed point precision for line math.
46// NOTE: Gouraud colors used by gpuPixelSpanFn can use a different precision.
47#define GPU_LINE_FIXED_BITS 16
86aad47b 48
030d1121 49// If defined, Gouraud lines will use fixed-point multiply-by-inverse to
50// do most divisions. With enough accuracy, this should be OK.
51#define USE_LINES_ALL_FIXED_PT_MATH
86aad47b 52
030d1121 53//////////////////////
54// Flat-shaded line //
55//////////////////////
56void gpuDrawLineF(PtrUnion packet, const PSD gpuPixelSpanDriver)
86aad47b 57{
030d1121 58 int x0, y0, x1, y1;
59 int dx, dy;
60
61 // All three of these variables should be signed (so multiplication works)
62 ptrdiff_t sx; // Sign of x delta, positive when x0 < x1
63 const ptrdiff_t dst_depth = FRAME_BYTES_PER_PIXEL; // PSX: 2 bytes per pixel
64 const ptrdiff_t dst_stride = FRAME_BYTE_STRIDE; // PSX: 2048 bytes per framebuffer line
65
66 // Clip region: xmax/ymax seem to normally be one *past* the rightmost/
67 // bottommost pixels of the draw area. Since we render every pixel between
68 // and including both line endpoints, subtract one from xmax/ymax.
69 const int xmin = gpu_unai.DrawingArea[0];
70 const int ymin = gpu_unai.DrawingArea[1];
71 const int xmax = gpu_unai.DrawingArea[2] - 1;
72 const int ymax = gpu_unai.DrawingArea[3] - 1;
73
74 x0 = GPU_EXPANDSIGN(packet.S2[2]) + gpu_unai.DrawingOffset[0];
75 y0 = GPU_EXPANDSIGN(packet.S2[3]) + gpu_unai.DrawingOffset[1];
76 x1 = GPU_EXPANDSIGN(packet.S2[4]) + gpu_unai.DrawingOffset[0];
77 y1 = GPU_EXPANDSIGN(packet.S2[5]) + gpu_unai.DrawingOffset[1];
78
79 // Always draw top to bottom, so ensure y0 <= y1
80 if (y0 > y1) {
81 SwapValues(y0, y1);
82 SwapValues(x0, x1);
83 }
84
85 // Is line totally outside Y clipping range?
86 if (y0 > ymax || y1 < ymin) return;
87
88 dx = x1 - x0;
89 dy = y1 - y0;
90
91 // X-axis range check : max distance between any two X coords is 1023
92 // (PSX hardware will not render anything violating this rule)
93 // NOTE: We'll check y coord range further below
94 if (dx >= CHKMAX_X || dx <= -CHKMAX_X)
95 return;
96
97 // Y-axis range check and clipping
98 if (dy) {
99 // Y-axis range check : max distance between any two Y coords is 511
100 // (PSX hardware will not render anything violating this rule)
101 if (dy >= CHKMAX_Y)
102 return;
103
104 // We already know y0 < y1
105 if (y0 < ymin) {
106 x0 += GPU_FAST_DIV(((ymin - y0) * dx), dy);
107 y0 = ymin;
86aad47b 108 }
030d1121 109 if (y1 > ymax) {
110 x1 += GPU_FAST_DIV(((ymax - y1) * dx), dy);
111 y1 = ymax;
86aad47b 112 }
030d1121 113
114 // Recompute in case clipping occurred:
115 dx = x1 - x0;
116 dy = y1 - y0;
117 }
118
119 // Check X clipping range, set 'sx' x-direction variable
120 if (dx == 0) {
121 // Is vertical line totally outside X clipping range?
122 if (x0 < xmin || x0 > xmax)
123 return;
124 sx = 0;
125 } else {
126 if (dx > 0) {
127 // x0 is leftmost coordinate
128 if (x0 > xmax) return; // Both points outside X clip range
129
130 if (x0 < xmin) {
131 if (x1 < xmin) return; // Both points outside X clip range
132 y0 += GPU_FAST_DIV(((xmin - x0) * dy), dx);
133 x0 = xmin;
134 }
135
136 if (x1 > xmax) {
137 y1 += GPU_FAST_DIV(((xmax - x1) * dy), dx);
138 x1 = xmax;
139 }
140
141 sx = +1;
142 dx = x1 - x0; // Get final value, which should also be absolute value
143 } else {
144 // x1 is leftmost coordinate
145 if (x1 > xmax) return; // Both points outside X clip range
146
147 if (x1 < xmin) {
148 if (x0 < xmin) return; // Both points outside X clip range
149
150 y1 += GPU_FAST_DIV(((xmin - x1) * dy), dx);
151 x1 = xmin;
86aad47b 152 }
030d1121 153
154 if (x0 > xmax) {
155 y0 += GPU_FAST_DIV(((xmax - x0) * dy), dx);
156 x0 = xmax;
157 }
158
159 sx = -1;
160 dx = x0 - x1; // Get final value, which should also be absolute value
161 }
162
163 // Recompute in case clipping occurred:
164 dy = y1 - y0;
165 }
166
167 // IMPORTANT: dx,dy should now contain their absolute values
168
169 int min_length, // Minimum length of a pixel run
170 start_length, // Length of first run
171 end_length, // Length of last run
172 err_term, // Cumulative error to determine when to draw longer run
173 err_adjup, // Increment to err_term for each run drawn
174 err_adjdown; // Subract this from err_term after drawing longer run
175
176 // Color to draw with (16 bits, highest of which is unset mask bit)
177 uintptr_t col16 = GPU_RGB16(packet.U4[0]);
178
179 // We use u8 pointers even though PS1 has u16 framebuffer.
180 // This allows pixel-drawing functions to increment dst pointer
181 // directly by the passed 'incr' value, not having to shift it first.
182 u8 *dst = (u8*)gpu_unai.vram + y0 * dst_stride + x0 * dst_depth;
183
184 // SPECIAL CASE: Vertical line
185 if (dx == 0) {
186 gpuPixelSpanDriver(dst, col16, dst_stride, dy+1);
187 return;
188 }
189
190 // SPECIAL CASE: Horizontal line
191 if (dy == 0) {
192 gpuPixelSpanDriver(dst, col16, sx * dst_depth, dx+1);
193 return;
194 }
195
196 // SPECIAL CASE: Diagonal line
197 if (dx == dy) {
198 gpuPixelSpanDriver(dst, col16, dst_stride + (sx * dst_depth), dy+1);
199 return;
200 }
201
202 int major, minor; // Major axis, minor axis
203 ptrdiff_t incr_major, incr_minor; // Ptr increment for each step along axis
204
205 if (dx > dy) {
206 major = dx;
207 minor = dy;
208 } else {
209 major = dy;
210 minor = dx;
211 }
212
213 // Determine if diagonal or horizontal runs
214 if (major < (2 * minor)) {
215 // Diagonal runs, so perform half-octant transformation
216 minor = major - minor;
217
218 // Advance diagonally when drawing runs
219 incr_major = dst_stride + (sx * dst_depth);
220
221 // After drawing each run, correct for over-advance along minor axis
222 if (dx > dy)
223 incr_minor = -dst_stride;
224 else
225 incr_minor = -sx * dst_depth;
226 } else {
227 // Horizontal or vertical runs
228 if (dx > dy) {
229 incr_major = sx * dst_depth;
230 incr_minor = dst_stride;
231 } else {
232 incr_major = dst_stride;
233 incr_minor = sx * dst_depth;
86aad47b 234 }
030d1121 235 }
236
237 if (minor > 1) {
238 // Minimum number of pixels each run
239 min_length = major / minor;
240
241 // Initial error term; reflects an initial step of 0.5 along minor axis
242 err_term = (major % minor) - (minor * 2);
243
244 // Increment err_term this much each step along minor axis; when
245 // err_term crosses zero, draw longer pixel run.
246 err_adjup = (major % minor) * 2;
247 } else {
248 min_length = major;
249 err_term = 0;
250 err_adjup = 0;
251 }
252
253 // Error term adjustment when err_term turns over; used to factor
254 // out the major-axis step made at that time
255 err_adjdown = minor * 2;
256
257 // The initial and last runs are partial, because minor axis advances
258 // only 0.5 for these runs, rather than 1. Each is half a full run,
259 // plus the initial pixel.
260 start_length = end_length = (min_length / 2) + 1;
261
262 if (min_length & 1) {
263 // If there're an odd number of pixels per run, we have 1 pixel that
264 // can't be allocated to either the initial or last partial run, so
265 // we'll add 0.5 to err_term so that this pixel will be handled
266 // by the normal full-run loop
267 err_term += minor;
268 } else {
269 // If the minimum run length is even and there's no fractional advance,
270 // we have one pixel that could go to either the initial or last
271 // partial run, which we arbitrarily allocate to the last run
272 if (err_adjup == 0)
273 start_length--; // Leave out the extra pixel at the start
274 }
275
276 // First run of pixels
277 dst = gpuPixelSpanDriver(dst, col16, incr_major, start_length);
278 dst += incr_minor;
279
280 // Middle runs of pixels
281 while (--minor > 0) {
282 int run_length = min_length;
283 err_term += err_adjup;
284
285 // If err_term passed 0, reset it and draw longer run
286 if (err_term > 0) {
287 err_term -= err_adjdown;
288 run_length++;
86aad47b 289 }
030d1121 290
291 dst = gpuPixelSpanDriver(dst, col16, incr_major, run_length);
292 dst += incr_minor;
293 }
294
295 // Final run of pixels
296 gpuPixelSpanDriver(dst, col16, incr_major, end_length);
297}
298
299/////////////////////////
300// Gouraud-shaded line //
301/////////////////////////
302void gpuDrawLineG(PtrUnion packet, const PSD gpuPixelSpanDriver)
303{
304 int x0, y0, x1, y1;
305 int dx, dy, dr, dg, db;
306 u32 r0, g0, b0, r1, g1, b1;
307
308 // All three of these variables should be signed (so multiplication works)
309 ptrdiff_t sx; // Sign of x delta, positive when x0 < x1
310 const ptrdiff_t dst_depth = FRAME_BYTES_PER_PIXEL; // PSX: 2 bytes per pixel
311 const ptrdiff_t dst_stride = FRAME_BYTE_STRIDE; // PSX: 2048 bytes per framebuffer line
312
313 // Clip region: xmax/ymax seem to normally be one *past* the rightmost/
314 // bottommost pixels of the draw area. We'll render every pixel between
315 // and including both line endpoints, so subtract one from xmax/ymax.
316 const int xmin = gpu_unai.DrawingArea[0];
317 const int ymin = gpu_unai.DrawingArea[1];
318 const int xmax = gpu_unai.DrawingArea[2] - 1;
319 const int ymax = gpu_unai.DrawingArea[3] - 1;
320
321 x0 = GPU_EXPANDSIGN(packet.S2[2]) + gpu_unai.DrawingOffset[0];
322 y0 = GPU_EXPANDSIGN(packet.S2[3]) + gpu_unai.DrawingOffset[1];
323 x1 = GPU_EXPANDSIGN(packet.S2[6]) + gpu_unai.DrawingOffset[0];
324 y1 = GPU_EXPANDSIGN(packet.S2[7]) + gpu_unai.DrawingOffset[1];
325
326 u32 col0 = packet.U4[0];
327 u32 col1 = packet.U4[2];
328
329 // Always draw top to bottom, so ensure y0 <= y1
330 if (y0 > y1) {
331 SwapValues(y0, y1);
332 SwapValues(x0, x1);
333 SwapValues(col0, col1);
334 }
335
336 // Is line totally outside Y clipping range?
337 if (y0 > ymax || y1 < ymin) return;
338
339 // If defined, Gouraud colors are fixed-point 5.11, otherwise they are 8.16
340 // (This is only beneficial if using SIMD-optimized pixel driver)
341#ifdef GPU_GOURAUD_LOW_PRECISION
342 r0 = (col0 >> 3) & 0x1f; g0 = (col0 >> 11) & 0x1f; b0 = (col0 >> 19) & 0x1f;
343 r1 = (col1 >> 3) & 0x1f; g1 = (col1 >> 11) & 0x1f; b1 = (col1 >> 19) & 0x1f;
344#else
345 r0 = col0 & 0xff; g0 = (col0 >> 8) & 0xff; b0 = (col0 >> 16) & 0xff;
346 r1 = col1 & 0xff; g1 = (col1 >> 8) & 0xff; b1 = (col1 >> 16) & 0xff;
347#endif
348
349 dx = x1 - x0;
350 dy = y1 - y0;
351 dr = r1 - r0;
352 dg = g1 - g0;
353 db = b1 - b0;
354
355 // X-axis range check : max distance between any two X coords is 1023
356 // (PSX hardware will not render anything violating this rule)
357 // NOTE: We'll check y coord range further below
358 if (dx >= CHKMAX_X || dx <= -CHKMAX_X)
359 return;
360
361 // Y-axis range check and clipping
362 if (dy) {
363 // Y-axis range check : max distance between any two Y coords is 511
364 // (PSX hardware will not render anything violating this rule)
365 if (dy >= CHKMAX_Y)
366 return;
367
368 // We already know y0 < y1
369 if (y0 < ymin) {
370#ifdef USE_LINES_ALL_FIXED_PT_MATH
371 s32 factor = GPU_FAST_DIV(((ymin - y0) << GPU_LINE_FIXED_BITS), dy);
372 x0 += (dx * factor) >> GPU_LINE_FIXED_BITS;
373 r0 += (dr * factor) >> GPU_LINE_FIXED_BITS;
374 g0 += (dg * factor) >> GPU_LINE_FIXED_BITS;
375 b0 += (db * factor) >> GPU_LINE_FIXED_BITS;
376#else
377 x0 += (ymin - y0) * dx / dy;
378 r0 += (ymin - y0) * dr / dy;
379 g0 += (ymin - y0) * dg / dy;
380 b0 += (ymin - y0) * db / dy;
381#endif
86aad47b 382 y0 = ymin;
86aad47b 383 }
030d1121 384
385 if (y1 > ymax) {
386#ifdef USE_LINES_ALL_FIXED_PT_MATH
387 s32 factor = GPU_FAST_DIV(((ymax - y1) << GPU_LINE_FIXED_BITS), dy);
388 x1 += (dx * factor) >> GPU_LINE_FIXED_BITS;
389 r1 += (dr * factor) >> GPU_LINE_FIXED_BITS;
390 g1 += (dg * factor) >> GPU_LINE_FIXED_BITS;
391 b1 += (db * factor) >> GPU_LINE_FIXED_BITS;
392#else
393 x1 += (ymax - y1) * dx / dy;
394 r1 += (ymax - y1) * dr / dy;
395 g1 += (ymax - y1) * dg / dy;
396 b1 += (ymax - y1) * db / dy;
397#endif
398 y1 = ymax;
86aad47b 399 }
030d1121 400
401 // Recompute in case clipping occurred:
402 dx = x1 - x0;
403 dy = y1 - y0;
404 dr = r1 - r0;
405 dg = g1 - g0;
406 db = b1 - b0;
407 }
408
409 // Check X clipping range, set 'sx' x-direction variable
410 if (dx == 0) {
411 // Is vertical line totally outside X clipping range?
412 if (x0 < xmin || x0 > xmax)
413 return;
414 sx = 0;
86aad47b 415 } else {
030d1121 416 if (dx > 0) {
417 // x0 is leftmost coordinate
418 if (x0 > xmax) return; // Both points outside X clip range
419
420 if (x0 < xmin) {
421 if (x1 < xmin) return; // Both points outside X clip range
422
423#ifdef USE_LINES_ALL_FIXED_PT_MATH
424 s32 factor = GPU_FAST_DIV(((xmin - x0) << GPU_LINE_FIXED_BITS), dx);
425 y0 += (dy * factor) >> GPU_LINE_FIXED_BITS;
426 r0 += (dr * factor) >> GPU_LINE_FIXED_BITS;
427 g0 += (dg * factor) >> GPU_LINE_FIXED_BITS;
428 b0 += (db * factor) >> GPU_LINE_FIXED_BITS;
429#else
430 y0 += (xmin - x0) * dy / dx;
431 r0 += (xmin - x0) * dr / dx;
432 g0 += (xmin - x0) * dg / dx;
433 b0 += (xmin - x0) * db / dx;
434#endif
435 x0 = xmin;
86aad47b 436 }
030d1121 437
438 if (x1 > xmax) {
439#ifdef USE_LINES_ALL_FIXED_PT_MATH
440 s32 factor = GPU_FAST_DIV(((xmax - x1) << GPU_LINE_FIXED_BITS), dx);
441 y1 += (dy * factor) >> GPU_LINE_FIXED_BITS;
442 r1 += (dr * factor) >> GPU_LINE_FIXED_BITS;
443 g1 += (dg * factor) >> GPU_LINE_FIXED_BITS;
444 b1 += (db * factor) >> GPU_LINE_FIXED_BITS;
445#else
446 y1 += (xmax - x1) * dy / dx;
447 r1 += (xmax - x1) * dr / dx;
448 g1 += (xmax - x1) * dg / dx;
449 b1 += (xmax - x1) * db / dx;
450#endif
451 x1 = xmax;
452 }
453
454 sx = +1;
455 dx = x1 - x0; // Get final value, which should also be absolute value
456 } else {
457 // x1 is leftmost coordinate
458 if (x1 > xmax) return; // Both points outside X clip range
459
460 if (x1 < xmin) {
461 if (x0 < xmin) return; // Both points outside X clip range
462
463#ifdef USE_LINES_ALL_FIXED_PT_MATH
464 s32 factor = GPU_FAST_DIV(((xmin - x1) << GPU_LINE_FIXED_BITS), dx);
465 y1 += (dy * factor) >> GPU_LINE_FIXED_BITS;
466 r1 += (dr * factor) >> GPU_LINE_FIXED_BITS;
467 g1 += (dg * factor) >> GPU_LINE_FIXED_BITS;
468 b1 += (db * factor) >> GPU_LINE_FIXED_BITS;
469#else
470 y1 += (xmin - x1) * dy / dx;
471 r1 += (xmin - x1) * dr / dx;
472 g1 += (xmin - x1) * dg / dx;
473 b1 += (xmin - x1) * db / dx;
474#endif
475 x1 = xmin;
476 }
477
478 if (x0 > xmax) {
479#ifdef USE_LINES_ALL_FIXED_PT_MATH
480 s32 factor = GPU_FAST_DIV(((xmax - x0) << GPU_LINE_FIXED_BITS), dx);
481 y0 += (dy * factor) >> GPU_LINE_FIXED_BITS;
482 r0 += (dr * factor) >> GPU_LINE_FIXED_BITS;
483 g0 += (dg * factor) >> GPU_LINE_FIXED_BITS;
484 b0 += (db * factor) >> GPU_LINE_FIXED_BITS;
485#else
486 y0 += (xmax - x0) * dy / dx;
487 r0 += (xmax - x0) * dr / dx;
488 g0 += (xmax - x0) * dg / dx;
489 b0 += (xmax - x0) * db / dx;
490#endif
491 x0 = xmax;
492 }
493
494 sx = -1;
495 dx = x0 - x1; // Get final value, which should also be absolute value
86aad47b 496 }
030d1121 497
498 // Recompute in case clipping occurred:
499 dy = y1 - y0;
500 dr = r1 - r0;
501 dg = g1 - g0;
502 db = b1 - b0;
86aad47b 503 }
86aad47b 504
030d1121 505 // IMPORTANT: dx,dy should now contain their absolute values
86aad47b 506
030d1121 507 int min_length, // Minimum length of a pixel run
508 start_length, // Length of first run
509 end_length, // Length of last run
510 err_term, // Cumulative error to determine when to draw longer run
511 err_adjup, // Increment to err_term for each run drawn
512 err_adjdown; // Subract this from err_term after drawing longer run
513
514 GouraudColor gcol;
515 gcol.r = r0 << GPU_GOURAUD_FIXED_BITS;
516 gcol.g = g0 << GPU_GOURAUD_FIXED_BITS;
517 gcol.b = b0 << GPU_GOURAUD_FIXED_BITS;
518
519 // We use u8 pointers even though PS1 has u16 framebuffer.
520 // This allows pixel-drawing functions to increment dst pointer
521 // directly by the passed 'incr' value, not having to shift it first.
522 u8 *dst = (u8*)gpu_unai.vram + y0 * dst_stride + x0 * dst_depth;
523
524 // SPECIAL CASE: Vertical line
525 if (dx == 0) {
526#ifdef USE_LINES_ALL_FIXED_PT_MATH
527 // Get dy fixed-point inverse
528 s32 inv_factor = 1 << GPU_GOURAUD_FIXED_BITS;
529 if (dy > 1) inv_factor = GPU_FAST_DIV(inv_factor, dy);
530
531 // Simultaneously divide and convert integer to Gouraud fixed point:
532 gcol.r_incr = dr * inv_factor;
533 gcol.g_incr = dg * inv_factor;
534 gcol.b_incr = db * inv_factor;
535#else
536 // First, convert to Gouraud fixed point
537 gcol.r_incr = dr << GPU_GOURAUD_FIXED_BITS;
538 gcol.g_incr = dg << GPU_GOURAUD_FIXED_BITS;
539 gcol.b_incr = db << GPU_GOURAUD_FIXED_BITS;
540
541 if (dy > 1) {
542 if (dr) gcol.r_incr /= dy;
543 if (dg) gcol.g_incr /= dy;
544 if (db) gcol.b_incr /= dy;
86aad47b 545 }
030d1121 546#endif
86aad47b 547
030d1121 548 gpuPixelSpanDriver(dst, (uintptr_t)&gcol, dst_stride, dy+1);
549 return;
550 }
551
552 // SPECIAL CASE: Horizontal line
553 if (dy == 0) {
554#ifdef USE_LINES_ALL_FIXED_PT_MATH
555 // Get dx fixed-point inverse
556 s32 inv_factor = (1 << GPU_GOURAUD_FIXED_BITS);
557 if (dx > 1) inv_factor = GPU_FAST_DIV(inv_factor, dx);
558
559 // Simultaneously divide and convert integer to Gouraud fixed point:
560 gcol.r_incr = dr * inv_factor;
561 gcol.g_incr = dg * inv_factor;
562 gcol.b_incr = db * inv_factor;
563#else
564 gcol.r_incr = dr << GPU_GOURAUD_FIXED_BITS;
565 gcol.g_incr = dg << GPU_GOURAUD_FIXED_BITS;
566 gcol.b_incr = db << GPU_GOURAUD_FIXED_BITS;
567
568 if (dx > 1) {
569 if (dr) gcol.r_incr /= dx;
570 if (dg) gcol.g_incr /= dx;
571 if (db) gcol.b_incr /= dx;
86aad47b 572 }
030d1121 573#endif
574
575 gpuPixelSpanDriver(dst, (uintptr_t)&gcol, sx * dst_depth, dx+1);
576 return;
577 }
578
579 // SPECIAL CASE: Diagonal line
580 if (dx == dy) {
581#ifdef USE_LINES_ALL_FIXED_PT_MATH
582 // Get dx fixed-point inverse
583 s32 inv_factor = (1 << GPU_GOURAUD_FIXED_BITS);
584 if (dx > 1) inv_factor = GPU_FAST_DIV(inv_factor, dx);
585
586 // Simultaneously divide and convert integer to Gouraud fixed point:
587 gcol.r_incr = dr * inv_factor;
588 gcol.g_incr = dg * inv_factor;
589 gcol.b_incr = db * inv_factor;
590#else
591 // First, convert to Gouraud fixed point
592 gcol.r_incr = dr << GPU_GOURAUD_FIXED_BITS;
593 gcol.g_incr = dg << GPU_GOURAUD_FIXED_BITS;
594 gcol.b_incr = db << GPU_GOURAUD_FIXED_BITS;
595
596 if (dx > 1) {
597 if (dr) gcol.r_incr /= dx;
598 if (dg) gcol.g_incr /= dx;
599 if (db) gcol.b_incr /= dx;
86aad47b 600 }
030d1121 601#endif
602
603 gpuPixelSpanDriver(dst, (uintptr_t)&gcol, dst_stride + (sx * dst_depth), dy+1);
604 return;
605 }
606
607 int major, minor; // Absolute val of major,minor axis delta
608 ptrdiff_t incr_major, incr_minor; // Ptr increment for each step along axis
609
610 if (dx > dy) {
611 major = dx;
612 minor = dy;
613 } else {
614 major = dy;
615 minor = dx;
616 }
617
618 // Determine if diagonal or horizontal runs
619 if (major < (2 * minor)) {
620 // Diagonal runs, so perform half-octant transformation
621 minor = major - minor;
622
623 // Advance diagonally when drawing runs
624 incr_major = dst_stride + (sx * dst_depth);
625
626 // After drawing each run, correct for over-advance along minor axis
627 if (dx > dy)
628 incr_minor = -dst_stride;
629 else
630 incr_minor = -sx * dst_depth;
631 } else {
632 // Horizontal or vertical runs
633 if (dx > dy) {
634 incr_major = sx * dst_depth;
635 incr_minor = dst_stride;
636 } else {
637 incr_major = dst_stride;
638 incr_minor = sx * dst_depth;
86aad47b 639 }
030d1121 640 }
641
642#ifdef USE_LINES_ALL_FIXED_PT_MATH
643 s32 major_inv = GPU_FAST_DIV((1 << GPU_GOURAUD_FIXED_BITS), major);
644
645 // Simultaneously divide and convert from integer to Gouraud fixed point:
646 gcol.r_incr = dr * major_inv;
647 gcol.g_incr = dg * major_inv;
648 gcol.b_incr = db * major_inv;
649#else
650 gcol.r_incr = dr ? ((dr << GPU_GOURAUD_FIXED_BITS) / major) : 0;
651 gcol.g_incr = dg ? ((dg << GPU_GOURAUD_FIXED_BITS) / major) : 0;
652 gcol.b_incr = db ? ((db << GPU_GOURAUD_FIXED_BITS) / major) : 0;
653#endif
654
655 if (minor > 1) {
656 // Minimum number of pixels each run
657 min_length = major / minor;
658
659 // Initial error term; reflects an initial step of 0.5 along minor axis
660 err_term = (major % minor) - (minor * 2);
661
662 // Increment err_term this much each step along minor axis; when
663 // err_term crosses zero, draw longer pixel run.
664 err_adjup = (major % minor) * 2;
86aad47b 665 } else {
030d1121 666 min_length = major;
667 err_term = 0;
668 err_adjup = 0;
669 }
670
671 // Error term adjustment when err_term turns over; used to factor
672 // out the major-axis step made at that time
673 err_adjdown = minor * 2;
674
675 // The initial and last runs are partial, because minor axis advances
676 // only 0.5 for these runs, rather than 1. Each is half a full run,
677 // plus the initial pixel.
678 start_length = end_length = (min_length / 2) + 1;
679
680 if (min_length & 1) {
681 // If there're an odd number of pixels per run, we have 1 pixel that
682 // can't be allocated to either the initial or last partial run, so
683 // we'll add 0.5 to err_term so that this pixel will be handled
684 // by the normal full-run loop
685 err_term += minor;
686 } else {
687 // If the minimum run length is even and there's no fractional advance,
688 // we have one pixel that could go to either the initial or last
689 // partial run, which we'll arbitrarily allocate to the last run
690 if (err_adjup == 0)
691 start_length--; // Leave out the extra pixel at the start
692 }
693
694 // First run of pixels
695 dst = gpuPixelSpanDriver(dst, (uintptr_t)&gcol, incr_major, start_length);
696 dst += incr_minor;
697
698 // Middle runs of pixels
699 while (--minor > 0) {
700 int run_length = min_length;
701 err_term += err_adjup;
702
703 // If err_term passed 0, reset it and draw longer run
704 if (err_term > 0) {
705 err_term -= err_adjdown;
706 run_length++;
86aad47b 707 }
030d1121 708
709 dst = gpuPixelSpanDriver(dst, (uintptr_t)&gcol, incr_major, run_length);
710 dst += incr_minor;
86aad47b 711 }
030d1121 712
713 // Final run of pixels
714 gpuPixelSpanDriver(dst, (uintptr_t)&gcol, incr_major, end_length);
86aad47b 715}