0bfe8d59 |
1 | /*************************************************************************** |
2 | * Copyright (C) 2010 PCSX4ALL Team * |
3 | * Copyright (C) 2010 Unai * |
4 | * Copyright (C) 2016 Senquack (dansilsby <AT> gmail <DOT> com) * |
5 | * * |
6 | * This program is free software; you can redistribute it and/or modify * |
7 | * it under the terms of the GNU General Public License as published by * |
8 | * the Free Software Foundation; either version 2 of the License, or * |
9 | * (at your option) any later version. * |
10 | * * |
11 | * This program is distributed in the hope that it will be useful, * |
12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of * |
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * |
14 | * GNU General Public License for more details. * |
15 | * * |
16 | * You should have received a copy of the GNU General Public License * |
17 | * along with this program; if not, write to the * |
18 | * Free Software Foundation, Inc., * |
19 | * 51 Franklin Street, Fifth Floor, Boston, MA 02111-1307 USA. * |
20 | ***************************************************************************/ |
21 | |
22 | #ifndef __GPU_UNAI_GPU_RASTER_LINE_H__ |
23 | #define __GPU_UNAI_GPU_RASTER_LINE_H__ |
24 | |
25 | /////////////////////////////////////////////////////////////////////////////// |
26 | // GPU internal line drawing functions |
27 | // |
28 | // Rewritten October 2016 by senquack: |
29 | // Instead of one pixel at a time, lines are now drawn in runs of pixels, |
30 | // whether vertical, horizontal, or diagonal. A new inner driver |
31 | // 'gpuPixelSpanFn' is used, as well as an enhanced Bresenham run-slice |
32 | // algorithm. For more information, see the following: |
33 | // |
34 | // Michael Abrash - Graphics Programming Black Book |
35 | // Chapters 35 - 36 (does not implement diagonal runs) |
36 | // http://www.drdobbs.com/parallel/graphics-programming-black-book/184404919 |
37 | // http://www.jagregory.com/abrash-black-book/ |
38 | // |
39 | // Article by Andrew Delong (does not implement diagonal runs) |
40 | // http://timetraces.ca/nw/drawline.htm |
41 | // |
42 | // 'Run-Based Multi-Point Line Drawing' by Eun Jae Lee & Larry F. Hodges |
43 | // https://smartech.gatech.edu/bitstream/handle/1853/3632/93-22.pdf |
44 | // Provided the idea of doing a half-octant transform allowing lines with |
45 | // slopes between 0.5 and 2.0 (diagonal runs of pixels) to be handled |
46 | // identically to the traditional horizontal/vertical run-slice method. |
47 | |
48 | // Use 16.16 fixed point precision for line math. |
49 | // NOTE: Gouraud colors used by gpuPixelSpanFn can use a different precision. |
50 | #define GPU_LINE_FIXED_BITS 16 |
51 | |
52 | // If defined, Gouraud lines will use fixed-point multiply-by-inverse to |
53 | // do most divisions. With enough accuracy, this should be OK. |
54 | #define USE_LINES_ALL_FIXED_PT_MATH |
55 | |
56 | ////////////////////// |
57 | // Flat-shaded line // |
58 | ////////////////////// |
59 | void gpuDrawLineF(PtrUnion packet, const PSD gpuPixelSpanDriver) |
60 | { |
61 | int x0, y0, x1, y1; |
62 | int dx, dy; |
63 | |
64 | // All three of these variables should be signed (so multiplication works) |
65 | ptrdiff_t sx; // Sign of x delta, positive when x0 < x1 |
66 | const ptrdiff_t dst_depth = FRAME_BYTES_PER_PIXEL; // PSX: 2 bytes per pixel |
67 | const ptrdiff_t dst_stride = FRAME_BYTE_STRIDE; // PSX: 2048 bytes per framebuffer line |
68 | |
69 | // Clip region: xmax/ymax seem to normally be one *past* the rightmost/ |
70 | // bottommost pixels of the draw area. Since we render every pixel between |
71 | // and including both line endpoints, subtract one from xmax/ymax. |
72 | const int xmin = gpu_senquack.DrawingArea[0]; |
73 | const int ymin = gpu_senquack.DrawingArea[1]; |
74 | const int xmax = gpu_senquack.DrawingArea[2] - 1; |
75 | const int ymax = gpu_senquack.DrawingArea[3] - 1; |
76 | |
77 | x0 = GPU_EXPANDSIGN(packet.S2[2]) + gpu_senquack.DrawingOffset[0]; |
78 | y0 = GPU_EXPANDSIGN(packet.S2[3]) + gpu_senquack.DrawingOffset[1]; |
79 | x1 = GPU_EXPANDSIGN(packet.S2[4]) + gpu_senquack.DrawingOffset[0]; |
80 | y1 = GPU_EXPANDSIGN(packet.S2[5]) + gpu_senquack.DrawingOffset[1]; |
81 | |
82 | // Always draw top to bottom, so ensure y0 <= y1 |
83 | if (y0 > y1) { |
84 | SwapValues(y0, y1); |
85 | SwapValues(x0, x1); |
86 | } |
87 | |
88 | // Is line totally outside Y clipping range? |
89 | if (y0 > ymax || y1 < ymin) return; |
90 | |
91 | dx = x1 - x0; |
92 | dy = y1 - y0; |
93 | |
94 | // X-axis range check : max distance between any two X coords is 1023 |
95 | // (PSX hardware will not render anything violating this rule) |
96 | // NOTE: We'll check y coord range further below |
97 | if (dx >= CHKMAX_X || dx <= -CHKMAX_X) |
98 | return; |
99 | |
100 | // Y-axis range check and clipping |
101 | if (dy) { |
102 | // Y-axis range check : max distance between any two Y coords is 511 |
103 | // (PSX hardware will not render anything violating this rule) |
104 | if (dy >= CHKMAX_Y) |
105 | return; |
106 | |
107 | // We already know y0 < y1 |
108 | if (y0 < ymin) { |
109 | x0 += GPU_FAST_DIV(((ymin - y0) * dx), dy); |
110 | y0 = ymin; |
111 | } |
112 | if (y1 > ymax) { |
113 | x1 += GPU_FAST_DIV(((ymax - y1) * dx), dy); |
114 | y1 = ymax; |
115 | } |
116 | |
117 | // Recompute in case clipping occurred: |
118 | dx = x1 - x0; |
119 | dy = y1 - y0; |
120 | } |
121 | |
122 | // Check X clipping range, set 'sx' x-direction variable |
123 | if (dx == 0) { |
124 | // Is vertical line totally outside X clipping range? |
125 | if (x0 < xmin || x0 > xmax) |
126 | return; |
127 | sx = 0; |
128 | } else { |
129 | if (dx > 0) { |
130 | // x0 is leftmost coordinate |
131 | if (x0 > xmax) return; // Both points outside X clip range |
132 | |
133 | if (x0 < xmin) { |
134 | if (x1 < xmin) return; // Both points outside X clip range |
135 | y0 += GPU_FAST_DIV(((xmin - x0) * dy), dx); |
136 | x0 = xmin; |
137 | } |
138 | |
139 | if (x1 > xmax) { |
140 | y1 += GPU_FAST_DIV(((xmax - x1) * dy), dx); |
141 | x1 = xmax; |
142 | } |
143 | |
144 | sx = +1; |
145 | dx = x1 - x0; // Get final value, which should also be absolute value |
146 | } else { |
147 | // x1 is leftmost coordinate |
148 | if (x1 > xmax) return; // Both points outside X clip range |
149 | |
150 | if (x1 < xmin) { |
151 | if (x0 < xmin) return; // Both points outside X clip range |
152 | |
153 | y1 += GPU_FAST_DIV(((xmin - x1) * dy), dx); |
154 | x1 = xmin; |
155 | } |
156 | |
157 | if (x0 > xmax) { |
158 | y0 += GPU_FAST_DIV(((xmax - x0) * dy), dx); |
159 | x0 = xmax; |
160 | } |
161 | |
162 | sx = -1; |
163 | dx = x0 - x1; // Get final value, which should also be absolute value |
164 | } |
165 | |
166 | // Recompute in case clipping occurred: |
167 | dy = y1 - y0; |
168 | } |
169 | |
170 | // IMPORTANT: dx,dy should now contain their absolute values |
171 | |
172 | int min_length, // Minimum length of a pixel run |
173 | start_length, // Length of first run |
174 | end_length, // Length of last run |
175 | err_term, // Cumulative error to determine when to draw longer run |
176 | err_adjup, // Increment to err_term for each run drawn |
177 | err_adjdown; // Subract this from err_term after drawing longer run |
178 | |
179 | // Color to draw with (16 bits, highest of which is unset mask bit) |
180 | uintptr_t col16 = GPU_RGB16(packet.U4[0]); |
181 | |
182 | // We use u8 pointers even though PS1 has u16 framebuffer. |
183 | // This allows pixel-drawing functions to increment dst pointer |
184 | // directly by the passed 'incr' value, not having to shift it first. |
185 | u8 *dst = (u8*)gpu_senquack.vram + y0 * dst_stride + x0 * dst_depth; |
186 | |
187 | // SPECIAL CASE: Vertical line |
188 | if (dx == 0) { |
189 | gpuPixelSpanDriver(dst, col16, dst_stride, dy+1); |
190 | return; |
191 | } |
192 | |
193 | // SPECIAL CASE: Horizontal line |
194 | if (dy == 0) { |
195 | gpuPixelSpanDriver(dst, col16, sx * dst_depth, dx+1); |
196 | return; |
197 | } |
198 | |
199 | // SPECIAL CASE: Diagonal line |
200 | if (dx == dy) { |
201 | gpuPixelSpanDriver(dst, col16, dst_stride + (sx * dst_depth), dy+1); |
202 | return; |
203 | } |
204 | |
205 | int major, minor; // Major axis, minor axis |
206 | ptrdiff_t incr_major, incr_minor; // Ptr increment for each step along axis |
207 | |
208 | if (dx > dy) { |
209 | major = dx; |
210 | minor = dy; |
211 | } else { |
212 | major = dy; |
213 | minor = dx; |
214 | } |
215 | |
216 | // Determine if diagonal or horizontal runs |
217 | if (major < (2 * minor)) { |
218 | // Diagonal runs, so perform half-octant transformation |
219 | minor = major - minor; |
220 | |
221 | // Advance diagonally when drawing runs |
222 | incr_major = dst_stride + (sx * dst_depth); |
223 | |
224 | // After drawing each run, correct for over-advance along minor axis |
225 | if (dx > dy) |
226 | incr_minor = -dst_stride; |
227 | else |
228 | incr_minor = -sx * dst_depth; |
229 | } else { |
230 | // Horizontal or vertical runs |
231 | if (dx > dy) { |
232 | incr_major = sx * dst_depth; |
233 | incr_minor = dst_stride; |
234 | } else { |
235 | incr_major = dst_stride; |
236 | incr_minor = sx * dst_depth; |
237 | } |
238 | } |
239 | |
240 | if (minor > 1) { |
241 | // Minimum number of pixels each run |
242 | min_length = major / minor; |
243 | |
244 | // Initial error term; reflects an initial step of 0.5 along minor axis |
245 | err_term = (major % minor) - (minor * 2); |
246 | |
247 | // Increment err_term this much each step along minor axis; when |
248 | // err_term crosses zero, draw longer pixel run. |
249 | err_adjup = (major % minor) * 2; |
250 | } else { |
251 | min_length = major; |
252 | err_term = 0; |
253 | err_adjup = 0; |
254 | } |
255 | |
256 | // Error term adjustment when err_term turns over; used to factor |
257 | // out the major-axis step made at that time |
258 | err_adjdown = minor * 2; |
259 | |
260 | // The initial and last runs are partial, because minor axis advances |
261 | // only 0.5 for these runs, rather than 1. Each is half a full run, |
262 | // plus the initial pixel. |
263 | start_length = end_length = (min_length / 2) + 1; |
264 | |
265 | if (min_length & 1) { |
266 | // If there're an odd number of pixels per run, we have 1 pixel that |
267 | // can't be allocated to either the initial or last partial run, so |
268 | // we'll add 0.5 to err_term so that this pixel will be handled |
269 | // by the normal full-run loop |
270 | err_term += minor; |
271 | } else { |
272 | // If the minimum run length is even and there's no fractional advance, |
273 | // we have one pixel that could go to either the initial or last |
274 | // partial run, which we arbitrarily allocate to the last run |
275 | if (err_adjup == 0) |
276 | start_length--; // Leave out the extra pixel at the start |
277 | } |
278 | |
279 | // First run of pixels |
280 | dst = gpuPixelSpanDriver(dst, col16, incr_major, start_length); |
281 | dst += incr_minor; |
282 | |
283 | // Middle runs of pixels |
284 | while (--minor > 0) { |
285 | int run_length = min_length; |
286 | err_term += err_adjup; |
287 | |
288 | // If err_term passed 0, reset it and draw longer run |
289 | if (err_term > 0) { |
290 | err_term -= err_adjdown; |
291 | run_length++; |
292 | } |
293 | |
294 | dst = gpuPixelSpanDriver(dst, col16, incr_major, run_length); |
295 | dst += incr_minor; |
296 | } |
297 | |
298 | // Final run of pixels |
299 | gpuPixelSpanDriver(dst, col16, incr_major, end_length); |
300 | } |
301 | |
302 | ///////////////////////// |
303 | // Gouraud-shaded line // |
304 | ///////////////////////// |
305 | void gpuDrawLineG(PtrUnion packet, const PSD gpuPixelSpanDriver) |
306 | { |
307 | int x0, y0, x1, y1; |
308 | int dx, dy, dr, dg, db; |
309 | u32 r0, g0, b0, r1, g1, b1; |
310 | |
311 | // All three of these variables should be signed (so multiplication works) |
312 | ptrdiff_t sx; // Sign of x delta, positive when x0 < x1 |
313 | const ptrdiff_t dst_depth = FRAME_BYTES_PER_PIXEL; // PSX: 2 bytes per pixel |
314 | const ptrdiff_t dst_stride = FRAME_BYTE_STRIDE; // PSX: 2048 bytes per framebuffer line |
315 | |
316 | // Clip region: xmax/ymax seem to normally be one *past* the rightmost/ |
317 | // bottommost pixels of the draw area. We'll render every pixel between |
318 | // and including both line endpoints, so subtract one from xmax/ymax. |
319 | const int xmin = gpu_senquack.DrawingArea[0]; |
320 | const int ymin = gpu_senquack.DrawingArea[1]; |
321 | const int xmax = gpu_senquack.DrawingArea[2] - 1; |
322 | const int ymax = gpu_senquack.DrawingArea[3] - 1; |
323 | |
324 | x0 = GPU_EXPANDSIGN(packet.S2[2]) + gpu_senquack.DrawingOffset[0]; |
325 | y0 = GPU_EXPANDSIGN(packet.S2[3]) + gpu_senquack.DrawingOffset[1]; |
326 | x1 = GPU_EXPANDSIGN(packet.S2[6]) + gpu_senquack.DrawingOffset[0]; |
327 | y1 = GPU_EXPANDSIGN(packet.S2[7]) + gpu_senquack.DrawingOffset[1]; |
328 | |
329 | u32 col0 = packet.U4[0]; |
330 | u32 col1 = packet.U4[2]; |
331 | |
332 | // Always draw top to bottom, so ensure y0 <= y1 |
333 | if (y0 > y1) { |
334 | SwapValues(y0, y1); |
335 | SwapValues(x0, x1); |
336 | SwapValues(col0, col1); |
337 | } |
338 | |
339 | // Is line totally outside Y clipping range? |
340 | if (y0 > ymax || y1 < ymin) return; |
341 | |
342 | // If defined, Gouraud colors are fixed-point 5.11, otherwise they are 8.16 |
343 | // (This is only beneficial if using SIMD-optimized pixel driver) |
344 | #ifdef GPU_GOURAUD_LOW_PRECISION |
345 | r0 = (col0 >> 3) & 0x1f; g0 = (col0 >> 11) & 0x1f; b0 = (col0 >> 19) & 0x1f; |
346 | r1 = (col1 >> 3) & 0x1f; g1 = (col1 >> 11) & 0x1f; b1 = (col1 >> 19) & 0x1f; |
347 | #else |
348 | r0 = col0 & 0xff; g0 = (col0 >> 8) & 0xff; b0 = (col0 >> 16) & 0xff; |
349 | r1 = col1 & 0xff; g1 = (col1 >> 8) & 0xff; b1 = (col1 >> 16) & 0xff; |
350 | #endif |
351 | |
352 | dx = x1 - x0; |
353 | dy = y1 - y0; |
354 | dr = r1 - r0; |
355 | dg = g1 - g0; |
356 | db = b1 - b0; |
357 | |
358 | // X-axis range check : max distance between any two X coords is 1023 |
359 | // (PSX hardware will not render anything violating this rule) |
360 | // NOTE: We'll check y coord range further below |
361 | if (dx >= CHKMAX_X || dx <= -CHKMAX_X) |
362 | return; |
363 | |
364 | // Y-axis range check and clipping |
365 | if (dy) { |
366 | // Y-axis range check : max distance between any two Y coords is 511 |
367 | // (PSX hardware will not render anything violating this rule) |
368 | if (dy >= CHKMAX_Y) |
369 | return; |
370 | |
371 | // We already know y0 < y1 |
372 | if (y0 < ymin) { |
373 | #ifdef USE_LINES_ALL_FIXED_PT_MATH |
374 | s32 factor = GPU_FAST_DIV(((ymin - y0) << GPU_LINE_FIXED_BITS), dy); |
375 | x0 += (dx * factor) >> GPU_LINE_FIXED_BITS; |
376 | r0 += (dr * factor) >> GPU_LINE_FIXED_BITS; |
377 | g0 += (dg * factor) >> GPU_LINE_FIXED_BITS; |
378 | b0 += (db * factor) >> GPU_LINE_FIXED_BITS; |
379 | #else |
380 | x0 += (ymin - y0) * dx / dy; |
381 | r0 += (ymin - y0) * dr / dy; |
382 | g0 += (ymin - y0) * dg / dy; |
383 | b0 += (ymin - y0) * db / dy; |
384 | #endif |
385 | y0 = ymin; |
386 | } |
387 | |
388 | if (y1 > ymax) { |
389 | #ifdef USE_LINES_ALL_FIXED_PT_MATH |
390 | s32 factor = GPU_FAST_DIV(((ymax - y1) << GPU_LINE_FIXED_BITS), dy); |
391 | x1 += (dx * factor) >> GPU_LINE_FIXED_BITS; |
392 | r1 += (dr * factor) >> GPU_LINE_FIXED_BITS; |
393 | g1 += (dg * factor) >> GPU_LINE_FIXED_BITS; |
394 | b1 += (db * factor) >> GPU_LINE_FIXED_BITS; |
395 | #else |
396 | x1 += (ymax - y1) * dx / dy; |
397 | r1 += (ymax - y1) * dr / dy; |
398 | g1 += (ymax - y1) * dg / dy; |
399 | b1 += (ymax - y1) * db / dy; |
400 | #endif |
401 | y1 = ymax; |
402 | } |
403 | |
404 | // Recompute in case clipping occurred: |
405 | dx = x1 - x0; |
406 | dy = y1 - y0; |
407 | dr = r1 - r0; |
408 | dg = g1 - g0; |
409 | db = b1 - b0; |
410 | } |
411 | |
412 | // Check X clipping range, set 'sx' x-direction variable |
413 | if (dx == 0) { |
414 | // Is vertical line totally outside X clipping range? |
415 | if (x0 < xmin || x0 > xmax) |
416 | return; |
417 | sx = 0; |
418 | } else { |
419 | if (dx > 0) { |
420 | // x0 is leftmost coordinate |
421 | if (x0 > xmax) return; // Both points outside X clip range |
422 | |
423 | if (x0 < xmin) { |
424 | if (x1 < xmin) return; // Both points outside X clip range |
425 | |
426 | #ifdef USE_LINES_ALL_FIXED_PT_MATH |
427 | s32 factor = GPU_FAST_DIV(((xmin - x0) << GPU_LINE_FIXED_BITS), dx); |
428 | y0 += (dy * factor) >> GPU_LINE_FIXED_BITS; |
429 | r0 += (dr * factor) >> GPU_LINE_FIXED_BITS; |
430 | g0 += (dg * factor) >> GPU_LINE_FIXED_BITS; |
431 | b0 += (db * factor) >> GPU_LINE_FIXED_BITS; |
432 | #else |
433 | y0 += (xmin - x0) * dy / dx; |
434 | r0 += (xmin - x0) * dr / dx; |
435 | g0 += (xmin - x0) * dg / dx; |
436 | b0 += (xmin - x0) * db / dx; |
437 | #endif |
438 | x0 = xmin; |
439 | } |
440 | |
441 | if (x1 > xmax) { |
442 | #ifdef USE_LINES_ALL_FIXED_PT_MATH |
443 | s32 factor = GPU_FAST_DIV(((xmax - x1) << GPU_LINE_FIXED_BITS), dx); |
444 | y1 += (dy * factor) >> GPU_LINE_FIXED_BITS; |
445 | r1 += (dr * factor) >> GPU_LINE_FIXED_BITS; |
446 | g1 += (dg * factor) >> GPU_LINE_FIXED_BITS; |
447 | b1 += (db * factor) >> GPU_LINE_FIXED_BITS; |
448 | #else |
449 | y1 += (xmax - x1) * dy / dx; |
450 | r1 += (xmax - x1) * dr / dx; |
451 | g1 += (xmax - x1) * dg / dx; |
452 | b1 += (xmax - x1) * db / dx; |
453 | #endif |
454 | x1 = xmax; |
455 | } |
456 | |
457 | sx = +1; |
458 | dx = x1 - x0; // Get final value, which should also be absolute value |
459 | } else { |
460 | // x1 is leftmost coordinate |
461 | if (x1 > xmax) return; // Both points outside X clip range |
462 | |
463 | if (x1 < xmin) { |
464 | if (x0 < xmin) return; // Both points outside X clip range |
465 | |
466 | #ifdef USE_LINES_ALL_FIXED_PT_MATH |
467 | s32 factor = GPU_FAST_DIV(((xmin - x1) << GPU_LINE_FIXED_BITS), dx); |
468 | y1 += (dy * factor) >> GPU_LINE_FIXED_BITS; |
469 | r1 += (dr * factor) >> GPU_LINE_FIXED_BITS; |
470 | g1 += (dg * factor) >> GPU_LINE_FIXED_BITS; |
471 | b1 += (db * factor) >> GPU_LINE_FIXED_BITS; |
472 | #else |
473 | y1 += (xmin - x1) * dy / dx; |
474 | r1 += (xmin - x1) * dr / dx; |
475 | g1 += (xmin - x1) * dg / dx; |
476 | b1 += (xmin - x1) * db / dx; |
477 | #endif |
478 | x1 = xmin; |
479 | } |
480 | |
481 | if (x0 > xmax) { |
482 | #ifdef USE_LINES_ALL_FIXED_PT_MATH |
483 | s32 factor = GPU_FAST_DIV(((xmax - x0) << GPU_LINE_FIXED_BITS), dx); |
484 | y0 += (dy * factor) >> GPU_LINE_FIXED_BITS; |
485 | r0 += (dr * factor) >> GPU_LINE_FIXED_BITS; |
486 | g0 += (dg * factor) >> GPU_LINE_FIXED_BITS; |
487 | b0 += (db * factor) >> GPU_LINE_FIXED_BITS; |
488 | #else |
489 | y0 += (xmax - x0) * dy / dx; |
490 | r0 += (xmax - x0) * dr / dx; |
491 | g0 += (xmax - x0) * dg / dx; |
492 | b0 += (xmax - x0) * db / dx; |
493 | #endif |
494 | x0 = xmax; |
495 | } |
496 | |
497 | sx = -1; |
498 | dx = x0 - x1; // Get final value, which should also be absolute value |
499 | } |
500 | |
501 | // Recompute in case clipping occurred: |
502 | dy = y1 - y0; |
503 | dr = r1 - r0; |
504 | dg = g1 - g0; |
505 | db = b1 - b0; |
506 | } |
507 | |
508 | // IMPORTANT: dx,dy should now contain their absolute values |
509 | |
510 | int min_length, // Minimum length of a pixel run |
511 | start_length, // Length of first run |
512 | end_length, // Length of last run |
513 | err_term, // Cumulative error to determine when to draw longer run |
514 | err_adjup, // Increment to err_term for each run drawn |
515 | err_adjdown; // Subract this from err_term after drawing longer run |
516 | |
517 | GouraudColor gcol; |
518 | gcol.r = r0 << GPU_GOURAUD_FIXED_BITS; |
519 | gcol.g = g0 << GPU_GOURAUD_FIXED_BITS; |
520 | gcol.b = b0 << GPU_GOURAUD_FIXED_BITS; |
521 | |
522 | // We use u8 pointers even though PS1 has u16 framebuffer. |
523 | // This allows pixel-drawing functions to increment dst pointer |
524 | // directly by the passed 'incr' value, not having to shift it first. |
525 | u8 *dst = (u8*)gpu_senquack.vram + y0 * dst_stride + x0 * dst_depth; |
526 | |
527 | // SPECIAL CASE: Vertical line |
528 | if (dx == 0) { |
529 | #ifdef USE_LINES_ALL_FIXED_PT_MATH |
530 | // Get dy fixed-point inverse |
531 | s32 inv_factor = 1 << GPU_GOURAUD_FIXED_BITS; |
532 | if (dy > 1) inv_factor = GPU_FAST_DIV(inv_factor, dy); |
533 | |
534 | // Simultaneously divide and convert integer to Gouraud fixed point: |
535 | gcol.r_incr = dr * inv_factor; |
536 | gcol.g_incr = dg * inv_factor; |
537 | gcol.b_incr = db * inv_factor; |
538 | #else |
539 | // First, convert to Gouraud fixed point |
540 | gcol.r_incr = dr << GPU_GOURAUD_FIXED_BITS; |
541 | gcol.g_incr = dg << GPU_GOURAUD_FIXED_BITS; |
542 | gcol.b_incr = db << GPU_GOURAUD_FIXED_BITS; |
543 | |
544 | if (dy > 1) { |
545 | if (dr) gcol.r_incr /= dy; |
546 | if (dg) gcol.g_incr /= dy; |
547 | if (db) gcol.b_incr /= dy; |
548 | } |
549 | #endif |
550 | |
551 | gpuPixelSpanDriver(dst, (uintptr_t)&gcol, dst_stride, dy+1); |
552 | return; |
553 | } |
554 | |
555 | // SPECIAL CASE: Horizontal line |
556 | if (dy == 0) { |
557 | #ifdef USE_LINES_ALL_FIXED_PT_MATH |
558 | // Get dx fixed-point inverse |
559 | s32 inv_factor = (1 << GPU_GOURAUD_FIXED_BITS); |
560 | if (dx > 1) inv_factor = GPU_FAST_DIV(inv_factor, dx); |
561 | |
562 | // Simultaneously divide and convert integer to Gouraud fixed point: |
563 | gcol.r_incr = dr * inv_factor; |
564 | gcol.g_incr = dg * inv_factor; |
565 | gcol.b_incr = db * inv_factor; |
566 | #else |
567 | gcol.r_incr = dr << GPU_GOURAUD_FIXED_BITS; |
568 | gcol.g_incr = dg << GPU_GOURAUD_FIXED_BITS; |
569 | gcol.b_incr = db << GPU_GOURAUD_FIXED_BITS; |
570 | |
571 | if (dx > 1) { |
572 | if (dr) gcol.r_incr /= dx; |
573 | if (dg) gcol.g_incr /= dx; |
574 | if (db) gcol.b_incr /= dx; |
575 | } |
576 | #endif |
577 | |
578 | gpuPixelSpanDriver(dst, (uintptr_t)&gcol, sx * dst_depth, dx+1); |
579 | return; |
580 | } |
581 | |
582 | // SPECIAL CASE: Diagonal line |
583 | if (dx == dy) { |
584 | #ifdef USE_LINES_ALL_FIXED_PT_MATH |
585 | // Get dx fixed-point inverse |
586 | s32 inv_factor = (1 << GPU_GOURAUD_FIXED_BITS); |
587 | if (dx > 1) inv_factor = GPU_FAST_DIV(inv_factor, dx); |
588 | |
589 | // Simultaneously divide and convert integer to Gouraud fixed point: |
590 | gcol.r_incr = dr * inv_factor; |
591 | gcol.g_incr = dg * inv_factor; |
592 | gcol.b_incr = db * inv_factor; |
593 | #else |
594 | // First, convert to Gouraud fixed point |
595 | gcol.r_incr = dr << GPU_GOURAUD_FIXED_BITS; |
596 | gcol.g_incr = dg << GPU_GOURAUD_FIXED_BITS; |
597 | gcol.b_incr = db << GPU_GOURAUD_FIXED_BITS; |
598 | |
599 | if (dx > 1) { |
600 | if (dr) gcol.r_incr /= dx; |
601 | if (dg) gcol.g_incr /= dx; |
602 | if (db) gcol.b_incr /= dx; |
603 | } |
604 | #endif |
605 | |
606 | gpuPixelSpanDriver(dst, (uintptr_t)&gcol, dst_stride + (sx * dst_depth), dy+1); |
607 | return; |
608 | } |
609 | |
610 | int major, minor; // Absolute val of major,minor axis delta |
611 | ptrdiff_t incr_major, incr_minor; // Ptr increment for each step along axis |
612 | |
613 | if (dx > dy) { |
614 | major = dx; |
615 | minor = dy; |
616 | } else { |
617 | major = dy; |
618 | minor = dx; |
619 | } |
620 | |
621 | // Determine if diagonal or horizontal runs |
622 | if (major < (2 * minor)) { |
623 | // Diagonal runs, so perform half-octant transformation |
624 | minor = major - minor; |
625 | |
626 | // Advance diagonally when drawing runs |
627 | incr_major = dst_stride + (sx * dst_depth); |
628 | |
629 | // After drawing each run, correct for over-advance along minor axis |
630 | if (dx > dy) |
631 | incr_minor = -dst_stride; |
632 | else |
633 | incr_minor = -sx * dst_depth; |
634 | } else { |
635 | // Horizontal or vertical runs |
636 | if (dx > dy) { |
637 | incr_major = sx * dst_depth; |
638 | incr_minor = dst_stride; |
639 | } else { |
640 | incr_major = dst_stride; |
641 | incr_minor = sx * dst_depth; |
642 | } |
643 | } |
644 | |
645 | #ifdef USE_LINES_ALL_FIXED_PT_MATH |
646 | s32 major_inv = GPU_FAST_DIV((1 << GPU_GOURAUD_FIXED_BITS), major); |
647 | |
648 | // Simultaneously divide and convert from integer to Gouraud fixed point: |
649 | gcol.r_incr = dr * major_inv; |
650 | gcol.g_incr = dg * major_inv; |
651 | gcol.b_incr = db * major_inv; |
652 | #else |
653 | gcol.r_incr = dr ? ((dr << GPU_GOURAUD_FIXED_BITS) / major) : 0; |
654 | gcol.g_incr = dg ? ((dg << GPU_GOURAUD_FIXED_BITS) / major) : 0; |
655 | gcol.b_incr = db ? ((db << GPU_GOURAUD_FIXED_BITS) / major) : 0; |
656 | #endif |
657 | |
658 | if (minor > 1) { |
659 | // Minimum number of pixels each run |
660 | min_length = major / minor; |
661 | |
662 | // Initial error term; reflects an initial step of 0.5 along minor axis |
663 | err_term = (major % minor) - (minor * 2); |
664 | |
665 | // Increment err_term this much each step along minor axis; when |
666 | // err_term crosses zero, draw longer pixel run. |
667 | err_adjup = (major % minor) * 2; |
668 | } else { |
669 | min_length = major; |
670 | err_term = 0; |
671 | err_adjup = 0; |
672 | } |
673 | |
674 | // Error term adjustment when err_term turns over; used to factor |
675 | // out the major-axis step made at that time |
676 | err_adjdown = minor * 2; |
677 | |
678 | // The initial and last runs are partial, because minor axis advances |
679 | // only 0.5 for these runs, rather than 1. Each is half a full run, |
680 | // plus the initial pixel. |
681 | start_length = end_length = (min_length / 2) + 1; |
682 | |
683 | if (min_length & 1) { |
684 | // If there're an odd number of pixels per run, we have 1 pixel that |
685 | // can't be allocated to either the initial or last partial run, so |
686 | // we'll add 0.5 to err_term so that this pixel will be handled |
687 | // by the normal full-run loop |
688 | err_term += minor; |
689 | } else { |
690 | // If the minimum run length is even and there's no fractional advance, |
691 | // we have one pixel that could go to either the initial or last |
692 | // partial run, which we'll arbitrarily allocate to the last run |
693 | if (err_adjup == 0) |
694 | start_length--; // Leave out the extra pixel at the start |
695 | } |
696 | |
697 | // First run of pixels |
698 | dst = gpuPixelSpanDriver(dst, (uintptr_t)&gcol, incr_major, start_length); |
699 | dst += incr_minor; |
700 | |
701 | // Middle runs of pixels |
702 | while (--minor > 0) { |
703 | int run_length = min_length; |
704 | err_term += err_adjup; |
705 | |
706 | // If err_term passed 0, reset it and draw longer run |
707 | if (err_term > 0) { |
708 | err_term -= err_adjdown; |
709 | run_length++; |
710 | } |
711 | |
712 | dst = gpuPixelSpanDriver(dst, (uintptr_t)&gcol, incr_major, run_length); |
713 | dst += incr_minor; |
714 | } |
715 | |
716 | // Final run of pixels |
717 | gpuPixelSpanDriver(dst, (uintptr_t)&gcol, incr_major, end_length); |
718 | } |
719 | |
720 | #endif /* __GPU_UNAI_GPU_RASTER_LINE_H__ */ |