| 1 | /*************************************************************************** |
| 2 | * Copyright (C) 2010 PCSX4ALL Team * |
| 3 | * Copyright (C) 2010 Unai * |
| 4 | * Copyright (C) 2016 Senquack (dansilsby <AT> gmail <DOT> com) * |
| 5 | * * |
| 6 | * This program is free software; you can redistribute it and/or modify * |
| 7 | * it under the terms of the GNU General Public License as published by * |
| 8 | * the Free Software Foundation; either version 2 of the License, or * |
| 9 | * (at your option) any later version. * |
| 10 | * * |
| 11 | * This program is distributed in the hope that it will be useful, * |
| 12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of * |
| 13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * |
| 14 | * GNU General Public License for more details. * |
| 15 | * * |
| 16 | * You should have received a copy of the GNU General Public License * |
| 17 | * along with this program; if not, write to the * |
| 18 | * Free Software Foundation, Inc., * |
| 19 | * 51 Franklin Street, Fifth Floor, Boston, MA 02111-1307 USA. * |
| 20 | ***************************************************************************/ |
| 21 | |
| 22 | #ifndef __GPU_UNAI_GPU_RASTER_LINE_H__ |
| 23 | #define __GPU_UNAI_GPU_RASTER_LINE_H__ |
| 24 | |
| 25 | /////////////////////////////////////////////////////////////////////////////// |
| 26 | // GPU internal line drawing functions |
| 27 | // |
| 28 | // Rewritten October 2016 by senquack: |
| 29 | // Instead of one pixel at a time, lines are now drawn in runs of pixels, |
| 30 | // whether vertical, horizontal, or diagonal. A new inner driver |
| 31 | // 'gpuPixelSpanFn' is used, as well as an enhanced Bresenham run-slice |
| 32 | // algorithm. For more information, see the following: |
| 33 | // |
| 34 | // Michael Abrash - Graphics Programming Black Book |
| 35 | // Chapters 35 - 36 (does not implement diagonal runs) |
| 36 | // http://www.drdobbs.com/parallel/graphics-programming-black-book/184404919 |
| 37 | // http://www.jagregory.com/abrash-black-book/ |
| 38 | // |
| 39 | // Article by Andrew Delong (does not implement diagonal runs) |
| 40 | // http://timetraces.ca/nw/drawline.htm |
| 41 | // |
| 42 | // 'Run-Based Multi-Point Line Drawing' by Eun Jae Lee & Larry F. Hodges |
| 43 | // https://smartech.gatech.edu/bitstream/handle/1853/3632/93-22.pdf |
| 44 | // Provided the idea of doing a half-octant transform allowing lines with |
| 45 | // slopes between 0.5 and 2.0 (diagonal runs of pixels) to be handled |
| 46 | // identically to the traditional horizontal/vertical run-slice method. |
| 47 | |
| 48 | // Use 16.16 fixed point precision for line math. |
| 49 | // NOTE: Gouraud colors used by gpuPixelSpanFn can use a different precision. |
| 50 | #define GPU_LINE_FIXED_BITS 16 |
| 51 | |
| 52 | // If defined, Gouraud lines will use fixed-point multiply-by-inverse to |
| 53 | // do most divisions. With enough accuracy, this should be OK. |
| 54 | #define USE_LINES_ALL_FIXED_PT_MATH |
| 55 | |
| 56 | ////////////////////// |
| 57 | // Flat-shaded line // |
| 58 | ////////////////////// |
| 59 | void gpuDrawLineF(PtrUnion packet, const PSD gpuPixelSpanDriver) |
| 60 | { |
| 61 | int x0, y0, x1, y1; |
| 62 | int dx, dy; |
| 63 | |
| 64 | // All three of these variables should be signed (so multiplication works) |
| 65 | ptrdiff_t sx; // Sign of x delta, positive when x0 < x1 |
| 66 | const ptrdiff_t dst_depth = FRAME_BYTES_PER_PIXEL; // PSX: 2 bytes per pixel |
| 67 | const ptrdiff_t dst_stride = FRAME_BYTE_STRIDE; // PSX: 2048 bytes per framebuffer line |
| 68 | |
| 69 | // Clip region: xmax/ymax seem to normally be one *past* the rightmost/ |
| 70 | // bottommost pixels of the draw area. Since we render every pixel between |
| 71 | // and including both line endpoints, subtract one from xmax/ymax. |
| 72 | const int xmin = gpu_senquack.DrawingArea[0]; |
| 73 | const int ymin = gpu_senquack.DrawingArea[1]; |
| 74 | const int xmax = gpu_senquack.DrawingArea[2] - 1; |
| 75 | const int ymax = gpu_senquack.DrawingArea[3] - 1; |
| 76 | |
| 77 | x0 = GPU_EXPANDSIGN(packet.S2[2]) + gpu_senquack.DrawingOffset[0]; |
| 78 | y0 = GPU_EXPANDSIGN(packet.S2[3]) + gpu_senquack.DrawingOffset[1]; |
| 79 | x1 = GPU_EXPANDSIGN(packet.S2[4]) + gpu_senquack.DrawingOffset[0]; |
| 80 | y1 = GPU_EXPANDSIGN(packet.S2[5]) + gpu_senquack.DrawingOffset[1]; |
| 81 | |
| 82 | // Always draw top to bottom, so ensure y0 <= y1 |
| 83 | if (y0 > y1) { |
| 84 | SwapValues(y0, y1); |
| 85 | SwapValues(x0, x1); |
| 86 | } |
| 87 | |
| 88 | // Is line totally outside Y clipping range? |
| 89 | if (y0 > ymax || y1 < ymin) return; |
| 90 | |
| 91 | dx = x1 - x0; |
| 92 | dy = y1 - y0; |
| 93 | |
| 94 | // X-axis range check : max distance between any two X coords is 1023 |
| 95 | // (PSX hardware will not render anything violating this rule) |
| 96 | // NOTE: We'll check y coord range further below |
| 97 | if (dx >= CHKMAX_X || dx <= -CHKMAX_X) |
| 98 | return; |
| 99 | |
| 100 | // Y-axis range check and clipping |
| 101 | if (dy) { |
| 102 | // Y-axis range check : max distance between any two Y coords is 511 |
| 103 | // (PSX hardware will not render anything violating this rule) |
| 104 | if (dy >= CHKMAX_Y) |
| 105 | return; |
| 106 | |
| 107 | // We already know y0 < y1 |
| 108 | if (y0 < ymin) { |
| 109 | x0 += GPU_FAST_DIV(((ymin - y0) * dx), dy); |
| 110 | y0 = ymin; |
| 111 | } |
| 112 | if (y1 > ymax) { |
| 113 | x1 += GPU_FAST_DIV(((ymax - y1) * dx), dy); |
| 114 | y1 = ymax; |
| 115 | } |
| 116 | |
| 117 | // Recompute in case clipping occurred: |
| 118 | dx = x1 - x0; |
| 119 | dy = y1 - y0; |
| 120 | } |
| 121 | |
| 122 | // Check X clipping range, set 'sx' x-direction variable |
| 123 | if (dx == 0) { |
| 124 | // Is vertical line totally outside X clipping range? |
| 125 | if (x0 < xmin || x0 > xmax) |
| 126 | return; |
| 127 | sx = 0; |
| 128 | } else { |
| 129 | if (dx > 0) { |
| 130 | // x0 is leftmost coordinate |
| 131 | if (x0 > xmax) return; // Both points outside X clip range |
| 132 | |
| 133 | if (x0 < xmin) { |
| 134 | if (x1 < xmin) return; // Both points outside X clip range |
| 135 | y0 += GPU_FAST_DIV(((xmin - x0) * dy), dx); |
| 136 | x0 = xmin; |
| 137 | } |
| 138 | |
| 139 | if (x1 > xmax) { |
| 140 | y1 += GPU_FAST_DIV(((xmax - x1) * dy), dx); |
| 141 | x1 = xmax; |
| 142 | } |
| 143 | |
| 144 | sx = +1; |
| 145 | dx = x1 - x0; // Get final value, which should also be absolute value |
| 146 | } else { |
| 147 | // x1 is leftmost coordinate |
| 148 | if (x1 > xmax) return; // Both points outside X clip range |
| 149 | |
| 150 | if (x1 < xmin) { |
| 151 | if (x0 < xmin) return; // Both points outside X clip range |
| 152 | |
| 153 | y1 += GPU_FAST_DIV(((xmin - x1) * dy), dx); |
| 154 | x1 = xmin; |
| 155 | } |
| 156 | |
| 157 | if (x0 > xmax) { |
| 158 | y0 += GPU_FAST_DIV(((xmax - x0) * dy), dx); |
| 159 | x0 = xmax; |
| 160 | } |
| 161 | |
| 162 | sx = -1; |
| 163 | dx = x0 - x1; // Get final value, which should also be absolute value |
| 164 | } |
| 165 | |
| 166 | // Recompute in case clipping occurred: |
| 167 | dy = y1 - y0; |
| 168 | } |
| 169 | |
| 170 | // IMPORTANT: dx,dy should now contain their absolute values |
| 171 | |
| 172 | int min_length, // Minimum length of a pixel run |
| 173 | start_length, // Length of first run |
| 174 | end_length, // Length of last run |
| 175 | err_term, // Cumulative error to determine when to draw longer run |
| 176 | err_adjup, // Increment to err_term for each run drawn |
| 177 | err_adjdown; // Subract this from err_term after drawing longer run |
| 178 | |
| 179 | // Color to draw with (16 bits, highest of which is unset mask bit) |
| 180 | uintptr_t col16 = GPU_RGB16(packet.U4[0]); |
| 181 | |
| 182 | // We use u8 pointers even though PS1 has u16 framebuffer. |
| 183 | // This allows pixel-drawing functions to increment dst pointer |
| 184 | // directly by the passed 'incr' value, not having to shift it first. |
| 185 | u8 *dst = (u8*)gpu_senquack.vram + y0 * dst_stride + x0 * dst_depth; |
| 186 | |
| 187 | // SPECIAL CASE: Vertical line |
| 188 | if (dx == 0) { |
| 189 | gpuPixelSpanDriver(dst, col16, dst_stride, dy+1); |
| 190 | return; |
| 191 | } |
| 192 | |
| 193 | // SPECIAL CASE: Horizontal line |
| 194 | if (dy == 0) { |
| 195 | gpuPixelSpanDriver(dst, col16, sx * dst_depth, dx+1); |
| 196 | return; |
| 197 | } |
| 198 | |
| 199 | // SPECIAL CASE: Diagonal line |
| 200 | if (dx == dy) { |
| 201 | gpuPixelSpanDriver(dst, col16, dst_stride + (sx * dst_depth), dy+1); |
| 202 | return; |
| 203 | } |
| 204 | |
| 205 | int major, minor; // Major axis, minor axis |
| 206 | ptrdiff_t incr_major, incr_minor; // Ptr increment for each step along axis |
| 207 | |
| 208 | if (dx > dy) { |
| 209 | major = dx; |
| 210 | minor = dy; |
| 211 | } else { |
| 212 | major = dy; |
| 213 | minor = dx; |
| 214 | } |
| 215 | |
| 216 | // Determine if diagonal or horizontal runs |
| 217 | if (major < (2 * minor)) { |
| 218 | // Diagonal runs, so perform half-octant transformation |
| 219 | minor = major - minor; |
| 220 | |
| 221 | // Advance diagonally when drawing runs |
| 222 | incr_major = dst_stride + (sx * dst_depth); |
| 223 | |
| 224 | // After drawing each run, correct for over-advance along minor axis |
| 225 | if (dx > dy) |
| 226 | incr_minor = -dst_stride; |
| 227 | else |
| 228 | incr_minor = -sx * dst_depth; |
| 229 | } else { |
| 230 | // Horizontal or vertical runs |
| 231 | if (dx > dy) { |
| 232 | incr_major = sx * dst_depth; |
| 233 | incr_minor = dst_stride; |
| 234 | } else { |
| 235 | incr_major = dst_stride; |
| 236 | incr_minor = sx * dst_depth; |
| 237 | } |
| 238 | } |
| 239 | |
| 240 | if (minor > 1) { |
| 241 | // Minimum number of pixels each run |
| 242 | min_length = major / minor; |
| 243 | |
| 244 | // Initial error term; reflects an initial step of 0.5 along minor axis |
| 245 | err_term = (major % minor) - (minor * 2); |
| 246 | |
| 247 | // Increment err_term this much each step along minor axis; when |
| 248 | // err_term crosses zero, draw longer pixel run. |
| 249 | err_adjup = (major % minor) * 2; |
| 250 | } else { |
| 251 | min_length = major; |
| 252 | err_term = 0; |
| 253 | err_adjup = 0; |
| 254 | } |
| 255 | |
| 256 | // Error term adjustment when err_term turns over; used to factor |
| 257 | // out the major-axis step made at that time |
| 258 | err_adjdown = minor * 2; |
| 259 | |
| 260 | // The initial and last runs are partial, because minor axis advances |
| 261 | // only 0.5 for these runs, rather than 1. Each is half a full run, |
| 262 | // plus the initial pixel. |
| 263 | start_length = end_length = (min_length / 2) + 1; |
| 264 | |
| 265 | if (min_length & 1) { |
| 266 | // If there're an odd number of pixels per run, we have 1 pixel that |
| 267 | // can't be allocated to either the initial or last partial run, so |
| 268 | // we'll add 0.5 to err_term so that this pixel will be handled |
| 269 | // by the normal full-run loop |
| 270 | err_term += minor; |
| 271 | } else { |
| 272 | // If the minimum run length is even and there's no fractional advance, |
| 273 | // we have one pixel that could go to either the initial or last |
| 274 | // partial run, which we arbitrarily allocate to the last run |
| 275 | if (err_adjup == 0) |
| 276 | start_length--; // Leave out the extra pixel at the start |
| 277 | } |
| 278 | |
| 279 | // First run of pixels |
| 280 | dst = gpuPixelSpanDriver(dst, col16, incr_major, start_length); |
| 281 | dst += incr_minor; |
| 282 | |
| 283 | // Middle runs of pixels |
| 284 | while (--minor > 0) { |
| 285 | int run_length = min_length; |
| 286 | err_term += err_adjup; |
| 287 | |
| 288 | // If err_term passed 0, reset it and draw longer run |
| 289 | if (err_term > 0) { |
| 290 | err_term -= err_adjdown; |
| 291 | run_length++; |
| 292 | } |
| 293 | |
| 294 | dst = gpuPixelSpanDriver(dst, col16, incr_major, run_length); |
| 295 | dst += incr_minor; |
| 296 | } |
| 297 | |
| 298 | // Final run of pixels |
| 299 | gpuPixelSpanDriver(dst, col16, incr_major, end_length); |
| 300 | } |
| 301 | |
| 302 | ///////////////////////// |
| 303 | // Gouraud-shaded line // |
| 304 | ///////////////////////// |
| 305 | void gpuDrawLineG(PtrUnion packet, const PSD gpuPixelSpanDriver) |
| 306 | { |
| 307 | int x0, y0, x1, y1; |
| 308 | int dx, dy, dr, dg, db; |
| 309 | u32 r0, g0, b0, r1, g1, b1; |
| 310 | |
| 311 | // All three of these variables should be signed (so multiplication works) |
| 312 | ptrdiff_t sx; // Sign of x delta, positive when x0 < x1 |
| 313 | const ptrdiff_t dst_depth = FRAME_BYTES_PER_PIXEL; // PSX: 2 bytes per pixel |
| 314 | const ptrdiff_t dst_stride = FRAME_BYTE_STRIDE; // PSX: 2048 bytes per framebuffer line |
| 315 | |
| 316 | // Clip region: xmax/ymax seem to normally be one *past* the rightmost/ |
| 317 | // bottommost pixels of the draw area. We'll render every pixel between |
| 318 | // and including both line endpoints, so subtract one from xmax/ymax. |
| 319 | const int xmin = gpu_senquack.DrawingArea[0]; |
| 320 | const int ymin = gpu_senquack.DrawingArea[1]; |
| 321 | const int xmax = gpu_senquack.DrawingArea[2] - 1; |
| 322 | const int ymax = gpu_senquack.DrawingArea[3] - 1; |
| 323 | |
| 324 | x0 = GPU_EXPANDSIGN(packet.S2[2]) + gpu_senquack.DrawingOffset[0]; |
| 325 | y0 = GPU_EXPANDSIGN(packet.S2[3]) + gpu_senquack.DrawingOffset[1]; |
| 326 | x1 = GPU_EXPANDSIGN(packet.S2[6]) + gpu_senquack.DrawingOffset[0]; |
| 327 | y1 = GPU_EXPANDSIGN(packet.S2[7]) + gpu_senquack.DrawingOffset[1]; |
| 328 | |
| 329 | u32 col0 = packet.U4[0]; |
| 330 | u32 col1 = packet.U4[2]; |
| 331 | |
| 332 | // Always draw top to bottom, so ensure y0 <= y1 |
| 333 | if (y0 > y1) { |
| 334 | SwapValues(y0, y1); |
| 335 | SwapValues(x0, x1); |
| 336 | SwapValues(col0, col1); |
| 337 | } |
| 338 | |
| 339 | // Is line totally outside Y clipping range? |
| 340 | if (y0 > ymax || y1 < ymin) return; |
| 341 | |
| 342 | // If defined, Gouraud colors are fixed-point 5.11, otherwise they are 8.16 |
| 343 | // (This is only beneficial if using SIMD-optimized pixel driver) |
| 344 | #ifdef GPU_GOURAUD_LOW_PRECISION |
| 345 | r0 = (col0 >> 3) & 0x1f; g0 = (col0 >> 11) & 0x1f; b0 = (col0 >> 19) & 0x1f; |
| 346 | r1 = (col1 >> 3) & 0x1f; g1 = (col1 >> 11) & 0x1f; b1 = (col1 >> 19) & 0x1f; |
| 347 | #else |
| 348 | r0 = col0 & 0xff; g0 = (col0 >> 8) & 0xff; b0 = (col0 >> 16) & 0xff; |
| 349 | r1 = col1 & 0xff; g1 = (col1 >> 8) & 0xff; b1 = (col1 >> 16) & 0xff; |
| 350 | #endif |
| 351 | |
| 352 | dx = x1 - x0; |
| 353 | dy = y1 - y0; |
| 354 | dr = r1 - r0; |
| 355 | dg = g1 - g0; |
| 356 | db = b1 - b0; |
| 357 | |
| 358 | // X-axis range check : max distance between any two X coords is 1023 |
| 359 | // (PSX hardware will not render anything violating this rule) |
| 360 | // NOTE: We'll check y coord range further below |
| 361 | if (dx >= CHKMAX_X || dx <= -CHKMAX_X) |
| 362 | return; |
| 363 | |
| 364 | // Y-axis range check and clipping |
| 365 | if (dy) { |
| 366 | // Y-axis range check : max distance between any two Y coords is 511 |
| 367 | // (PSX hardware will not render anything violating this rule) |
| 368 | if (dy >= CHKMAX_Y) |
| 369 | return; |
| 370 | |
| 371 | // We already know y0 < y1 |
| 372 | if (y0 < ymin) { |
| 373 | #ifdef USE_LINES_ALL_FIXED_PT_MATH |
| 374 | s32 factor = GPU_FAST_DIV(((ymin - y0) << GPU_LINE_FIXED_BITS), dy); |
| 375 | x0 += (dx * factor) >> GPU_LINE_FIXED_BITS; |
| 376 | r0 += (dr * factor) >> GPU_LINE_FIXED_BITS; |
| 377 | g0 += (dg * factor) >> GPU_LINE_FIXED_BITS; |
| 378 | b0 += (db * factor) >> GPU_LINE_FIXED_BITS; |
| 379 | #else |
| 380 | x0 += (ymin - y0) * dx / dy; |
| 381 | r0 += (ymin - y0) * dr / dy; |
| 382 | g0 += (ymin - y0) * dg / dy; |
| 383 | b0 += (ymin - y0) * db / dy; |
| 384 | #endif |
| 385 | y0 = ymin; |
| 386 | } |
| 387 | |
| 388 | if (y1 > ymax) { |
| 389 | #ifdef USE_LINES_ALL_FIXED_PT_MATH |
| 390 | s32 factor = GPU_FAST_DIV(((ymax - y1) << GPU_LINE_FIXED_BITS), dy); |
| 391 | x1 += (dx * factor) >> GPU_LINE_FIXED_BITS; |
| 392 | r1 += (dr * factor) >> GPU_LINE_FIXED_BITS; |
| 393 | g1 += (dg * factor) >> GPU_LINE_FIXED_BITS; |
| 394 | b1 += (db * factor) >> GPU_LINE_FIXED_BITS; |
| 395 | #else |
| 396 | x1 += (ymax - y1) * dx / dy; |
| 397 | r1 += (ymax - y1) * dr / dy; |
| 398 | g1 += (ymax - y1) * dg / dy; |
| 399 | b1 += (ymax - y1) * db / dy; |
| 400 | #endif |
| 401 | y1 = ymax; |
| 402 | } |
| 403 | |
| 404 | // Recompute in case clipping occurred: |
| 405 | dx = x1 - x0; |
| 406 | dy = y1 - y0; |
| 407 | dr = r1 - r0; |
| 408 | dg = g1 - g0; |
| 409 | db = b1 - b0; |
| 410 | } |
| 411 | |
| 412 | // Check X clipping range, set 'sx' x-direction variable |
| 413 | if (dx == 0) { |
| 414 | // Is vertical line totally outside X clipping range? |
| 415 | if (x0 < xmin || x0 > xmax) |
| 416 | return; |
| 417 | sx = 0; |
| 418 | } else { |
| 419 | if (dx > 0) { |
| 420 | // x0 is leftmost coordinate |
| 421 | if (x0 > xmax) return; // Both points outside X clip range |
| 422 | |
| 423 | if (x0 < xmin) { |
| 424 | if (x1 < xmin) return; // Both points outside X clip range |
| 425 | |
| 426 | #ifdef USE_LINES_ALL_FIXED_PT_MATH |
| 427 | s32 factor = GPU_FAST_DIV(((xmin - x0) << GPU_LINE_FIXED_BITS), dx); |
| 428 | y0 += (dy * factor) >> GPU_LINE_FIXED_BITS; |
| 429 | r0 += (dr * factor) >> GPU_LINE_FIXED_BITS; |
| 430 | g0 += (dg * factor) >> GPU_LINE_FIXED_BITS; |
| 431 | b0 += (db * factor) >> GPU_LINE_FIXED_BITS; |
| 432 | #else |
| 433 | y0 += (xmin - x0) * dy / dx; |
| 434 | r0 += (xmin - x0) * dr / dx; |
| 435 | g0 += (xmin - x0) * dg / dx; |
| 436 | b0 += (xmin - x0) * db / dx; |
| 437 | #endif |
| 438 | x0 = xmin; |
| 439 | } |
| 440 | |
| 441 | if (x1 > xmax) { |
| 442 | #ifdef USE_LINES_ALL_FIXED_PT_MATH |
| 443 | s32 factor = GPU_FAST_DIV(((xmax - x1) << GPU_LINE_FIXED_BITS), dx); |
| 444 | y1 += (dy * factor) >> GPU_LINE_FIXED_BITS; |
| 445 | r1 += (dr * factor) >> GPU_LINE_FIXED_BITS; |
| 446 | g1 += (dg * factor) >> GPU_LINE_FIXED_BITS; |
| 447 | b1 += (db * factor) >> GPU_LINE_FIXED_BITS; |
| 448 | #else |
| 449 | y1 += (xmax - x1) * dy / dx; |
| 450 | r1 += (xmax - x1) * dr / dx; |
| 451 | g1 += (xmax - x1) * dg / dx; |
| 452 | b1 += (xmax - x1) * db / dx; |
| 453 | #endif |
| 454 | x1 = xmax; |
| 455 | } |
| 456 | |
| 457 | sx = +1; |
| 458 | dx = x1 - x0; // Get final value, which should also be absolute value |
| 459 | } else { |
| 460 | // x1 is leftmost coordinate |
| 461 | if (x1 > xmax) return; // Both points outside X clip range |
| 462 | |
| 463 | if (x1 < xmin) { |
| 464 | if (x0 < xmin) return; // Both points outside X clip range |
| 465 | |
| 466 | #ifdef USE_LINES_ALL_FIXED_PT_MATH |
| 467 | s32 factor = GPU_FAST_DIV(((xmin - x1) << GPU_LINE_FIXED_BITS), dx); |
| 468 | y1 += (dy * factor) >> GPU_LINE_FIXED_BITS; |
| 469 | r1 += (dr * factor) >> GPU_LINE_FIXED_BITS; |
| 470 | g1 += (dg * factor) >> GPU_LINE_FIXED_BITS; |
| 471 | b1 += (db * factor) >> GPU_LINE_FIXED_BITS; |
| 472 | #else |
| 473 | y1 += (xmin - x1) * dy / dx; |
| 474 | r1 += (xmin - x1) * dr / dx; |
| 475 | g1 += (xmin - x1) * dg / dx; |
| 476 | b1 += (xmin - x1) * db / dx; |
| 477 | #endif |
| 478 | x1 = xmin; |
| 479 | } |
| 480 | |
| 481 | if (x0 > xmax) { |
| 482 | #ifdef USE_LINES_ALL_FIXED_PT_MATH |
| 483 | s32 factor = GPU_FAST_DIV(((xmax - x0) << GPU_LINE_FIXED_BITS), dx); |
| 484 | y0 += (dy * factor) >> GPU_LINE_FIXED_BITS; |
| 485 | r0 += (dr * factor) >> GPU_LINE_FIXED_BITS; |
| 486 | g0 += (dg * factor) >> GPU_LINE_FIXED_BITS; |
| 487 | b0 += (db * factor) >> GPU_LINE_FIXED_BITS; |
| 488 | #else |
| 489 | y0 += (xmax - x0) * dy / dx; |
| 490 | r0 += (xmax - x0) * dr / dx; |
| 491 | g0 += (xmax - x0) * dg / dx; |
| 492 | b0 += (xmax - x0) * db / dx; |
| 493 | #endif |
| 494 | x0 = xmax; |
| 495 | } |
| 496 | |
| 497 | sx = -1; |
| 498 | dx = x0 - x1; // Get final value, which should also be absolute value |
| 499 | } |
| 500 | |
| 501 | // Recompute in case clipping occurred: |
| 502 | dy = y1 - y0; |
| 503 | dr = r1 - r0; |
| 504 | dg = g1 - g0; |
| 505 | db = b1 - b0; |
| 506 | } |
| 507 | |
| 508 | // IMPORTANT: dx,dy should now contain their absolute values |
| 509 | |
| 510 | int min_length, // Minimum length of a pixel run |
| 511 | start_length, // Length of first run |
| 512 | end_length, // Length of last run |
| 513 | err_term, // Cumulative error to determine when to draw longer run |
| 514 | err_adjup, // Increment to err_term for each run drawn |
| 515 | err_adjdown; // Subract this from err_term after drawing longer run |
| 516 | |
| 517 | GouraudColor gcol; |
| 518 | gcol.r = r0 << GPU_GOURAUD_FIXED_BITS; |
| 519 | gcol.g = g0 << GPU_GOURAUD_FIXED_BITS; |
| 520 | gcol.b = b0 << GPU_GOURAUD_FIXED_BITS; |
| 521 | |
| 522 | // We use u8 pointers even though PS1 has u16 framebuffer. |
| 523 | // This allows pixel-drawing functions to increment dst pointer |
| 524 | // directly by the passed 'incr' value, not having to shift it first. |
| 525 | u8 *dst = (u8*)gpu_senquack.vram + y0 * dst_stride + x0 * dst_depth; |
| 526 | |
| 527 | // SPECIAL CASE: Vertical line |
| 528 | if (dx == 0) { |
| 529 | #ifdef USE_LINES_ALL_FIXED_PT_MATH |
| 530 | // Get dy fixed-point inverse |
| 531 | s32 inv_factor = 1 << GPU_GOURAUD_FIXED_BITS; |
| 532 | if (dy > 1) inv_factor = GPU_FAST_DIV(inv_factor, dy); |
| 533 | |
| 534 | // Simultaneously divide and convert integer to Gouraud fixed point: |
| 535 | gcol.r_incr = dr * inv_factor; |
| 536 | gcol.g_incr = dg * inv_factor; |
| 537 | gcol.b_incr = db * inv_factor; |
| 538 | #else |
| 539 | // First, convert to Gouraud fixed point |
| 540 | gcol.r_incr = dr << GPU_GOURAUD_FIXED_BITS; |
| 541 | gcol.g_incr = dg << GPU_GOURAUD_FIXED_BITS; |
| 542 | gcol.b_incr = db << GPU_GOURAUD_FIXED_BITS; |
| 543 | |
| 544 | if (dy > 1) { |
| 545 | if (dr) gcol.r_incr /= dy; |
| 546 | if (dg) gcol.g_incr /= dy; |
| 547 | if (db) gcol.b_incr /= dy; |
| 548 | } |
| 549 | #endif |
| 550 | |
| 551 | gpuPixelSpanDriver(dst, (uintptr_t)&gcol, dst_stride, dy+1); |
| 552 | return; |
| 553 | } |
| 554 | |
| 555 | // SPECIAL CASE: Horizontal line |
| 556 | if (dy == 0) { |
| 557 | #ifdef USE_LINES_ALL_FIXED_PT_MATH |
| 558 | // Get dx fixed-point inverse |
| 559 | s32 inv_factor = (1 << GPU_GOURAUD_FIXED_BITS); |
| 560 | if (dx > 1) inv_factor = GPU_FAST_DIV(inv_factor, dx); |
| 561 | |
| 562 | // Simultaneously divide and convert integer to Gouraud fixed point: |
| 563 | gcol.r_incr = dr * inv_factor; |
| 564 | gcol.g_incr = dg * inv_factor; |
| 565 | gcol.b_incr = db * inv_factor; |
| 566 | #else |
| 567 | gcol.r_incr = dr << GPU_GOURAUD_FIXED_BITS; |
| 568 | gcol.g_incr = dg << GPU_GOURAUD_FIXED_BITS; |
| 569 | gcol.b_incr = db << GPU_GOURAUD_FIXED_BITS; |
| 570 | |
| 571 | if (dx > 1) { |
| 572 | if (dr) gcol.r_incr /= dx; |
| 573 | if (dg) gcol.g_incr /= dx; |
| 574 | if (db) gcol.b_incr /= dx; |
| 575 | } |
| 576 | #endif |
| 577 | |
| 578 | gpuPixelSpanDriver(dst, (uintptr_t)&gcol, sx * dst_depth, dx+1); |
| 579 | return; |
| 580 | } |
| 581 | |
| 582 | // SPECIAL CASE: Diagonal line |
| 583 | if (dx == dy) { |
| 584 | #ifdef USE_LINES_ALL_FIXED_PT_MATH |
| 585 | // Get dx fixed-point inverse |
| 586 | s32 inv_factor = (1 << GPU_GOURAUD_FIXED_BITS); |
| 587 | if (dx > 1) inv_factor = GPU_FAST_DIV(inv_factor, dx); |
| 588 | |
| 589 | // Simultaneously divide and convert integer to Gouraud fixed point: |
| 590 | gcol.r_incr = dr * inv_factor; |
| 591 | gcol.g_incr = dg * inv_factor; |
| 592 | gcol.b_incr = db * inv_factor; |
| 593 | #else |
| 594 | // First, convert to Gouraud fixed point |
| 595 | gcol.r_incr = dr << GPU_GOURAUD_FIXED_BITS; |
| 596 | gcol.g_incr = dg << GPU_GOURAUD_FIXED_BITS; |
| 597 | gcol.b_incr = db << GPU_GOURAUD_FIXED_BITS; |
| 598 | |
| 599 | if (dx > 1) { |
| 600 | if (dr) gcol.r_incr /= dx; |
| 601 | if (dg) gcol.g_incr /= dx; |
| 602 | if (db) gcol.b_incr /= dx; |
| 603 | } |
| 604 | #endif |
| 605 | |
| 606 | gpuPixelSpanDriver(dst, (uintptr_t)&gcol, dst_stride + (sx * dst_depth), dy+1); |
| 607 | return; |
| 608 | } |
| 609 | |
| 610 | int major, minor; // Absolute val of major,minor axis delta |
| 611 | ptrdiff_t incr_major, incr_minor; // Ptr increment for each step along axis |
| 612 | |
| 613 | if (dx > dy) { |
| 614 | major = dx; |
| 615 | minor = dy; |
| 616 | } else { |
| 617 | major = dy; |
| 618 | minor = dx; |
| 619 | } |
| 620 | |
| 621 | // Determine if diagonal or horizontal runs |
| 622 | if (major < (2 * minor)) { |
| 623 | // Diagonal runs, so perform half-octant transformation |
| 624 | minor = major - minor; |
| 625 | |
| 626 | // Advance diagonally when drawing runs |
| 627 | incr_major = dst_stride + (sx * dst_depth); |
| 628 | |
| 629 | // After drawing each run, correct for over-advance along minor axis |
| 630 | if (dx > dy) |
| 631 | incr_minor = -dst_stride; |
| 632 | else |
| 633 | incr_minor = -sx * dst_depth; |
| 634 | } else { |
| 635 | // Horizontal or vertical runs |
| 636 | if (dx > dy) { |
| 637 | incr_major = sx * dst_depth; |
| 638 | incr_minor = dst_stride; |
| 639 | } else { |
| 640 | incr_major = dst_stride; |
| 641 | incr_minor = sx * dst_depth; |
| 642 | } |
| 643 | } |
| 644 | |
| 645 | #ifdef USE_LINES_ALL_FIXED_PT_MATH |
| 646 | s32 major_inv = GPU_FAST_DIV((1 << GPU_GOURAUD_FIXED_BITS), major); |
| 647 | |
| 648 | // Simultaneously divide and convert from integer to Gouraud fixed point: |
| 649 | gcol.r_incr = dr * major_inv; |
| 650 | gcol.g_incr = dg * major_inv; |
| 651 | gcol.b_incr = db * major_inv; |
| 652 | #else |
| 653 | gcol.r_incr = dr ? ((dr << GPU_GOURAUD_FIXED_BITS) / major) : 0; |
| 654 | gcol.g_incr = dg ? ((dg << GPU_GOURAUD_FIXED_BITS) / major) : 0; |
| 655 | gcol.b_incr = db ? ((db << GPU_GOURAUD_FIXED_BITS) / major) : 0; |
| 656 | #endif |
| 657 | |
| 658 | if (minor > 1) { |
| 659 | // Minimum number of pixels each run |
| 660 | min_length = major / minor; |
| 661 | |
| 662 | // Initial error term; reflects an initial step of 0.5 along minor axis |
| 663 | err_term = (major % minor) - (minor * 2); |
| 664 | |
| 665 | // Increment err_term this much each step along minor axis; when |
| 666 | // err_term crosses zero, draw longer pixel run. |
| 667 | err_adjup = (major % minor) * 2; |
| 668 | } else { |
| 669 | min_length = major; |
| 670 | err_term = 0; |
| 671 | err_adjup = 0; |
| 672 | } |
| 673 | |
| 674 | // Error term adjustment when err_term turns over; used to factor |
| 675 | // out the major-axis step made at that time |
| 676 | err_adjdown = minor * 2; |
| 677 | |
| 678 | // The initial and last runs are partial, because minor axis advances |
| 679 | // only 0.5 for these runs, rather than 1. Each is half a full run, |
| 680 | // plus the initial pixel. |
| 681 | start_length = end_length = (min_length / 2) + 1; |
| 682 | |
| 683 | if (min_length & 1) { |
| 684 | // If there're an odd number of pixels per run, we have 1 pixel that |
| 685 | // can't be allocated to either the initial or last partial run, so |
| 686 | // we'll add 0.5 to err_term so that this pixel will be handled |
| 687 | // by the normal full-run loop |
| 688 | err_term += minor; |
| 689 | } else { |
| 690 | // If the minimum run length is even and there's no fractional advance, |
| 691 | // we have one pixel that could go to either the initial or last |
| 692 | // partial run, which we'll arbitrarily allocate to the last run |
| 693 | if (err_adjup == 0) |
| 694 | start_length--; // Leave out the extra pixel at the start |
| 695 | } |
| 696 | |
| 697 | // First run of pixels |
| 698 | dst = gpuPixelSpanDriver(dst, (uintptr_t)&gcol, incr_major, start_length); |
| 699 | dst += incr_minor; |
| 700 | |
| 701 | // Middle runs of pixels |
| 702 | while (--minor > 0) { |
| 703 | int run_length = min_length; |
| 704 | err_term += err_adjup; |
| 705 | |
| 706 | // If err_term passed 0, reset it and draw longer run |
| 707 | if (err_term > 0) { |
| 708 | err_term -= err_adjdown; |
| 709 | run_length++; |
| 710 | } |
| 711 | |
| 712 | dst = gpuPixelSpanDriver(dst, (uintptr_t)&gcol, incr_major, run_length); |
| 713 | dst += incr_minor; |
| 714 | } |
| 715 | |
| 716 | // Final run of pixels |
| 717 | gpuPixelSpanDriver(dst, (uintptr_t)&gcol, incr_major, end_length); |
| 718 | } |
| 719 | |
| 720 | #endif /* __GPU_UNAI_GPU_RASTER_LINE_H__ */ |